Source code for brew.metrics.diversity.paired

import numpy as np


def __coefficients(oracle):
    A = np.asarray(oracle[:, 0], dtype=bool)
    B = np.asarray(oracle[:, 1], dtype=bool)

    a = np.sum(A * B)           # A right, B right
    b = np.sum(~A * B)          # A wrong, B right
    c = np.sum(A * ~B)          # A right, B wrong
    d = np.sum(~A * ~B)         # A wrong, B wrong

    return a, b, c, d


[docs]def kuncheva_q_statistics(oracle):
    L = oracle.shape[1]
    div = np.zeros((L * (L - 1)) / 2)
    div_i = 0

    for i in range(L):
        for j in range(i + 1, L):
            a, b, c, d = __coefficients(oracle[:, [i, j]])
            div[div_i] = float(a * d - b * c) / ((a * d + b * c) + 10e-24)
            div_i = div_i + 1

    return np.mean(div)


[docs]def kuncheva_correlation_coefficient_p(oracle):
    L = oracle.shape[1]
    div = np.zeros((L * (L - 1)) / 2)
    div_i = 0

    for i in range(L):
        for j in range(i + 1, L):
            a, b, c, d = __coefficients(oracle[:, [i, j]])
            div[div_i] = float((a * d - b * c)) / \
                (np.sqrt((a + b) * (c + d) * (a + c) * (b + d)))
            div_i = div_i + 1

    return np.mean(div)


[docs]def kuncheva_disagreement_measure(oracle):
    L = oracle.shape[1]
    div = np.zeros((L * (L - 1)) / 2)
    div_i = 0

    for i in range(L):
        for j in range(i + 1, L):
            a, b, c, d = __coefficients(oracle[:, [i, j]])
            div[div_i] = float(b + c) / (a + b + c + d)
            div_i = div_i + 1

    return np.mean(div)


[docs]def kuncheva_agreement_measure(oracle):
    return 1.0 / (kuncheva_disagreement_measure(oracle) + 10e-24)


[docs]def kuncheva_double_fault_measure(oracle):
    L = oracle.shape[1]
    div = np.zeros((L * (L - 1)) / 2)
    div_i = 0

    for i in range(L):
        for j in range(i + 1, L):
            a, b, c, d = __coefficients(oracle[:, [i, j]])
            div[div_i] = float(d) / (a + b + c + d)
            div_i = div_i + 1

    return np.mean(div)


def __get_coefficients(y_true, y_pred_a, y_pred_b):
    a, b, c, d = 0, 0, 0, 0
    for i in range(y_true.shape[0]):
        if y_pred_a[i] == y_true[i] and y_pred_b[i] == y_true[i]:
            a = a + 1
        elif y_pred_a[i] != y_true[i] and y_pred_b[i] == y_true[i]:
            b = b + 1
        elif y_pred_a[i] == y_true[i] and y_pred_b[i] != y_true[i]:
            c = c + 1
        else:
            d = d + 1

    return a, b, c, d


[docs]def q_statistics(y_true, y_pred_a, y_pred_b):
    a, b, c, d = __get_coefficients(y_true, y_pred_a, y_pred_b)
    q = float(a * d - b * c) / (a * d + b * c)
    return q


[docs]def correlation_coefficient_p(y_true, y_pred_a, y_pred_b):
    a, b, c, d = __get_coefficients(y_true, y_pred_a, y_pred_b)
    p = float((a * d - b * c)) / np.sqrt((a + b) * (c + d) * (a + c) * (b + d))
    return p


[docs]def disagreement_measure(y_true, y_pred_a, y_pred_b):
    a, b, c, d = __get_coefficients(y_true, y_pred_a, y_pred_b)
    disagreement = float(b + c) / (a + b + c + d)
    return disagreement


[docs]def agreement_measure(y_true, y_pred_a, y_pred_b):
    return 1.0 / disagreement_measure(y_true, y_pred_a, y_pred_b)


[docs]def double_fault_measure(y_true, y_pred_a, y_pred_b):
    a, b, c, d = __get_coefficients(y_true, y_pred_a, y_pred_b)
    df = float(d) / (a + b + c + d)
    return df


[docs]def paired_metric_ensemble(ensemble, X, y, paired_metric=q_statistics):
    classifiers = ensemble.classifiers
    size = len(classifiers)
    diversities = []
    for i in range(size):
        for j in range(i):
            y_pred_a = classifiers[i].predict(X)
            y_pred_b = classifiers[j].predict(X)
            diversity = paired_metric(y, y_pred_a, y_pred_b)
            diversities = diversities + [diversity]

    return np.mean(diversities)