import numpy as np
def __coefficients(oracle):
A = np.asarray(oracle[:, 0], dtype=bool)
B = np.asarray(oracle[:, 1], dtype=bool)
a = np.sum(A * B) # A right, B right
b = np.sum(~A * B) # A wrong, B right
c = np.sum(A * ~B) # A right, B wrong
d = np.sum(~A * ~B) # A wrong, B wrong
return a, b, c, d
[docs]def kuncheva_q_statistics(oracle):
L = oracle.shape[1]
div = np.zeros((L * (L - 1)) / 2)
div_i = 0
for i in range(L):
for j in range(i + 1, L):
a, b, c, d = __coefficients(oracle[:, [i, j]])
div[div_i] = float(a * d - b * c) / ((a * d + b * c) + 10e-24)
div_i = div_i + 1
return np.mean(div)
[docs]def kuncheva_correlation_coefficient_p(oracle):
L = oracle.shape[1]
div = np.zeros((L * (L - 1)) / 2)
div_i = 0
for i in range(L):
for j in range(i + 1, L):
a, b, c, d = __coefficients(oracle[:, [i, j]])
div[div_i] = float((a * d - b * c)) / \
(np.sqrt((a + b) * (c + d) * (a + c) * (b + d)))
div_i = div_i + 1
return np.mean(div)
[docs]def kuncheva_disagreement_measure(oracle):
L = oracle.shape[1]
div = np.zeros((L * (L - 1)) / 2)
div_i = 0
for i in range(L):
for j in range(i + 1, L):
a, b, c, d = __coefficients(oracle[:, [i, j]])
div[div_i] = float(b + c) / (a + b + c + d)
div_i = div_i + 1
return np.mean(div)
[docs]def kuncheva_agreement_measure(oracle):
return 1.0 / (kuncheva_disagreement_measure(oracle) + 10e-24)
[docs]def kuncheva_double_fault_measure(oracle):
L = oracle.shape[1]
div = np.zeros((L * (L - 1)) / 2)
div_i = 0
for i in range(L):
for j in range(i + 1, L):
a, b, c, d = __coefficients(oracle[:, [i, j]])
div[div_i] = float(d) / (a + b + c + d)
div_i = div_i + 1
return np.mean(div)
def __get_coefficients(y_true, y_pred_a, y_pred_b):
a, b, c, d = 0, 0, 0, 0
for i in range(y_true.shape[0]):
if y_pred_a[i] == y_true[i] and y_pred_b[i] == y_true[i]:
a = a + 1
elif y_pred_a[i] != y_true[i] and y_pred_b[i] == y_true[i]:
b = b + 1
elif y_pred_a[i] == y_true[i] and y_pred_b[i] != y_true[i]:
c = c + 1
else:
d = d + 1
return a, b, c, d
[docs]def q_statistics(y_true, y_pred_a, y_pred_b):
a, b, c, d = __get_coefficients(y_true, y_pred_a, y_pred_b)
q = float(a * d - b * c) / (a * d + b * c)
return q
[docs]def correlation_coefficient_p(y_true, y_pred_a, y_pred_b):
a, b, c, d = __get_coefficients(y_true, y_pred_a, y_pred_b)
p = float((a * d - b * c)) / np.sqrt((a + b) * (c + d) * (a + c) * (b + d))
return p
[docs]def disagreement_measure(y_true, y_pred_a, y_pred_b):
a, b, c, d = __get_coefficients(y_true, y_pred_a, y_pred_b)
disagreement = float(b + c) / (a + b + c + d)
return disagreement
[docs]def agreement_measure(y_true, y_pred_a, y_pred_b):
return 1.0 / disagreement_measure(y_true, y_pred_a, y_pred_b)
[docs]def double_fault_measure(y_true, y_pred_a, y_pred_b):
a, b, c, d = __get_coefficients(y_true, y_pred_a, y_pred_b)
df = float(d) / (a + b + c + d)
return df
[docs]def paired_metric_ensemble(ensemble, X, y, paired_metric=q_statistics):
classifiers = ensemble.classifiers
size = len(classifiers)
diversities = []
for i in range(size):
for j in range(i):
y_pred_a = classifiers[i].predict(X)
y_pred_b = classifiers[j].predict(X)
diversity = paired_metric(y, y_pred_a, y_pred_b)
diversities = diversities + [diversity]
return np.mean(diversities)