Source code for brew.selection.dynamic.dsknn

import numpy as np

from brew.base import Ensemble
from brew.metrics.diversity.paired import kuncheva_double_fault_measure
from .base import DCS


[docs]class DSKNN(DCS): """DS-KNN The DS-KNN selects an ensemble of classifiers based on their accuracy and diversity in the neighborhood of the test sample. Attributes ---------- `Xval` : array-like, shape = [indeterminated, n_features] Validation set. `yval` : array-like, shape = [indeterminated] Labels of the validation set. `knn` : sklearn KNeighborsClassifier, Classifier used to find neighborhood. Examples -------- >>> from brew.selection.dynamic import DSKNN >>> from brew.generation.bagging import Bagging >>> from brew.base import EnsembleClassifier >>> >>> from sklearn.tree import DecisionTreeClassifier >>> import numpy as np >>> >>> X = np.array([[-1, 0], [-0.8, 1], [-0.8, -1], [-0.5, 0], [0.5, 0], [1, 0], [0.8, 1], [0.8, -1]]) >>> y = np.array([1, 1, 1, 2, 1, 2, 2, 2]) >>> tree = DecisionTreeClassifier(max_depth=1, min_samples_leaf=1) >>> bag = Bagging(base_classifier=tree, n_classifiers=10) >>> bag.fit(X, y) >>> >>> sel = DSKNN(X, y, K=3) >>> >>> clf = EnsembleClassifier(bag.ensemble, selector=sel) >>> clf.predict([-1.1,-0.5]) [1] See also -------- brew.selection.dynamic.lca.OLA: Overall Local Accuracy. brew.selection.dynamic.lca.LCA: Local Class Accuracy. References ---------- Santana, Alixandre, et al. "A dynamic classifier selection method to build ensembles using accuracy and diversity." 2006 Ninth Brazilian Symposium on Neural Networks (SBRN'06). IEEE, 2006. """ def __init__(self, Xval, yval, K=5, weighted=False, knn=None, n_1=0.7, n_2=0.3): if n_1 < 0 or n_2 < 0 or n_1 <= n_2: raise Exception self.n_1 = n_1 self.n_2 = n_2 super(DSKNN, self).__init__( Xval, yval, K=K, weighted=weighted, knn=knn)
[docs] def select(self, ensemble, x): if ensemble.in_agreement(x): return Ensemble([ensemble.classifiers[0]]), None n_sel_1, n_sel_2 = self.n_1, self.n_2 if isinstance(self.n_1, float): n_sel_1 = int(n_sel_1 * len(ensemble)) if isinstance(self.n_2, float): n_sel_2 = int(n_sel_2 * len(ensemble)) n_sel_1 = max(n_sel_1, 1) n_sel_2 = max(n_sel_2, 1) # intialize variables # the the indexes of the KNN of x classifiers = ensemble.classifiers [idx] = self.knn.kneighbors(x, return_distance=False) X, y = self.Xval[idx], self.yval[idx] acc_scores = np.array([clf.score(X, y) for clf in classifiers]) out = ensemble.output(X, mode='labels') oracle = np.equal(out, y[:, np.newaxis]) div_scores = np.zeros(len(ensemble), dtype=float) for i in range(len(ensemble)): tmp = [] for j in range(len(ensemble)): if i != j: d = kuncheva_double_fault_measure(oracle[:, [i, j]]) tmp.append(d) div_scores[i] = np.mean(tmp) z = zip(np.arange(len(ensemble)), acc_scores, div_scores) z = sorted(z, key=lambda e: e[1], reverse=True)[:n_sel_1] z = sorted(z, key=lambda e: e[2], reverse=False)[:n_sel_2] z = zip(*z)[0] classifiers = [classifiers[i] for i in z] return Ensemble(classifiers=classifiers), None