Source code for brew.selection.dynamic.mcb

import numpy as np

from brew.base import Ensemble
from .base import DCS


[docs]class MCB(DCS): """Multiple Classifier Behavior. The Multiple Classifier Behavior (MCB) selects the best classifier using the similarity of the classifications on the K neighbors of the test sample in the validation set. Attributes ---------- `Xval` : array-like, shape = [indeterminated, n_features] Validation set. `yval` : array-like, shape = [indeterminated] Labels of the validation set. `knn` : sklearn KNeighborsClassifier, Classifier used to find neighborhood. Examples -------- >>> from brew.selection.dynamic.mcb import MCB >>> from brew.generation.bagging import Bagging >>> from brew.base import EnsembleClassifier >>> >>> from sklearn.tree import DecisionTreeClassifier >>> import numpy as np >>> >>> X = np.array([[-1, 0], [-0.8, 1], [-0.8, -1], [-0.5, 0], [0.5, 0], [1, 0], [0.8, 1], [0.8, -1]]) >>> y = np.array([1, 1, 1, 2, 1, 2, 2, 2]) >>> tree = DecisionTreeClassifier(max_depth=1, min_samples_leaf=1) >>> bag = Bagging(base_classifier=tree, n_classifiers=10) >>> bag.fit(X, y) >>> >>> mcb = MCB(X, y, K=3) >>> >>> clf = EnsembleClassifier(bag.ensemble, selector=mcb) >>> clf.predict([-1.1,-0.5]) [1] See also -------- brew.selection.dynamic.lca.OLA: Overall Local Accuracy. brew.selection.dynamic.lca.LCA: Local Class Accuracy. References ---------- Giacinto, Giorgio, and Fabio Roli. "Dynamic classifier selection based on multiple classifier behaviour." Pattern Recognition 34.9 (2001): 1879-1881. """ def __init__(self, Xval, yval, K=5, weighted=False, knn=None, similarity_threshold=0.7, significance_threshold=0.3): self.similarity_threshold = similarity_threshold self.significance_threshold = significance_threshold super(MCB, self).__init__(Xval, yval, K=K, weighted=weighted, knn=knn)
[docs] def select(self, ensemble, x): if ensemble.in_agreement(x): return Ensemble([ensemble.classifiers[0]]), None mcb_x = ensemble.output(x, mode='labels')[0, :] # intialize variables # the the indexes of the KNN of x [idx] = self.knn.kneighbors(x, return_distance=False) X, y = self.Xval[idx], self.yval[idx] mcb_v = ensemble.output(X, mode='labels') idx = [] for i in range(X.shape[0]): sim = np.mean(mcb_x == mcb_v[i, :]) if sim > self.similarity_threshold: idx = idx + [i] if len(idx) == 0: idx = np.arange(X.shape[0]) scores = [clf.score(X[idx], y[idx]) for clf in ensemble.classifiers] scores = np.array(scores) # if best classifier is significantly better # use best_classifier best_i = np.argmax(scores) best_j_score = np.max(scores[np.arange(len(scores)) != best_i]) if scores[best_i] - scores[best_j] >= self.significance_threshold: best_classifier = ensemble.classifiers[best_i] return Ensemble(classifiers=[best_classifier]), None return Ensemble(classifiers=ensemble.classifiers), None