Source code for brew.selection.dynamic.lca

import numpy as np

from brew.base import Ensemble
from .base import DCS


[docs]class LCA2(DCS):
    """Local Class Accuracy.

    The Local Class Accuracy selects the best classifier for
    a sample using it's K nearest neighbors.

    Attributes
    ----------
    `Xval` : array-like, shape = [indeterminated, n_features]
        Validation set.

    `yval` : array-like, shape = [indeterminated]
        Labels of the validation set.

    `knn`  : sklearn KNeighborsClassifier,
        Classifier used to find neighborhood.


    Examples
    --------
    >>> from brew.selection.dynamic.lca import LCA
    >>> from brew.generation.bagging import Bagging
    >>> from brew.base import EnsembleClassifier
    >>>
    >>> from sklearn.tree import DecisionTreeClassifier
    >>> import numpy as np
    >>>
    >>> X = np.array([[-1, 0], [-0.8, 1], [-0.8, -1], [-0.5, 0],
                      [0.5, 0], [1, 0], [0.8, 1], [0.8, -1]])
    >>> y = np.array([1, 1, 1, 2, 1, 2, 2, 2])
    >>> tree = DecisionTreeClassifier(max_depth=1, min_samples_leaf=1)
    >>> bag = Bagging(base_classifier=tree, n_classifiers=10)
    >>> bag.fit(X, y)
    >>>
    >>> lca = LCA(X, y, K=3)
    >>>
    >>> clf = EnsembleClassifier(bag.ensemble, selector=lca)
    >>> clf.predict([-1.1,-0.5])
    [1]

    See also
    --------
    brew.selection.dynamic.ola.OLA: Overall Local Accuracy.

    References
    ----------
    Woods, Kevin, Kevin Bowyer, and W. Philip Kegelmeyer Jr. "Combination
    of multiple classifiers using local accuracy estimates." Computer Vision
    and Pattern Recognition, 1996. Proceedings CVPR'96, 1996 IEEE Computer
    Society Conference on. IEEE, 1996.

    Ko, Albert HR, Robert Sabourin, and Alceu Souza Britto Jr.
    "From dynamic classifier selection to dynamic ensemble selection."
    Pattern Recognition 41.5 (2008): 1718-1731.
    """

    def __init__(self, Xval, yval, K=5, weighted=False, knn=None):
        '''
        Parameters
        ----------
        Xval : Numpy 2d-array with rows representing each sample.
        yval : Numpy 1d-array representing the target classes of
            the samples in Xval.
        K : int (default=5), the size of the neighborhood used to select the
            classifier.
        weighted : bool (default=False), if the selected classifiers are
            weighted;
        knn : sklearn KNeighborsClassifier (default=None), a classifier
            to find the neighborhood of each sample.
        '''
        super(LCA2, self).__init__(Xval, yval, K, weighted, knn)

[docs]    def select(self, ensemble, x):

        if ensemble.in_agreement(x):
            return Ensemble([ensemble.classifiers[0]]), None

        # obtain the K nearest neighbors in the validation set
        [idx] = self.knn.kneighbors(x, return_distance=False)
        neighbors_X = self.Xval[idx]  # k neighbors
        neighbors_y = self.yval[idx]  # k neighbors target

        # pool_output (sample, classifier_output)
        pool_output = np.zeros((neighbors_X.shape[0], len(ensemble)))
        for i, clf in enumerate(ensemble.classifiers):
            pool_output[:, i] = clf.predict(neighbors_X)

        x_outputs = [ensemble.classifiers[j].predict(
            x) for j in range(len(ensemble))]
        x_outputs = np.asarray(x_outputs).flatten()

        d = {}
        scores = np.zeros(len(ensemble))
        for j in range(pool_output.shape[1]):
            # get correctly classified samples
            mask_classified_correctly = pool_output[:, j] == neighbors_y
            # get classified samples with the same class as 'x'
            mask_classified_same_class = (pool_output[:, j] == x_outputs[j])
            # get correctly classified samples with the same class as 'x'
            mask = mask_classified_correctly * mask_classified_same_class
            # calculate score
            scores[j] = float(sum(mask)) / (sum(mask_classified_same_class) + 10e-24)
            d[str(scores[j])] = d[str(scores[j])] + [j] if str(scores[j]) in d else [j]


        best_scores = sorted([float(k) for k in list(d.keys())], reverse=True)

        options = None
        for j, score in enumerate(best_scores):
            pred = [x_outputs[i] for i in d[str(score)]]
            pred = np.asarray(pred).flatten()

            bincount = np.bincount(pred.astype(int))
            if options is not None:
                for i in range(len(bincount)):
                    bincount[i] = bincount[i] if i in options else 0

            imx = np.argmax(bincount)
            votes = np.argwhere(bincount == bincount[imx]).flatten()
            count = len(votes)
            if count == 1:
                ens = Ensemble([ensemble.classifiers[np.argmax(pred == imx)]])
                return ens, None
            elif options is None:
                options = votes

        return Ensemble([ensemble.classifiers[np.argmax(scores)]]), None

[docs]class LCA(DCS):

[docs]    def select(self, ensemble, x):
        if ensemble.in_agreement(x):
            return Ensemble([ensemble.classifiers[0]]), None

        # obtain the K nearest neighbors in the validation set
        [idx] = self.knn.kneighbors(x, n_neighbors=self.K, return_distance=False)
        neighbors_X = self.Xval[idx] # k neighbors
        neighbors_y = self.yval[idx] # k neighbors target

        # pool_output (sample, classifier_output)
        pool_output = np.zeros((neighbors_X.shape[0], len(ensemble)))
        for i, clf in enumerate(ensemble.classifiers):
            pool_output[:, i] = clf.predict(neighbors_X)

        x_outputs = [ensemble.classifiers[j].predict(
            x) for j in range(len(ensemble))]
        x_outputs = np.asarray(x_outputs).flatten()

        scores = np.zeros(len(ensemble))
        for j in range(pool_output.shape[1]):
            # get correctly classified samples
            mask_classified_correctly = pool_output[:, j] == neighbors_y
            # get classified samples with the same class as 'x'
            mask_classified_same_class = (pool_output[:, j] == x_outputs[j])
            # get correctly classified samples with the same class as 'x'
            mask = mask_classified_correctly * mask_classified_same_class
            # calculate score
            scores[j] = float(sum(mask)) / (sum(mask_classified_same_class) + 10e-24)

        return Ensemble([ensemble.classifiers[np.argmax(scores)]]), None