Source code for brew.selection.dynamic.knora

# -*- coding: utf-8 -*-

import numpy as np

from .base import DCS

from brew.base import Ensemble


# do not use this class directly, call it's subclasses instead (e.g. KNORA_E)
[docs]class KNORA(DCS):

    def _get_best_classifiers(self, ensemble, neighbors_X, neighbors_y, x):
        ensemble_out = ensemble.output(neighbors_X, mode='labels')
        ensemble_mask = ensemble_out == neighbors_y[:, np.newaxis]

        correct = np.sum(ensemble_mask, axis=0)
        idx = np.argmax(correct)  # best classifier idx

        all_idx = correct == correct[idx]

        pool = [ensemble.classifiers[i] for i in all_idx]

        return pool


[docs]class KNORA_ELIMINATE(KNORA):
    """K-nearest-oracles Eliminate.

    The KNORA Eliminate reduces the neighborhood until finds an
    ensemble of classifiers that correctly classify all neighbors.

    Attributes
    ----------
    `Xval` : array-like, shape = [indeterminated, n_features]
        Validation set.

    `yval` : array-like, shape = [indeterminated]
        Labels of the validation set.

    `knn`  : sklearn KNeighborsClassifier,
        Classifier used to find neighborhood.

    `weighted` : bool, (makes no difference in knora_eliminate)
        Bool that defines if the classifiers uses weights or not


    Examples
    --------
    >>> from brew.selection.dynamic.knora import KNORA_ELIMINATE
    >>> from brew.generation.bagging import Bagging
    >>> from brew.base import EnsembleClassifier
    >>>
    >>> from sklearn.tree import DecisionTreeClassifier
    >>> import numpy as np
    >>>
    >>> X = np.array([[-1, 0], [-0.8, 1], [-0.8, -1], [-0.5, 0],
                      [0.5, 0], [1, 0], [0.8, 1], [0.8, -1]])
    >>> y = np.array([1, 1, 1, 2, 1, 2, 2, 2])
    >>>
    >>> dt = DecisionTreeClassifier(max_depth=1, min_samples_leaf=1)
    >>> bag = Bagging(base_classifier=dt, n_classifiers=10)
    >>> bag.fit(X, y)
    >>>
    >>> ke = KNORA_ELIMINATE(X, y, K=5)
    >>>
    >>> clf = EnsembleClassifier(bag.ensemble, selector=ke)
    >>> clf.predict([-1.1,-0.5])
    [1]

    See also
    --------
    brew.selection.dynamic.knora.KNORA_UNION: KNORA Union.
    brew.selection.dynamic.lca.LCA: Local Class Accuracy.
    brew.selection.dynamic.ola.OLA: Overall Local Accuracy.

    References
    ----------
    Ko, Albert HR, Robert Sabourin, and Alceu Souza Britto Jr.
    "From dynamic classifier selection to dynamic ensemble selection."
    Pattern Recognition 41.5 (2008): 1718-1731.

    Britto, Alceu S., Robert Sabourin, and Luiz ES Oliveira.
    "Dynamic selection of classifiers—A comprehensive review."
    Pattern Recognition 47.11 (2014): 3665-3680.

    Hung-Ren Ko, A., Robert Sabourin, and A. de Souza Britto.
    "K-nearest oracle for dynamic ensemble selection."
    Document Analysis and Recognition, 2007. ICDAR 2007.
    Ninth International Conference on. Vol. 1. IEEE, 2007

    """

    def __init__(self, Xval, yval, K=5, weighted=False, knn=None, v2007=False):
        self.v2007 = v2007
        super(KNORA_ELIMINATE, self).__init__(
            Xval, yval, K=K, weighted=weighted, knn=knn)

[docs]    def select(self, ensemble, x):
        ensemble_mask = None

        neighbors_X, neighbors_y = self.get_neighbors(x)
        pool_output = ensemble.output(neighbors_X, mode='labels')

        # gradually decrease neighborhood size if no
        # classifier predicts ALL the neighbors correctly
        for i in range(self.K, 0, -1):
            pool_mask = _get_pool_mask(
                pool_output[:i], neighbors_y[:i], np.all)

            # if at least one classifier gets all neighbors right
            if pool_mask is not None:
                ensemble_mask = pool_mask
                break

        # if NO classifiers get the nearest neighbor correctly
        if ensemble_mask is None:

            if self.v2007:
                # Increase neighborhood until one classifier
                # gets at least ONE (i.e. ANY) neighbors correctly.
                # Starts with 2 because mask_all with k=1 is
                # the same as mask_any with k=1
                for i in range(2, self.K + 1):
                    pool_mask = _get_pool_mask(
                        pool_output[:i], neighbors_y[:i], np.any)

                    if pool_mask is not None:
                        ensemble_mask = pool_mask
                        break

        [selected_idx] = np.where(ensemble_mask)

        if selected_idx.size > 0:
            pool = Ensemble(
                classifiers=[ensemble.classifiers[i] for i in selected_idx])

        else:  # use all classifiers
            # pool = ensemble
            classifiers = self._get_best_classifiers(
                ensemble, neighbors_X, neighbors_y, x)
            pool = Ensemble(classifiers=classifiers)

        # KNORA-ELIMINATE-W that supposedly uses weights, does not make
        # any sense, so even if self.weighted is True, always return
        # None for the weights

        return pool, None


[docs]class KNORA_UNION(KNORA):
    """K-nearest-oracles Union.

    The KNORA union reduces the neighborhood until finds an
    ensemble of classifiers that correctly classify all neighbors.

    Attributes
    ----------
    `Xval` : array-like, shape = [indeterminated, n_features]
        Validation set.

    `yval` : array-like, shape = [indeterminated]
        Labels of the validation set.

    `knn`  : sklearn KNeighborsClassifier,
        Classifier used to find neighborhood.

    `weighted` : bool, (makes no difference in knora_eliminate)
        Bool that defines if the classifiers uses weights or not


    Examples
    --------
    >>> from brew.selection.dynamic.knora import KNORA_UNION
    >>> from brew.generation.bagging import Bagging
    >>> from brew.base import EnsembleClassifier
    >>>
    >>> from sklearn.tree import DecisionTreeClassifier
    >>> import numpy as np
    >>>
    >>> X = np.array([[-1, 0], [-0.8, 1], [-0.8, -1], [-0.5, 0],
                      [0.5, 0], [1, 0], [0.8, 1], [0.8, -1]])
    >>> y = np.array([1, 1, 1, 2, 1, 2, 2, 2])
    >>>
    >>> dt = DecisionTreeClassifier(max_depth=1, min_samples_leaf=1)
    >>> bag = Bagging(base_classifier=dt, n_classifiers=10)
    >>> bag.fit(X, y)
    >>>
    >>> ku = KNORA_UNION(X, y, K=5)
    >>>
    >>> clf = EnsembleClassifier(bag.ensemble, selector=ku)
    >>> clf.predict([-1.1,-0.5])
    [1]

    See also
    --------
    brew.selection.dynamic.knora.KNORA_ELIMINATE: Knora Eliminate.
    brew.selection.dynamic.lca.LCA: Local Class Accuracy.
    brew.selection.dynamic.ola.OLA: Overall Local Accuracy.

    References
    ----------
    Ko, Albert HR, Robert Sabourin, and Alceu Souza Britto Jr.
    "From dynamic classifier selection to dynamic ensemble selection."
    Pattern Recognition 41.5 (2008): 1718-1731.

    Britto, Alceu S., Robert Sabourin, and Luiz ES Oliveira.
    "Dynamic selection of classifiers—A comprehensive review."
    Pattern Recognition 47.11 (2014): 3665-3680.

    Hung-Ren Ko, A., Robert Sabourin, and A. de Souza Britto.
    "K-nearest oracle for dynamic ensemble selection."
    Document Analysis and Recognition, 2007. ICDAR 2007.
    Ninth International Conference on. Vol. 1. IEEE, 2007.
    """

[docs]    def select(self, ensemble, x):
        neighbors_X, neighbors_y = self.get_neighbors(x)
        pool_output = ensemble.output(neighbors_X, mode='labels')

        output_mask = (pool_output == neighbors_y[:, np.newaxis])

        [selected_idx] = np.where(np.any(output_mask, axis=0))

        if selected_idx.size > 0:
            if self.weighted:
                weights = 1.0 / \
                    (np.sqrt(np.sum((x - neighbors_X)**2, axis=1)) + 10e-8)
                weighted_votes = np.dot(weights, output_mask[:, selected_idx])
            else:
                weighted_votes = np.sum(output_mask[:, selected_idx], axis=0)

            pool = Ensemble(
                classifiers=[ensemble.classifiers[i] for i in selected_idx])
        # if no classifiers are selected,
        # use all classifiers with no weights
        else:
            pool = ensemble
            weighted_votes = None

        return pool, weighted_votes


def _get_pool_mask(pool_output, neighbors_target, func):
    pool_mask = func(pool_output == neighbors_target[:, np.newaxis], axis=0)

    if np.sum(pool_mask) > 0:
        return pool_mask

    return None