first commit
This commit is contained in:
@@ -0,0 +1,999 @@
|
||||
"""
|
||||
Linear Discriminant Analysis and Quadratic Discriminant Analysis
|
||||
"""
|
||||
|
||||
# Authors: Clemens Brunner
|
||||
# Martin Billinger
|
||||
# Matthieu Perrot
|
||||
# Mathieu Blondel
|
||||
|
||||
# License: BSD 3-Clause
|
||||
|
||||
import warnings
|
||||
import numpy as np
|
||||
from scipy import linalg
|
||||
from scipy.special import expit
|
||||
from numbers import Real
|
||||
|
||||
from .base import BaseEstimator, TransformerMixin, ClassifierMixin
|
||||
from .base import _ClassNamePrefixFeaturesOutMixin
|
||||
from .linear_model._base import LinearClassifierMixin
|
||||
from .covariance import ledoit_wolf, empirical_covariance, shrunk_covariance
|
||||
from .utils.multiclass import unique_labels
|
||||
from .utils.validation import check_is_fitted
|
||||
from .utils.multiclass import check_classification_targets
|
||||
from .utils.extmath import softmax
|
||||
from .preprocessing import StandardScaler
|
||||
|
||||
|
||||
__all__ = ["LinearDiscriminantAnalysis", "QuadraticDiscriminantAnalysis"]
|
||||
|
||||
|
||||
def _cov(X, shrinkage=None, covariance_estimator=None):
|
||||
"""Estimate covariance matrix (using optional covariance_estimator).
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
Input data.
|
||||
|
||||
shrinkage : {'empirical', 'auto'} or float, default=None
|
||||
Shrinkage parameter, possible values:
|
||||
- None or 'empirical': no shrinkage (default).
|
||||
- 'auto': automatic shrinkage using the Ledoit-Wolf lemma.
|
||||
- float between 0 and 1: fixed shrinkage parameter.
|
||||
|
||||
Shrinkage parameter is ignored if `covariance_estimator`
|
||||
is not None.
|
||||
|
||||
covariance_estimator : estimator, default=None
|
||||
If not None, `covariance_estimator` is used to estimate
|
||||
the covariance matrices instead of relying on the empirical
|
||||
covariance estimator (with potential shrinkage).
|
||||
The object should have a fit method and a ``covariance_`` attribute
|
||||
like the estimators in :mod:`sklearn.covariance``.
|
||||
if None the shrinkage parameter drives the estimate.
|
||||
|
||||
.. versionadded:: 0.24
|
||||
|
||||
Returns
|
||||
-------
|
||||
s : ndarray of shape (n_features, n_features)
|
||||
Estimated covariance matrix.
|
||||
"""
|
||||
if covariance_estimator is None:
|
||||
shrinkage = "empirical" if shrinkage is None else shrinkage
|
||||
if isinstance(shrinkage, str):
|
||||
if shrinkage == "auto":
|
||||
sc = StandardScaler() # standardize features
|
||||
X = sc.fit_transform(X)
|
||||
s = ledoit_wolf(X)[0]
|
||||
# rescale
|
||||
s = sc.scale_[:, np.newaxis] * s * sc.scale_[np.newaxis, :]
|
||||
elif shrinkage == "empirical":
|
||||
s = empirical_covariance(X)
|
||||
else:
|
||||
raise ValueError("unknown shrinkage parameter")
|
||||
elif isinstance(shrinkage, Real):
|
||||
if shrinkage < 0 or shrinkage > 1:
|
||||
raise ValueError("shrinkage parameter must be between 0 and 1")
|
||||
s = shrunk_covariance(empirical_covariance(X), shrinkage)
|
||||
else:
|
||||
raise TypeError("shrinkage must be a float or a string")
|
||||
else:
|
||||
if shrinkage is not None and shrinkage != 0:
|
||||
raise ValueError(
|
||||
"covariance_estimator and shrinkage parameters "
|
||||
"are not None. Only one of the two can be set."
|
||||
)
|
||||
covariance_estimator.fit(X)
|
||||
if not hasattr(covariance_estimator, "covariance_"):
|
||||
raise ValueError(
|
||||
"%s does not have a covariance_ attribute"
|
||||
% covariance_estimator.__class__.__name__
|
||||
)
|
||||
s = covariance_estimator.covariance_
|
||||
return s
|
||||
|
||||
|
||||
def _class_means(X, y):
|
||||
"""Compute class means.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
Input data.
|
||||
|
||||
y : array-like of shape (n_samples,) or (n_samples, n_targets)
|
||||
Target values.
|
||||
|
||||
Returns
|
||||
-------
|
||||
means : array-like of shape (n_classes, n_features)
|
||||
Class means.
|
||||
"""
|
||||
classes, y = np.unique(y, return_inverse=True)
|
||||
cnt = np.bincount(y)
|
||||
means = np.zeros(shape=(len(classes), X.shape[1]))
|
||||
np.add.at(means, y, X)
|
||||
means /= cnt[:, None]
|
||||
return means
|
||||
|
||||
|
||||
def _class_cov(X, y, priors, shrinkage=None, covariance_estimator=None):
|
||||
"""Compute weighted within-class covariance matrix.
|
||||
|
||||
The per-class covariance are weighted by the class priors.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
Input data.
|
||||
|
||||
y : array-like of shape (n_samples,) or (n_samples, n_targets)
|
||||
Target values.
|
||||
|
||||
priors : array-like of shape (n_classes,)
|
||||
Class priors.
|
||||
|
||||
shrinkage : 'auto' or float, default=None
|
||||
Shrinkage parameter, possible values:
|
||||
- None: no shrinkage (default).
|
||||
- 'auto': automatic shrinkage using the Ledoit-Wolf lemma.
|
||||
- float between 0 and 1: fixed shrinkage parameter.
|
||||
|
||||
Shrinkage parameter is ignored if `covariance_estimator` is not None.
|
||||
|
||||
covariance_estimator : estimator, default=None
|
||||
If not None, `covariance_estimator` is used to estimate
|
||||
the covariance matrices instead of relying the empirical
|
||||
covariance estimator (with potential shrinkage).
|
||||
The object should have a fit method and a ``covariance_`` attribute
|
||||
like the estimators in sklearn.covariance.
|
||||
If None, the shrinkage parameter drives the estimate.
|
||||
|
||||
.. versionadded:: 0.24
|
||||
|
||||
Returns
|
||||
-------
|
||||
cov : array-like of shape (n_features, n_features)
|
||||
Weighted within-class covariance matrix
|
||||
"""
|
||||
classes = np.unique(y)
|
||||
cov = np.zeros(shape=(X.shape[1], X.shape[1]))
|
||||
for idx, group in enumerate(classes):
|
||||
Xg = X[y == group, :]
|
||||
cov += priors[idx] * np.atleast_2d(_cov(Xg, shrinkage, covariance_estimator))
|
||||
return cov
|
||||
|
||||
|
||||
class LinearDiscriminantAnalysis(
|
||||
_ClassNamePrefixFeaturesOutMixin,
|
||||
LinearClassifierMixin,
|
||||
TransformerMixin,
|
||||
BaseEstimator,
|
||||
):
|
||||
"""Linear Discriminant Analysis.
|
||||
|
||||
A classifier with a linear decision boundary, generated by fitting class
|
||||
conditional densities to the data and using Bayes' rule.
|
||||
|
||||
The model fits a Gaussian density to each class, assuming that all classes
|
||||
share the same covariance matrix.
|
||||
|
||||
The fitted model can also be used to reduce the dimensionality of the input
|
||||
by projecting it to the most discriminative directions, using the
|
||||
`transform` method.
|
||||
|
||||
.. versionadded:: 0.17
|
||||
*LinearDiscriminantAnalysis*.
|
||||
|
||||
Read more in the :ref:`User Guide <lda_qda>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
solver : {'svd', 'lsqr', 'eigen'}, default='svd'
|
||||
Solver to use, possible values:
|
||||
- 'svd': Singular value decomposition (default).
|
||||
Does not compute the covariance matrix, therefore this solver is
|
||||
recommended for data with a large number of features.
|
||||
- 'lsqr': Least squares solution.
|
||||
Can be combined with shrinkage or custom covariance estimator.
|
||||
- 'eigen': Eigenvalue decomposition.
|
||||
Can be combined with shrinkage or custom covariance estimator.
|
||||
|
||||
shrinkage : 'auto' or float, default=None
|
||||
Shrinkage parameter, possible values:
|
||||
- None: no shrinkage (default).
|
||||
- 'auto': automatic shrinkage using the Ledoit-Wolf lemma.
|
||||
- float between 0 and 1: fixed shrinkage parameter.
|
||||
|
||||
This should be left to None if `covariance_estimator` is used.
|
||||
Note that shrinkage works only with 'lsqr' and 'eigen' solvers.
|
||||
|
||||
priors : array-like of shape (n_classes,), default=None
|
||||
The class prior probabilities. By default, the class proportions are
|
||||
inferred from the training data.
|
||||
|
||||
n_components : int, default=None
|
||||
Number of components (<= min(n_classes - 1, n_features)) for
|
||||
dimensionality reduction. If None, will be set to
|
||||
min(n_classes - 1, n_features). This parameter only affects the
|
||||
`transform` method.
|
||||
|
||||
store_covariance : bool, default=False
|
||||
If True, explicitly compute the weighted within-class covariance
|
||||
matrix when solver is 'svd'. The matrix is always computed
|
||||
and stored for the other solvers.
|
||||
|
||||
.. versionadded:: 0.17
|
||||
|
||||
tol : float, default=1.0e-4
|
||||
Absolute threshold for a singular value of X to be considered
|
||||
significant, used to estimate the rank of X. Dimensions whose
|
||||
singular values are non-significant are discarded. Only used if
|
||||
solver is 'svd'.
|
||||
|
||||
.. versionadded:: 0.17
|
||||
|
||||
covariance_estimator : covariance estimator, default=None
|
||||
If not None, `covariance_estimator` is used to estimate
|
||||
the covariance matrices instead of relying on the empirical
|
||||
covariance estimator (with potential shrinkage).
|
||||
The object should have a fit method and a ``covariance_`` attribute
|
||||
like the estimators in :mod:`sklearn.covariance`.
|
||||
if None the shrinkage parameter drives the estimate.
|
||||
|
||||
This should be left to None if `shrinkage` is used.
|
||||
Note that `covariance_estimator` works only with 'lsqr' and 'eigen'
|
||||
solvers.
|
||||
|
||||
.. versionadded:: 0.24
|
||||
|
||||
Attributes
|
||||
----------
|
||||
coef_ : ndarray of shape (n_features,) or (n_classes, n_features)
|
||||
Weight vector(s).
|
||||
|
||||
intercept_ : ndarray of shape (n_classes,)
|
||||
Intercept term.
|
||||
|
||||
covariance_ : array-like of shape (n_features, n_features)
|
||||
Weighted within-class covariance matrix. It corresponds to
|
||||
`sum_k prior_k * C_k` where `C_k` is the covariance matrix of the
|
||||
samples in class `k`. The `C_k` are estimated using the (potentially
|
||||
shrunk) biased estimator of covariance. If solver is 'svd', only
|
||||
exists when `store_covariance` is True.
|
||||
|
||||
explained_variance_ratio_ : ndarray of shape (n_components,)
|
||||
Percentage of variance explained by each of the selected components.
|
||||
If ``n_components`` is not set then all components are stored and the
|
||||
sum of explained variances is equal to 1.0. Only available when eigen
|
||||
or svd solver is used.
|
||||
|
||||
means_ : array-like of shape (n_classes, n_features)
|
||||
Class-wise means.
|
||||
|
||||
priors_ : array-like of shape (n_classes,)
|
||||
Class priors (sum to 1).
|
||||
|
||||
scalings_ : array-like of shape (rank, n_classes - 1)
|
||||
Scaling of the features in the space spanned by the class centroids.
|
||||
Only available for 'svd' and 'eigen' solvers.
|
||||
|
||||
xbar_ : array-like of shape (n_features,)
|
||||
Overall mean. Only present if solver is 'svd'.
|
||||
|
||||
classes_ : array-like of shape (n_classes,)
|
||||
Unique class labels.
|
||||
|
||||
n_features_in_ : int
|
||||
Number of features seen during :term:`fit`.
|
||||
|
||||
.. versionadded:: 0.24
|
||||
|
||||
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
||||
Names of features seen during :term:`fit`. Defined only when `X`
|
||||
has feature names that are all strings.
|
||||
|
||||
.. versionadded:: 1.0
|
||||
|
||||
See Also
|
||||
--------
|
||||
QuadraticDiscriminantAnalysis : Quadratic Discriminant Analysis.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import numpy as np
|
||||
>>> from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
|
||||
>>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
|
||||
>>> y = np.array([1, 1, 1, 2, 2, 2])
|
||||
>>> clf = LinearDiscriminantAnalysis()
|
||||
>>> clf.fit(X, y)
|
||||
LinearDiscriminantAnalysis()
|
||||
>>> print(clf.predict([[-0.8, -1]]))
|
||||
[1]
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
solver="svd",
|
||||
shrinkage=None,
|
||||
priors=None,
|
||||
n_components=None,
|
||||
store_covariance=False,
|
||||
tol=1e-4,
|
||||
covariance_estimator=None,
|
||||
):
|
||||
self.solver = solver
|
||||
self.shrinkage = shrinkage
|
||||
self.priors = priors
|
||||
self.n_components = n_components
|
||||
self.store_covariance = store_covariance # used only in svd solver
|
||||
self.tol = tol # used only in svd solver
|
||||
self.covariance_estimator = covariance_estimator
|
||||
|
||||
def _solve_lsqr(self, X, y, shrinkage, covariance_estimator):
|
||||
"""Least squares solver.
|
||||
|
||||
The least squares solver computes a straightforward solution of the
|
||||
optimal decision rule based directly on the discriminant functions. It
|
||||
can only be used for classification (with any covariance estimator),
|
||||
because
|
||||
estimation of eigenvectors is not performed. Therefore, dimensionality
|
||||
reduction with the transform is not supported.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
Training data.
|
||||
|
||||
y : array-like of shape (n_samples,) or (n_samples, n_classes)
|
||||
Target values.
|
||||
|
||||
shrinkage : 'auto', float or None
|
||||
Shrinkage parameter, possible values:
|
||||
- None: no shrinkage.
|
||||
- 'auto': automatic shrinkage using the Ledoit-Wolf lemma.
|
||||
- float between 0 and 1: fixed shrinkage parameter.
|
||||
|
||||
Shrinkage parameter is ignored if `covariance_estimator` i
|
||||
not None
|
||||
|
||||
covariance_estimator : estimator, default=None
|
||||
If not None, `covariance_estimator` is used to estimate
|
||||
the covariance matrices instead of relying the empirical
|
||||
covariance estimator (with potential shrinkage).
|
||||
The object should have a fit method and a ``covariance_`` attribute
|
||||
like the estimators in sklearn.covariance.
|
||||
if None the shrinkage parameter drives the estimate.
|
||||
|
||||
.. versionadded:: 0.24
|
||||
|
||||
Notes
|
||||
-----
|
||||
This solver is based on [1]_, section 2.6.2, pp. 39-41.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] R. O. Duda, P. E. Hart, D. G. Stork. Pattern Classification
|
||||
(Second Edition). John Wiley & Sons, Inc., New York, 2001. ISBN
|
||||
0-471-05669-3.
|
||||
"""
|
||||
self.means_ = _class_means(X, y)
|
||||
self.covariance_ = _class_cov(
|
||||
X, y, self.priors_, shrinkage, covariance_estimator
|
||||
)
|
||||
self.coef_ = linalg.lstsq(self.covariance_, self.means_.T)[0].T
|
||||
self.intercept_ = -0.5 * np.diag(np.dot(self.means_, self.coef_.T)) + np.log(
|
||||
self.priors_
|
||||
)
|
||||
|
||||
def _solve_eigen(self, X, y, shrinkage, covariance_estimator):
|
||||
"""Eigenvalue solver.
|
||||
|
||||
The eigenvalue solver computes the optimal solution of the Rayleigh
|
||||
coefficient (basically the ratio of between class scatter to within
|
||||
class scatter). This solver supports both classification and
|
||||
dimensionality reduction (with any covariance estimator).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
Training data.
|
||||
|
||||
y : array-like of shape (n_samples,) or (n_samples, n_targets)
|
||||
Target values.
|
||||
|
||||
shrinkage : 'auto', float or None
|
||||
Shrinkage parameter, possible values:
|
||||
- None: no shrinkage.
|
||||
- 'auto': automatic shrinkage using the Ledoit-Wolf lemma.
|
||||
- float between 0 and 1: fixed shrinkage constant.
|
||||
|
||||
Shrinkage parameter is ignored if `covariance_estimator` i
|
||||
not None
|
||||
|
||||
covariance_estimator : estimator, default=None
|
||||
If not None, `covariance_estimator` is used to estimate
|
||||
the covariance matrices instead of relying the empirical
|
||||
covariance estimator (with potential shrinkage).
|
||||
The object should have a fit method and a ``covariance_`` attribute
|
||||
like the estimators in sklearn.covariance.
|
||||
if None the shrinkage parameter drives the estimate.
|
||||
|
||||
.. versionadded:: 0.24
|
||||
|
||||
Notes
|
||||
-----
|
||||
This solver is based on [1]_, section 3.8.3, pp. 121-124.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] R. O. Duda, P. E. Hart, D. G. Stork. Pattern Classification
|
||||
(Second Edition). John Wiley & Sons, Inc., New York, 2001. ISBN
|
||||
0-471-05669-3.
|
||||
"""
|
||||
self.means_ = _class_means(X, y)
|
||||
self.covariance_ = _class_cov(
|
||||
X, y, self.priors_, shrinkage, covariance_estimator
|
||||
)
|
||||
|
||||
Sw = self.covariance_ # within scatter
|
||||
St = _cov(X, shrinkage, covariance_estimator) # total scatter
|
||||
Sb = St - Sw # between scatter
|
||||
|
||||
evals, evecs = linalg.eigh(Sb, Sw)
|
||||
self.explained_variance_ratio_ = np.sort(evals / np.sum(evals))[::-1][
|
||||
: self._max_components
|
||||
]
|
||||
evecs = evecs[:, np.argsort(evals)[::-1]] # sort eigenvectors
|
||||
|
||||
self.scalings_ = evecs
|
||||
self.coef_ = np.dot(self.means_, evecs).dot(evecs.T)
|
||||
self.intercept_ = -0.5 * np.diag(np.dot(self.means_, self.coef_.T)) + np.log(
|
||||
self.priors_
|
||||
)
|
||||
|
||||
def _solve_svd(self, X, y):
|
||||
"""SVD solver.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
Training data.
|
||||
|
||||
y : array-like of shape (n_samples,) or (n_samples, n_targets)
|
||||
Target values.
|
||||
"""
|
||||
n_samples, n_features = X.shape
|
||||
n_classes = len(self.classes_)
|
||||
|
||||
self.means_ = _class_means(X, y)
|
||||
if self.store_covariance:
|
||||
self.covariance_ = _class_cov(X, y, self.priors_)
|
||||
|
||||
Xc = []
|
||||
for idx, group in enumerate(self.classes_):
|
||||
Xg = X[y == group, :]
|
||||
Xc.append(Xg - self.means_[idx])
|
||||
|
||||
self.xbar_ = np.dot(self.priors_, self.means_)
|
||||
|
||||
Xc = np.concatenate(Xc, axis=0)
|
||||
|
||||
# 1) within (univariate) scaling by with classes std-dev
|
||||
std = Xc.std(axis=0)
|
||||
# avoid division by zero in normalization
|
||||
std[std == 0] = 1.0
|
||||
fac = 1.0 / (n_samples - n_classes)
|
||||
|
||||
# 2) Within variance scaling
|
||||
X = np.sqrt(fac) * (Xc / std)
|
||||
# SVD of centered (within)scaled data
|
||||
U, S, Vt = linalg.svd(X, full_matrices=False)
|
||||
|
||||
rank = np.sum(S > self.tol)
|
||||
# Scaling of within covariance is: V' 1/S
|
||||
scalings = (Vt[:rank] / std).T / S[:rank]
|
||||
fac = 1.0 if n_classes == 1 else 1.0 / (n_classes - 1)
|
||||
|
||||
# 3) Between variance scaling
|
||||
# Scale weighted centers
|
||||
X = np.dot(
|
||||
(
|
||||
(np.sqrt((n_samples * self.priors_) * fac))
|
||||
* (self.means_ - self.xbar_).T
|
||||
).T,
|
||||
scalings,
|
||||
)
|
||||
# Centers are living in a space with n_classes-1 dim (maximum)
|
||||
# Use SVD to find projection in the space spanned by the
|
||||
# (n_classes) centers
|
||||
_, S, Vt = linalg.svd(X, full_matrices=0)
|
||||
|
||||
if self._max_components == 0:
|
||||
self.explained_variance_ratio_ = np.empty((0,), dtype=S.dtype)
|
||||
else:
|
||||
self.explained_variance_ratio_ = (S**2 / np.sum(S**2))[
|
||||
: self._max_components
|
||||
]
|
||||
|
||||
rank = np.sum(S > self.tol * S[0])
|
||||
self.scalings_ = np.dot(scalings, Vt.T[:, :rank])
|
||||
coef = np.dot(self.means_ - self.xbar_, self.scalings_)
|
||||
self.intercept_ = -0.5 * np.sum(coef**2, axis=1) + np.log(self.priors_)
|
||||
self.coef_ = np.dot(coef, self.scalings_.T)
|
||||
self.intercept_ -= np.dot(self.xbar_, self.coef_.T)
|
||||
|
||||
def fit(self, X, y):
|
||||
"""Fit the Linear Discriminant Analysis model.
|
||||
|
||||
.. versionchanged:: 0.19
|
||||
*store_covariance* has been moved to main constructor.
|
||||
|
||||
.. versionchanged:: 0.19
|
||||
*tol* has been moved to main constructor.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
Training data.
|
||||
|
||||
y : array-like of shape (n_samples,)
|
||||
Target values.
|
||||
|
||||
Returns
|
||||
-------
|
||||
self : object
|
||||
Fitted estimator.
|
||||
"""
|
||||
X, y = self._validate_data(
|
||||
X, y, ensure_min_samples=2, dtype=[np.float64, np.float32]
|
||||
)
|
||||
self.classes_ = unique_labels(y)
|
||||
n_samples, _ = X.shape
|
||||
n_classes = len(self.classes_)
|
||||
|
||||
if n_samples == n_classes:
|
||||
raise ValueError(
|
||||
"The number of samples must be more than the number of classes."
|
||||
)
|
||||
|
||||
if self.priors is None: # estimate priors from sample
|
||||
_, y_t = np.unique(y, return_inverse=True) # non-negative ints
|
||||
self.priors_ = np.bincount(y_t) / float(len(y))
|
||||
else:
|
||||
self.priors_ = np.asarray(self.priors)
|
||||
|
||||
if (self.priors_ < 0).any():
|
||||
raise ValueError("priors must be non-negative")
|
||||
if not np.isclose(self.priors_.sum(), 1.0):
|
||||
warnings.warn("The priors do not sum to 1. Renormalizing", UserWarning)
|
||||
self.priors_ = self.priors_ / self.priors_.sum()
|
||||
|
||||
# Maximum number of components no matter what n_components is
|
||||
# specified:
|
||||
max_components = min(len(self.classes_) - 1, X.shape[1])
|
||||
|
||||
if self.n_components is None:
|
||||
self._max_components = max_components
|
||||
else:
|
||||
if self.n_components > max_components:
|
||||
raise ValueError(
|
||||
"n_components cannot be larger than min(n_features, n_classes - 1)."
|
||||
)
|
||||
self._max_components = self.n_components
|
||||
|
||||
if self.solver == "svd":
|
||||
if self.shrinkage is not None:
|
||||
raise NotImplementedError("shrinkage not supported")
|
||||
if self.covariance_estimator is not None:
|
||||
raise ValueError(
|
||||
"covariance estimator "
|
||||
"is not supported "
|
||||
"with svd solver. Try another solver"
|
||||
)
|
||||
self._solve_svd(X, y)
|
||||
elif self.solver == "lsqr":
|
||||
self._solve_lsqr(
|
||||
X,
|
||||
y,
|
||||
shrinkage=self.shrinkage,
|
||||
covariance_estimator=self.covariance_estimator,
|
||||
)
|
||||
elif self.solver == "eigen":
|
||||
self._solve_eigen(
|
||||
X,
|
||||
y,
|
||||
shrinkage=self.shrinkage,
|
||||
covariance_estimator=self.covariance_estimator,
|
||||
)
|
||||
else:
|
||||
raise ValueError(
|
||||
"unknown solver {} (valid solvers are 'svd', "
|
||||
"'lsqr', and 'eigen').".format(self.solver)
|
||||
)
|
||||
if self.classes_.size == 2: # treat binary case as a special case
|
||||
self.coef_ = np.array(
|
||||
self.coef_[1, :] - self.coef_[0, :], ndmin=2, dtype=X.dtype
|
||||
)
|
||||
self.intercept_ = np.array(
|
||||
self.intercept_[1] - self.intercept_[0], ndmin=1, dtype=X.dtype
|
||||
)
|
||||
self._n_features_out = self._max_components
|
||||
return self
|
||||
|
||||
def transform(self, X):
|
||||
"""Project data to maximize class separation.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
Input data.
|
||||
|
||||
Returns
|
||||
-------
|
||||
X_new : ndarray of shape (n_samples, n_components) or \
|
||||
(n_samples, min(rank, n_components))
|
||||
Transformed data. In the case of the 'svd' solver, the shape
|
||||
is (n_samples, min(rank, n_components)).
|
||||
"""
|
||||
if self.solver == "lsqr":
|
||||
raise NotImplementedError(
|
||||
"transform not implemented for 'lsqr' solver (use 'svd' or 'eigen')."
|
||||
)
|
||||
check_is_fitted(self)
|
||||
|
||||
X = self._validate_data(X, reset=False)
|
||||
if self.solver == "svd":
|
||||
X_new = np.dot(X - self.xbar_, self.scalings_)
|
||||
elif self.solver == "eigen":
|
||||
X_new = np.dot(X, self.scalings_)
|
||||
|
||||
return X_new[:, : self._max_components]
|
||||
|
||||
def predict_proba(self, X):
|
||||
"""Estimate probability.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
Input data.
|
||||
|
||||
Returns
|
||||
-------
|
||||
C : ndarray of shape (n_samples, n_classes)
|
||||
Estimated probabilities.
|
||||
"""
|
||||
check_is_fitted(self)
|
||||
|
||||
decision = self.decision_function(X)
|
||||
if self.classes_.size == 2:
|
||||
proba = expit(decision)
|
||||
return np.vstack([1 - proba, proba]).T
|
||||
else:
|
||||
return softmax(decision)
|
||||
|
||||
def predict_log_proba(self, X):
|
||||
"""Estimate log probability.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
Input data.
|
||||
|
||||
Returns
|
||||
-------
|
||||
C : ndarray of shape (n_samples, n_classes)
|
||||
Estimated log probabilities.
|
||||
"""
|
||||
prediction = self.predict_proba(X)
|
||||
prediction[prediction == 0.0] += np.finfo(prediction.dtype).tiny
|
||||
return np.log(prediction)
|
||||
|
||||
def decision_function(self, X):
|
||||
"""Apply decision function to an array of samples.
|
||||
|
||||
The decision function is equal (up to a constant factor) to the
|
||||
log-posterior of the model, i.e. `log p(y = k | x)`. In a binary
|
||||
classification setting this instead corresponds to the difference
|
||||
`log p(y = 1 | x) - log p(y = 0 | x)`. See :ref:`lda_qda_math`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
Array of samples (test vectors).
|
||||
|
||||
Returns
|
||||
-------
|
||||
C : ndarray of shape (n_samples,) or (n_samples, n_classes)
|
||||
Decision function values related to each class, per sample.
|
||||
In the two-class case, the shape is (n_samples,), giving the
|
||||
log likelihood ratio of the positive class.
|
||||
"""
|
||||
# Only override for the doc
|
||||
return super().decision_function(X)
|
||||
|
||||
|
||||
class QuadraticDiscriminantAnalysis(ClassifierMixin, BaseEstimator):
|
||||
"""Quadratic Discriminant Analysis.
|
||||
|
||||
A classifier with a quadratic decision boundary, generated
|
||||
by fitting class conditional densities to the data
|
||||
and using Bayes' rule.
|
||||
|
||||
The model fits a Gaussian density to each class.
|
||||
|
||||
.. versionadded:: 0.17
|
||||
*QuadraticDiscriminantAnalysis*
|
||||
|
||||
Read more in the :ref:`User Guide <lda_qda>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
priors : ndarray of shape (n_classes,), default=None
|
||||
Class priors. By default, the class proportions are inferred from the
|
||||
training data.
|
||||
|
||||
reg_param : float, default=0.0
|
||||
Regularizes the per-class covariance estimates by transforming S2 as
|
||||
``S2 = (1 - reg_param) * S2 + reg_param * np.eye(n_features)``,
|
||||
where S2 corresponds to the `scaling_` attribute of a given class.
|
||||
|
||||
store_covariance : bool, default=False
|
||||
If True, the class covariance matrices are explicitly computed and
|
||||
stored in the `self.covariance_` attribute.
|
||||
|
||||
.. versionadded:: 0.17
|
||||
|
||||
tol : float, default=1.0e-4
|
||||
Absolute threshold for a singular value to be considered significant,
|
||||
used to estimate the rank of `Xk` where `Xk` is the centered matrix
|
||||
of samples in class k. This parameter does not affect the
|
||||
predictions. It only controls a warning that is raised when features
|
||||
are considered to be colinear.
|
||||
|
||||
.. versionadded:: 0.17
|
||||
|
||||
Attributes
|
||||
----------
|
||||
covariance_ : list of len n_classes of ndarray \
|
||||
of shape (n_features, n_features)
|
||||
For each class, gives the covariance matrix estimated using the
|
||||
samples of that class. The estimations are unbiased. Only present if
|
||||
`store_covariance` is True.
|
||||
|
||||
means_ : array-like of shape (n_classes, n_features)
|
||||
Class-wise means.
|
||||
|
||||
priors_ : array-like of shape (n_classes,)
|
||||
Class priors (sum to 1).
|
||||
|
||||
rotations_ : list of len n_classes of ndarray of shape (n_features, n_k)
|
||||
For each class k an array of shape (n_features, n_k), where
|
||||
``n_k = min(n_features, number of elements in class k)``
|
||||
It is the rotation of the Gaussian distribution, i.e. its
|
||||
principal axis. It corresponds to `V`, the matrix of eigenvectors
|
||||
coming from the SVD of `Xk = U S Vt` where `Xk` is the centered
|
||||
matrix of samples from class k.
|
||||
|
||||
scalings_ : list of len n_classes of ndarray of shape (n_k,)
|
||||
For each class, contains the scaling of
|
||||
the Gaussian distributions along its principal axes, i.e. the
|
||||
variance in the rotated coordinate system. It corresponds to `S^2 /
|
||||
(n_samples - 1)`, where `S` is the diagonal matrix of singular values
|
||||
from the SVD of `Xk`, where `Xk` is the centered matrix of samples
|
||||
from class k.
|
||||
|
||||
classes_ : ndarray of shape (n_classes,)
|
||||
Unique class labels.
|
||||
|
||||
n_features_in_ : int
|
||||
Number of features seen during :term:`fit`.
|
||||
|
||||
.. versionadded:: 0.24
|
||||
|
||||
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
||||
Names of features seen during :term:`fit`. Defined only when `X`
|
||||
has feature names that are all strings.
|
||||
|
||||
.. versionadded:: 1.0
|
||||
|
||||
See Also
|
||||
--------
|
||||
LinearDiscriminantAnalysis : Linear Discriminant Analysis.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
|
||||
>>> import numpy as np
|
||||
>>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
|
||||
>>> y = np.array([1, 1, 1, 2, 2, 2])
|
||||
>>> clf = QuadraticDiscriminantAnalysis()
|
||||
>>> clf.fit(X, y)
|
||||
QuadraticDiscriminantAnalysis()
|
||||
>>> print(clf.predict([[-0.8, -1]]))
|
||||
[1]
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, *, priors=None, reg_param=0.0, store_covariance=False, tol=1.0e-4
|
||||
):
|
||||
self.priors = np.asarray(priors) if priors is not None else None
|
||||
self.reg_param = reg_param
|
||||
self.store_covariance = store_covariance
|
||||
self.tol = tol
|
||||
|
||||
def fit(self, X, y):
|
||||
"""Fit the model according to the given training data and parameters.
|
||||
|
||||
.. versionchanged:: 0.19
|
||||
``store_covariances`` has been moved to main constructor as
|
||||
``store_covariance``
|
||||
|
||||
.. versionchanged:: 0.19
|
||||
``tol`` has been moved to main constructor.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
Training vector, where `n_samples` is the number of samples and
|
||||
`n_features` is the number of features.
|
||||
|
||||
y : array-like of shape (n_samples,)
|
||||
Target values (integers).
|
||||
|
||||
Returns
|
||||
-------
|
||||
self : object
|
||||
Fitted estimator.
|
||||
"""
|
||||
X, y = self._validate_data(X, y)
|
||||
check_classification_targets(y)
|
||||
self.classes_, y = np.unique(y, return_inverse=True)
|
||||
n_samples, n_features = X.shape
|
||||
n_classes = len(self.classes_)
|
||||
if n_classes < 2:
|
||||
raise ValueError(
|
||||
"The number of classes has to be greater than one; got %d class"
|
||||
% (n_classes)
|
||||
)
|
||||
if self.priors is None:
|
||||
self.priors_ = np.bincount(y) / float(n_samples)
|
||||
else:
|
||||
self.priors_ = self.priors
|
||||
|
||||
cov = None
|
||||
store_covariance = self.store_covariance
|
||||
if store_covariance:
|
||||
cov = []
|
||||
means = []
|
||||
scalings = []
|
||||
rotations = []
|
||||
for ind in range(n_classes):
|
||||
Xg = X[y == ind, :]
|
||||
meang = Xg.mean(0)
|
||||
means.append(meang)
|
||||
if len(Xg) == 1:
|
||||
raise ValueError(
|
||||
"y has only 1 sample in class %s, covariance is ill defined."
|
||||
% str(self.classes_[ind])
|
||||
)
|
||||
Xgc = Xg - meang
|
||||
# Xgc = U * S * V.T
|
||||
_, S, Vt = np.linalg.svd(Xgc, full_matrices=False)
|
||||
rank = np.sum(S > self.tol)
|
||||
if rank < n_features:
|
||||
warnings.warn("Variables are collinear")
|
||||
S2 = (S**2) / (len(Xg) - 1)
|
||||
S2 = ((1 - self.reg_param) * S2) + self.reg_param
|
||||
if self.store_covariance or store_covariance:
|
||||
# cov = V * (S^2 / (n-1)) * V.T
|
||||
cov.append(np.dot(S2 * Vt.T, Vt))
|
||||
scalings.append(S2)
|
||||
rotations.append(Vt.T)
|
||||
if self.store_covariance or store_covariance:
|
||||
self.covariance_ = cov
|
||||
self.means_ = np.asarray(means)
|
||||
self.scalings_ = scalings
|
||||
self.rotations_ = rotations
|
||||
return self
|
||||
|
||||
def _decision_function(self, X):
|
||||
# return log posterior, see eq (4.12) p. 110 of the ESL.
|
||||
check_is_fitted(self)
|
||||
|
||||
X = self._validate_data(X, reset=False)
|
||||
norm2 = []
|
||||
for i in range(len(self.classes_)):
|
||||
R = self.rotations_[i]
|
||||
S = self.scalings_[i]
|
||||
Xm = X - self.means_[i]
|
||||
X2 = np.dot(Xm, R * (S ** (-0.5)))
|
||||
norm2.append(np.sum(X2**2, axis=1))
|
||||
norm2 = np.array(norm2).T # shape = [len(X), n_classes]
|
||||
u = np.asarray([np.sum(np.log(s)) for s in self.scalings_])
|
||||
return -0.5 * (norm2 + u) + np.log(self.priors_)
|
||||
|
||||
def decision_function(self, X):
|
||||
"""Apply decision function to an array of samples.
|
||||
|
||||
The decision function is equal (up to a constant factor) to the
|
||||
log-posterior of the model, i.e. `log p(y = k | x)`. In a binary
|
||||
classification setting this instead corresponds to the difference
|
||||
`log p(y = 1 | x) - log p(y = 0 | x)`. See :ref:`lda_qda_math`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
Array of samples (test vectors).
|
||||
|
||||
Returns
|
||||
-------
|
||||
C : ndarray of shape (n_samples,) or (n_samples, n_classes)
|
||||
Decision function values related to each class, per sample.
|
||||
In the two-class case, the shape is (n_samples,), giving the
|
||||
log likelihood ratio of the positive class.
|
||||
"""
|
||||
dec_func = self._decision_function(X)
|
||||
# handle special case of two classes
|
||||
if len(self.classes_) == 2:
|
||||
return dec_func[:, 1] - dec_func[:, 0]
|
||||
return dec_func
|
||||
|
||||
def predict(self, X):
|
||||
"""Perform classification on an array of test vectors X.
|
||||
|
||||
The predicted class C for each sample in X is returned.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
Vector to be scored, where `n_samples` is the number of samples and
|
||||
`n_features` is the number of features.
|
||||
|
||||
Returns
|
||||
-------
|
||||
C : ndarray of shape (n_samples,)
|
||||
Estimated probabilities.
|
||||
"""
|
||||
d = self._decision_function(X)
|
||||
y_pred = self.classes_.take(d.argmax(1))
|
||||
return y_pred
|
||||
|
||||
def predict_proba(self, X):
|
||||
"""Return posterior probabilities of classification.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
Array of samples/test vectors.
|
||||
|
||||
Returns
|
||||
-------
|
||||
C : ndarray of shape (n_samples, n_classes)
|
||||
Posterior probabilities of classification per class.
|
||||
"""
|
||||
values = self._decision_function(X)
|
||||
# compute the likelihood of the underlying gaussian models
|
||||
# up to a multiplicative constant.
|
||||
likelihood = np.exp(values - values.max(axis=1)[:, np.newaxis])
|
||||
# compute posterior probabilities
|
||||
return likelihood / likelihood.sum(axis=1)[:, np.newaxis]
|
||||
|
||||
def predict_log_proba(self, X):
|
||||
"""Return log of posterior probabilities of classification.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
Array of samples/test vectors.
|
||||
|
||||
Returns
|
||||
-------
|
||||
C : ndarray of shape (n_samples, n_classes)
|
||||
Posterior log-probabilities of classification per class.
|
||||
"""
|
||||
# XXX : can do better to avoid precision overflows
|
||||
probas_ = self.predict_proba(X)
|
||||
return np.log(probas_)
|
||||
Reference in New Issue
Block a user