first commit

2022-08-02 09:52:52 -04:00
parent 417ea8660b
commit 05e52aa52b
10444 changed files with 2300232 additions and 0 deletions
--- a/dashboard/flask-server/venv/Lib/site-packages/sklearn/discriminant_analysis.py
+++ b/dashboard/flask-server/venv/Lib/site-packages/sklearn/discriminant_analysis.py
@@ -0,0 +1,999 @@
+"""
+Linear Discriminant Analysis and Quadratic Discriminant Analysis
+"""
+
+# Authors: Clemens Brunner
+#          Martin Billinger
+#          Matthieu Perrot
+#          Mathieu Blondel
+
+# License: BSD 3-Clause
+
+import warnings
+import numpy as np
+from scipy import linalg
+from scipy.special import expit
+from numbers import Real
+
+from .base import BaseEstimator, TransformerMixin, ClassifierMixin
+from .base import _ClassNamePrefixFeaturesOutMixin
+from .linear_model._base import LinearClassifierMixin
+from .covariance import ledoit_wolf, empirical_covariance, shrunk_covariance
+from .utils.multiclass import unique_labels
+from .utils.validation import check_is_fitted
+from .utils.multiclass import check_classification_targets
+from .utils.extmath import softmax
+from .preprocessing import StandardScaler
+
+
+__all__ = ["LinearDiscriminantAnalysis", "QuadraticDiscriminantAnalysis"]
+
+
+def _cov(X, shrinkage=None, covariance_estimator=None):
+    """Estimate covariance matrix (using optional covariance_estimator).
+    Parameters
+    ----------
+    X : array-like of shape (n_samples, n_features)
+        Input data.
+
+    shrinkage : {'empirical', 'auto'} or float, default=None
+        Shrinkage parameter, possible values:
+          - None or 'empirical': no shrinkage (default).
+          - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.
+          - float between 0 and 1: fixed shrinkage parameter.
+
+        Shrinkage parameter is ignored if  `covariance_estimator`
+        is not None.
+
+    covariance_estimator : estimator, default=None
+        If not None, `covariance_estimator` is used to estimate
+        the covariance matrices instead of relying on the empirical
+        covariance estimator (with potential shrinkage).
+        The object should have a fit method and a ``covariance_`` attribute
+        like the estimators in :mod:`sklearn.covariance``.
+        if None the shrinkage parameter drives the estimate.
+
+        .. versionadded:: 0.24
+
+    Returns
+    -------
+    s : ndarray of shape (n_features, n_features)
+        Estimated covariance matrix.
+    """
+    if covariance_estimator is None:
+        shrinkage = "empirical" if shrinkage is None else shrinkage
+        if isinstance(shrinkage, str):
+            if shrinkage == "auto":
+                sc = StandardScaler()  # standardize features
+                X = sc.fit_transform(X)
+                s = ledoit_wolf(X)[0]
+                # rescale
+                s = sc.scale_[:, np.newaxis] * s * sc.scale_[np.newaxis, :]
+            elif shrinkage == "empirical":
+                s = empirical_covariance(X)
+            else:
+                raise ValueError("unknown shrinkage parameter")
+        elif isinstance(shrinkage, Real):
+            if shrinkage < 0 or shrinkage > 1:
+                raise ValueError("shrinkage parameter must be between 0 and 1")
+            s = shrunk_covariance(empirical_covariance(X), shrinkage)
+        else:
+            raise TypeError("shrinkage must be a float or a string")
+    else:
+        if shrinkage is not None and shrinkage != 0:
+            raise ValueError(
+                "covariance_estimator and shrinkage parameters "
+                "are not None. Only one of the two can be set."
+            )
+        covariance_estimator.fit(X)
+        if not hasattr(covariance_estimator, "covariance_"):
+            raise ValueError(
+                "%s does not have a covariance_ attribute"
+                % covariance_estimator.__class__.__name__
+            )
+        s = covariance_estimator.covariance_
+    return s
+
+
+def _class_means(X, y):
+    """Compute class means.
+
+    Parameters
+    ----------
+    X : array-like of shape (n_samples, n_features)
+        Input data.
+
+    y : array-like of shape (n_samples,) or (n_samples, n_targets)
+        Target values.
+
+    Returns
+    -------
+    means : array-like of shape (n_classes, n_features)
+        Class means.
+    """
+    classes, y = np.unique(y, return_inverse=True)
+    cnt = np.bincount(y)
+    means = np.zeros(shape=(len(classes), X.shape[1]))
+    np.add.at(means, y, X)
+    means /= cnt[:, None]
+    return means
+
+
+def _class_cov(X, y, priors, shrinkage=None, covariance_estimator=None):
+    """Compute weighted within-class covariance matrix.
+
+    The per-class covariance are weighted by the class priors.
+
+    Parameters
+    ----------
+    X : array-like of shape (n_samples, n_features)
+        Input data.
+
+    y : array-like of shape (n_samples,) or (n_samples, n_targets)
+        Target values.
+
+    priors : array-like of shape (n_classes,)
+        Class priors.
+
+    shrinkage : 'auto' or float, default=None
+        Shrinkage parameter, possible values:
+          - None: no shrinkage (default).
+          - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.
+          - float between 0 and 1: fixed shrinkage parameter.
+
+        Shrinkage parameter is ignored if `covariance_estimator` is not None.
+
+    covariance_estimator : estimator, default=None
+        If not None, `covariance_estimator` is used to estimate
+        the covariance matrices instead of relying the empirical
+        covariance estimator (with potential shrinkage).
+        The object should have a fit method and a ``covariance_`` attribute
+        like the estimators in sklearn.covariance.
+        If None, the shrinkage parameter drives the estimate.
+
+        .. versionadded:: 0.24
+
+    Returns
+    -------
+    cov : array-like of shape (n_features, n_features)
+        Weighted within-class covariance matrix
+    """
+    classes = np.unique(y)
+    cov = np.zeros(shape=(X.shape[1], X.shape[1]))
+    for idx, group in enumerate(classes):
+        Xg = X[y == group, :]
+        cov += priors[idx] * np.atleast_2d(_cov(Xg, shrinkage, covariance_estimator))
+    return cov
+
+
+class LinearDiscriminantAnalysis(
+    _ClassNamePrefixFeaturesOutMixin,
+    LinearClassifierMixin,
+    TransformerMixin,
+    BaseEstimator,
+):
+    """Linear Discriminant Analysis.
+
+    A classifier with a linear decision boundary, generated by fitting class
+    conditional densities to the data and using Bayes' rule.
+
+    The model fits a Gaussian density to each class, assuming that all classes
+    share the same covariance matrix.
+
+    The fitted model can also be used to reduce the dimensionality of the input
+    by projecting it to the most discriminative directions, using the
+    `transform` method.
+
+    .. versionadded:: 0.17
+       *LinearDiscriminantAnalysis*.
+
+    Read more in the :ref:`User Guide <lda_qda>`.
+
+    Parameters
+    ----------
+    solver : {'svd', 'lsqr', 'eigen'}, default='svd'
+        Solver to use, possible values:
+          - 'svd': Singular value decomposition (default).
+            Does not compute the covariance matrix, therefore this solver is
+            recommended for data with a large number of features.
+          - 'lsqr': Least squares solution.
+            Can be combined with shrinkage or custom covariance estimator.
+          - 'eigen': Eigenvalue decomposition.
+            Can be combined with shrinkage or custom covariance estimator.
+
+    shrinkage : 'auto' or float, default=None
+        Shrinkage parameter, possible values:
+          - None: no shrinkage (default).
+          - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.
+          - float between 0 and 1: fixed shrinkage parameter.
+
+        This should be left to None if `covariance_estimator` is used.
+        Note that shrinkage works only with 'lsqr' and 'eigen' solvers.
+
+    priors : array-like of shape (n_classes,), default=None
+        The class prior probabilities. By default, the class proportions are
+        inferred from the training data.
+
+    n_components : int, default=None
+        Number of components (<= min(n_classes - 1, n_features)) for
+        dimensionality reduction. If None, will be set to
+        min(n_classes - 1, n_features). This parameter only affects the
+        `transform` method.
+
+    store_covariance : bool, default=False
+        If True, explicitly compute the weighted within-class covariance
+        matrix when solver is 'svd'. The matrix is always computed
+        and stored for the other solvers.
+
+        .. versionadded:: 0.17
+
+    tol : float, default=1.0e-4
+        Absolute threshold for a singular value of X to be considered
+        significant, used to estimate the rank of X. Dimensions whose
+        singular values are non-significant are discarded. Only used if
+        solver is 'svd'.
+
+        .. versionadded:: 0.17
+
+    covariance_estimator : covariance estimator, default=None
+        If not None, `covariance_estimator` is used to estimate
+        the covariance matrices instead of relying on the empirical
+        covariance estimator (with potential shrinkage).
+        The object should have a fit method and a ``covariance_`` attribute
+        like the estimators in :mod:`sklearn.covariance`.
+        if None the shrinkage parameter drives the estimate.
+
+        This should be left to None if `shrinkage` is used.
+        Note that `covariance_estimator` works only with 'lsqr' and 'eigen'
+        solvers.
+
+        .. versionadded:: 0.24
+
+    Attributes
+    ----------
+    coef_ : ndarray of shape (n_features,) or (n_classes, n_features)
+        Weight vector(s).
+
+    intercept_ : ndarray of shape (n_classes,)
+        Intercept term.
+
+    covariance_ : array-like of shape (n_features, n_features)
+        Weighted within-class covariance matrix. It corresponds to
+        `sum_k prior_k * C_k` where `C_k` is the covariance matrix of the
+        samples in class `k`. The `C_k` are estimated using the (potentially
+        shrunk) biased estimator of covariance. If solver is 'svd', only
+        exists when `store_covariance` is True.
+
+    explained_variance_ratio_ : ndarray of shape (n_components,)
+        Percentage of variance explained by each of the selected components.
+        If ``n_components`` is not set then all components are stored and the
+        sum of explained variances is equal to 1.0. Only available when eigen
+        or svd solver is used.
+
+    means_ : array-like of shape (n_classes, n_features)
+        Class-wise means.
+
+    priors_ : array-like of shape (n_classes,)
+        Class priors (sum to 1).
+
+    scalings_ : array-like of shape (rank, n_classes - 1)
+        Scaling of the features in the space spanned by the class centroids.
+        Only available for 'svd' and 'eigen' solvers.
+
+    xbar_ : array-like of shape (n_features,)
+        Overall mean. Only present if solver is 'svd'.
+
+    classes_ : array-like of shape (n_classes,)
+        Unique class labels.
+
+    n_features_in_ : int
+        Number of features seen during :term:`fit`.
+
+        .. versionadded:: 0.24
+
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X`
+        has feature names that are all strings.
+
+        .. versionadded:: 1.0
+
+    See Also
+    --------
+    QuadraticDiscriminantAnalysis : Quadratic Discriminant Analysis.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
+    >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
+    >>> y = np.array([1, 1, 1, 2, 2, 2])
+    >>> clf = LinearDiscriminantAnalysis()
+    >>> clf.fit(X, y)
+    LinearDiscriminantAnalysis()
+    >>> print(clf.predict([[-0.8, -1]]))
+    [1]
+    """
+
+    def __init__(
+        self,
+        solver="svd",
+        shrinkage=None,
+        priors=None,
+        n_components=None,
+        store_covariance=False,
+        tol=1e-4,
+        covariance_estimator=None,
+    ):
+        self.solver = solver
+        self.shrinkage = shrinkage
+        self.priors = priors
+        self.n_components = n_components
+        self.store_covariance = store_covariance  # used only in svd solver
+        self.tol = tol  # used only in svd solver
+        self.covariance_estimator = covariance_estimator
+
+    def _solve_lsqr(self, X, y, shrinkage, covariance_estimator):
+        """Least squares solver.
+
+        The least squares solver computes a straightforward solution of the
+        optimal decision rule based directly on the discriminant functions. It
+        can only be used for classification (with any covariance estimator),
+        because
+        estimation of eigenvectors is not performed. Therefore, dimensionality
+        reduction with the transform is not supported.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Training data.
+
+        y : array-like of shape (n_samples,) or (n_samples, n_classes)
+            Target values.
+
+        shrinkage : 'auto', float or None
+            Shrinkage parameter, possible values:
+              - None: no shrinkage.
+              - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.
+              - float between 0 and 1: fixed shrinkage parameter.
+
+            Shrinkage parameter is ignored if  `covariance_estimator` i
+            not None
+
+        covariance_estimator : estimator, default=None
+            If not None, `covariance_estimator` is used to estimate
+            the covariance matrices instead of relying the empirical
+            covariance estimator (with potential shrinkage).
+            The object should have a fit method and a ``covariance_`` attribute
+            like the estimators in sklearn.covariance.
+            if None the shrinkage parameter drives the estimate.
+
+            .. versionadded:: 0.24
+
+        Notes
+        -----
+        This solver is based on [1]_, section 2.6.2, pp. 39-41.
+
+        References
+        ----------
+        .. [1] R. O. Duda, P. E. Hart, D. G. Stork. Pattern Classification
+           (Second Edition). John Wiley & Sons, Inc., New York, 2001. ISBN
+           0-471-05669-3.
+        """
+        self.means_ = _class_means(X, y)
+        self.covariance_ = _class_cov(
+            X, y, self.priors_, shrinkage, covariance_estimator
+        )
+        self.coef_ = linalg.lstsq(self.covariance_, self.means_.T)[0].T
+        self.intercept_ = -0.5 * np.diag(np.dot(self.means_, self.coef_.T)) + np.log(
+            self.priors_
+        )
+
+    def _solve_eigen(self, X, y, shrinkage, covariance_estimator):
+        """Eigenvalue solver.
+
+        The eigenvalue solver computes the optimal solution of the Rayleigh
+        coefficient (basically the ratio of between class scatter to within
+        class scatter). This solver supports both classification and
+        dimensionality reduction (with any covariance estimator).
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Training data.
+
+        y : array-like of shape (n_samples,) or (n_samples, n_targets)
+            Target values.
+
+        shrinkage : 'auto', float or None
+            Shrinkage parameter, possible values:
+              - None: no shrinkage.
+              - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.
+              - float between 0 and 1: fixed shrinkage constant.
+
+            Shrinkage parameter is ignored if  `covariance_estimator` i
+            not None
+
+        covariance_estimator : estimator, default=None
+            If not None, `covariance_estimator` is used to estimate
+            the covariance matrices instead of relying the empirical
+            covariance estimator (with potential shrinkage).
+            The object should have a fit method and a ``covariance_`` attribute
+            like the estimators in sklearn.covariance.
+            if None the shrinkage parameter drives the estimate.
+
+            .. versionadded:: 0.24
+
+        Notes
+        -----
+        This solver is based on [1]_, section 3.8.3, pp. 121-124.
+
+        References
+        ----------
+        .. [1] R. O. Duda, P. E. Hart, D. G. Stork. Pattern Classification
+           (Second Edition). John Wiley & Sons, Inc., New York, 2001. ISBN
+           0-471-05669-3.
+        """
+        self.means_ = _class_means(X, y)
+        self.covariance_ = _class_cov(
+            X, y, self.priors_, shrinkage, covariance_estimator
+        )
+
+        Sw = self.covariance_  # within scatter
+        St = _cov(X, shrinkage, covariance_estimator)  # total scatter
+        Sb = St - Sw  # between scatter
+
+        evals, evecs = linalg.eigh(Sb, Sw)
+        self.explained_variance_ratio_ = np.sort(evals / np.sum(evals))[::-1][
+            : self._max_components
+        ]
+        evecs = evecs[:, np.argsort(evals)[::-1]]  # sort eigenvectors
+
+        self.scalings_ = evecs
+        self.coef_ = np.dot(self.means_, evecs).dot(evecs.T)
+        self.intercept_ = -0.5 * np.diag(np.dot(self.means_, self.coef_.T)) + np.log(
+            self.priors_
+        )
+
+    def _solve_svd(self, X, y):
+        """SVD solver.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Training data.
+
+        y : array-like of shape (n_samples,) or (n_samples, n_targets)
+            Target values.
+        """
+        n_samples, n_features = X.shape
+        n_classes = len(self.classes_)
+
+        self.means_ = _class_means(X, y)
+        if self.store_covariance:
+            self.covariance_ = _class_cov(X, y, self.priors_)
+
+        Xc = []
+        for idx, group in enumerate(self.classes_):
+            Xg = X[y == group, :]
+            Xc.append(Xg - self.means_[idx])
+
+        self.xbar_ = np.dot(self.priors_, self.means_)
+
+        Xc = np.concatenate(Xc, axis=0)
+
+        # 1) within (univariate) scaling by with classes std-dev
+        std = Xc.std(axis=0)
+        # avoid division by zero in normalization
+        std[std == 0] = 1.0
+        fac = 1.0 / (n_samples - n_classes)
+
+        # 2) Within variance scaling
+        X = np.sqrt(fac) * (Xc / std)
+        # SVD of centered (within)scaled data
+        U, S, Vt = linalg.svd(X, full_matrices=False)
+
+        rank = np.sum(S > self.tol)
+        # Scaling of within covariance is: V' 1/S
+        scalings = (Vt[:rank] / std).T / S[:rank]
+        fac = 1.0 if n_classes == 1 else 1.0 / (n_classes - 1)
+
+        # 3) Between variance scaling
+        # Scale weighted centers
+        X = np.dot(
+            (
+                (np.sqrt((n_samples * self.priors_) * fac))
+                * (self.means_ - self.xbar_).T
+            ).T,
+            scalings,
+        )
+        # Centers are living in a space with n_classes-1 dim (maximum)
+        # Use SVD to find projection in the space spanned by the
+        # (n_classes) centers
+        _, S, Vt = linalg.svd(X, full_matrices=0)
+
+        if self._max_components == 0:
+            self.explained_variance_ratio_ = np.empty((0,), dtype=S.dtype)
+        else:
+            self.explained_variance_ratio_ = (S**2 / np.sum(S**2))[
+                : self._max_components
+            ]
+
+        rank = np.sum(S > self.tol * S[0])
+        self.scalings_ = np.dot(scalings, Vt.T[:, :rank])
+        coef = np.dot(self.means_ - self.xbar_, self.scalings_)
+        self.intercept_ = -0.5 * np.sum(coef**2, axis=1) + np.log(self.priors_)
+        self.coef_ = np.dot(coef, self.scalings_.T)
+        self.intercept_ -= np.dot(self.xbar_, self.coef_.T)
+
+    def fit(self, X, y):
+        """Fit the Linear Discriminant Analysis model.
+
+           .. versionchanged:: 0.19
+              *store_covariance* has been moved to main constructor.
+
+           .. versionchanged:: 0.19
+              *tol* has been moved to main constructor.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Training data.
+
+        y : array-like of shape (n_samples,)
+            Target values.
+
+        Returns
+        -------
+        self : object
+            Fitted estimator.
+        """
+        X, y = self._validate_data(
+            X, y, ensure_min_samples=2, dtype=[np.float64, np.float32]
+        )
+        self.classes_ = unique_labels(y)
+        n_samples, _ = X.shape
+        n_classes = len(self.classes_)
+
+        if n_samples == n_classes:
+            raise ValueError(
+                "The number of samples must be more than the number of classes."
+            )
+
+        if self.priors is None:  # estimate priors from sample
+            _, y_t = np.unique(y, return_inverse=True)  # non-negative ints
+            self.priors_ = np.bincount(y_t) / float(len(y))
+        else:
+            self.priors_ = np.asarray(self.priors)
+
+        if (self.priors_ < 0).any():
+            raise ValueError("priors must be non-negative")
+        if not np.isclose(self.priors_.sum(), 1.0):
+            warnings.warn("The priors do not sum to 1. Renormalizing", UserWarning)
+            self.priors_ = self.priors_ / self.priors_.sum()
+
+        # Maximum number of components no matter what n_components is
+        # specified:
+        max_components = min(len(self.classes_) - 1, X.shape[1])
+
+        if self.n_components is None:
+            self._max_components = max_components
+        else:
+            if self.n_components > max_components:
+                raise ValueError(
+                    "n_components cannot be larger than min(n_features, n_classes - 1)."
+                )
+            self._max_components = self.n_components
+
+        if self.solver == "svd":
+            if self.shrinkage is not None:
+                raise NotImplementedError("shrinkage not supported")
+            if self.covariance_estimator is not None:
+                raise ValueError(
+                    "covariance estimator "
+                    "is not supported "
+                    "with svd solver. Try another solver"
+                )
+            self._solve_svd(X, y)
+        elif self.solver == "lsqr":
+            self._solve_lsqr(
+                X,
+                y,
+                shrinkage=self.shrinkage,
+                covariance_estimator=self.covariance_estimator,
+            )
+        elif self.solver == "eigen":
+            self._solve_eigen(
+                X,
+                y,
+                shrinkage=self.shrinkage,
+                covariance_estimator=self.covariance_estimator,
+            )
+        else:
+            raise ValueError(
+                "unknown solver {} (valid solvers are 'svd', "
+                "'lsqr', and 'eigen').".format(self.solver)
+            )
+        if self.classes_.size == 2:  # treat binary case as a special case
+            self.coef_ = np.array(
+                self.coef_[1, :] - self.coef_[0, :], ndmin=2, dtype=X.dtype
+            )
+            self.intercept_ = np.array(
+                self.intercept_[1] - self.intercept_[0], ndmin=1, dtype=X.dtype
+            )
+        self._n_features_out = self._max_components
+        return self
+
+    def transform(self, X):
+        """Project data to maximize class separation.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Input data.
+
+        Returns
+        -------
+        X_new : ndarray of shape (n_samples, n_components) or \
+            (n_samples, min(rank, n_components))
+            Transformed data. In the case of the 'svd' solver, the shape
+            is (n_samples, min(rank, n_components)).
+        """
+        if self.solver == "lsqr":
+            raise NotImplementedError(
+                "transform not implemented for 'lsqr' solver (use 'svd' or 'eigen')."
+            )
+        check_is_fitted(self)
+
+        X = self._validate_data(X, reset=False)
+        if self.solver == "svd":
+            X_new = np.dot(X - self.xbar_, self.scalings_)
+        elif self.solver == "eigen":
+            X_new = np.dot(X, self.scalings_)
+
+        return X_new[:, : self._max_components]
+
+    def predict_proba(self, X):
+        """Estimate probability.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Input data.
+
+        Returns
+        -------
+        C : ndarray of shape (n_samples, n_classes)
+            Estimated probabilities.
+        """
+        check_is_fitted(self)
+
+        decision = self.decision_function(X)
+        if self.classes_.size == 2:
+            proba = expit(decision)
+            return np.vstack([1 - proba, proba]).T
+        else:
+            return softmax(decision)
+
+    def predict_log_proba(self, X):
+        """Estimate log probability.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Input data.
+
+        Returns
+        -------
+        C : ndarray of shape (n_samples, n_classes)
+            Estimated log probabilities.
+        """
+        prediction = self.predict_proba(X)
+        prediction[prediction == 0.0] += np.finfo(prediction.dtype).tiny
+        return np.log(prediction)
+
+    def decision_function(self, X):
+        """Apply decision function to an array of samples.
+
+        The decision function is equal (up to a constant factor) to the
+        log-posterior of the model, i.e. `log p(y = k | x)`. In a binary
+        classification setting this instead corresponds to the difference
+        `log p(y = 1 | x) - log p(y = 0 | x)`. See :ref:`lda_qda_math`.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Array of samples (test vectors).
+
+        Returns
+        -------
+        C : ndarray of shape (n_samples,) or (n_samples, n_classes)
+            Decision function values related to each class, per sample.
+            In the two-class case, the shape is (n_samples,), giving the
+            log likelihood ratio of the positive class.
+        """
+        # Only override for the doc
+        return super().decision_function(X)
+
+
+class QuadraticDiscriminantAnalysis(ClassifierMixin, BaseEstimator):
+    """Quadratic Discriminant Analysis.
+
+    A classifier with a quadratic decision boundary, generated
+    by fitting class conditional densities to the data
+    and using Bayes' rule.
+
+    The model fits a Gaussian density to each class.
+
+    .. versionadded:: 0.17
+       *QuadraticDiscriminantAnalysis*
+
+    Read more in the :ref:`User Guide <lda_qda>`.
+
+    Parameters
+    ----------
+    priors : ndarray of shape (n_classes,), default=None
+        Class priors. By default, the class proportions are inferred from the
+        training data.
+
+    reg_param : float, default=0.0
+        Regularizes the per-class covariance estimates by transforming S2 as
+        ``S2 = (1 - reg_param) * S2 + reg_param * np.eye(n_features)``,
+        where S2 corresponds to the `scaling_` attribute of a given class.
+
+    store_covariance : bool, default=False
+        If True, the class covariance matrices are explicitly computed and
+        stored in the `self.covariance_` attribute.
+
+        .. versionadded:: 0.17
+
+    tol : float, default=1.0e-4
+        Absolute threshold for a singular value to be considered significant,
+        used to estimate the rank of `Xk` where `Xk` is the centered matrix
+        of samples in class k. This parameter does not affect the
+        predictions. It only controls a warning that is raised when features
+        are considered to be colinear.
+
+        .. versionadded:: 0.17
+
+    Attributes
+    ----------
+    covariance_ : list of len n_classes of ndarray \
+            of shape (n_features, n_features)
+        For each class, gives the covariance matrix estimated using the
+        samples of that class. The estimations are unbiased. Only present if
+        `store_covariance` is True.
+
+    means_ : array-like of shape (n_classes, n_features)
+        Class-wise means.
+
+    priors_ : array-like of shape (n_classes,)
+        Class priors (sum to 1).
+
+    rotations_ : list of len n_classes of ndarray of shape (n_features, n_k)
+        For each class k an array of shape (n_features, n_k), where
+        ``n_k = min(n_features, number of elements in class k)``
+        It is the rotation of the Gaussian distribution, i.e. its
+        principal axis. It corresponds to `V`, the matrix of eigenvectors
+        coming from the SVD of `Xk = U S Vt` where `Xk` is the centered
+        matrix of samples from class k.
+
+    scalings_ : list of len n_classes of ndarray of shape (n_k,)
+        For each class, contains the scaling of
+        the Gaussian distributions along its principal axes, i.e. the
+        variance in the rotated coordinate system. It corresponds to `S^2 /
+        (n_samples - 1)`, where `S` is the diagonal matrix of singular values
+        from the SVD of `Xk`, where `Xk` is the centered matrix of samples
+        from class k.
+
+    classes_ : ndarray of shape (n_classes,)
+        Unique class labels.
+
+    n_features_in_ : int
+        Number of features seen during :term:`fit`.
+
+        .. versionadded:: 0.24
+
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X`
+        has feature names that are all strings.
+
+        .. versionadded:: 1.0
+
+    See Also
+    --------
+    LinearDiscriminantAnalysis : Linear Discriminant Analysis.
+
+    Examples
+    --------
+    >>> from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
+    >>> import numpy as np
+    >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
+    >>> y = np.array([1, 1, 1, 2, 2, 2])
+    >>> clf = QuadraticDiscriminantAnalysis()
+    >>> clf.fit(X, y)
+    QuadraticDiscriminantAnalysis()
+    >>> print(clf.predict([[-0.8, -1]]))
+    [1]
+    """
+
+    def __init__(
+        self, *, priors=None, reg_param=0.0, store_covariance=False, tol=1.0e-4
+    ):
+        self.priors = np.asarray(priors) if priors is not None else None
+        self.reg_param = reg_param
+        self.store_covariance = store_covariance
+        self.tol = tol
+
+    def fit(self, X, y):
+        """Fit the model according to the given training data and parameters.
+
+            .. versionchanged:: 0.19
+               ``store_covariances`` has been moved to main constructor as
+               ``store_covariance``
+
+            .. versionchanged:: 0.19
+               ``tol`` has been moved to main constructor.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Training vector, where `n_samples` is the number of samples and
+            `n_features` is the number of features.
+
+        y : array-like of shape (n_samples,)
+            Target values (integers).
+
+        Returns
+        -------
+        self : object
+            Fitted estimator.
+        """
+        X, y = self._validate_data(X, y)
+        check_classification_targets(y)
+        self.classes_, y = np.unique(y, return_inverse=True)
+        n_samples, n_features = X.shape
+        n_classes = len(self.classes_)
+        if n_classes < 2:
+            raise ValueError(
+                "The number of classes has to be greater than one; got %d class"
+                % (n_classes)
+            )
+        if self.priors is None:
+            self.priors_ = np.bincount(y) / float(n_samples)
+        else:
+            self.priors_ = self.priors
+
+        cov = None
+        store_covariance = self.store_covariance
+        if store_covariance:
+            cov = []
+        means = []
+        scalings = []
+        rotations = []
+        for ind in range(n_classes):
+            Xg = X[y == ind, :]
+            meang = Xg.mean(0)
+            means.append(meang)
+            if len(Xg) == 1:
+                raise ValueError(
+                    "y has only 1 sample in class %s, covariance is ill defined."
+                    % str(self.classes_[ind])
+                )
+            Xgc = Xg - meang
+            # Xgc = U * S * V.T
+            _, S, Vt = np.linalg.svd(Xgc, full_matrices=False)
+            rank = np.sum(S > self.tol)
+            if rank < n_features:
+                warnings.warn("Variables are collinear")
+            S2 = (S**2) / (len(Xg) - 1)
+            S2 = ((1 - self.reg_param) * S2) + self.reg_param
+            if self.store_covariance or store_covariance:
+                # cov = V * (S^2 / (n-1)) * V.T
+                cov.append(np.dot(S2 * Vt.T, Vt))
+            scalings.append(S2)
+            rotations.append(Vt.T)
+        if self.store_covariance or store_covariance:
+            self.covariance_ = cov
+        self.means_ = np.asarray(means)
+        self.scalings_ = scalings
+        self.rotations_ = rotations
+        return self
+
+    def _decision_function(self, X):
+        # return log posterior, see eq (4.12) p. 110 of the ESL.
+        check_is_fitted(self)
+
+        X = self._validate_data(X, reset=False)
+        norm2 = []
+        for i in range(len(self.classes_)):
+            R = self.rotations_[i]
+            S = self.scalings_[i]
+            Xm = X - self.means_[i]
+            X2 = np.dot(Xm, R * (S ** (-0.5)))
+            norm2.append(np.sum(X2**2, axis=1))
+        norm2 = np.array(norm2).T  # shape = [len(X), n_classes]
+        u = np.asarray([np.sum(np.log(s)) for s in self.scalings_])
+        return -0.5 * (norm2 + u) + np.log(self.priors_)
+
+    def decision_function(self, X):
+        """Apply decision function to an array of samples.
+
+        The decision function is equal (up to a constant factor) to the
+        log-posterior of the model, i.e. `log p(y = k | x)`. In a binary
+        classification setting this instead corresponds to the difference
+        `log p(y = 1 | x) - log p(y = 0 | x)`. See :ref:`lda_qda_math`.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Array of samples (test vectors).
+
+        Returns
+        -------
+        C : ndarray of shape (n_samples,) or (n_samples, n_classes)
+            Decision function values related to each class, per sample.
+            In the two-class case, the shape is (n_samples,), giving the
+            log likelihood ratio of the positive class.
+        """
+        dec_func = self._decision_function(X)
+        # handle special case of two classes
+        if len(self.classes_) == 2:
+            return dec_func[:, 1] - dec_func[:, 0]
+        return dec_func
+
+    def predict(self, X):
+        """Perform classification on an array of test vectors X.
+
+        The predicted class C for each sample in X is returned.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Vector to be scored, where `n_samples` is the number of samples and
+            `n_features` is the number of features.
+
+        Returns
+        -------
+        C : ndarray of shape (n_samples,)
+            Estimated probabilities.
+        """
+        d = self._decision_function(X)
+        y_pred = self.classes_.take(d.argmax(1))
+        return y_pred
+
+    def predict_proba(self, X):
+        """Return posterior probabilities of classification.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Array of samples/test vectors.
+
+        Returns
+        -------
+        C : ndarray of shape (n_samples, n_classes)
+            Posterior probabilities of classification per class.
+        """
+        values = self._decision_function(X)
+        # compute the likelihood of the underlying gaussian models
+        # up to a multiplicative constant.
+        likelihood = np.exp(values - values.max(axis=1)[:, np.newaxis])
+        # compute posterior probabilities
+        return likelihood / likelihood.sum(axis=1)[:, np.newaxis]
+
+    def predict_log_proba(self, X):
+        """Return log of posterior probabilities of classification.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Array of samples/test vectors.
+
+        Returns
+        -------
+        C : ndarray of shape (n_samples, n_classes)
+            Posterior log-probabilities of classification per class.
+        """
+        # XXX : can do better to avoid precision overflows
+        probas_ = self.predict_proba(X)
+        return np.log(probas_)