first commit
This commit is contained in:
@@ -0,0 +1,29 @@
|
||||
"""
|
||||
The :mod:`sklearn._loss` module includes loss function classes suitable for
|
||||
fitting classification and regression tasks.
|
||||
"""
|
||||
|
||||
from .loss import (
|
||||
HalfSquaredError,
|
||||
AbsoluteError,
|
||||
PinballLoss,
|
||||
HalfPoissonLoss,
|
||||
HalfGammaLoss,
|
||||
HalfTweedieLoss,
|
||||
HalfTweedieLossIdentity,
|
||||
HalfBinomialLoss,
|
||||
HalfMultinomialLoss,
|
||||
)
|
||||
|
||||
|
||||
__all__ = [
|
||||
"HalfSquaredError",
|
||||
"AbsoluteError",
|
||||
"PinballLoss",
|
||||
"HalfPoissonLoss",
|
||||
"HalfGammaLoss",
|
||||
"HalfTweedieLoss",
|
||||
"HalfTweedieLossIdentity",
|
||||
"HalfBinomialLoss",
|
||||
"HalfMultinomialLoss",
|
||||
]
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,82 @@
|
||||
# cython: language_level=3
|
||||
|
||||
import numpy as np
|
||||
cimport numpy as np
|
||||
|
||||
np.import_array()
|
||||
|
||||
|
||||
# Fused types for y_true, y_pred, raw_prediction
|
||||
ctypedef fused Y_DTYPE_C:
|
||||
np.npy_float64
|
||||
np.npy_float32
|
||||
|
||||
|
||||
# Fused types for gradient and hessian
|
||||
ctypedef fused G_DTYPE_C:
|
||||
np.npy_float64
|
||||
np.npy_float32
|
||||
|
||||
|
||||
# Struct to return 2 doubles
|
||||
ctypedef struct double_pair:
|
||||
double val1
|
||||
double val2
|
||||
|
||||
|
||||
# C base class for loss functions
|
||||
cdef class CyLossFunction:
|
||||
cdef double cy_loss(self, double y_true, double raw_prediction) nogil
|
||||
cdef double cy_gradient(self, double y_true, double raw_prediction) nogil
|
||||
cdef double_pair cy_grad_hess(self, double y_true, double raw_prediction) nogil
|
||||
|
||||
|
||||
cdef class CyHalfSquaredError(CyLossFunction):
|
||||
cdef double cy_loss(self, double y_true, double raw_prediction) nogil
|
||||
cdef double cy_gradient(self, double y_true, double raw_prediction) nogil
|
||||
cdef double_pair cy_grad_hess(self, double y_true, double raw_prediction) nogil
|
||||
|
||||
|
||||
cdef class CyAbsoluteError(CyLossFunction):
|
||||
cdef double cy_loss(self, double y_true, double raw_prediction) nogil
|
||||
cdef double cy_gradient(self, double y_true, double raw_prediction) nogil
|
||||
cdef double_pair cy_grad_hess(self, double y_true, double raw_prediction) nogil
|
||||
|
||||
|
||||
cdef class CyPinballLoss(CyLossFunction):
|
||||
cdef readonly double quantile # readonly makes it accessible from Python
|
||||
cdef double cy_loss(self, double y_true, double raw_prediction) nogil
|
||||
cdef double cy_gradient(self, double y_true, double raw_prediction) nogil
|
||||
cdef double_pair cy_grad_hess(self, double y_true, double raw_prediction) nogil
|
||||
|
||||
|
||||
cdef class CyHalfPoissonLoss(CyLossFunction):
|
||||
cdef double cy_loss(self, double y_true, double raw_prediction) nogil
|
||||
cdef double cy_gradient(self, double y_true, double raw_prediction) nogil
|
||||
cdef double_pair cy_grad_hess(self, double y_true, double raw_prediction) nogil
|
||||
|
||||
|
||||
cdef class CyHalfGammaLoss(CyLossFunction):
|
||||
cdef double cy_loss(self, double y_true, double raw_prediction) nogil
|
||||
cdef double cy_gradient(self, double y_true, double raw_prediction) nogil
|
||||
cdef double_pair cy_grad_hess(self, double y_true, double raw_prediction) nogil
|
||||
|
||||
|
||||
cdef class CyHalfTweedieLoss(CyLossFunction):
|
||||
cdef readonly double power # readonly makes it accessible from Python
|
||||
cdef double cy_loss(self, double y_true, double raw_prediction) nogil
|
||||
cdef double cy_gradient(self, double y_true, double raw_prediction) nogil
|
||||
cdef double_pair cy_grad_hess(self, double y_true, double raw_prediction) nogil
|
||||
|
||||
|
||||
cdef class CyHalfTweedieLossIdentity(CyLossFunction):
|
||||
cdef readonly double power # readonly makes it accessible from Python
|
||||
cdef double cy_loss(self, double y_true, double raw_prediction) nogil
|
||||
cdef double cy_gradient(self, double y_true, double raw_prediction) nogil
|
||||
cdef double_pair cy_grad_hess(self, double y_true, double raw_prediction) nogil
|
||||
|
||||
|
||||
cdef class CyHalfBinomialLoss(CyLossFunction):
|
||||
cdef double cy_loss(self, double y_true, double raw_prediction) nogil
|
||||
cdef double cy_gradient(self, double y_true, double raw_prediction) nogil
|
||||
cdef double_pair cy_grad_hess(self, double y_true, double raw_prediction) nogil
|
||||
@@ -0,0 +1,373 @@
|
||||
"""
|
||||
Distribution functions used in GLM
|
||||
"""
|
||||
|
||||
# Author: Christian Lorentzen <lorentzen.ch@googlemail.com>
|
||||
# License: BSD 3 clause
|
||||
#
|
||||
# TODO(1.3): remove file
|
||||
# This is only used for backward compatibility in _GeneralizedLinearRegressor
|
||||
# for the deprecated family attribute.
|
||||
|
||||
from abc import ABCMeta, abstractmethod
|
||||
from collections import namedtuple
|
||||
import numbers
|
||||
|
||||
import numpy as np
|
||||
from scipy.special import xlogy
|
||||
|
||||
|
||||
DistributionBoundary = namedtuple("DistributionBoundary", ("value", "inclusive"))
|
||||
|
||||
|
||||
class ExponentialDispersionModel(metaclass=ABCMeta):
|
||||
r"""Base class for reproductive Exponential Dispersion Models (EDM).
|
||||
|
||||
The pdf of :math:`Y\sim \mathrm{EDM}(y_\textrm{pred}, \phi)` is given by
|
||||
|
||||
.. math:: p(y| \theta, \phi) = c(y, \phi)
|
||||
\exp\left(\frac{\theta y-A(\theta)}{\phi}\right)
|
||||
= \tilde{c}(y, \phi)
|
||||
\exp\left(-\frac{d(y, y_\textrm{pred})}{2\phi}\right)
|
||||
|
||||
with mean :math:`\mathrm{E}[Y] = A'(\theta) = y_\textrm{pred}`,
|
||||
variance :math:`\mathrm{Var}[Y] = \phi \cdot v(y_\textrm{pred})`,
|
||||
unit variance :math:`v(y_\textrm{pred})` and
|
||||
unit deviance :math:`d(y,y_\textrm{pred})`.
|
||||
|
||||
Methods
|
||||
-------
|
||||
deviance
|
||||
deviance_derivative
|
||||
in_y_range
|
||||
unit_deviance
|
||||
unit_deviance_derivative
|
||||
unit_variance
|
||||
|
||||
References
|
||||
----------
|
||||
https://en.wikipedia.org/wiki/Exponential_dispersion_model.
|
||||
"""
|
||||
|
||||
def in_y_range(self, y):
|
||||
"""Returns ``True`` if y is in the valid range of Y~EDM.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
y : array of shape (n_samples,)
|
||||
Target values.
|
||||
"""
|
||||
# Note that currently supported distributions have +inf upper bound
|
||||
|
||||
if not isinstance(self._lower_bound, DistributionBoundary):
|
||||
raise TypeError(
|
||||
"_lower_bound attribute must be of type DistributionBoundary"
|
||||
)
|
||||
|
||||
if self._lower_bound.inclusive:
|
||||
return np.greater_equal(y, self._lower_bound.value)
|
||||
else:
|
||||
return np.greater(y, self._lower_bound.value)
|
||||
|
||||
@abstractmethod
|
||||
def unit_variance(self, y_pred):
|
||||
r"""Compute the unit variance function.
|
||||
|
||||
The unit variance :math:`v(y_\textrm{pred})` determines the variance as
|
||||
a function of the mean :math:`y_\textrm{pred}` by
|
||||
:math:`\mathrm{Var}[Y_i] = \phi/s_i*v(y_\textrm{pred}_i)`.
|
||||
It can also be derived from the unit deviance
|
||||
:math:`d(y,y_\textrm{pred})` as
|
||||
|
||||
.. math:: v(y_\textrm{pred}) = \frac{2}{
|
||||
\frac{\partial^2 d(y,y_\textrm{pred})}{
|
||||
\partialy_\textrm{pred}^2}}\big|_{y=y_\textrm{pred}}
|
||||
|
||||
See also :func:`variance`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
y_pred : array of shape (n_samples,)
|
||||
Predicted mean.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def unit_deviance(self, y, y_pred, check_input=False):
|
||||
r"""Compute the unit deviance.
|
||||
|
||||
The unit_deviance :math:`d(y,y_\textrm{pred})` can be defined by the
|
||||
log-likelihood as
|
||||
:math:`d(y,y_\textrm{pred}) = -2\phi\cdot
|
||||
\left(loglike(y,y_\textrm{pred},\phi) - loglike(y,y,\phi)\right).`
|
||||
|
||||
Parameters
|
||||
----------
|
||||
y : array of shape (n_samples,)
|
||||
Target values.
|
||||
|
||||
y_pred : array of shape (n_samples,)
|
||||
Predicted mean.
|
||||
|
||||
check_input : bool, default=False
|
||||
If True raise an exception on invalid y or y_pred values, otherwise
|
||||
they will be propagated as NaN.
|
||||
Returns
|
||||
-------
|
||||
deviance: array of shape (n_samples,)
|
||||
Computed deviance
|
||||
"""
|
||||
|
||||
def unit_deviance_derivative(self, y, y_pred):
|
||||
r"""Compute the derivative of the unit deviance w.r.t. y_pred.
|
||||
|
||||
The derivative of the unit deviance is given by
|
||||
:math:`\frac{\partial}{\partialy_\textrm{pred}}d(y,y_\textrm{pred})
|
||||
= -2\frac{y-y_\textrm{pred}}{v(y_\textrm{pred})}`
|
||||
with unit variance :math:`v(y_\textrm{pred})`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
y : array of shape (n_samples,)
|
||||
Target values.
|
||||
|
||||
y_pred : array of shape (n_samples,)
|
||||
Predicted mean.
|
||||
"""
|
||||
return -2 * (y - y_pred) / self.unit_variance(y_pred)
|
||||
|
||||
def deviance(self, y, y_pred, weights=1):
|
||||
r"""Compute the deviance.
|
||||
|
||||
The deviance is a weighted sum of the per sample unit deviances,
|
||||
:math:`D = \sum_i s_i \cdot d(y_i, y_\textrm{pred}_i)`
|
||||
with weights :math:`s_i` and unit deviance
|
||||
:math:`d(y,y_\textrm{pred})`.
|
||||
In terms of the log-likelihood it is :math:`D = -2\phi\cdot
|
||||
\left(loglike(y,y_\textrm{pred},\frac{phi}{s})
|
||||
- loglike(y,y,\frac{phi}{s})\right)`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
y : array of shape (n_samples,)
|
||||
Target values.
|
||||
|
||||
y_pred : array of shape (n_samples,)
|
||||
Predicted mean.
|
||||
|
||||
weights : {int, array of shape (n_samples,)}, default=1
|
||||
Weights or exposure to which variance is inverse proportional.
|
||||
"""
|
||||
return np.sum(weights * self.unit_deviance(y, y_pred))
|
||||
|
||||
def deviance_derivative(self, y, y_pred, weights=1):
|
||||
r"""Compute the derivative of the deviance w.r.t. y_pred.
|
||||
|
||||
It gives :math:`\frac{\partial}{\partial y_\textrm{pred}}
|
||||
D(y, \y_\textrm{pred}; weights)`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
y : array, shape (n_samples,)
|
||||
Target values.
|
||||
|
||||
y_pred : array, shape (n_samples,)
|
||||
Predicted mean.
|
||||
|
||||
weights : {int, array of shape (n_samples,)}, default=1
|
||||
Weights or exposure to which variance is inverse proportional.
|
||||
"""
|
||||
return weights * self.unit_deviance_derivative(y, y_pred)
|
||||
|
||||
|
||||
class TweedieDistribution(ExponentialDispersionModel):
|
||||
r"""A class for the Tweedie distribution.
|
||||
|
||||
A Tweedie distribution with mean :math:`y_\textrm{pred}=\mathrm{E}[Y]`
|
||||
is uniquely defined by it's mean-variance relationship
|
||||
:math:`\mathrm{Var}[Y] \propto y_\textrm{pred}^power`.
|
||||
|
||||
Special cases are:
|
||||
|
||||
===== ================
|
||||
Power Distribution
|
||||
===== ================
|
||||
0 Normal
|
||||
1 Poisson
|
||||
(1,2) Compound Poisson
|
||||
2 Gamma
|
||||
3 Inverse Gaussian
|
||||
|
||||
Parameters
|
||||
----------
|
||||
power : float, default=0
|
||||
The variance power of the `unit_variance`
|
||||
:math:`v(y_\textrm{pred}) = y_\textrm{pred}^{power}`.
|
||||
For ``0<power<1``, no distribution exists.
|
||||
"""
|
||||
|
||||
def __init__(self, power=0):
|
||||
self.power = power
|
||||
|
||||
@property
|
||||
def power(self):
|
||||
return self._power
|
||||
|
||||
@power.setter
|
||||
def power(self, power):
|
||||
# We use a property with a setter, to update lower and
|
||||
# upper bound when the power parameter is updated e.g. in grid
|
||||
# search.
|
||||
if not isinstance(power, numbers.Real):
|
||||
raise TypeError("power must be a real number, input was {0}".format(power))
|
||||
|
||||
if power <= 0:
|
||||
# Extreme Stable or Normal distribution
|
||||
self._lower_bound = DistributionBoundary(-np.Inf, inclusive=False)
|
||||
elif 0 < power < 1:
|
||||
raise ValueError(
|
||||
"Tweedie distribution is only defined for power<=0 and power>=1."
|
||||
)
|
||||
elif 1 <= power < 2:
|
||||
# Poisson or Compound Poisson distribution
|
||||
self._lower_bound = DistributionBoundary(0, inclusive=True)
|
||||
elif power >= 2:
|
||||
# Gamma, Positive Stable, Inverse Gaussian distributions
|
||||
self._lower_bound = DistributionBoundary(0, inclusive=False)
|
||||
else: # pragma: no cover
|
||||
# this branch should be unreachable.
|
||||
raise ValueError
|
||||
|
||||
self._power = power
|
||||
|
||||
def unit_variance(self, y_pred):
|
||||
"""Compute the unit variance of a Tweedie distribution
|
||||
v(y_\textrm{pred})=y_\textrm{pred}**power.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
y_pred : array of shape (n_samples,)
|
||||
Predicted mean.
|
||||
"""
|
||||
return np.power(y_pred, self.power)
|
||||
|
||||
def unit_deviance(self, y, y_pred, check_input=False):
|
||||
r"""Compute the unit deviance.
|
||||
|
||||
The unit_deviance :math:`d(y,y_\textrm{pred})` can be defined by the
|
||||
log-likelihood as
|
||||
:math:`d(y,y_\textrm{pred}) = -2\phi\cdot
|
||||
\left(loglike(y,y_\textrm{pred},\phi) - loglike(y,y,\phi)\right).`
|
||||
|
||||
Parameters
|
||||
----------
|
||||
y : array of shape (n_samples,)
|
||||
Target values.
|
||||
|
||||
y_pred : array of shape (n_samples,)
|
||||
Predicted mean.
|
||||
|
||||
check_input : bool, default=False
|
||||
If True raise an exception on invalid y or y_pred values, otherwise
|
||||
they will be propagated as NaN.
|
||||
Returns
|
||||
-------
|
||||
deviance: array of shape (n_samples,)
|
||||
Computed deviance
|
||||
"""
|
||||
p = self.power
|
||||
|
||||
if check_input:
|
||||
message = (
|
||||
"Mean Tweedie deviance error with power={} can only be used on ".format(
|
||||
p
|
||||
)
|
||||
)
|
||||
if p < 0:
|
||||
# 'Extreme stable', y any real number, y_pred > 0
|
||||
if (y_pred <= 0).any():
|
||||
raise ValueError(message + "strictly positive y_pred.")
|
||||
elif p == 0:
|
||||
# Normal, y and y_pred can be any real number
|
||||
pass
|
||||
elif 0 < p < 1:
|
||||
raise ValueError(
|
||||
"Tweedie deviance is only defined for power<=0 and power>=1."
|
||||
)
|
||||
elif 1 <= p < 2:
|
||||
# Poisson and compound Poisson distribution, y >= 0, y_pred > 0
|
||||
if (y < 0).any() or (y_pred <= 0).any():
|
||||
raise ValueError(
|
||||
message + "non-negative y and strictly positive y_pred."
|
||||
)
|
||||
elif p >= 2:
|
||||
# Gamma and Extreme stable distribution, y and y_pred > 0
|
||||
if (y <= 0).any() or (y_pred <= 0).any():
|
||||
raise ValueError(message + "strictly positive y and y_pred.")
|
||||
else: # pragma: nocover
|
||||
# Unreachable statement
|
||||
raise ValueError
|
||||
|
||||
if p < 0:
|
||||
# 'Extreme stable', y any real number, y_pred > 0
|
||||
dev = 2 * (
|
||||
np.power(np.maximum(y, 0), 2 - p) / ((1 - p) * (2 - p))
|
||||
- y * np.power(y_pred, 1 - p) / (1 - p)
|
||||
+ np.power(y_pred, 2 - p) / (2 - p)
|
||||
)
|
||||
|
||||
elif p == 0:
|
||||
# Normal distribution, y and y_pred any real number
|
||||
dev = (y - y_pred) ** 2
|
||||
elif p < 1:
|
||||
raise ValueError(
|
||||
"Tweedie deviance is only defined for power<=0 and power>=1."
|
||||
)
|
||||
elif p == 1:
|
||||
# Poisson distribution
|
||||
dev = 2 * (xlogy(y, y / y_pred) - y + y_pred)
|
||||
elif p == 2:
|
||||
# Gamma distribution
|
||||
dev = 2 * (np.log(y_pred / y) + y / y_pred - 1)
|
||||
else:
|
||||
dev = 2 * (
|
||||
np.power(y, 2 - p) / ((1 - p) * (2 - p))
|
||||
- y * np.power(y_pred, 1 - p) / (1 - p)
|
||||
+ np.power(y_pred, 2 - p) / (2 - p)
|
||||
)
|
||||
return dev
|
||||
|
||||
|
||||
class NormalDistribution(TweedieDistribution):
|
||||
"""Class for the Normal (aka Gaussian) distribution."""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(power=0)
|
||||
|
||||
|
||||
class PoissonDistribution(TweedieDistribution):
|
||||
"""Class for the scaled Poisson distribution."""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(power=1)
|
||||
|
||||
|
||||
class GammaDistribution(TweedieDistribution):
|
||||
"""Class for the Gamma distribution."""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(power=2)
|
||||
|
||||
|
||||
class InverseGaussianDistribution(TweedieDistribution):
|
||||
"""Class for the scaled InverseGaussianDistribution distribution."""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(power=3)
|
||||
|
||||
|
||||
EDM_DISTRIBUTIONS = {
|
||||
"normal": NormalDistribution,
|
||||
"poisson": PoissonDistribution,
|
||||
"gamma": GammaDistribution,
|
||||
"inverse-gaussian": InverseGaussianDistribution,
|
||||
}
|
||||
@@ -0,0 +1,261 @@
|
||||
"""
|
||||
Module contains classes for invertible (and differentiable) link functions.
|
||||
"""
|
||||
# Author: Christian Lorentzen <lorentzen.ch@gmail.com>
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass
|
||||
|
||||
import numpy as np
|
||||
from scipy.special import expit, logit
|
||||
from scipy.stats import gmean
|
||||
from ..utils.extmath import softmax
|
||||
|
||||
|
||||
@dataclass
|
||||
class Interval:
|
||||
low: float
|
||||
high: float
|
||||
low_inclusive: bool
|
||||
high_inclusive: bool
|
||||
|
||||
def __post_init__(self):
|
||||
"""Check that low <= high"""
|
||||
if self.low > self.high:
|
||||
raise ValueError(
|
||||
f"One must have low <= high; got low={self.low}, high={self.high}."
|
||||
)
|
||||
|
||||
def includes(self, x):
|
||||
"""Test whether all values of x are in interval range.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x : ndarray
|
||||
Array whose elements are tested to be in interval range.
|
||||
|
||||
Returns
|
||||
-------
|
||||
result : bool
|
||||
"""
|
||||
if self.low_inclusive:
|
||||
low = np.greater_equal(x, self.low)
|
||||
else:
|
||||
low = np.greater(x, self.low)
|
||||
|
||||
if not np.all(low):
|
||||
return False
|
||||
|
||||
if self.high_inclusive:
|
||||
high = np.less_equal(x, self.high)
|
||||
else:
|
||||
high = np.less(x, self.high)
|
||||
|
||||
# Note: np.all returns numpy.bool_
|
||||
return bool(np.all(high))
|
||||
|
||||
|
||||
def _inclusive_low_high(interval, dtype=np.float64):
|
||||
"""Generate values low and high to be within the interval range.
|
||||
|
||||
This is used in tests only.
|
||||
|
||||
Returns
|
||||
-------
|
||||
low, high : tuple
|
||||
The returned values low and high lie within the interval.
|
||||
"""
|
||||
eps = 10 * np.finfo(dtype).eps
|
||||
if interval.low == -np.inf:
|
||||
low = -1e10
|
||||
elif interval.low < 0:
|
||||
low = interval.low * (1 - eps) + eps
|
||||
else:
|
||||
low = interval.low * (1 + eps) + eps
|
||||
|
||||
if interval.high == np.inf:
|
||||
high = 1e10
|
||||
elif interval.high < 0:
|
||||
high = interval.high * (1 + eps) - eps
|
||||
else:
|
||||
high = interval.high * (1 - eps) - eps
|
||||
|
||||
return low, high
|
||||
|
||||
|
||||
class BaseLink(ABC):
|
||||
"""Abstract base class for differentiable, invertible link functions.
|
||||
|
||||
Convention:
|
||||
- link function g: raw_prediction = g(y_pred)
|
||||
- inverse link h: y_pred = h(raw_prediction)
|
||||
|
||||
For (generalized) linear models, `raw_prediction = X @ coef` is the so
|
||||
called linear predictor, and `y_pred = h(raw_prediction)` is the predicted
|
||||
conditional (on X) expected value of the target `y_true`.
|
||||
|
||||
The methods are not implemented as staticmethods in case a link function needs
|
||||
parameters.
|
||||
"""
|
||||
|
||||
is_multiclass = False # used for testing only
|
||||
|
||||
# Usually, raw_prediction may be any real number and y_pred is an open
|
||||
# interval.
|
||||
# interval_raw_prediction = Interval(-np.inf, np.inf, False, False)
|
||||
interval_y_pred = Interval(-np.inf, np.inf, False, False)
|
||||
|
||||
@abstractmethod
|
||||
def link(self, y_pred, out=None):
|
||||
"""Compute the link function g(y_pred).
|
||||
|
||||
The link function maps (predicted) target values to raw predictions,
|
||||
i.e. `g(y_pred) = raw_prediction`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
y_pred : array
|
||||
Predicted target values.
|
||||
out : array
|
||||
A location into which the result is stored. If provided, it must
|
||||
have a shape that the inputs broadcast to. If not provided or None,
|
||||
a freshly-allocated array is returned.
|
||||
|
||||
Returns
|
||||
-------
|
||||
out : array
|
||||
Output array, element-wise link function.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def inverse(self, raw_prediction, out=None):
|
||||
"""Compute the inverse link function h(raw_prediction).
|
||||
|
||||
The inverse link function maps raw predictions to predicted target
|
||||
values, i.e. `h(raw_prediction) = y_pred`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
raw_prediction : array
|
||||
Raw prediction values (in link space).
|
||||
out : array
|
||||
A location into which the result is stored. If provided, it must
|
||||
have a shape that the inputs broadcast to. If not provided or None,
|
||||
a freshly-allocated array is returned.
|
||||
|
||||
Returns
|
||||
-------
|
||||
out : array
|
||||
Output array, element-wise inverse link function.
|
||||
"""
|
||||
|
||||
|
||||
class IdentityLink(BaseLink):
|
||||
"""The identity link function g(x)=x."""
|
||||
|
||||
def link(self, y_pred, out=None):
|
||||
if out is not None:
|
||||
np.copyto(out, y_pred)
|
||||
return out
|
||||
else:
|
||||
return y_pred
|
||||
|
||||
inverse = link
|
||||
|
||||
|
||||
class LogLink(BaseLink):
|
||||
"""The log link function g(x)=log(x)."""
|
||||
|
||||
interval_y_pred = Interval(0, np.inf, False, False)
|
||||
|
||||
def link(self, y_pred, out=None):
|
||||
return np.log(y_pred, out=out)
|
||||
|
||||
def inverse(self, raw_prediction, out=None):
|
||||
return np.exp(raw_prediction, out=out)
|
||||
|
||||
|
||||
class LogitLink(BaseLink):
|
||||
"""The logit link function g(x)=logit(x)."""
|
||||
|
||||
interval_y_pred = Interval(0, 1, False, False)
|
||||
|
||||
def link(self, y_pred, out=None):
|
||||
return logit(y_pred, out=out)
|
||||
|
||||
def inverse(self, raw_prediction, out=None):
|
||||
return expit(raw_prediction, out=out)
|
||||
|
||||
|
||||
class MultinomialLogit(BaseLink):
|
||||
"""The symmetric multinomial logit function.
|
||||
|
||||
Convention:
|
||||
- y_pred.shape = raw_prediction.shape = (n_samples, n_classes)
|
||||
|
||||
Notes:
|
||||
- The inverse link h is the softmax function.
|
||||
- The sum is over the second axis, i.e. axis=1 (n_classes).
|
||||
|
||||
We have to choose additional constraints in order to make
|
||||
|
||||
y_pred[k] = exp(raw_pred[k]) / sum(exp(raw_pred[k]), k=0..n_classes-1)
|
||||
|
||||
for n_classes classes identifiable and invertible.
|
||||
We choose the symmetric side constraint where the geometric mean response
|
||||
is set as reference category, see [2]:
|
||||
|
||||
The symmetric multinomial logit link function for a single data point is
|
||||
then defined as
|
||||
|
||||
raw_prediction[k] = g(y_pred[k]) = log(y_pred[k]/gmean(y_pred))
|
||||
= log(y_pred[k]) - mean(log(y_pred)).
|
||||
|
||||
Note that this is equivalent to the definition in [1] and implies mean
|
||||
centered raw predictions:
|
||||
|
||||
sum(raw_prediction[k], k=0..n_classes-1) = 0.
|
||||
|
||||
For linear models with raw_prediction = X @ coef, this corresponds to
|
||||
sum(coef[k], k=0..n_classes-1) = 0, i.e. the sum over classes for every
|
||||
feature is zero.
|
||||
|
||||
Reference
|
||||
---------
|
||||
.. [1] Friedman, Jerome; Hastie, Trevor; Tibshirani, Robert. "Additive
|
||||
logistic regression: a statistical view of boosting" Ann. Statist.
|
||||
28 (2000), no. 2, 337--407. doi:10.1214/aos/1016218223.
|
||||
https://projecteuclid.org/euclid.aos/1016218223
|
||||
|
||||
.. [2] Zahid, Faisal Maqbool and Gerhard Tutz. "Ridge estimation for
|
||||
multinomial logit models with symmetric side constraints."
|
||||
Computational Statistics 28 (2013): 1017-1034.
|
||||
http://epub.ub.uni-muenchen.de/11001/1/tr067.pdf
|
||||
"""
|
||||
|
||||
is_multiclass = True
|
||||
interval_y_pred = Interval(0, 1, False, False)
|
||||
|
||||
def symmetrize_raw_prediction(self, raw_prediction):
|
||||
return raw_prediction - np.mean(raw_prediction, axis=1)[:, np.newaxis]
|
||||
|
||||
def link(self, y_pred, out=None):
|
||||
# geometric mean as reference category
|
||||
gm = gmean(y_pred, axis=1)
|
||||
return np.log(y_pred / gm[:, np.newaxis], out=out)
|
||||
|
||||
def inverse(self, raw_prediction, out=None):
|
||||
if out is None:
|
||||
return softmax(raw_prediction, copy=True)
|
||||
else:
|
||||
np.copyto(out, raw_prediction)
|
||||
softmax(out, copy=False)
|
||||
return out
|
||||
|
||||
|
||||
_LINKS = {
|
||||
"identity": IdentityLink,
|
||||
"log": LogLink,
|
||||
"logit": LogitLink,
|
||||
"multinomial_logit": MultinomialLogit,
|
||||
}
|
||||
1035
dashboard/flask-server/venv/Lib/site-packages/sklearn/_loss/loss.py
Normal file
1035
dashboard/flask-server/venv/Lib/site-packages/sklearn/_loss/loss.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,25 @@
|
||||
import numpy
|
||||
from numpy.distutils.misc_util import Configuration
|
||||
from sklearn._build_utils import gen_from_templates
|
||||
|
||||
|
||||
def configuration(parent_package="", top_path=None):
|
||||
config = Configuration("_loss", parent_package, top_path)
|
||||
|
||||
# generate _loss.pyx from template
|
||||
templates = ["sklearn/_loss/_loss.pyx.tp"]
|
||||
gen_from_templates(templates)
|
||||
|
||||
config.add_extension(
|
||||
"_loss",
|
||||
sources=["_loss.pyx"],
|
||||
include_dirs=[numpy.get_include()],
|
||||
# define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")],
|
||||
)
|
||||
return config
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from numpy.distutils.core import setup
|
||||
|
||||
setup(**configuration().todict())
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,123 @@
|
||||
# Authors: Christian Lorentzen <lorentzen.ch@gmail.com>
|
||||
#
|
||||
# License: BSD 3 clause
|
||||
#
|
||||
# TODO(1.3): remove file
|
||||
import numpy as np
|
||||
from numpy.testing import (
|
||||
assert_allclose,
|
||||
assert_array_equal,
|
||||
)
|
||||
from scipy.optimize import check_grad
|
||||
import pytest
|
||||
|
||||
from sklearn._loss.glm_distribution import (
|
||||
TweedieDistribution,
|
||||
NormalDistribution,
|
||||
PoissonDistribution,
|
||||
GammaDistribution,
|
||||
InverseGaussianDistribution,
|
||||
DistributionBoundary,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"family, expected",
|
||||
[
|
||||
(NormalDistribution(), [True, True, True]),
|
||||
(PoissonDistribution(), [False, True, True]),
|
||||
(TweedieDistribution(power=1.5), [False, True, True]),
|
||||
(GammaDistribution(), [False, False, True]),
|
||||
(InverseGaussianDistribution(), [False, False, True]),
|
||||
(TweedieDistribution(power=4.5), [False, False, True]),
|
||||
],
|
||||
)
|
||||
def test_family_bounds(family, expected):
|
||||
"""Test the valid range of distributions at -1, 0, 1."""
|
||||
result = family.in_y_range([-1, 0, 1])
|
||||
assert_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_invalid_distribution_bound():
|
||||
dist = TweedieDistribution()
|
||||
dist._lower_bound = 0
|
||||
with pytest.raises(TypeError, match="must be of type DistributionBoundary"):
|
||||
dist.in_y_range([-1, 0, 1])
|
||||
|
||||
|
||||
def test_tweedie_distribution_power():
|
||||
msg = "distribution is only defined for power<=0 and power>=1"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
TweedieDistribution(power=0.5)
|
||||
|
||||
with pytest.raises(TypeError, match="must be a real number"):
|
||||
TweedieDistribution(power=1j)
|
||||
|
||||
with pytest.raises(TypeError, match="must be a real number"):
|
||||
dist = TweedieDistribution()
|
||||
dist.power = 1j
|
||||
|
||||
dist = TweedieDistribution()
|
||||
assert isinstance(dist._lower_bound, DistributionBoundary)
|
||||
|
||||
assert dist._lower_bound.inclusive is False
|
||||
dist.power = 1
|
||||
assert dist._lower_bound.value == 0.0
|
||||
assert dist._lower_bound.inclusive is True
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"family, chk_values",
|
||||
[
|
||||
(NormalDistribution(), [-1.5, -0.1, 0.1, 2.5]),
|
||||
(PoissonDistribution(), [0.1, 1.5]),
|
||||
(GammaDistribution(), [0.1, 1.5]),
|
||||
(InverseGaussianDistribution(), [0.1, 1.5]),
|
||||
(TweedieDistribution(power=-2.5), [0.1, 1.5]),
|
||||
(TweedieDistribution(power=-1), [0.1, 1.5]),
|
||||
(TweedieDistribution(power=1.5), [0.1, 1.5]),
|
||||
(TweedieDistribution(power=2.5), [0.1, 1.5]),
|
||||
(TweedieDistribution(power=-4), [0.1, 1.5]),
|
||||
],
|
||||
)
|
||||
def test_deviance_zero(family, chk_values):
|
||||
"""Test deviance(y,y) = 0 for different families."""
|
||||
for x in chk_values:
|
||||
assert_allclose(family.deviance(x, x), 0, atol=1e-9)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"family",
|
||||
[
|
||||
NormalDistribution(),
|
||||
PoissonDistribution(),
|
||||
GammaDistribution(),
|
||||
InverseGaussianDistribution(),
|
||||
TweedieDistribution(power=-2.5),
|
||||
TweedieDistribution(power=-1),
|
||||
TweedieDistribution(power=1.5),
|
||||
TweedieDistribution(power=2.5),
|
||||
TweedieDistribution(power=-4),
|
||||
],
|
||||
ids=lambda x: x.__class__.__name__,
|
||||
)
|
||||
def test_deviance_derivative(family):
|
||||
"""Test deviance derivative for different families."""
|
||||
rng = np.random.RandomState(0)
|
||||
y_true = rng.rand(10)
|
||||
# make data positive
|
||||
y_true += np.abs(y_true.min()) + 1e-2
|
||||
|
||||
y_pred = y_true + np.fmax(rng.rand(10), 0.0)
|
||||
|
||||
dev = family.deviance(y_true, y_pred)
|
||||
assert isinstance(dev, float)
|
||||
dev_derivative = family.deviance_derivative(y_true, y_pred)
|
||||
assert dev_derivative.shape == y_pred.shape
|
||||
|
||||
err = check_grad(
|
||||
lambda y_pred: family.deviance(y_true, y_pred),
|
||||
lambda y_pred: family.deviance_derivative(y_true, y_pred),
|
||||
y_pred,
|
||||
) / np.linalg.norm(dev_derivative)
|
||||
assert abs(err) < 1e-6
|
||||
@@ -0,0 +1,108 @@
|
||||
import numpy as np
|
||||
from numpy.testing import assert_allclose, assert_array_equal
|
||||
import pytest
|
||||
|
||||
from sklearn._loss.link import (
|
||||
_LINKS,
|
||||
_inclusive_low_high,
|
||||
MultinomialLogit,
|
||||
Interval,
|
||||
)
|
||||
|
||||
|
||||
LINK_FUNCTIONS = list(_LINKS.values())
|
||||
|
||||
|
||||
def test_interval_raises():
|
||||
"""Test that interval with low > high raises ValueError."""
|
||||
with pytest.raises(
|
||||
ValueError, match="One must have low <= high; got low=1, high=0."
|
||||
):
|
||||
Interval(1, 0, False, False)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"interval",
|
||||
[
|
||||
Interval(0, 1, False, False),
|
||||
Interval(0, 1, False, True),
|
||||
Interval(0, 1, True, False),
|
||||
Interval(0, 1, True, True),
|
||||
Interval(-np.inf, np.inf, False, False),
|
||||
Interval(-np.inf, np.inf, False, True),
|
||||
Interval(-np.inf, np.inf, True, False),
|
||||
Interval(-np.inf, np.inf, True, True),
|
||||
Interval(-10, -1, False, False),
|
||||
Interval(-10, -1, False, True),
|
||||
Interval(-10, -1, True, False),
|
||||
Interval(-10, -1, True, True),
|
||||
],
|
||||
)
|
||||
def test_is_in_range(interval):
|
||||
# make sure low and high are always within the interval, used for linspace
|
||||
low, high = _inclusive_low_high(interval)
|
||||
|
||||
x = np.linspace(low, high, num=10)
|
||||
assert interval.includes(x)
|
||||
|
||||
# x contains lower bound
|
||||
assert interval.includes(np.r_[x, interval.low]) == interval.low_inclusive
|
||||
|
||||
# x contains upper bound
|
||||
assert interval.includes(np.r_[x, interval.high]) == interval.high_inclusive
|
||||
|
||||
# x contains upper and lower bound
|
||||
assert interval.includes(np.r_[x, interval.low, interval.high]) == (
|
||||
interval.low_inclusive and interval.high_inclusive
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("link", LINK_FUNCTIONS)
|
||||
def test_link_inverse_identity(link):
|
||||
# Test that link of inverse gives identity.
|
||||
rng = np.random.RandomState(42)
|
||||
link = link()
|
||||
n_samples, n_classes = 100, None
|
||||
if link.is_multiclass:
|
||||
n_classes = 10
|
||||
raw_prediction = rng.normal(loc=0, scale=10, size=(n_samples, n_classes))
|
||||
if isinstance(link, MultinomialLogit):
|
||||
raw_prediction = link.symmetrize_raw_prediction(raw_prediction)
|
||||
else:
|
||||
# So far, the valid interval of raw_prediction is (-inf, inf) and
|
||||
# we do not need to distinguish.
|
||||
raw_prediction = rng.normal(loc=0, scale=10, size=(n_samples))
|
||||
|
||||
assert_allclose(link.link(link.inverse(raw_prediction)), raw_prediction)
|
||||
y_pred = link.inverse(raw_prediction)
|
||||
assert_allclose(link.inverse(link.link(y_pred)), y_pred)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("link", LINK_FUNCTIONS)
|
||||
def test_link_out_argument(link):
|
||||
# Test that out argument gets assigned the result.
|
||||
rng = np.random.RandomState(42)
|
||||
link = link()
|
||||
n_samples, n_classes = 100, None
|
||||
if link.is_multiclass:
|
||||
n_classes = 10
|
||||
raw_prediction = rng.normal(loc=0, scale=10, size=(n_samples, n_classes))
|
||||
if isinstance(link, MultinomialLogit):
|
||||
raw_prediction = link.symmetrize_raw_prediction(raw_prediction)
|
||||
else:
|
||||
# So far, the valid interval of raw_prediction is (-inf, inf) and
|
||||
# we do not need to distinguish.
|
||||
raw_prediction = rng.normal(loc=0, scale=10, size=(n_samples))
|
||||
|
||||
y_pred = link.inverse(raw_prediction, out=None)
|
||||
out = np.empty_like(raw_prediction)
|
||||
y_pred_2 = link.inverse(raw_prediction, out=out)
|
||||
assert_allclose(y_pred, out)
|
||||
assert_array_equal(out, y_pred_2)
|
||||
assert np.shares_memory(out, y_pred_2)
|
||||
|
||||
out = np.empty_like(y_pred)
|
||||
raw_prediction_2 = link.link(y_pred, out=out)
|
||||
assert_allclose(raw_prediction, out)
|
||||
assert_array_equal(out, raw_prediction_2)
|
||||
assert np.shares_memory(out, raw_prediction_2)
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user