first commit
This commit is contained in:
@@ -0,0 +1,25 @@
|
||||
"""
|
||||
The :mod:`sklearn.svm` module includes Support Vector Machine algorithms.
|
||||
"""
|
||||
|
||||
# See http://scikit-learn.sourceforge.net/modules/svm.html for complete
|
||||
# documentation.
|
||||
|
||||
# Author: Fabian Pedregosa <fabian.pedregosa@inria.fr> with help from
|
||||
# the scikit-learn community. LibSVM and LibLinear are copyright
|
||||
# of their respective owners.
|
||||
# License: BSD 3 clause (C) INRIA 2010
|
||||
|
||||
from ._classes import SVC, NuSVC, SVR, NuSVR, OneClassSVM, LinearSVC, LinearSVR
|
||||
from ._bounds import l1_min_c
|
||||
|
||||
__all__ = [
|
||||
"LinearSVC",
|
||||
"LinearSVR",
|
||||
"NuSVC",
|
||||
"NuSVR",
|
||||
"OneClassSVM",
|
||||
"SVC",
|
||||
"SVR",
|
||||
"l1_min_c",
|
||||
]
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
1237
dashboard/flask-server/venv/Lib/site-packages/sklearn/svm/_base.py
Normal file
1237
dashboard/flask-server/venv/Lib/site-packages/sklearn/svm/_base.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,74 @@
|
||||
"""Determination of parameter bounds"""
|
||||
# Author: Paolo Losi
|
||||
# License: BSD 3 clause
|
||||
|
||||
import numpy as np
|
||||
|
||||
from ..preprocessing import LabelBinarizer
|
||||
from ..utils.validation import check_consistent_length, check_array
|
||||
from ..utils.extmath import safe_sparse_dot
|
||||
|
||||
|
||||
def l1_min_c(X, y, *, loss="squared_hinge", fit_intercept=True, intercept_scaling=1.0):
|
||||
"""
|
||||
Return the lowest bound for C such that for C in (l1_min_C, infinity)
|
||||
the model is guaranteed not to be empty. This applies to l1 penalized
|
||||
classifiers, such as LinearSVC with penalty='l1' and
|
||||
linear_model.LogisticRegression with penalty='l1'.
|
||||
|
||||
This value is valid if class_weight parameter in fit() is not set.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
||||
Training vector, where `n_samples` is the number of samples and
|
||||
`n_features` is the number of features.
|
||||
|
||||
y : array-like of shape (n_samples,)
|
||||
Target vector relative to X.
|
||||
|
||||
loss : {'squared_hinge', 'log'}, default='squared_hinge'
|
||||
Specifies the loss function.
|
||||
With 'squared_hinge' it is the squared hinge loss (a.k.a. L2 loss).
|
||||
With 'log' it is the loss of logistic regression models.
|
||||
|
||||
fit_intercept : bool, default=True
|
||||
Specifies if the intercept should be fitted by the model.
|
||||
It must match the fit() method parameter.
|
||||
|
||||
intercept_scaling : float, default=1.0
|
||||
when fit_intercept is True, instance vector x becomes
|
||||
[x, intercept_scaling],
|
||||
i.e. a "synthetic" feature with constant value equals to
|
||||
intercept_scaling is appended to the instance vector.
|
||||
It must match the fit() method parameter.
|
||||
|
||||
Returns
|
||||
-------
|
||||
l1_min_c : float
|
||||
minimum value for C
|
||||
"""
|
||||
if loss not in ("squared_hinge", "log"):
|
||||
raise ValueError('loss type not in ("squared_hinge", "log")')
|
||||
|
||||
X = check_array(X, accept_sparse="csc")
|
||||
check_consistent_length(X, y)
|
||||
|
||||
Y = LabelBinarizer(neg_label=-1).fit_transform(y).T
|
||||
# maximum absolute value over classes and features
|
||||
den = np.max(np.abs(safe_sparse_dot(Y, X)))
|
||||
if fit_intercept:
|
||||
bias = np.full(
|
||||
(np.size(y), 1), intercept_scaling, dtype=np.array(intercept_scaling).dtype
|
||||
)
|
||||
den = max(den, abs(np.dot(Y, bias)).max())
|
||||
|
||||
if den == 0.0:
|
||||
raise ValueError(
|
||||
"Ill-posed l1_min_c calculation: l1 will always "
|
||||
"select zero coefficients for this data"
|
||||
)
|
||||
if loss == "squared_hinge":
|
||||
return 0.5 / den
|
||||
else: # loss == 'log':
|
||||
return 2.0 / den
|
||||
File diff suppressed because it is too large
Load Diff
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,134 @@
|
||||
import os
|
||||
from os.path import join
|
||||
import numpy
|
||||
|
||||
|
||||
def configuration(parent_package="", top_path=None):
|
||||
from numpy.distutils.misc_util import Configuration
|
||||
|
||||
config = Configuration("svm", parent_package, top_path)
|
||||
|
||||
config.add_subpackage("tests")
|
||||
|
||||
# newrand wrappers
|
||||
config.add_extension(
|
||||
"_newrand",
|
||||
sources=["_newrand.pyx"],
|
||||
include_dirs=[numpy.get_include(), join("src", "newrand")],
|
||||
depends=[join("src", "newrand", "newrand.h")],
|
||||
language="c++",
|
||||
# Use C++11 random number generator fix
|
||||
extra_compile_args=["-std=c++11"],
|
||||
)
|
||||
|
||||
# Section LibSVM
|
||||
|
||||
# we compile both libsvm and libsvm_sparse
|
||||
config.add_library(
|
||||
"libsvm-skl",
|
||||
sources=[join("src", "libsvm", "libsvm_template.cpp")],
|
||||
depends=[
|
||||
join("src", "libsvm", "svm.cpp"),
|
||||
join("src", "libsvm", "svm.h"),
|
||||
join("src", "newrand", "newrand.h"),
|
||||
],
|
||||
# Force C++ linking in case gcc is picked up instead
|
||||
# of g++ under windows with some versions of MinGW
|
||||
extra_link_args=["-lstdc++"],
|
||||
# Use C++11 to use the random number generator fix
|
||||
extra_compiler_args=["-std=c++11"],
|
||||
)
|
||||
|
||||
libsvm_sources = ["_libsvm.pyx"]
|
||||
libsvm_depends = [
|
||||
join("src", "libsvm", "libsvm_helper.c"),
|
||||
join("src", "libsvm", "libsvm_template.cpp"),
|
||||
join("src", "libsvm", "svm.cpp"),
|
||||
join("src", "libsvm", "svm.h"),
|
||||
join("src", "newrand", "newrand.h"),
|
||||
]
|
||||
|
||||
config.add_extension(
|
||||
"_libsvm",
|
||||
sources=libsvm_sources,
|
||||
include_dirs=[
|
||||
numpy.get_include(),
|
||||
join("src", "libsvm"),
|
||||
join("src", "newrand"),
|
||||
],
|
||||
libraries=["libsvm-skl"],
|
||||
depends=libsvm_depends,
|
||||
)
|
||||
|
||||
# liblinear module
|
||||
libraries = []
|
||||
if os.name == "posix":
|
||||
libraries.append("m")
|
||||
|
||||
# precompile liblinear to use C++11 flag
|
||||
config.add_library(
|
||||
"liblinear-skl",
|
||||
sources=[
|
||||
join("src", "liblinear", "linear.cpp"),
|
||||
join("src", "liblinear", "tron.cpp"),
|
||||
],
|
||||
depends=[
|
||||
join("src", "liblinear", "linear.h"),
|
||||
join("src", "liblinear", "tron.h"),
|
||||
join("src", "newrand", "newrand.h"),
|
||||
],
|
||||
# Force C++ linking in case gcc is picked up instead
|
||||
# of g++ under windows with some versions of MinGW
|
||||
extra_link_args=["-lstdc++"],
|
||||
# Use C++11 to use the random number generator fix
|
||||
extra_compiler_args=["-std=c++11"],
|
||||
)
|
||||
|
||||
liblinear_sources = ["_liblinear.pyx"]
|
||||
liblinear_depends = [
|
||||
join("src", "liblinear", "*.h"),
|
||||
join("src", "newrand", "newrand.h"),
|
||||
join("src", "liblinear", "liblinear_helper.c"),
|
||||
]
|
||||
|
||||
config.add_extension(
|
||||
"_liblinear",
|
||||
sources=liblinear_sources,
|
||||
libraries=["liblinear-skl"] + libraries,
|
||||
include_dirs=[
|
||||
join(".", "src", "liblinear"),
|
||||
join(".", "src", "newrand"),
|
||||
join("..", "utils"),
|
||||
numpy.get_include(),
|
||||
],
|
||||
depends=liblinear_depends,
|
||||
# extra_compile_args=['-O0 -fno-inline'],
|
||||
)
|
||||
|
||||
# end liblinear module
|
||||
|
||||
# this should go *after* libsvm-skl
|
||||
libsvm_sparse_sources = ["_libsvm_sparse.pyx"]
|
||||
config.add_extension(
|
||||
"_libsvm_sparse",
|
||||
libraries=["libsvm-skl"],
|
||||
sources=libsvm_sparse_sources,
|
||||
include_dirs=[
|
||||
numpy.get_include(),
|
||||
join("src", "libsvm"),
|
||||
join("src", "newrand"),
|
||||
],
|
||||
depends=[
|
||||
join("src", "libsvm", "svm.h"),
|
||||
join("src", "newrand", "newrand.h"),
|
||||
join("src", "libsvm", "libsvm_sparse_helper.c"),
|
||||
],
|
||||
)
|
||||
|
||||
return config
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from numpy.distutils.core import setup
|
||||
|
||||
setup(**configuration(top_path="").todict())
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,144 @@
|
||||
import numpy as np
|
||||
from scipy import sparse as sp
|
||||
from scipy import stats
|
||||
|
||||
import pytest
|
||||
|
||||
from sklearn.svm._bounds import l1_min_c
|
||||
from sklearn.svm import LinearSVC
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from sklearn.svm._newrand import set_seed_wrap, bounded_rand_int_wrap
|
||||
|
||||
|
||||
dense_X = [[-1, 0], [0, 1], [1, 1], [1, 1]]
|
||||
sparse_X = sp.csr_matrix(dense_X)
|
||||
|
||||
Y1 = [0, 1, 1, 1]
|
||||
Y2 = [2, 1, 0, 0]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("loss", ["squared_hinge", "log"])
|
||||
@pytest.mark.parametrize("X_label", ["sparse", "dense"])
|
||||
@pytest.mark.parametrize("Y_label", ["two-classes", "multi-class"])
|
||||
@pytest.mark.parametrize("intercept_label", ["no-intercept", "fit-intercept"])
|
||||
def test_l1_min_c(loss, X_label, Y_label, intercept_label):
|
||||
Xs = {"sparse": sparse_X, "dense": dense_X}
|
||||
Ys = {"two-classes": Y1, "multi-class": Y2}
|
||||
intercepts = {
|
||||
"no-intercept": {"fit_intercept": False},
|
||||
"fit-intercept": {"fit_intercept": True, "intercept_scaling": 10},
|
||||
}
|
||||
|
||||
X = Xs[X_label]
|
||||
Y = Ys[Y_label]
|
||||
intercept_params = intercepts[intercept_label]
|
||||
check_l1_min_c(X, Y, loss, **intercept_params)
|
||||
|
||||
|
||||
def test_l1_min_c_l2_loss():
|
||||
# loss='l2' should raise ValueError
|
||||
msg = "loss type not in"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
l1_min_c(dense_X, Y1, loss="l2")
|
||||
|
||||
|
||||
def check_l1_min_c(X, y, loss, fit_intercept=True, intercept_scaling=None):
|
||||
min_c = l1_min_c(
|
||||
X,
|
||||
y,
|
||||
loss=loss,
|
||||
fit_intercept=fit_intercept,
|
||||
intercept_scaling=intercept_scaling,
|
||||
)
|
||||
|
||||
clf = {
|
||||
"log": LogisticRegression(penalty="l1", solver="liblinear"),
|
||||
"squared_hinge": LinearSVC(loss="squared_hinge", penalty="l1", dual=False),
|
||||
}[loss]
|
||||
|
||||
clf.fit_intercept = fit_intercept
|
||||
clf.intercept_scaling = intercept_scaling
|
||||
|
||||
clf.C = min_c
|
||||
clf.fit(X, y)
|
||||
assert (np.asarray(clf.coef_) == 0).all()
|
||||
assert (np.asarray(clf.intercept_) == 0).all()
|
||||
|
||||
clf.C = min_c * 1.01
|
||||
clf.fit(X, y)
|
||||
assert (np.asarray(clf.coef_) != 0).any() or (np.asarray(clf.intercept_) != 0).any()
|
||||
|
||||
|
||||
def test_ill_posed_min_c():
|
||||
X = [[0, 0], [0, 0]]
|
||||
y = [0, 1]
|
||||
with pytest.raises(ValueError):
|
||||
l1_min_c(X, y)
|
||||
|
||||
|
||||
def test_unsupported_loss():
|
||||
with pytest.raises(ValueError):
|
||||
l1_min_c(dense_X, Y1, loss="l1")
|
||||
|
||||
|
||||
_MAX_UNSIGNED_INT = 4294967295
|
||||
|
||||
|
||||
@pytest.mark.parametrize("seed, val", [(None, 81), (0, 54), (_MAX_UNSIGNED_INT, 9)])
|
||||
def test_newrand_set_seed(seed, val):
|
||||
"""Test that `set_seed` produces deterministic results"""
|
||||
if seed is not None:
|
||||
set_seed_wrap(seed)
|
||||
x = bounded_rand_int_wrap(100)
|
||||
assert x == val, f"Expected {val} but got {x} instead"
|
||||
|
||||
|
||||
@pytest.mark.parametrize("seed", [-1, _MAX_UNSIGNED_INT + 1])
|
||||
def test_newrand_set_seed_overflow(seed):
|
||||
"""Test that `set_seed_wrap` is defined for unsigned 32bits ints"""
|
||||
with pytest.raises(OverflowError):
|
||||
set_seed_wrap(seed)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("range_, n_pts", [(_MAX_UNSIGNED_INT, 10000), (100, 25)])
|
||||
def test_newrand_bounded_rand_int(range_, n_pts):
|
||||
"""Test that `bounded_rand_int` follows a uniform distribution"""
|
||||
n_iter = 100
|
||||
ks_pvals = []
|
||||
uniform_dist = stats.uniform(loc=0, scale=range_)
|
||||
# perform multiple samplings to make chance of outlier sampling negligible
|
||||
for _ in range(n_iter):
|
||||
# Deterministic random sampling
|
||||
sample = [bounded_rand_int_wrap(range_) for _ in range(n_pts)]
|
||||
res = stats.kstest(sample, uniform_dist.cdf)
|
||||
ks_pvals.append(res.pvalue)
|
||||
# Null hypothesis = samples come from an uniform distribution.
|
||||
# Under the null hypothesis, p-values should be uniformly distributed
|
||||
# and not concentrated on low values
|
||||
# (this may seem counter-intuitive but is backed by multiple refs)
|
||||
# So we can do two checks:
|
||||
|
||||
# (1) check uniformity of p-values
|
||||
uniform_p_vals_dist = stats.uniform(loc=0, scale=1)
|
||||
res_pvals = stats.kstest(ks_pvals, uniform_p_vals_dist.cdf)
|
||||
assert res_pvals.pvalue > 0.05, (
|
||||
"Null hypothesis rejected: generated random numbers are not uniform."
|
||||
" Details: the (meta) p-value of the test of uniform distribution"
|
||||
f" of p-values is {res_pvals.pvalue} which is not > 0.05"
|
||||
)
|
||||
|
||||
# (2) (safety belt) check that 90% of p-values are above 0.05
|
||||
min_10pct_pval = np.percentile(ks_pvals, q=10)
|
||||
# lower 10th quantile pvalue <= 0.05 means that the test rejects the
|
||||
# null hypothesis that the sample came from the uniform distribution
|
||||
assert min_10pct_pval > 0.05, (
|
||||
"Null hypothesis rejected: generated random numbers are not uniform. "
|
||||
f"Details: lower 10th quantile p-value of {min_10pct_pval} not > 0.05."
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("range_", [-1, _MAX_UNSIGNED_INT + 1])
|
||||
def test_newrand_bounded_rand_int_limits(range_):
|
||||
"""Test that `bounded_rand_int_wrap` is defined for unsigned 32bits ints"""
|
||||
with pytest.raises(OverflowError):
|
||||
bounded_rand_int_wrap(range_)
|
||||
@@ -0,0 +1,560 @@
|
||||
import pytest
|
||||
|
||||
import numpy as np
|
||||
from numpy.testing import assert_array_almost_equal, assert_array_equal
|
||||
from scipy import sparse
|
||||
|
||||
from sklearn import datasets, svm, linear_model, base
|
||||
from sklearn.datasets import make_classification, load_digits, make_blobs
|
||||
from sklearn.svm.tests import test_svm
|
||||
from sklearn.exceptions import ConvergenceWarning
|
||||
from sklearn.utils.extmath import safe_sparse_dot
|
||||
from sklearn.utils._testing import ignore_warnings, skip_if_32bit
|
||||
|
||||
|
||||
# test sample 1
|
||||
X = np.array([[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]])
|
||||
X_sp = sparse.lil_matrix(X)
|
||||
Y = [1, 1, 1, 2, 2, 2]
|
||||
T = np.array([[-1, -1], [2, 2], [3, 2]])
|
||||
true_result = [1, 2, 2]
|
||||
|
||||
# test sample 2
|
||||
X2 = np.array(
|
||||
[
|
||||
[0, 0, 0],
|
||||
[1, 1, 1],
|
||||
[2, 0, 0],
|
||||
[0, 0, 2],
|
||||
[3, 3, 3],
|
||||
]
|
||||
)
|
||||
X2_sp = sparse.dok_matrix(X2)
|
||||
Y2 = [1, 2, 2, 2, 3]
|
||||
T2 = np.array([[-1, -1, -1], [1, 1, 1], [2, 2, 2]])
|
||||
true_result2 = [1, 2, 3]
|
||||
|
||||
|
||||
iris = datasets.load_iris()
|
||||
# permute
|
||||
rng = np.random.RandomState(0)
|
||||
perm = rng.permutation(iris.target.size)
|
||||
iris.data = iris.data[perm]
|
||||
iris.target = iris.target[perm]
|
||||
# sparsify
|
||||
iris.data = sparse.csr_matrix(iris.data)
|
||||
|
||||
|
||||
def check_svm_model_equal(dense_svm, sparse_svm, X_train, y_train, X_test):
|
||||
dense_svm.fit(X_train.toarray(), y_train)
|
||||
if sparse.isspmatrix(X_test):
|
||||
X_test_dense = X_test.toarray()
|
||||
else:
|
||||
X_test_dense = X_test
|
||||
sparse_svm.fit(X_train, y_train)
|
||||
assert sparse.issparse(sparse_svm.support_vectors_)
|
||||
assert sparse.issparse(sparse_svm.dual_coef_)
|
||||
assert_array_almost_equal(
|
||||
dense_svm.support_vectors_, sparse_svm.support_vectors_.toarray()
|
||||
)
|
||||
assert_array_almost_equal(dense_svm.dual_coef_, sparse_svm.dual_coef_.toarray())
|
||||
if dense_svm.kernel == "linear":
|
||||
assert sparse.issparse(sparse_svm.coef_)
|
||||
assert_array_almost_equal(dense_svm.coef_, sparse_svm.coef_.toarray())
|
||||
assert_array_almost_equal(dense_svm.support_, sparse_svm.support_)
|
||||
assert_array_almost_equal(
|
||||
dense_svm.predict(X_test_dense), sparse_svm.predict(X_test)
|
||||
)
|
||||
assert_array_almost_equal(
|
||||
dense_svm.decision_function(X_test_dense), sparse_svm.decision_function(X_test)
|
||||
)
|
||||
assert_array_almost_equal(
|
||||
dense_svm.decision_function(X_test_dense),
|
||||
sparse_svm.decision_function(X_test_dense),
|
||||
)
|
||||
if isinstance(dense_svm, svm.OneClassSVM):
|
||||
msg = "cannot use sparse input in 'OneClassSVM' trained on dense data"
|
||||
else:
|
||||
assert_array_almost_equal(
|
||||
dense_svm.predict_proba(X_test_dense), sparse_svm.predict_proba(X_test), 4
|
||||
)
|
||||
msg = "cannot use sparse input in 'SVC' trained on dense data"
|
||||
if sparse.isspmatrix(X_test):
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
dense_svm.predict(X_test)
|
||||
|
||||
|
||||
@skip_if_32bit
|
||||
def test_svc():
|
||||
"""Check that sparse SVC gives the same result as SVC"""
|
||||
# many class dataset:
|
||||
X_blobs, y_blobs = make_blobs(n_samples=100, centers=10, random_state=0)
|
||||
X_blobs = sparse.csr_matrix(X_blobs)
|
||||
|
||||
datasets = [
|
||||
[X_sp, Y, T],
|
||||
[X2_sp, Y2, T2],
|
||||
[X_blobs[:80], y_blobs[:80], X_blobs[80:]],
|
||||
[iris.data, iris.target, iris.data],
|
||||
]
|
||||
kernels = ["linear", "poly", "rbf", "sigmoid"]
|
||||
for dataset in datasets:
|
||||
for kernel in kernels:
|
||||
clf = svm.SVC(
|
||||
gamma=1,
|
||||
kernel=kernel,
|
||||
probability=True,
|
||||
random_state=0,
|
||||
decision_function_shape="ovo",
|
||||
)
|
||||
sp_clf = svm.SVC(
|
||||
gamma=1,
|
||||
kernel=kernel,
|
||||
probability=True,
|
||||
random_state=0,
|
||||
decision_function_shape="ovo",
|
||||
)
|
||||
check_svm_model_equal(clf, sp_clf, *dataset)
|
||||
|
||||
|
||||
def test_unsorted_indices():
|
||||
# test that the result with sorted and unsorted indices in csr is the same
|
||||
# we use a subset of digits as iris, blobs or make_classification didn't
|
||||
# show the problem
|
||||
X, y = load_digits(return_X_y=True)
|
||||
X_test = sparse.csr_matrix(X[50:100])
|
||||
X, y = X[:50], y[:50]
|
||||
|
||||
X_sparse = sparse.csr_matrix(X)
|
||||
coef_dense = (
|
||||
svm.SVC(kernel="linear", probability=True, random_state=0).fit(X, y).coef_
|
||||
)
|
||||
sparse_svc = svm.SVC(kernel="linear", probability=True, random_state=0).fit(
|
||||
X_sparse, y
|
||||
)
|
||||
coef_sorted = sparse_svc.coef_
|
||||
# make sure dense and sparse SVM give the same result
|
||||
assert_array_almost_equal(coef_dense, coef_sorted.toarray())
|
||||
|
||||
# reverse each row's indices
|
||||
def scramble_indices(X):
|
||||
new_data = []
|
||||
new_indices = []
|
||||
for i in range(1, len(X.indptr)):
|
||||
row_slice = slice(*X.indptr[i - 1 : i + 1])
|
||||
new_data.extend(X.data[row_slice][::-1])
|
||||
new_indices.extend(X.indices[row_slice][::-1])
|
||||
return sparse.csr_matrix((new_data, new_indices, X.indptr), shape=X.shape)
|
||||
|
||||
X_sparse_unsorted = scramble_indices(X_sparse)
|
||||
X_test_unsorted = scramble_indices(X_test)
|
||||
|
||||
assert not X_sparse_unsorted.has_sorted_indices
|
||||
assert not X_test_unsorted.has_sorted_indices
|
||||
|
||||
unsorted_svc = svm.SVC(kernel="linear", probability=True, random_state=0).fit(
|
||||
X_sparse_unsorted, y
|
||||
)
|
||||
coef_unsorted = unsorted_svc.coef_
|
||||
# make sure unsorted indices give same result
|
||||
assert_array_almost_equal(coef_unsorted.toarray(), coef_sorted.toarray())
|
||||
assert_array_almost_equal(
|
||||
sparse_svc.predict_proba(X_test_unsorted), sparse_svc.predict_proba(X_test)
|
||||
)
|
||||
|
||||
|
||||
def test_svc_with_custom_kernel():
|
||||
def kfunc(x, y):
|
||||
return safe_sparse_dot(x, y.T)
|
||||
|
||||
clf_lin = svm.SVC(kernel="linear").fit(X_sp, Y)
|
||||
clf_mylin = svm.SVC(kernel=kfunc).fit(X_sp, Y)
|
||||
assert_array_equal(clf_lin.predict(X_sp), clf_mylin.predict(X_sp))
|
||||
|
||||
|
||||
@skip_if_32bit
|
||||
def test_svc_iris():
|
||||
# Test the sparse SVC with the iris dataset
|
||||
for k in ("linear", "poly", "rbf"):
|
||||
sp_clf = svm.SVC(kernel=k).fit(iris.data, iris.target)
|
||||
clf = svm.SVC(kernel=k).fit(iris.data.toarray(), iris.target)
|
||||
|
||||
assert_array_almost_equal(
|
||||
clf.support_vectors_, sp_clf.support_vectors_.toarray()
|
||||
)
|
||||
assert_array_almost_equal(clf.dual_coef_, sp_clf.dual_coef_.toarray())
|
||||
assert_array_almost_equal(
|
||||
clf.predict(iris.data.toarray()), sp_clf.predict(iris.data)
|
||||
)
|
||||
if k == "linear":
|
||||
assert_array_almost_equal(clf.coef_, sp_clf.coef_.toarray())
|
||||
|
||||
|
||||
def test_sparse_decision_function():
|
||||
# Test decision_function
|
||||
|
||||
# Sanity check, test that decision_function implemented in python
|
||||
# returns the same as the one in libsvm
|
||||
|
||||
# multi class:
|
||||
svc = svm.SVC(kernel="linear", C=0.1, decision_function_shape="ovo")
|
||||
clf = svc.fit(iris.data, iris.target)
|
||||
|
||||
dec = safe_sparse_dot(iris.data, clf.coef_.T) + clf.intercept_
|
||||
|
||||
assert_array_almost_equal(dec, clf.decision_function(iris.data))
|
||||
|
||||
# binary:
|
||||
clf.fit(X, Y)
|
||||
dec = np.dot(X, clf.coef_.T) + clf.intercept_
|
||||
prediction = clf.predict(X)
|
||||
assert_array_almost_equal(dec.ravel(), clf.decision_function(X))
|
||||
assert_array_almost_equal(
|
||||
prediction, clf.classes_[(clf.decision_function(X) > 0).astype(int).ravel()]
|
||||
)
|
||||
expected = np.array([-1.0, -0.66, -1.0, 0.66, 1.0, 1.0])
|
||||
assert_array_almost_equal(clf.decision_function(X), expected, 2)
|
||||
|
||||
|
||||
def test_error():
|
||||
# Test that it gives proper exception on deficient input
|
||||
# impossible value of C
|
||||
with pytest.raises(ValueError):
|
||||
svm.SVC(C=-1).fit(X, Y)
|
||||
|
||||
# impossible value of nu
|
||||
clf = svm.NuSVC(nu=0.0)
|
||||
with pytest.raises(ValueError):
|
||||
clf.fit(X_sp, Y)
|
||||
|
||||
Y2 = Y[:-1] # wrong dimensions for labels
|
||||
with pytest.raises(ValueError):
|
||||
clf.fit(X_sp, Y2)
|
||||
|
||||
clf = svm.SVC()
|
||||
clf.fit(X_sp, Y)
|
||||
assert_array_equal(clf.predict(T), true_result)
|
||||
|
||||
|
||||
def test_linearsvc():
|
||||
# Similar to test_SVC
|
||||
clf = svm.LinearSVC(random_state=0).fit(X, Y)
|
||||
sp_clf = svm.LinearSVC(random_state=0).fit(X_sp, Y)
|
||||
|
||||
assert sp_clf.fit_intercept
|
||||
|
||||
assert_array_almost_equal(clf.coef_, sp_clf.coef_, decimal=4)
|
||||
assert_array_almost_equal(clf.intercept_, sp_clf.intercept_, decimal=4)
|
||||
|
||||
assert_array_almost_equal(clf.predict(X), sp_clf.predict(X_sp))
|
||||
|
||||
clf.fit(X2, Y2)
|
||||
sp_clf.fit(X2_sp, Y2)
|
||||
|
||||
assert_array_almost_equal(clf.coef_, sp_clf.coef_, decimal=4)
|
||||
assert_array_almost_equal(clf.intercept_, sp_clf.intercept_, decimal=4)
|
||||
|
||||
|
||||
def test_linearsvc_iris():
|
||||
# Test the sparse LinearSVC with the iris dataset
|
||||
|
||||
sp_clf = svm.LinearSVC(random_state=0).fit(iris.data, iris.target)
|
||||
clf = svm.LinearSVC(random_state=0).fit(iris.data.toarray(), iris.target)
|
||||
|
||||
assert clf.fit_intercept == sp_clf.fit_intercept
|
||||
|
||||
assert_array_almost_equal(clf.coef_, sp_clf.coef_, decimal=1)
|
||||
assert_array_almost_equal(clf.intercept_, sp_clf.intercept_, decimal=1)
|
||||
assert_array_almost_equal(
|
||||
clf.predict(iris.data.toarray()), sp_clf.predict(iris.data)
|
||||
)
|
||||
|
||||
# check decision_function
|
||||
pred = np.argmax(sp_clf.decision_function(iris.data), 1)
|
||||
assert_array_almost_equal(pred, clf.predict(iris.data.toarray()))
|
||||
|
||||
# sparsify the coefficients on both models and check that they still
|
||||
# produce the same results
|
||||
clf.sparsify()
|
||||
assert_array_equal(pred, clf.predict(iris.data))
|
||||
sp_clf.sparsify()
|
||||
assert_array_equal(pred, sp_clf.predict(iris.data))
|
||||
|
||||
|
||||
def test_weight():
|
||||
# Test class weights
|
||||
X_, y_ = make_classification(
|
||||
n_samples=200, n_features=100, weights=[0.833, 0.167], random_state=0
|
||||
)
|
||||
|
||||
X_ = sparse.csr_matrix(X_)
|
||||
for clf in (
|
||||
linear_model.LogisticRegression(),
|
||||
svm.LinearSVC(random_state=0),
|
||||
svm.SVC(),
|
||||
):
|
||||
clf.set_params(class_weight={0: 5})
|
||||
clf.fit(X_[:180], y_[:180])
|
||||
y_pred = clf.predict(X_[180:])
|
||||
assert np.sum(y_pred == y_[180:]) >= 11
|
||||
|
||||
|
||||
def test_sample_weights():
|
||||
# Test weights on individual samples
|
||||
clf = svm.SVC()
|
||||
clf.fit(X_sp, Y)
|
||||
assert_array_equal(clf.predict([X[2]]), [1.0])
|
||||
|
||||
sample_weight = [0.1] * 3 + [10] * 3
|
||||
clf.fit(X_sp, Y, sample_weight=sample_weight)
|
||||
assert_array_equal(clf.predict([X[2]]), [2.0])
|
||||
|
||||
|
||||
def test_sparse_liblinear_intercept_handling():
|
||||
# Test that sparse liblinear honours intercept_scaling param
|
||||
test_svm.test_dense_liblinear_intercept_handling(svm.LinearSVC)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("datasets_index", range(4))
|
||||
@pytest.mark.parametrize("kernel", ["linear", "poly", "rbf", "sigmoid"])
|
||||
@skip_if_32bit
|
||||
def test_sparse_oneclasssvm(datasets_index, kernel):
|
||||
# Check that sparse OneClassSVM gives the same result as dense OneClassSVM
|
||||
# many class dataset:
|
||||
X_blobs, _ = make_blobs(n_samples=100, centers=10, random_state=0)
|
||||
X_blobs = sparse.csr_matrix(X_blobs)
|
||||
datasets = [
|
||||
[X_sp, None, T],
|
||||
[X2_sp, None, T2],
|
||||
[X_blobs[:80], None, X_blobs[80:]],
|
||||
[iris.data, None, iris.data],
|
||||
]
|
||||
dataset = datasets[datasets_index]
|
||||
clf = svm.OneClassSVM(gamma=1, kernel=kernel)
|
||||
sp_clf = svm.OneClassSVM(gamma=1, kernel=kernel)
|
||||
check_svm_model_equal(clf, sp_clf, *dataset)
|
||||
|
||||
|
||||
def test_sparse_realdata():
|
||||
# Test on a subset from the 20newsgroups dataset.
|
||||
# This catches some bugs if input is not correctly converted into
|
||||
# sparse format or weights are not correctly initialized.
|
||||
|
||||
data = np.array([0.03771744, 0.1003567, 0.01174647, 0.027069])
|
||||
indices = np.array([6, 5, 35, 31])
|
||||
indptr = np.array(
|
||||
[
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
2,
|
||||
2,
|
||||
2,
|
||||
2,
|
||||
2,
|
||||
2,
|
||||
2,
|
||||
2,
|
||||
2,
|
||||
2,
|
||||
2,
|
||||
2,
|
||||
2,
|
||||
2,
|
||||
2,
|
||||
2,
|
||||
2,
|
||||
2,
|
||||
2,
|
||||
2,
|
||||
2,
|
||||
2,
|
||||
2,
|
||||
2,
|
||||
2,
|
||||
2,
|
||||
2,
|
||||
2,
|
||||
2,
|
||||
2,
|
||||
2,
|
||||
2,
|
||||
2,
|
||||
2,
|
||||
2,
|
||||
2,
|
||||
2,
|
||||
2,
|
||||
4,
|
||||
4,
|
||||
4,
|
||||
]
|
||||
)
|
||||
X = sparse.csr_matrix((data, indices, indptr))
|
||||
y = np.array(
|
||||
[
|
||||
1.0,
|
||||
0.0,
|
||||
2.0,
|
||||
2.0,
|
||||
1.0,
|
||||
1.0,
|
||||
1.0,
|
||||
2.0,
|
||||
2.0,
|
||||
0.0,
|
||||
1.0,
|
||||
2.0,
|
||||
2.0,
|
||||
0.0,
|
||||
2.0,
|
||||
0.0,
|
||||
3.0,
|
||||
0.0,
|
||||
3.0,
|
||||
0.0,
|
||||
1.0,
|
||||
1.0,
|
||||
3.0,
|
||||
2.0,
|
||||
3.0,
|
||||
2.0,
|
||||
0.0,
|
||||
3.0,
|
||||
1.0,
|
||||
0.0,
|
||||
2.0,
|
||||
1.0,
|
||||
2.0,
|
||||
0.0,
|
||||
1.0,
|
||||
0.0,
|
||||
2.0,
|
||||
3.0,
|
||||
1.0,
|
||||
3.0,
|
||||
0.0,
|
||||
1.0,
|
||||
0.0,
|
||||
0.0,
|
||||
2.0,
|
||||
0.0,
|
||||
1.0,
|
||||
2.0,
|
||||
2.0,
|
||||
2.0,
|
||||
3.0,
|
||||
2.0,
|
||||
0.0,
|
||||
3.0,
|
||||
2.0,
|
||||
1.0,
|
||||
2.0,
|
||||
3.0,
|
||||
2.0,
|
||||
2.0,
|
||||
0.0,
|
||||
1.0,
|
||||
0.0,
|
||||
1.0,
|
||||
2.0,
|
||||
3.0,
|
||||
0.0,
|
||||
0.0,
|
||||
2.0,
|
||||
2.0,
|
||||
1.0,
|
||||
3.0,
|
||||
1.0,
|
||||
1.0,
|
||||
0.0,
|
||||
1.0,
|
||||
2.0,
|
||||
1.0,
|
||||
1.0,
|
||||
3.0,
|
||||
]
|
||||
)
|
||||
|
||||
clf = svm.SVC(kernel="linear").fit(X.toarray(), y)
|
||||
sp_clf = svm.SVC(kernel="linear").fit(sparse.coo_matrix(X), y)
|
||||
|
||||
assert_array_equal(clf.support_vectors_, sp_clf.support_vectors_.toarray())
|
||||
assert_array_equal(clf.dual_coef_, sp_clf.dual_coef_.toarray())
|
||||
|
||||
|
||||
def test_sparse_svc_clone_with_callable_kernel():
|
||||
# Test that the "dense_fit" is called even though we use sparse input
|
||||
# meaning that everything works fine.
|
||||
a = svm.SVC(C=1, kernel=lambda x, y: x * y.T, probability=True, random_state=0)
|
||||
b = base.clone(a)
|
||||
|
||||
b.fit(X_sp, Y)
|
||||
pred = b.predict(X_sp)
|
||||
b.predict_proba(X_sp)
|
||||
|
||||
dense_svm = svm.SVC(
|
||||
C=1, kernel=lambda x, y: np.dot(x, y.T), probability=True, random_state=0
|
||||
)
|
||||
pred_dense = dense_svm.fit(X, Y).predict(X)
|
||||
assert_array_equal(pred_dense, pred)
|
||||
# b.decision_function(X_sp) # XXX : should be supported
|
||||
|
||||
|
||||
def test_timeout():
|
||||
sp = svm.SVC(
|
||||
C=1, kernel=lambda x, y: x * y.T, probability=True, random_state=0, max_iter=1
|
||||
)
|
||||
warning_msg = (
|
||||
r"Solver terminated early \(max_iter=1\). Consider pre-processing "
|
||||
r"your data with StandardScaler or MinMaxScaler."
|
||||
)
|
||||
with pytest.warns(ConvergenceWarning, match=warning_msg):
|
||||
sp.fit(X_sp, Y)
|
||||
|
||||
|
||||
def test_consistent_proba():
|
||||
a = svm.SVC(probability=True, max_iter=1, random_state=0)
|
||||
with ignore_warnings(category=ConvergenceWarning):
|
||||
proba_1 = a.fit(X, Y).predict_proba(X)
|
||||
a = svm.SVC(probability=True, max_iter=1, random_state=0)
|
||||
with ignore_warnings(category=ConvergenceWarning):
|
||||
proba_2 = a.fit(X, Y).predict_proba(X)
|
||||
assert_array_almost_equal(proba_1, proba_2)
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user