open source pkg v1

This commit is contained in:
Vijay Yadev
2020-08-04 19:12:31 -04:00
parent bef213dba9
commit c389fc2c47
3708 changed files with 1624220 additions and 1 deletions

View File

@@ -0,0 +1,66 @@
// Copyright (C) 2013 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_AVERAGE_PREcISION_Hh_
#define DLIB_AVERAGE_PREcISION_Hh_
#include "average_precision_abstract.h"
#include <vector>
namespace dlib
{
namespace impl
{
inline bool get_bool_part (
const bool& b
) { return b; }
template <typename T>
bool get_bool_part(const std::pair<T,bool>& item) { return item.second; }
}
// ----------------------------------------------------------------------------------------
template <typename T, typename alloc>
double average_precision (
const std::vector<T,alloc>& items,
unsigned long missing_relevant_items = 0
)
{
using namespace dlib::impl;
double relevant_count = 0;
// find the precision values
std::vector<double> precision;
for (unsigned long i = 0; i < items.size(); ++i)
{
if (get_bool_part(items[i]))
{
++relevant_count;
precision.push_back(relevant_count / (i+1));
}
}
double precision_sum = 0;
double max_val = 0;
// now sum over the interpolated precision values
for (std::vector<double>::reverse_iterator i = precision.rbegin(); i != precision.rend(); ++i)
{
max_val = std::max(max_val, *i);
precision_sum += max_val;
}
relevant_count += missing_relevant_items;
if (relevant_count != 0)
return precision_sum/relevant_count;
else
return 1;
}
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_AVERAGE_PREcISION_Hh_

View File

@@ -0,0 +1,67 @@
// Copyright (C) 2013 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_AVERAGE_PREcISION_ABSTRACT_Hh_
#ifdef DLIB_AVERAGE_PREcISION_ABSTRACT_Hh_
#include <vector>
namespace dlib
{
// ----------------------------------------------------------------------------------------
template <
typename alloc
>
double average_precision (
const std::vector<bool,alloc>& items,
unsigned long missing_relevant_items = 0
);
/*!
ensures
- Interprets items as a list of relevant and non-relevant items in a response
from an information retrieval system. In particular, items with a true value
are relevant and false items are non-relevant. This function then returns
the average precision of the ranking of the given items. For, example, the
ranking [true, true, true, true, false] would have an average precision of 1.
On the other hand, the ranking of [true false false true] would have an
average precision of 0.75 (because the first true has a precision of 1 and
the second true has a precision of 0.5, giving an average of 0.75).
- As a special case, if item contains no true elements then the average
precision is considered to be 1.
- Note that we use the interpolated precision. That is, the interpolated
precision at a recall value r is set to the maximum precision obtained at any
higher recall value. Or in other words, we interpolate the precision/recall
curve so that precision is monotonically decreasing. Therefore, the average
precision value returned by this function is the area under this interpolated
precision/recall curve.
- This function will add in missing_relevant_items number of items with a
precision of zero into the average value returned. For example, the average
precision of the ranking [true, true] if there are 2 missing relevant items
is 0.5.
!*/
// ----------------------------------------------------------------------------------------
template <
typename T,
typename alloc
>
double average_precision (
const std::vector<std::pair<T,bool>,alloc>& items,
unsigned long missing_relevant_items = 0
);
/*!
ensures
- this function is equivalent to copying the bool values from items into a
std::vector<bool> and then calling the above average_precision() routine on
it and returning the result.
!*/
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_AVERAGE_PREcISION_ABSTRACT_Hh_

View File

@@ -0,0 +1,186 @@
// Copyright (C) 2013 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_CCA_hh_
#define DLIB_CCA_hh_
#include "cca_abstract.h"
#include "../algs.h"
#include "../matrix.h"
#include "../sparse_vector.h"
#include "random_subset_selector.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
template <
typename T
>
matrix<typename T::type,0,1> compute_correlations (
const matrix_exp<T>& L,
const matrix_exp<T>& R
)
{
DLIB_ASSERT( L.size() > 0 && R.size() > 0 && L.nr() == R.nr(),
"\t matrix compute_correlations()"
<< "\n\t Invalid inputs were given to this function."
<< "\n\t L.size(): " << L.size()
<< "\n\t R.size(): " << R.size()
<< "\n\t L.nr(): " << L.nr()
<< "\n\t R.nr(): " << R.nr()
);
typedef typename T::type type;
matrix<type> A, B, C;
A = diag(trans(R)*L);
B = sqrt(diag(trans(L)*L));
C = sqrt(diag(trans(R)*R));
A = pointwise_multiply(A , reciprocal(pointwise_multiply(B,C)));
return A;
}
// ----------------------------------------------------------------------------------------
template <
typename matrix_type,
typename T
>
matrix<T,0,1> impl_cca (
const matrix_type& L,
const matrix_type& R,
matrix<T>& Ltrans,
matrix<T>& Rtrans,
unsigned long num_correlations,
unsigned long extra_rank,
unsigned long q,
unsigned long num_output_correlations,
double regularization
)
{
matrix<T> Ul, Vl;
matrix<T> Ur, Vr;
matrix<T> U, V;
matrix<T,0,1> Dr, Dl, D;
// Note that we add a few more singular vectors in because it helps improve the
// final results if we run this part with a little higher rank than the final SVD.
svd_fast(L, Ul, Dl, Vl, num_correlations+extra_rank, q);
svd_fast(R, Ur, Dr, Vr, num_correlations+extra_rank, q);
// Zero out singular values that are essentially zero so they don't cause numerical
// difficulties in the code below.
const double eps = std::numeric_limits<T>::epsilon()*std::max(max(Dr),max(Dl))*100;
Dl = round_zeros(Dl+regularization,eps);
Dr = round_zeros(Dr+regularization,eps);
// This matrix is really small so we can do a normal full SVD on it. Note that we
// also throw away the columns of Ul and Ur corresponding to zero singular values.
svd3(diagm(Dl>0)*tmp(trans(Ul)*Ur)*diagm(Dr>0), U, D, V);
// now throw away extra columns of the transformations. We do this in a way
// that keeps the directions that have the highest correlations.
matrix<T,0,1> temp = D;
rsort_columns(U, temp);
rsort_columns(V, D);
U = colm(U, range(0, num_output_correlations-1));
V = colm(V, range(0, num_output_correlations-1));
D = rowm(D, range(0, num_output_correlations-1));
Ltrans = Vl*inv(diagm(Dl))*U;
Rtrans = Vr*inv(diagm(Dr))*V;
// Note that the D matrix contains the correlation values for the transformed
// vectors. However, when the L and R matrices have rank higher than
// num_correlations+extra_rank then the values in D become only approximate.
return D;
}
// ----------------------------------------------------------------------------------------
template <typename T>
matrix<T,0,1> cca (
const matrix<T>& L,
const matrix<T>& R,
matrix<T>& Ltrans,
matrix<T>& Rtrans,
unsigned long num_correlations,
unsigned long extra_rank = 5,
unsigned long q = 2,
double regularization = 0
)
{
DLIB_ASSERT( num_correlations > 0 && L.size() > 0 && R.size() > 0 && L.nr() == R.nr() &&
regularization >= 0,
"\t matrix cca()"
<< "\n\t Invalid inputs were given to this function."
<< "\n\t num_correlations: " << num_correlations
<< "\n\t regularization: " << regularization
<< "\n\t L.size(): " << L.size()
<< "\n\t R.size(): " << R.size()
<< "\n\t L.nr(): " << L.nr()
<< "\n\t R.nr(): " << R.nr()
);
using std::min;
const unsigned long n = min(num_correlations, (unsigned long)min(R.nr(),min(L.nc(), R.nc())));
return impl_cca(L,R,Ltrans, Rtrans, num_correlations, extra_rank, q, n, regularization);
}
// ----------------------------------------------------------------------------------------
template <typename sparse_vector_type, typename T>
matrix<T,0,1> cca (
const std::vector<sparse_vector_type>& L,
const std::vector<sparse_vector_type>& R,
matrix<T>& Ltrans,
matrix<T>& Rtrans,
unsigned long num_correlations,
unsigned long extra_rank = 5,
unsigned long q = 2,
double regularization = 0
)
{
DLIB_ASSERT( num_correlations > 0 && L.size() == R.size() &&
max_index_plus_one(L) > 0 && max_index_plus_one(R) > 0 &&
regularization >= 0,
"\t matrix cca()"
<< "\n\t Invalid inputs were given to this function."
<< "\n\t num_correlations: " << num_correlations
<< "\n\t regularization: " << regularization
<< "\n\t L.size(): " << L.size()
<< "\n\t R.size(): " << R.size()
<< "\n\t max_index_plus_one(L): " << max_index_plus_one(L)
<< "\n\t max_index_plus_one(R): " << max_index_plus_one(R)
);
using std::min;
const unsigned long n = min(max_index_plus_one(L), max_index_plus_one(R));
const unsigned long num_output_correlations = min(num_correlations, std::min<unsigned long>(R.size(),n));
return impl_cca(L,R,Ltrans, Rtrans, num_correlations, extra_rank, q, num_output_correlations, regularization);
}
// ----------------------------------------------------------------------------------------
template <typename sparse_vector_type, typename Rand_type, typename T>
matrix<T,0,1> cca (
const random_subset_selector<sparse_vector_type,Rand_type>& L,
const random_subset_selector<sparse_vector_type,Rand_type>& R,
matrix<T>& Ltrans,
matrix<T>& Rtrans,
unsigned long num_correlations,
unsigned long extra_rank = 5,
unsigned long q = 2
)
{
return cca(L.to_std_vector(), R.to_std_vector(), Ltrans, Rtrans, num_correlations, extra_rank, q);
}
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_CCA_hh_

View File

@@ -0,0 +1,191 @@
// Copyright (C) 2013 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_CCA_AbSTRACT_Hh_
#ifdef DLIB_CCA_AbSTRACT_Hh_
#include "../matrix/matrix_la_abstract.h"
#include "random_subset_selector_abstract.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
template <
typename T
>
matrix<typename T::type,0,1> compute_correlations (
const matrix_exp<T>& L,
const matrix_exp<T>& R
);
/*!
requires
- L.size() > 0
- R.size() > 0
- L.nr() == R.nr()
ensures
- This function treats L and R as sequences of paired row vectors. It
then computes the correlation values between the elements of these
row vectors. In particular, we return a vector COR such that:
- COR.size() == L.nc()
- for all valid i:
- COR(i) == the correlation coefficient between the following sequence
of paired numbers: (L(k,i), R(k,i)) for k: 0 <= k < L.nr().
Therefore, COR(i) is a value between -1 and 1 inclusive where 1
indicates perfect correlation and -1 perfect anti-correlation. Note
that this function assumes the input data vectors have been centered
(i.e. made to have zero mean). If this is not the case then it will
report inaccurate results.
!*/
// ----------------------------------------------------------------------------------------
template <
typename T
>
matrix<T,0,1> cca (
const matrix<T>& L,
const matrix<T>& R,
matrix<T>& Ltrans,
matrix<T>& Rtrans,
unsigned long num_correlations,
unsigned long extra_rank = 5,
unsigned long q = 2,
double regularization = 0
);
/*!
requires
- num_correlations > 0
- L.size() > 0
- R.size() > 0
- L.nr() == R.nr()
- regularization >= 0
ensures
- This function performs a canonical correlation analysis between the row
vectors in L and R. That is, it finds two transformation matrices, Ltrans
and Rtrans, such that row vectors in the transformed matrices L*Ltrans and
R*Rtrans are as correlated as possible. That is, we try to find two transforms
such that the correlation values returned by compute_correlations(L*Ltrans, R*Rtrans)
would be maximized.
- Let N == min(num_correlations, min(R.nr(),min(L.nc(),R.nc())))
(This is the actual number of elements in the transformed vectors.
Therefore, note that you can't get more outputs than there are rows or
columns in the input matrices.)
- #Ltrans.nr() == L.nc()
- #Ltrans.nc() == N
- #Rtrans.nr() == R.nc()
- #Rtrans.nc() == N
- This function assumes the data vectors in L and R have already been centered
(i.e. we assume the vectors have zero means). However, in many cases it is
fine to use uncentered data with cca(). But if it is important for your
problem then you should center your data before passing it to cca().
- This function works with reduced rank approximations of the L and R matrices.
This makes it fast when working with large matrices. In particular, we use
the svd_fast() routine to find reduced rank representations of the input
matrices by calling it as follows: svd_fast(L, U,D,V, num_correlations+extra_rank, q)
and similarly for R. This means that you can use the extra_rank and q
arguments to cca() to influence the accuracy of the reduced rank
approximation. However, the default values should work fine for most
problems.
- returns an estimate of compute_correlations(L*#Ltrans, R*#Rtrans). The
returned vector should exactly match the output of compute_correlations()
when the reduced rank approximation to L and R is accurate and regularization
is set to 0. However, if this is not the case then the return value of this
function will deviate from compute_correlations(L*#Ltrans, R*#Rtrans). This
deviation can be used to check if the reduced rank approximation is working
or you need to increase extra_rank.
- The dimensions of the output vectors produced by L*#Ltrans or R*#Rtrans are
ordered such that the dimensions with the highest correlations come first.
That is, after applying the transforms produced by cca() to a set of vectors
you will find that dimension 0 has the highest correlation, then dimension 1
has the next highest, and so on. This also means that the list of numbers
returned from cca() will always be listed in decreasing order.
- This function performs the ridge regression version of Canonical Correlation
Analysis when regularization is set to a value > 0. In particular, larger
values indicate the solution should be more heavily regularized. This can be
useful when the dimensionality of the data is larger than the number of
samples.
- A good discussion of CCA can be found in the paper "Canonical Correlation
Analysis" by David Weenink. In particular, this function is implemented
using equations 29 and 30 from his paper. We also use the idea of doing CCA
on a reduced rank approximation of L and R as suggested by Paramveer S.
Dhillon in his paper "Two Step CCA: A new spectral method for estimating
vector models of words".
!*/
// ----------------------------------------------------------------------------------------
template <
typename sparse_vector_type,
typename T
>
matrix<T,0,1> cca (
const std::vector<sparse_vector_type>& L,
const std::vector<sparse_vector_type>& R,
matrix<T>& Ltrans,
matrix<T>& Rtrans,
unsigned long num_correlations,
unsigned long extra_rank = 5,
unsigned long q = 2,
double regularization = 0
);
/*!
requires
- num_correlations > 0
- L.size() == R.size()
- max_index_plus_one(L) > 0 && max_index_plus_one(R) > 0
(i.e. L and R can't represent empty matrices)
- L and R must contain sparse vectors (see the top of dlib/svm/sparse_vector_abstract.h
for a definition of sparse vector)
- regularization >= 0
ensures
- This is just an overload of the cca() function defined above. Except in this
case we take a sparse representation of the input L and R matrices rather than
dense matrices. Therefore, in this case, we interpret L and R as matrices
with L.size() rows, where each row is defined by a sparse vector. So this
function does exactly the same thing as the above cca().
- Note that you can apply the output transforms to a sparse vector with the
following code:
sparse_matrix_vector_multiply(trans(Ltrans), your_sparse_vector)
!*/
// ----------------------------------------------------------------------------------------
template <
typename sparse_vector_type,
typename Rand_type,
typename T
>
matrix<T,0,1> cca (
const random_subset_selector<sparse_vector_type,Rand_type>& L,
const random_subset_selector<sparse_vector_type,Rand_type>& R,
matrix<T>& Ltrans,
matrix<T>& Rtrans,
unsigned long num_correlations,
unsigned long extra_rank = 5,
unsigned long q = 2,
double regularization = 0
);
/*!
requires
- num_correlations > 0
- L.size() == R.size()
- max_index_plus_one(L) > 0 && max_index_plus_one(R) > 0
(i.e. L and R can't represent empty matrices)
- L and R must contain sparse vectors (see the top of dlib/svm/sparse_vector_abstract.h
for a definition of sparse vector)
- regularization >= 0
ensures
- returns cca(L.to_std_vector(), R.to_std_vector(), Ltrans, Rtrans, num_correlations, extra_rank, q)
(i.e. this is just a convenience function for calling the cca() routine when
your sparse vectors are contained inside a random_subset_selector rather than
a std::vector)
!*/
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_CCA_AbSTRACT_Hh_

View File

@@ -0,0 +1,541 @@
// Copyright (C) 2009 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_DPCA_h_
#define DLIB_DPCA_h_
#include "dpca_abstract.h"
#include <limits>
#include <cmath>
#include "../algs.h"
#include "../matrix.h"
#include <iostream>
namespace dlib
{
// ----------------------------------------------------------------------------------------
template <
typename matrix_type
>
class discriminant_pca
{
/*!
INITIAL VALUE
- vect_size == 0
- total_count == 0
- between_count == 0
- within_count == 0
- between_weight == 1
- within_weight == 1
CONVENTION
- vect_size == in_vector_size()
- total_count == the number of times add_to_total_variance() has been called.
- within_count == the number of times add_to_within_class_variance() has been called.
- between_count == the number of times add_to_between_class_variance() has been called.
- between_weight == between_class_weight()
- within_weight == within_class_weight()
- if (total_count != 0)
- total_sum == the sum of all vectors given to add_to_total_variance()
- the covariance of all the elements given to add_to_total_variance() is given
by:
- let avg == total_sum/total_count
- covariance == total_cov/total_count - avg*trans(avg)
- if (within_count != 0)
- within_cov/within_count == the normalized within class scatter matrix
- if (between_count != 0)
- between_cov/between_count == the normalized between class scatter matrix
!*/
public:
struct discriminant_pca_error : public error
{
discriminant_pca_error(const std::string& message): error(message) {}
};
typedef typename matrix_type::mem_manager_type mem_manager_type;
typedef typename matrix_type::type scalar_type;
typedef typename matrix_type::layout_type layout_type;
typedef matrix<scalar_type,0,0,mem_manager_type,layout_type> general_matrix;
typedef matrix<scalar_type,0,1,mem_manager_type,layout_type> column_matrix;
discriminant_pca (
)
{
clear();
}
void clear(
)
{
total_count = 0;
between_count = 0;
within_count = 0;
vect_size = 0;
between_weight = 1;
within_weight = 1;
total_sum.set_size(0);
between_cov.set_size(0,0);
total_cov.set_size(0,0);
within_cov.set_size(0,0);
}
long in_vector_size (
) const
{
return vect_size;
}
void set_within_class_weight (
scalar_type weight
)
{
// make sure requires clause is not broken
DLIB_ASSERT(weight >= 0,
"\t void discriminant_pca::set_within_class_weight()"
<< "\n\t You can't use negative weight values"
<< "\n\t weight: " << weight
<< "\n\t this: " << this
);
within_weight = weight;
}
scalar_type within_class_weight (
) const
{
return within_weight;
}
void set_between_class_weight (
scalar_type weight
)
{
// make sure requires clause is not broken
DLIB_ASSERT(weight >= 0,
"\t void discriminant_pca::set_between_class_weight()"
<< "\n\t You can't use negative weight values"
<< "\n\t weight: " << weight
<< "\n\t this: " << this
);
between_weight = weight;
}
scalar_type between_class_weight (
) const
{
return between_weight;
}
template <typename EXP1, typename EXP2>
void add_to_within_class_variance(
const matrix_exp<EXP1>& x,
const matrix_exp<EXP2>& y
)
{
// make sure requires clause is not broken
DLIB_ASSERT(is_col_vector(x) && is_col_vector(y) &&
x.size() == y.size() &&
(in_vector_size() == 0 || x.size() == in_vector_size()),
"\t void discriminant_pca::add_to_within_class_variance()"
<< "\n\t Invalid inputs were given to this function"
<< "\n\t is_col_vector(x): " << is_col_vector(x)
<< "\n\t is_col_vector(y): " << is_col_vector(y)
<< "\n\t x.size(): " << x.size()
<< "\n\t y.size(): " << y.size()
<< "\n\t in_vector_size(): " << in_vector_size()
<< "\n\t this: " << this
);
vect_size = x.size();
if (within_count == 0)
{
within_cov = (x-y)*trans(x-y);
}
else
{
within_cov += (x-y)*trans(x-y);
}
++within_count;
}
template <typename EXP1, typename EXP2>
void add_to_between_class_variance(
const matrix_exp<EXP1>& x,
const matrix_exp<EXP2>& y
)
{
// make sure requires clause is not broken
DLIB_ASSERT(is_col_vector(x) && is_col_vector(y) &&
x.size() == y.size() &&
(in_vector_size() == 0 || x.size() == in_vector_size()),
"\t void discriminant_pca::add_to_between_class_variance()"
<< "\n\t Invalid inputs were given to this function"
<< "\n\t is_col_vector(x): " << is_col_vector(x)
<< "\n\t is_col_vector(y): " << is_col_vector(y)
<< "\n\t x.size(): " << x.size()
<< "\n\t y.size(): " << y.size()
<< "\n\t in_vector_size(): " << in_vector_size()
<< "\n\t this: " << this
);
vect_size = x.size();
if (between_count == 0)
{
between_cov = (x-y)*trans(x-y);
}
else
{
between_cov += (x-y)*trans(x-y);
}
++between_count;
}
template <typename EXP>
void add_to_total_variance(
const matrix_exp<EXP>& x
)
{
// make sure requires clause is not broken
DLIB_ASSERT(is_col_vector(x) && (in_vector_size() == 0 || x.size() == in_vector_size()),
"\t void discriminant_pca::add_to_total_variance()"
<< "\n\t Invalid inputs were given to this function"
<< "\n\t is_col_vector(x): " << is_col_vector(x)
<< "\n\t in_vector_size(): " << in_vector_size()
<< "\n\t x.size(): " << x.size()
<< "\n\t this: " << this
);
vect_size = x.size();
if (total_count == 0)
{
total_cov = x*trans(x);
total_sum = x;
}
else
{
total_cov += x*trans(x);
total_sum += x;
}
++total_count;
}
const general_matrix dpca_matrix (
const double eps = 0.99
) const
{
general_matrix dpca_mat;
general_matrix eigenvalues;
dpca_matrix(dpca_mat, eigenvalues, eps);
return dpca_mat;
}
const general_matrix dpca_matrix_of_size (
const long num_rows
)
{
// make sure requires clause is not broken
DLIB_ASSERT(0 < num_rows && num_rows <= in_vector_size(),
"\t general_matrix discriminant_pca::dpca_matrix_of_size()"
<< "\n\t Invalid inputs were given to this function"
<< "\n\t num_rows: " << num_rows
<< "\n\t in_vector_size(): " << in_vector_size()
<< "\n\t this: " << this
);
general_matrix dpca_mat;
general_matrix eigenvalues;
dpca_matrix_of_size(dpca_mat, eigenvalues, num_rows);
return dpca_mat;
}
void dpca_matrix (
general_matrix& dpca_mat,
general_matrix& eigenvalues,
const double eps = 0.99
) const
{
// make sure requires clause is not broken
DLIB_ASSERT(0 < eps && eps <= 1 && in_vector_size() != 0,
"\t void discriminant_pca::dpca_matrix()"
<< "\n\t Invalid inputs were given to this function"
<< "\n\t eps: " << eps
<< "\n\t in_vector_size(): " << in_vector_size()
<< "\n\t this: " << this
);
compute_dpca_matrix(dpca_mat, eigenvalues, eps, 0);
}
void dpca_matrix_of_size (
general_matrix& dpca_mat,
general_matrix& eigenvalues,
const long num_rows
)
{
// make sure requires clause is not broken
DLIB_ASSERT(0 < num_rows && num_rows <= in_vector_size(),
"\t general_matrix discriminant_pca::dpca_matrix_of_size()"
<< "\n\t Invalid inputs were given to this function"
<< "\n\t num_rows: " << num_rows
<< "\n\t in_vector_size(): " << in_vector_size()
<< "\n\t this: " << this
);
compute_dpca_matrix(dpca_mat, eigenvalues, 1, num_rows);
}
void swap (
discriminant_pca& item
)
{
using std::swap;
swap(total_cov, item.total_cov);
swap(total_sum, item.total_sum);
swap(total_count, item.total_count);
swap(vect_size, item.vect_size);
swap(between_cov, item.between_cov);
swap(between_count, item.between_count);
swap(between_weight, item.between_weight);
swap(within_cov, item.within_cov);
swap(within_count, item.within_count);
swap(within_weight, item.within_weight);
}
friend void deserialize (
discriminant_pca& item,
std::istream& in
)
{
deserialize( item.total_cov, in);
deserialize( item.total_sum, in);
deserialize( item.total_count, in);
deserialize( item.vect_size, in);
deserialize( item.between_cov, in);
deserialize( item.between_count, in);
deserialize( item.between_weight, in);
deserialize( item.within_cov, in);
deserialize( item.within_count, in);
deserialize( item.within_weight, in);
}
friend void serialize (
const discriminant_pca& item,
std::ostream& out
)
{
serialize( item.total_cov, out);
serialize( item.total_sum, out);
serialize( item.total_count, out);
serialize( item.vect_size, out);
serialize( item.between_cov, out);
serialize( item.between_count, out);
serialize( item.between_weight, out);
serialize( item.within_cov, out);
serialize( item.within_count, out);
serialize( item.within_weight, out);
}
discriminant_pca operator+ (
const discriminant_pca& item
) const
{
// make sure requires clause is not broken
DLIB_ASSERT((in_vector_size() == 0 || item.in_vector_size() == 0 || in_vector_size() == item.in_vector_size()) &&
between_class_weight() == item.between_class_weight() &&
within_class_weight() == item.within_class_weight(),
"\t discriminant_pca discriminant_pca::operator+()"
<< "\n\t The two discriminant_pca objects being added must have compatible parameters"
<< "\n\t in_vector_size(): " << in_vector_size()
<< "\n\t item.in_vector_size(): " << item.in_vector_size()
<< "\n\t between_class_weight(): " << between_class_weight()
<< "\n\t item.between_class_weight(): " << item.between_class_weight()
<< "\n\t within_class_weight(): " << within_class_weight()
<< "\n\t item.within_class_weight(): " << item.within_class_weight()
<< "\n\t this: " << this
);
discriminant_pca temp(item);
// We need to make sure to ignore empty matrices. That's what these if statements
// are for.
if (total_count != 0 && temp.total_count != 0)
{
temp.total_cov += total_cov;
temp.total_sum += total_sum;
temp.total_count += total_count;
}
else if (total_count != 0)
{
temp.total_cov = total_cov;
temp.total_sum = total_sum;
temp.total_count = total_count;
}
if (between_count != 0 && temp.between_count != 0)
{
temp.between_cov += between_cov;
temp.between_count += between_count;
}
else if (between_count != 0)
{
temp.between_cov = between_cov;
temp.between_count = between_count;
}
if (within_count != 0 && temp.within_count != 0)
{
temp.within_cov += within_cov;
temp.within_count += within_count;
}
else if (within_count != 0)
{
temp.within_cov = within_cov;
temp.within_count = within_count;
}
return temp;
}
discriminant_pca& operator+= (
const discriminant_pca& rhs
)
{
(*this + rhs).swap(*this);
return *this;
}
private:
void compute_dpca_matrix (
general_matrix& dpca_mat,
general_matrix& eigenvalues,
const double eps,
long num_rows
) const
{
general_matrix cov;
// now combine the three measures of variance into a single matrix by using the
// within_weight and between_weight weights.
cov = get_total_covariance_matrix();
if (within_count != 0)
cov -= within_weight*within_cov/within_count;
if (between_count != 0)
cov += between_weight*between_cov/between_count;
eigenvalue_decomposition<general_matrix> eig(make_symmetric(cov));
eigenvalues = eig.get_real_eigenvalues();
dpca_mat = eig.get_pseudo_v();
// sort the eigenvalues and eigenvectors so that the biggest eigenvalues come first
rsort_columns(dpca_mat, eigenvalues);
long num_vectors = 0;
if (num_rows == 0)
{
// Some of the eigenvalues might be negative. So first lets zero those out
// so they won't get considered.
eigenvalues = pointwise_multiply(eigenvalues > 0, eigenvalues);
// figure out how many eigenvectors we want in our dpca matrix
const double thresh = sum(eigenvalues)*eps;
double total = 0;
for (long r = 0; r < eigenvalues.size() && total < thresh; ++r)
{
// Don't even think about looking at eigenvalues that are 0. If we go this
// far then we have all we need.
if (eigenvalues(r) == 0)
break;
++num_vectors;
total += eigenvalues(r);
}
if (num_vectors == 0)
throw discriminant_pca_error("While performing discriminant_pca, all eigenvalues were negative or 0");
}
else
{
num_vectors = num_rows;
}
// So now we know we want to use num_vectors of the first eigenvectors. So
// pull those out and discard the rest.
dpca_mat = trans(colm(dpca_mat,range(0,num_vectors-1)));
// also clip off the eigenvalues we aren't using
eigenvalues = rowm(eigenvalues, range(0,num_vectors-1));
}
general_matrix get_total_covariance_matrix (
) const
/*!
ensures
- returns the covariance matrix of all the data given to the add_to_total_variance()
!*/
{
// if we don't even know the dimensionality of the vectors we are dealing
// with then just return an empty matrix
if (vect_size == 0)
return general_matrix();
// we know the vector size but we have zero total covariance.
if (total_count == 0)
{
general_matrix temp(vect_size,vect_size);
temp = 0;
return temp;
}
// In this case we actually have something to make a total covariance matrix out of.
// So do that.
column_matrix avg = total_sum/total_count;
return total_cov/total_count - avg*trans(avg);
}
general_matrix total_cov;
column_matrix total_sum;
scalar_type total_count;
long vect_size;
general_matrix between_cov;
scalar_type between_count;
scalar_type between_weight;
general_matrix within_cov;
scalar_type within_count;
scalar_type within_weight;
};
template <
typename matrix_type
>
inline void swap (
discriminant_pca<matrix_type>& a,
discriminant_pca<matrix_type>& b
) { a.swap(b); }
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_DPCA_h_

View File

@@ -0,0 +1,365 @@
// Copyright (C) 2009 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_DPCA_ABSTRaCT_
#ifdef DLIB_DPCA_ABSTRaCT_
#include <limits>
#include <cmath>
#include "../matrix/matrix_abstract.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
template <
typename matrix_type
>
class discriminant_pca
{
/*!
REQUIREMENTS ON matrix_type
Must be some type of dlib::matrix.
INITIAL VALUE
- in_vector_size() == 0
- between_class_weight() == 1
- within_class_weight() == 1
WHAT THIS OBJECT REPRESENTS
This object implements the Discriminant PCA technique described in the paper:
A New Discriminant Principal Component Analysis Method with Partial Supervision (2009)
by Dan Sun and Daoqiang Zhang
This algorithm is basically a straightforward generalization of the classical PCA
technique to handle partially labeled data. It is useful if you want to learn a linear
dimensionality reduction rule using a bunch of data that is partially labeled.
It functions by estimating three different scatter matrices. The first is the total scatter
matrix St (i.e. the total data covariance matrix), the second is the between class scatter
matrix Sb (basically a measure of the variance between data of different classes) and the
third is the within class scatter matrix Sw (a measure of the variance of data within the
same classes).
Once these three matrices are estimated they are combined according to the following equation:
S = St + a*Sb - b*Sw
Where a and b are user supplied weights. Then the largest eigenvalues of the S matrix are
computed and their associated eigenvectors are returned as the output of this algorithm.
That is, the desired linear dimensionality reduction is given by the matrix with these
eigenvectors stored in its rows.
Note that if a and b are set to 0 (or no labeled data is provided) then the output transformation
matrix is the same as the one produced by the classical PCA algorithm.
!*/
public:
struct discriminant_pca_error : public error;
/*!
This exception is thrown if there is some error that prevents us from creating
a DPCA matrix.
!*/
typedef typename matrix_type::mem_manager_type mem_manager_type;
typedef typename matrix_type::type scalar_type;
typedef typename matrix_type::layout_type layout_type;
typedef matrix<scalar_type,0,0,mem_manager_type,layout_type> general_matrix;
typedef matrix<scalar_type,0,1,mem_manager_type,layout_type> column_matrix;
discriminant_pca (
);
/*!
ensures
- this object is properly initialized
!*/
void clear(
);
/*!
ensures
- #*this has its initial value
!*/
long in_vector_size (
) const;
/*!
ensures
- if (this object has been presented with any input vectors) then
- returns the dimension of the column vectors used with this object
- else
- returns 0
!*/
void set_within_class_weight (
scalar_type weight
);
/*!
requires
- weight >= 0
ensures
- #within_class_weight() == weight
!*/
scalar_type within_class_weight (
) const;
/*!
ensures
- returns the weight used when combining the within class scatter matrix with
the other scatter matrices.
!*/
void set_between_class_weight (
scalar_type weight
);
/*!
requires
- weight >= 0
ensures
- #between_class_weight() == weight
!*/
scalar_type between_class_weight (
) const;
/*!
ensures
- returns the weight used when combining the between class scatter matrix with
the other scatter matrices.
!*/
void add_to_within_class_variance(
const matrix_exp& x,
const matrix_exp& y
);
/*!
requires
- is_col_vector(x) == true
- is_col_vector(y) == true
- x.size() == y.size()
- if (in_vector_size() != 0) then
- x.size() == y.size() == in_vector_size()
ensures
- #in_vector_size() == x.size()
- Adds (x-y)*trans(x-y) to the within class scatter matrix.
(i.e. the direction given by (x-y) is recorded as being a direction associated
with within class variance and is therefore unimportant and will be weighted
less in the final dimensionality reduction)
!*/
void add_to_between_class_variance(
const matrix_exp& x,
const matrix_exp& y
);
/*!
requires
- is_col_vector(x) == true
- is_col_vector(y) == true
- x.size() == y.size()
- if (in_vector_size() != 0) then
- x.size() == y.size() == in_vector_size()
ensures
- #in_vector_size() == x.size()
- Adds (x-y)*trans(x-y) to the between class scatter matrix.
(i.e. the direction given by (x-y) is recorded as being a direction associated
with between class variance and is therefore important and will be weighted
higher in the final dimensionality reduction)
!*/
void add_to_total_variance(
const matrix_exp& x
);
/*!
requires
- is_col_vector(x) == true
- if (in_vector_size() != 0) then
- x.size() == in_vector_size()
ensures
- #in_vector_size() == x.size()
- let M denote the centroid (or mean) of all the data. Then this function
Adds (x-M)*trans(x-M) to the total scatter matrix.
(i.e. the direction given by (x-M) is recorded as being a direction associated
with unlabeled variance and is therefore of default importance and will be weighted
as described in the discriminant_pca class description.)
!*/
const general_matrix dpca_matrix (
const double eps = 0.99
) const;
/*!
requires
- 0 < eps <= 1
- in_vector_size() != 0
(i.e. you have to have given this object some data)
ensures
- computes and returns the matrix MAT given by dpca_matrix(MAT,eigen,eps).
That is, this function returns the dpca_matrix computed by the function
defined below.
- Note that MAT is the desired linear transformation matrix. That is,
multiplying a vector by MAT performs the desired linear dimensionality reduction.
throws
- discriminant_pca_error
This exception is thrown if we are unable to create the dpca_matrix for some
reason. For example, if only within class examples have been given or
within_class_weight() is very large then all eigenvalues will be negative and
that prevents this algorithm from working properly.
!*/
void dpca_matrix (
general_matrix& dpca_mat,
general_matrix& eigenvalues,
const double eps = 0.99
) const;
/*!
requires
- 0 < eps <= 1
- in_vector_size() != 0
(i.e. you have to have given this object some data)
ensures
- is_col_vector(#eigenvalues) == true
- #dpca_mat.nr() == eigenvalues.size()
- #dpca_mat.nc() == in_vector_size()
- rowm(#dpca_mat,i) represents the ith eigenvector of the S matrix described
in the class description and its eigenvalue is given by eigenvalues(i).
- all values in #eigenvalues are > 0. Moreover, the eigenvalues are in
sorted order with the largest eigenvalue stored at eigenvalues(0).
- (#dpca_mat)*trans(#dpca_mat) == identity_matrix.
(i.e. the rows of the dpca_matrix are all unit length vectors and are mutually
orthogonal)
- Note that #dpca_mat is the desired linear transformation matrix. That is,
multiplying a vector by #dpca_mat performs the desired linear dimensionality
reduction.
- sum(#eigenvalues) will be equal to about eps times the total sum of all
positive eigenvalues in the S matrix described in this class's description.
This means that eps is a number that controls how "lossy" the dimensionality
reduction will be. Large values of eps result in more output dimensions
while smaller values result in fewer.
throws
- discriminant_pca_error
This exception is thrown if we are unable to create the dpca_matrix for some
reason. For example, if only within class examples have been given or
within_class_weight() is very large then all eigenvalues will be negative and
that prevents this algorithm from working properly.
!*/
const general_matrix dpca_matrix_of_size (
const long num_rows
);
/*!
requires
- 0 < num_rows <= in_vector_size()
ensures
- computes and returns the matrix MAT given by dpca_matrix_of_size(MAT,eigen,num_rows).
That is, this function returns the dpca_matrix computed by the function
defined below.
- Note that MAT is the desired linear transformation matrix. That is,
multiplying a vector by MAT performs the desired linear dimensionality
reduction to num_rows dimensions.
!*/
void dpca_matrix_of_size (
general_matrix& dpca_mat,
general_matrix& eigenvalues,
const long num_rows
);
/*!
requires
- 0 < num_rows <= in_vector_size()
ensures
- is_col_vector(#eigenvalues) == true
- #dpca_mat.nr() == eigenvalues.size()
- #dpca_mat.nr() == num_rows
- #dpca_mat.nc() == in_vector_size()
- rowm(#dpca_mat,i) represents the ith eigenvector of the S matrix described
in the class description and its eigenvalue is given by eigenvalues(i).
- The values in #eigenvalues might be positive or negative. Additionally, the
eigenvalues are in sorted order with the largest eigenvalue stored at
eigenvalues(0).
- (#dpca_mat)*trans(#dpca_mat) == identity_matrix.
(i.e. the rows of the dpca_matrix are all unit length vectors and are mutually
orthogonal)
- Note that #dpca_mat is the desired linear transformation matrix. That is,
multiplying a vector by #dpca_mat performs the desired linear dimensionality
reduction to num_rows dimensions.
!*/
discriminant_pca operator+ (
const discriminant_pca& item
) const;
/*!
requires
- in_vector_size() == 0 || item.in_vector_size() == 0 || in_vector_size() == item.in_vector_size()
(i.e. the in_vector_size() of *this and item must match or one must be zero)
- between_class_weight() == item.between_class_weight()
- within_class_weight() == item.within_class_weight()
ensures
- returns a new discriminant_pca object that represents the combination of all
the measurements given to *this and item. That is, this function returns a
discriminant_pca object, R, that is equivalent to what you would obtain if all
modifying calls (e.g. the add_to_*() functions) to *this and item had instead
been done to R.
!*/
discriminant_pca& operator+= (
const discriminant_pca& rhs
);
/*!
requires
- in_vector_size() == 0 || rhs.in_vector_size() == 0 || in_vector_size() == rhs.in_vector_size()
(i.e. the in_vector_size() of *this and rhs must match or one must be zero)
- between_class_weight() == rhs.between_class_weight()
- within_class_weight() == rhs.within_class_weight()
ensures
- #*this == *item + rhs
- returns #*this
!*/
void swap (
discriminant_pca& item
);
/*!
ensures
- swaps *this and item
!*/
};
// ----------------------------------------------------------------------------------------
template <
typename matrix_type
>
inline void swap (
discriminant_pca<matrix_type>& a,
discriminant_pca<matrix_type>& b
) { a.swap(b); }
/*!
provides a global swap function
!*/
template <
typename matrix_type,
>
void deserialize (
discriminant_pca<matrix_type>& item,
std::istream& in
);
/*!
provides deserialization support
!*/
template <
typename matrix_type,
>
void serialize (
const discriminant_pca<matrix_type>& item,
std::ostream& out
);
/*!
provides serialization support
!*/
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_DPCA_ABSTRaCT_

View File

@@ -0,0 +1,82 @@
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_IMAGE_FEATURE_SaMPLING_Hh_
#define DLIB_IMAGE_FEATURE_SaMPLING_Hh_
#include "image_feature_sampling_abstract.h"
#include "../statistics.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
template <
typename image_array_type,
typename feature_extractor_type,
typename pyramid_type
>
random_subset_selector<typename feature_extractor_type::descriptor_type> randomly_sample_image_features (
const image_array_type& images,
const pyramid_type& pyr,
const feature_extractor_type& fe_,
unsigned long num
)
{
feature_extractor_type fe;
fe.copy_configuration(fe_);
random_subset_selector<typename feature_extractor_type::descriptor_type> basis;
basis.set_max_size(num);
typedef typename image_array_type::type image_type;
image_type temp_img, temp_img2;
for (unsigned long i = 0; i < images.size(); ++i)
{
bool at_pyramid_top = true;
while (true)
{
if (at_pyramid_top)
fe.load(images[i]);
else
fe.load(temp_img);
if (fe.size() == 0)
break;
for (long r = 0; r < fe.nr(); ++r)
{
for (long c = 0; c < fe.nc(); ++c)
{
if (basis.next_add_accepts())
{
basis.add(fe(r,c));
}
else
{
basis.add();
}
}
}
if (at_pyramid_top)
{
at_pyramid_top = false;
pyr(images[i], temp_img);
}
else
{
pyr(temp_img, temp_img2);
swap(temp_img2,temp_img);
}
}
}
return basis;
}
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_IMAGE_FEATURE_SaMPLING_Hh_

View File

@@ -0,0 +1,45 @@
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_IMAGE_FEATURE_SaMPLING_ABSTRACT_Hh_
#ifdef DLIB_IMAGE_FEATURE_SaMPLING_ABSTRACT_Hh_
#include "random_subset_selector_abstract.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
template <
typename image_array_type,
typename feature_extractor_type,
typename pyramid_type
>
random_subset_selector<typename feature_extractor_type::descriptor_type> randomly_sample_image_features (
const image_array_type& images,
const pyramid_type& pyr,
const feature_extractor_type& fe,
unsigned long num
);
/*!
requires
- pyramid_type == a type compatible with the image pyramid objects defined
in dlib/image_transforms/image_pyramid_abstract.h
- feature_extractor_type == a local image feature extractor type such as the
dlib::hog_image
- image_array_type == an implementation of dlib/array/array_kernel_abstract.h
and it must contain image objects which can be passed to pyr() and fe.load()
and are swappable by global swap().
ensures
- creates an image pyramid for each image in images and performs feature
extraction on each pyramid level. Then selects a random subsample of at
most num local feature vectors and returns it.
!*/
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_IMAGE_FEATURE_SaMPLING_ABSTRACT_Hh_

View File

@@ -0,0 +1,237 @@
// Copyright (C) 2014 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_LDA_Hh_
#define DLIB_LDA_Hh_
#include "lda_abstract.h"
#include "../algs.h"
#include <map>
#include "../matrix.h"
#include <vector>
namespace dlib
{
// ----------------------------------------------------------------------------------------
namespace impl
{
inline std::map<unsigned long,unsigned long> make_class_labels(
const std::vector<unsigned long>& row_labels
)
{
std::map<unsigned long,unsigned long> class_labels;
for (unsigned long i = 0; i < row_labels.size(); ++i)
{
const unsigned long next = class_labels.size();
if (class_labels.count(row_labels[i]) == 0)
class_labels[row_labels[i]] = next;
}
return class_labels;
}
// ------------------------------------------------------------------------------------
template <
typename T
>
matrix<T,0,1> center_matrix (
matrix<T>& X
)
{
matrix<T,1> mean;
for (long r = 0; r < X.nr(); ++r)
mean += rowm(X,r);
mean /= X.nr();
for (long r = 0; r < X.nr(); ++r)
set_rowm(X,r) -= mean;
return trans(mean);
}
}
// ----------------------------------------------------------------------------------------
template <
typename T
>
void compute_lda_transform (
matrix<T>& X,
matrix<T,0,1>& mean,
const std::vector<unsigned long>& row_labels,
unsigned long lda_dims = 500,
unsigned long extra_pca_dims = 200
)
{
std::map<unsigned long,unsigned long> class_labels = impl::make_class_labels(row_labels);
// LDA can only give out at most class_labels.size()-1 dimensions so don't try to
// compute more than that.
lda_dims = std::min<unsigned long>(lda_dims, class_labels.size()-1);
// make sure requires clause is not broken
DLIB_CASSERT(class_labels.size() > 1,
"\t void compute_lda_transform()"
<< "\n\t You can't call this function if the number of distinct class labels is less than 2."
);
DLIB_CASSERT(X.size() != 0 && (long)row_labels.size() == X.nr() && lda_dims != 0,
"\t void compute_lda_transform()"
<< "\n\t Invalid inputs were given to this function."
<< "\n\t X.size(): " << X.size()
<< "\n\t row_labels.size(): " << row_labels.size()
<< "\n\t lda_dims: " << lda_dims
);
mean = impl::center_matrix(X);
// Do PCA to reduce dims
matrix<T> pu,pw,pv;
svd_fast(X, pu, pw, pv, lda_dims+extra_pca_dims, 4);
pu.set_size(0,0); // free RAM, we don't need pu.
X = X*pv;
matrix<T> class_means(class_labels.size(), X.nc());
class_means = 0;
matrix<T,0,1> class_counts(class_labels.size());
class_counts = 0;
// First compute the means of each class
for (unsigned long i = 0; i < row_labels.size(); ++i)
{
const unsigned long class_idx = class_labels[row_labels[i]];
set_rowm(class_means,class_idx) += rowm(X,i);
class_counts(class_idx)++;
}
class_means = inv(diagm(class_counts))*class_means;
// subtract means from the data
for (unsigned long i = 0; i < row_labels.size(); ++i)
{
const unsigned long class_idx = class_labels[row_labels[i]];
set_rowm(X,i) -= rowm(class_means,class_idx);
}
// Note that we are using the formulas from the paper Using Discriminant
// Eigenfeatures for Image Retrieval by Swets and Weng.
matrix<T> Sw = trans(X)*X;
matrix<T> Sb = trans(class_means)*class_means;
matrix<T> A, H;
matrix<T,0,1> W;
svd3(Sw, A, W, H);
W = sqrt(W);
W = reciprocal(lowerbound(W,max(W)*1e-5));
A = trans(H*diagm(W))*Sb*H*diagm(W);
matrix<T> v,s,u;
svd3(A, v, s, u);
matrix<T> tform = H*diagm(W)*u;
// pick out only the number of dimensions we are supposed to for the output, unless
// we should just keep them all, then don't do anything.
if ((long)lda_dims <= tform.nc())
{
rsort_columns(tform, s);
tform = colm(tform, range(0, lda_dims-1));
}
X = trans(pv*tform);
mean = X*mean;
}
// ----------------------------------------------------------------------------------------
inline std::pair<double,double> equal_error_rate (
const std::vector<double>& low_vals,
const std::vector<double>& high_vals
)
{
std::vector<std::pair<double,int> > temp;
temp.reserve(low_vals.size()+high_vals.size());
for (unsigned long i = 0; i < low_vals.size(); ++i)
temp.push_back(std::make_pair(low_vals[i], -1));
for (unsigned long i = 0; i < high_vals.size(); ++i)
temp.push_back(std::make_pair(high_vals[i], +1));
std::sort(temp.begin(), temp.end());
if (temp.size() == 0)
return std::make_pair(0,0);
double thresh = temp[0].first;
unsigned long num_low_wrong = low_vals.size();
unsigned long num_high_wrong = 0;
double low_error = num_low_wrong/(double)low_vals.size();
double high_error = num_high_wrong/(double)high_vals.size();
for (unsigned long i = 0; i < temp.size() && high_error < low_error; ++i)
{
thresh = temp[i].first;
if (temp[i].second > 0)
{
num_high_wrong++;
high_error = num_high_wrong/(double)high_vals.size();
}
else
{
num_low_wrong--;
low_error = num_low_wrong/(double)low_vals.size();
}
}
return std::make_pair((low_error+high_error)/2, thresh);
}
// ----------------------------------------------------------------------------------------
struct roc_point
{
double true_positive_rate;
double false_positive_rate;
double detection_threshold;
};
inline std::vector<roc_point> compute_roc_curve (
const std::vector<double>& true_detections,
const std::vector<double>& false_detections
)
{
DLIB_CASSERT(true_detections.size() != 0);
DLIB_CASSERT(false_detections.size() != 0);
std::vector<std::pair<double,int> > temp;
temp.reserve(true_detections.size()+false_detections.size());
for (unsigned long i = 0; i < true_detections.size(); ++i)
temp.push_back(std::make_pair(true_detections[i], +1));
for (unsigned long i = 0; i < false_detections.size(); ++i)
temp.push_back(std::make_pair(false_detections[i], -1));
std::sort(temp.rbegin(), temp.rend());
std::vector<roc_point> roc_curve;
roc_curve.reserve(temp.size());
double num_false_included = 0;
double num_true_included = 0;
for (unsigned long i = 0; i < temp.size(); ++i)
{
if (temp[i].second > 0)
num_true_included++;
else
num_false_included++;
roc_point p;
p.true_positive_rate = num_true_included/true_detections.size();
p.false_positive_rate = num_false_included/false_detections.size();
p.detection_threshold = temp[i].first;
roc_curve.push_back(p);
}
return roc_curve;
}
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_LDA_Hh_

View File

@@ -0,0 +1,118 @@
// Copyright (C) 2014 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_LDA_ABSTRACT_Hh_
#ifdef DLIB_LDA_ABSTRACT_Hh_
#include <map>
#include "../matrix.h"
#include <vector>
namespace dlib
{
// ----------------------------------------------------------------------------------------
template <
typename T
>
void compute_lda_transform (
matrix<T>& X,
matrix<T,0,1>& M,
const std::vector<unsigned long>& row_labels,
unsigned long lda_dims = 500,
unsigned long extra_pca_dims = 200
);
/*!
requires
- X.size() != 0
- row_labels.size() == X.nr()
- The number of distinct values in row_labels > 1
- lda_dims != 0
ensures
- We interpret X as a collection X.nr() of input vectors, where each row of X
is one of the vectors.
- We interpret row_labels[i] as the label of the vector rowm(X,i).
- This function performs the dimensionality reducing version of linear
discriminant analysis. That is, you give it a set of labeled vectors and it
returns a linear transform that maps the input vectors into a new space that
is good for distinguishing between the different classes. In particular,
this function finds matrices Z and M such that:
- Given an input vector x, Z*x-M, is the transformed version of x. That is,
Z*x-M maps x into a space where x vectors that share the same class label
are near each other.
- Z*x-M results in the transformed vectors having zero expected mean.
- Z.nr() <= lda_dims
(it might be less than lda_dims if there are not enough distinct class
labels to support lda_dims dimensions).
- Z.nc() == X.nc()
- We overwrite the input matrix X and store Z in it. Therefore, the
outputs of this function are in X and M.
- In order to deal with very high dimensional inputs, we perform PCA internally
to map the input vectors into a space of at most lda_dims+extra_pca_dims
prior to performing LDA.
!*/
// ----------------------------------------------------------------------------------------
std::pair<double,double> equal_error_rate (
const std::vector<double>& low_vals,
const std::vector<double>& high_vals
);
/*!
ensures
- This function finds a threshold T that best separates the elements of
low_vals from high_vals by selecting the threshold with equal error rate. In
particular, we try to pick a threshold T such that:
- for all valid i:
- high_vals[i] >= T
- for all valid i:
- low_vals[i] < T
Where the best T is determined such that the fraction of low_vals >= T is the
same as the fraction of high_vals < T.
- Let ERR == the equal error rate. I.e. the fraction of times low_vals >= T
and high_vals < T. Note that 0 <= ERR <= 1.
- returns make_pair(ERR,T)
!*/
// ----------------------------------------------------------------------------------------
struct roc_point
{
double true_positive_rate;
double false_positive_rate;
double detection_threshold;
};
std::vector<roc_point> compute_roc_curve (
const std::vector<double>& true_detections,
const std::vector<double>& false_detections
);
/*!
requires
- true_detections.size() != 0
- false_detections.size() != 0
ensures
- This function computes the ROC curve (receiver operating characteristic)
curve of the given data. Therefore, we interpret true_detections as
containing detection scores for a bunch of true detections and
false_detections as detection scores from a bunch of false detections. A
perfect detector would always give higher scores to true detections than to
false detections, resulting in a true positive rate of 1 and a false positive
rate of 0, for some appropriate detection threshold.
- Returns an array, ROC, such that:
- ROC.size() == true_detections.size()+false_detections.size()
- for all valid i:
- If you were to accept all detections with a score >= ROC[i].detection_threshold
then you would obtain a true positive rate of ROC[i].true_positive_rate and a
false positive rate of ROC[i].false_positive_rate.
- ROC is ordered such that low detection rates come first. That is, the
curve is swept from a high detection threshold to a low threshold.
!*/
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_LDA_ABSTRACT_Hh_

View File

@@ -0,0 +1,372 @@
// Copyright (C) 2010 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_RANDOM_SUBSeT_SELECTOR_H_
#define DLIB_RANDOM_SUBSeT_SELECTOR_H_
#include "random_subset_selector_abstract.h"
#include "../rand.h"
#include <vector>
#include "../algs.h"
#include "../string.h"
#include "../serialize.h"
#include "../matrix/matrix_mat.h"
#include <iostream>
namespace dlib
{
template <
typename T,
typename Rand_type = dlib::rand
>
class random_subset_selector
{
/*!
INITIAL VALUE
- _max_size == 0
- items.size() == 0
- count == 0
- _next_add_accepts == false
CONVENTION
- count == the number of times add() has been called since the last
time this object was empty.
- items.size() == size()
- max_size() == _max_size
- next_add_accepts() == _next_add_accepts
!*/
public:
typedef T type;
typedef T value_type;
typedef default_memory_manager mem_manager_type;
typedef Rand_type rand_type;
typedef typename std::vector<T>::iterator iterator;
typedef typename std::vector<T>::const_iterator const_iterator;
random_subset_selector (
)
{
_max_size = 0;
make_empty();
}
void set_seed(const std::string& value)
{
rnd.set_seed(value);
}
void make_empty (
)
{
items.resize(0);
count = 0;
update_next_add_accepts();
}
const std::vector<T>& to_std_vector(
) const { return items; }
size_t size (
) const
{
return items.size();
}
void set_max_size (
unsigned long new_max_size
)
{
items.reserve(new_max_size);
make_empty();
_max_size = new_max_size;
update_next_add_accepts();
}
unsigned long max_size (
) const
{
return _max_size;
}
T& operator[] (
unsigned long idx
)
{
// make sure requires clause is not broken
DLIB_ASSERT(idx < size(),
"\tvoid random_subset_selector::operator[]()"
<< "\n\t idx is out of range"
<< "\n\t idx: " << idx
<< "\n\t size(): " << size()
<< "\n\t this: " << this
);
return items[idx];
}
const T& operator[] (
unsigned long idx
) const
{
// make sure requires clause is not broken
DLIB_ASSERT(idx < size(),
"\tvoid random_subset_selector::operator[]()"
<< "\n\t idx is out of range"
<< "\n\t idx: " << idx
<< "\n\t size(): " << size()
<< "\n\t this: " << this
);
return items[idx];
}
iterator begin() { return items.begin(); }
const_iterator begin() const { return items.begin(); }
iterator end() { return items.end(); }
const_iterator end() const { return items.end(); }
bool next_add_accepts (
) const
{
return _next_add_accepts;
}
void add (
const T& new_item
)
{
if (items.size() < _max_size)
{
items.push_back(new_item);
// swap into a random place
exchange(items[rnd.get_random_32bit_number()%items.size()], items.back());
}
else if (_next_add_accepts)
{
// pick a random element of items and replace it.
items[rnd.get_random_32bit_number()%items.size()] = new_item;
}
update_next_add_accepts();
++count;
}
void add (
)
{
// make sure requires clause is not broken
DLIB_ASSERT(next_add_accepts() == false,
"\tvoid random_subset_selector::add()"
<< "\n\t You should be calling the version of add() that takes an argument"
<< "\n\t this: " << this
);
update_next_add_accepts();
++count;
}
void swap (
random_subset_selector& a
)
{
items.swap(a.items);
std::swap(_max_size, a._max_size);
std::swap(count, a.count);
rnd.swap(a.rnd);
std::swap(_next_add_accepts, a._next_add_accepts);
}
template <typename T1, typename T2>
friend void serialize (
const random_subset_selector<T1,T2>& item,
std::ostream& out
);
template <typename T1, typename T2>
friend void deserialize (
random_subset_selector<T1,T2>& item,
std::istream& in
);
private:
void update_next_add_accepts (
)
{
if (items.size() < _max_size)
{
_next_add_accepts = true;
}
else if (_max_size == 0)
{
_next_add_accepts = false;
}
else
{
// At this point each element of items has had an equal chance of being in this object.
// In particular, the probability that each arrived here is currently items.size()/count.
// We need to be able to say that, after this function ends, the probability of any
// particular object ending up in items is items.size()/(count+1). So this means that
// we should decide to add a new item into items with this probability. Also, if we do
// so then we pick one of the current items and replace it at random with the new item.
// Make me a random 64 bit number. This might seem excessive but I want this object
// to be able to handle an effectively infinite number of calls to add(). So count
// might get very large and we need to deal with that properly.
const unsigned long num1 = rnd.get_random_32bit_number();
const unsigned long num2 = rnd.get_random_32bit_number();
uint64 num = num1;
num <<= 32;
num |= num2;
num %= (count+1);
_next_add_accepts = (num < items.size());
}
}
std::vector<T> items;
unsigned long _max_size;
uint64 count;
rand_type rnd;
bool _next_add_accepts;
};
template <
typename T,
typename rand_type
>
void swap (
random_subset_selector<T,rand_type>& a,
random_subset_selector<T,rand_type>& b
) { a.swap(b); }
// ----------------------------------------------------------------------------------------
template <typename T1, typename T2>
void serialize (
const random_subset_selector<T1,T2>& item,
std::ostream& out
)
{
serialize(item.items, out);
serialize(item._max_size, out);
serialize(item.count, out);
serialize(item.rnd, out);
serialize(item._next_add_accepts, out);
}
template <typename T1, typename T2>
void deserialize (
random_subset_selector<T1,T2>& item,
std::istream& in
)
{
deserialize(item.items, in);
deserialize(item._max_size, in);
deserialize(item.count, in);
deserialize(item.rnd, in);
deserialize(item._next_add_accepts, in);
}
// ----------------------------------------------------------------------------------------
template <
typename T,
typename alloc
>
random_subset_selector<T> randomly_subsample (
const std::vector<T,alloc>& samples,
unsigned long num
)
{
random_subset_selector<T> subset;
subset.set_max_size(num);
for (unsigned long i = 0; i < samples.size(); ++i)
subset.add(samples[i]);
return subset;
}
// ----------------------------------------------------------------------------------------
template <
typename T,
typename alloc,
typename U
>
random_subset_selector<T> randomly_subsample (
const std::vector<T,alloc>& samples,
unsigned long num,
const U& random_seed
)
{
random_subset_selector<T> subset;
subset.set_seed(cast_to_string(random_seed));
subset.set_max_size(num);
for (unsigned long i = 0; i < samples.size(); ++i)
subset.add(samples[i]);
return subset;
}
// ----------------------------------------------------------------------------------------
template <
typename T
>
random_subset_selector<T> randomly_subsample (
const random_subset_selector<T>& samples,
unsigned long num
)
{
random_subset_selector<T> subset;
subset.set_max_size(num);
for (unsigned long i = 0; i < samples.size(); ++i)
subset.add(samples[i]);
return subset;
}
// ----------------------------------------------------------------------------------------
template <
typename T,
typename U
>
random_subset_selector<T> randomly_subsample (
const random_subset_selector<T>& samples,
unsigned long num,
const U& random_seed
)
{
random_subset_selector<T> subset;
subset.set_seed(cast_to_string(random_seed));
subset.set_max_size(num);
for (unsigned long i = 0; i < samples.size(); ++i)
subset.add(samples[i]);
return subset;
}
// ----------------------------------------------------------------------------------------
template <
typename T
>
const matrix_op<op_array_to_mat<random_subset_selector<T> > > mat (
const random_subset_selector<T>& m
)
{
typedef op_array_to_mat<random_subset_selector<T> > op;
return matrix_op<op>(op(m));
}
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_RANDOM_SUBSeT_SELECTOR_H_

View File

@@ -0,0 +1,388 @@
// Copyright (C) 2010 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_RANDOM_SUBSeT_SELECTOR_ABSTRACT_H_
#ifdef DLIB_RANDOM_SUBSeT_SELECTOR_ABSTRACT_H_
#include <vector>
#include "../rand/rand_kernel_abstract.h"
#include "../algs.h"
#include "../string.h"
namespace dlib
{
template <
typename T,
typename Rand_type = dlib::rand
>
class random_subset_selector
{
/*!
REQUIREMENTS ON T
T must be a copyable type
REQUIREMENTS ON Rand_type
must be an implementation of dlib/rand/rand_kernel_abstract.h
INITIAL VALUE
- size() == 0
- max_size() == 0
- next_add_accepts() == false
WHAT THIS OBJECT REPRESENTS
This object is a tool to help you select a random subset of a large body of data.
In particular, it is useful when the body of data is too large to fit into memory.
So for example, suppose you have 1000000 data samples and you want to select a
random subset of size 1000. Then you could do that as follows:
random_subset_selector<sample_type> rand_subset;
rand_subset.set_max_size(1000)
for (int i = 0; i < 1000000; ++i)
rand_subset.add( get_next_data_sample());
At the end of the for loop you will have your random subset of 1000 samples. And by
random I mean that each of the 1000000 data samples has an equal chance of ending
up in the rand_subset object.
Note that the above example calls get_next_data_sample() for each data sample. This
may be inefficient since most of the data samples are just ignored. An alternative
method that doesn't require you to load each sample can also be used. Consider the
following:
random_subset_selector<sample_type> rand_subset;
rand_subset.set_max_size(1000)
for (int i = 0; i < 1000000; ++i)
if (rand_subset.next_add_accepts())
rand_subset.add(get_data_sample(i));
else
rand_subset.add()
In the above example we only actually fetch the data sample into memory if we
know that the rand_subset would include it into the random subset. Otherwise,
we can just call the empty add().
Finally, note that the random_subset_selector uses a deterministic pseudo-random
number generator under the hood. Moreover, the default constructor always seeds
the random number generator in the same way. So unless you call set_seed()
each instance of the random_subset_selector will function identically.
!*/
public:
typedef T type;
typedef T value_type;
typedef default_memory_manager mem_manager_type;
typedef Rand_type rand_type;
typedef typename std::vector<T>::iterator iterator;
typedef typename std::vector<T>::const_iterator const_iterator;
random_subset_selector (
);
/*!
ensures
- this object is properly initialized
!*/
void set_seed(
const std::string& value
);
/*!
ensures
- sets the seed of the random number generator that is embedded in
this object to the given value.
!*/
void make_empty (
);
/*!
ensures
- #size() == 0
!*/
size_t size (
) const;
/*!
ensures
- returns the number of items of type T currently contained in this object
!*/
void set_max_size (
unsigned long new_max_size
);
/*!
ensures
- #max_size() == new_max_size
- #size() == 0
!*/
unsigned long max_size (
) const;
/*!
ensures
- returns the maximum allowable size for this object
!*/
T& operator[] (
unsigned long idx
);
/*!
requires
- idx < size()
ensures
- returns a non-const reference to the idx'th element of this object
!*/
const T& operator[] (
unsigned long idx
) const;
/*!
requires
- idx < size()
ensures
- returns a const reference to the idx'th element of this object
!*/
bool next_add_accepts (
) const;
/*!
ensures
- if (the next call to add(item) will result in item being included
into *this) then
- returns true
- Note that the next item will always be accepted if size() < max_size().
- else
- returns false
- Note that the next item will never be accepted if max_size() == 0.
!*/
void add (
const T& new_item
);
/*!
ensures
- if (next_add_accepts()) then
- places new_item into *this object at a random location
- if (size() < max_size()) then
- #size() == size() + 1
- #next_add_accepts() == The updated information about the acceptance
of the next call to add()
!*/
void add (
);
/*!
requires
- next_add_accepts() == false
ensures
- This function does nothing but update the value of #next_add_accepts()
!*/
iterator begin(
);
/*!
ensures
- if (size() > 0) then
- returns an iterator referring to the first element in
this container.
- else
- returns end()
!*/
const_iterator begin(
) const;
/*!
ensures
- if (size() > 0) then
- returns a const_iterator referring to the first element in
this container.
- else
- returns end()
!*/
iterator end(
);
/*!
ensures
- returns an iterator that represents one past the end of
this container
!*/
const_iterator end(
) const;
/*!
ensures
- returns an iterator that represents one past the end of
this container
!*/
const std::vector<T>& to_std_vector(
) const;
/*!
ensures
- returns a const reference to the underlying std::vector<T> that contains
all elements in this object. That is, this function returns a vector, V,
which has the following properties:
- V.size() == this->size()
- V.begin() == this->begin()
- V.end() == this->end()
!*/
void swap (
random_subset_selector& item
);
/*!
ensures
- swaps *this and item
!*/
};
template <
typename T,
typename rand_type
>
void swap (
random_subset_selector<T,rand_type>& a,
random_subset_selector<T,rand_type>& b
) { a.swap(b); }
/*!
provides global swap support
!*/
template <
typename T,
typename rand_type
>
void serialize (
const random_subset_selector<T,rand_type>& item,
std::ostream& out
);
/*!
provides serialization support
!*/
template <
typename T,
typename rand_type
>
void deserialize (
random_subset_selector<T,rand_type>& item,
std::istream& in
);
/*!
provides deserialization support
!*/
// ----------------------------------------------------------------------------------------
template <
typename T,
typename alloc
>
random_subset_selector<T> randomly_subsample (
const std::vector<T,alloc>& samples,
unsigned long num
);
/*!
ensures
- returns a random subset R such that:
- R contains a random subset of the given samples
- R.size() == min(num, samples.size())
- R.max_size() == num
- The random number generator used by this function will always be
initialized in the same way. I.e. this function will always pick
the same random subsample if called multiple times.
!*/
// ----------------------------------------------------------------------------------------
template <
typename T,
typename alloc,
typename U
>
random_subset_selector<T> randomly_subsample (
const std::vector<T,alloc>& samples,
unsigned long num,
const U& random_seed
);
/*!
requires
- random_seed must be convertible to a string by dlib::cast_to_string()
ensures
- returns a random subset R such that:
- R contains a random subset of the given samples
- R.size() == min(num, samples.size())
- R.max_size() == num
- The given random_seed will be used to initialize the random number
generator used by this function.
!*/
// ----------------------------------------------------------------------------------------
template <
typename T
>
random_subset_selector<T> randomly_subsample (
const random_subset_selector<T>& samples,
unsigned long num
);
/*!
ensures
- returns a random subset R such that:
- R contains a random subset of the given samples
- R.size() == min(num, samples.size())
- R.max_size() == num
- The random number generator used by this function will always be
initialized in the same way. I.e. this function will always pick
the same random subsample if called multiple times.
!*/
// ----------------------------------------------------------------------------------------
template <
typename T,
typename U
>
random_subset_selector<T> randomly_subsample (
const random_subset_selector<T>& samples,
unsigned long num,
const U& random_seed
);
/*!
requires
- random_seed must be convertible to a string by dlib::cast_to_string()
ensures
- returns a random subset R such that:
- R contains a random subset of the given samples
- R.size() == min(num, samples.size())
- R.max_size() == num
- The given random_seed will be used to initialize the random number
generator used by this function.
!*/
// ----------------------------------------------------------------------------------------
template <
typename T
>
const matrix_exp mat (
const random_subset_selector<T>& m
);
/*!
ensures
- returns a matrix R such that:
- is_col_vector(R) == true
- R.size() == m.size()
- for all valid r:
R(r) == m[r]
!*/
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_RANDOM_SUBSeT_SELECTOR_ABSTRACT_H_

View File

@@ -0,0 +1,370 @@
// Copyright (C) 2016 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_RuNNING_GRADIENT_Hh_
#define DLIB_RuNNING_GRADIENT_Hh_
#include "running_gradient_abstract.h"
#include "../algs.h"
#include "../serialize.h"
#include <cmath>
#include "../matrix.h"
#include <algorithm>
namespace dlib
{
class running_gradient
{
public:
running_gradient (
)
{
clear();
}
void clear(
)
{
n = 0;
R = identity_matrix<double>(2)*1e6;
w = 0;
residual_squared = 0;
}
double current_n (
) const
{
return n;
}
void add(
double y
)
{
matrix<double,2,1> x;
x = n, 1;
// Do recursive least squares computations
const double temp = 1 + trans(x)*R*x;
matrix<double,2,1> tmp = R*x;
R = R - (tmp*trans(tmp))/temp;
// R should always be symmetric. This line improves numeric stability of this algorithm.
R = 0.5*(R + trans(R));
w = w + R*x*(y - trans(x)*w);
// Also, recursively keep track of the residual error between the given value
// and what our linear predictor outputs.
residual_squared = residual_squared + std::pow((y - trans(x)*w),2.0)*temp;
++n;
}
double gradient (
) const
{
// make sure requires clause is not broken
DLIB_ASSERT(current_n() > 1,
"\t double running_gradient::gradient()"
<< "\n\t You must add more values into this object before calling this function."
<< "\n\t this: " << this
);
return w(0);
}
double intercept (
) const
{
// make sure requires clause is not broken
DLIB_ASSERT(current_n() > 0,
"\t double running_gradient::intercept()"
<< "\n\t You must add more values into this object before calling this function."
<< "\n\t this: " << this
);
return w(1);
}
double standard_error (
) const
{
// make sure requires clause is not broken
DLIB_ASSERT(current_n() > 2,
"\t double running_gradient::standard_error()"
<< "\n\t You must add more values into this object before calling this function."
<< "\n\t this: " << this
);
const double s = residual_squared/(n-2);
const double adjust = 12.0/(std::pow(current_n(),3.0) - current_n());
return std::sqrt(s*adjust);
}
double probability_gradient_less_than (
double thresh
) const
{
// make sure requires clause is not broken
DLIB_ASSERT(current_n() > 2,
"\t double running_gradient::probability_gradient_less_than()"
<< "\n\t You must add more values into this object before calling this function."
<< "\n\t this: " << this
);
return normal_cdf(thresh, gradient(), standard_error());
}
double probability_gradient_greater_than (
double thresh
) const
{
// make sure requires clause is not broken
DLIB_ASSERT(current_n() > 2,
"\t double running_gradient::probability_gradient_greater_than()"
<< "\n\t You must add more values into this object before calling this function."
<< "\n\t this: " << this
);
return 1-probability_gradient_less_than(thresh);
}
friend void serialize (const running_gradient& item, std::ostream& out)
{
int version = 1;
serialize(version, out);
serialize(item.n, out);
serialize(item.R, out);
serialize(item.w, out);
serialize(item.residual_squared, out);
}
friend void deserialize (running_gradient& item, std::istream& in)
{
int version = 0;
deserialize(version, in);
if (version != 1)
throw serialization_error("Unexpected version found while deserializing dlib::running_gradient.");
deserialize(item.n, in);
deserialize(item.R, in);
deserialize(item.w, in);
deserialize(item.residual_squared, in);
}
private:
static double normal_cdf(double value, double mean, double stddev)
{
if (stddev == 0)
{
if (value < mean)
return 0;
else if (value > mean)
return 1;
else
return 0.5;
}
value = (value-mean)/stddev;
return 0.5 * std::erfc(-value / std::sqrt(2.0));
}
double n;
matrix<double,2,2> R;
matrix<double,2,1> w;
double residual_squared;
};
// ----------------------------------------------------------------------------------------
template <
typename T
>
double probability_gradient_less_than (
const T& container,
double thresh
)
{
running_gradient g;
for(auto&& v : container)
g.add(v);
// make sure requires clause is not broken
DLIB_ASSERT(g.current_n() > 2,
"\t double probability_gradient_less_than()"
<< "\n\t You need more than 2 elements in the given container to call this function."
);
return g.probability_gradient_less_than(thresh);
}
template <
typename T
>
double probability_gradient_greater_than (
const T& container,
double thresh
)
{
running_gradient g;
for(auto&& v : container)
g.add(v);
// make sure requires clause is not broken
DLIB_ASSERT(g.current_n() > 2,
"\t double probability_gradient_greater_than()"
<< "\n\t You need more than 2 elements in the given container to call this function."
);
return g.probability_gradient_greater_than(thresh);
}
// ----------------------------------------------------------------------------------------
template <
typename T
>
double find_upper_quantile (
const T& container_,
double quantile
)
{
DLIB_CASSERT(0 <= quantile && quantile <= 1.0);
// copy container into a std::vector
std::vector<double> container(container_.begin(), container_.end());
DLIB_CASSERT(container.size() > 0);
size_t idx_upper = std::round((container.size()-1)*(1-quantile));
std::nth_element(container.begin(), container.begin()+idx_upper, container.end());
auto upper_q = *(container.begin()+idx_upper);
return upper_q;
}
// ----------------------------------------------------------------------------------------
template <
typename T
>
size_t count_steps_without_decrease (
const T& container,
double probability_of_decrease = 0.51
)
{
// make sure requires clause is not broken
DLIB_ASSERT(0.5 < probability_of_decrease && probability_of_decrease < 1,
"\t size_t count_steps_without_decrease()"
<< "\n\t probability_of_decrease: "<< probability_of_decrease
);
running_gradient g;
size_t count = 0;
size_t j = 0;
for (auto i = container.rbegin(); i != container.rend(); ++i)
{
++j;
g.add(*i);
if (g.current_n() > 2)
{
// Note that this only looks backwards because we are looping over the
// container backwards. So here we are really checking if the gradient isn't
// decreasing.
double prob_decreasing = g.probability_gradient_greater_than(0);
// If we aren't confident things are decreasing.
if (prob_decreasing < probability_of_decrease)
count = j;
}
}
return count;
}
// ----------------------------------------------------------------------------------------
template <
typename T
>
size_t count_steps_without_decrease_robust (
const T& container,
double probability_of_decrease = 0.51,
double quantile_discard = 0.10
)
{
// make sure requires clause is not broken
DLIB_ASSERT(0 <= quantile_discard && quantile_discard <= 1);
DLIB_ASSERT(0.5 < probability_of_decrease && probability_of_decrease < 1,
"\t size_t count_steps_without_decrease_robust()"
<< "\n\t probability_of_decrease: "<< probability_of_decrease
);
if (container.size() == 0)
return 0;
const auto quantile_thresh = find_upper_quantile(container, quantile_discard);
running_gradient g;
size_t count = 0;
size_t j = 0;
for (auto i = container.rbegin(); i != container.rend(); ++i)
{
++j;
// ignore values that are too large
if (*i <= quantile_thresh)
g.add(*i);
if (g.current_n() > 2)
{
// Note that this only looks backwards because we are looping over the
// container backwards. So here we are really checking if the gradient isn't
// decreasing.
double prob_decreasing = g.probability_gradient_greater_than(0);
// If we aren't confident things are decreasing.
if (prob_decreasing < probability_of_decrease)
count = j;
}
}
return count;
}
// ----------------------------------------------------------------------------------------
template <
typename T
>
size_t count_steps_without_increase (
const T& container,
double probability_of_increase = 0.51
)
{
// make sure requires clause is not broken
DLIB_ASSERT(0.5 < probability_of_increase && probability_of_increase < 1,
"\t size_t count_steps_without_increase()"
<< "\n\t probability_of_increase: "<< probability_of_increase
);
running_gradient g;
size_t count = 0;
size_t j = 0;
for (auto i = container.rbegin(); i != container.rend(); ++i)
{
++j;
g.add(*i);
if (g.current_n() > 2)
{
// Note that this only looks backwards because we are looping over the
// container backwards. So here we are really checking if the gradient isn't
// increasing.
double prob_increasing = g.probability_gradient_less_than(0);
// If we aren't confident things are increasing.
if (prob_increasing < probability_of_increase)
count = j;
}
}
return count;
}
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_RuNNING_GRADIENT_Hh_

View File

@@ -0,0 +1,276 @@
// Copyright (C) 2016 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_RuNNING_GRADIENT_ABSTRACT_Hh_
#ifdef DLIB_RuNNING_GRADIENT_ABSTRACT_Hh_
namespace dlib
{
class running_gradient
{
/*!
WHAT THIS OBJECT REPRESENTS
This object is a tool for estimating if a noisy sequence of numbers is
trending up or down and by how much. It does this by finding the least
squares fit of a line to the data and then allows you to perform a
statistical test on the slope of that line.
!*/
public:
running_gradient (
);
/*!
ensures
- #current_n() == 0
!*/
void clear(
);
/*!
ensures
- #current_n() == 0
- this object has its initial value
- clears all memory of any previous data points
!*/
double current_n (
) const;
/*!
ensures
- returns the number of values given to this object by add().
!*/
void add(
double y
);
/*!
ensures
- Updates the gradient() and standard_error() estimates in this object
based on the new y value.
- #current_n() == current_n() + 1
!*/
double gradient (
) const;
/*!
requires
- current_n() > 1
ensures
- If we consider the values given to add() as time series data, we can
estimate the rate-of-change of those values. That is, how much,
typically, do those values change from sample to sample? The gradient()
function returns the current estimate. It does this by finding the least
squares fit of a line to the data given to add() and returning the slope
of this line.
!*/
double intercept (
) const;
/*!
requires
- current_n() > 0
ensures
- This class fits a line to the time series data given to add(). This
function returns the intercept of that line while gradient() returns the
slope of that line. This means that, for example, the next point that
add() will see, as predicted by this best fit line, is the value
intercept() + current_n()*gradient().
!*/
double standard_error (
) const;
/*!
requires
- current_n() > 2
ensures
- returns the standard deviation of the estimate of gradient().
!*/
double probability_gradient_less_than (
double thresh
) const;
/*!
requires
- current_n() > 2
ensures
- If we can assume the values given to add() are linearly related to each
other and corrupted by Gaussian additive noise then our estimate of
gradient() is a random variable with a mean value of gradient() and a
standard deviation of standard_error(). This lets us compute the
probability that the true gradient of the data is less than thresh, which
is what this function returns.
!*/
double probability_gradient_greater_than (
double thresh
) const;
/*!
requires
- current_n() > 2
ensures
- returns 1-probability_gradient_less_than(thresh)
!*/
};
void serialize (
const running_gradient& item,
std::ostream& out
);
/*!
provides serialization support
!*/
void deserialize (
running_gradient& item,
std::istream& in
);
/*!
provides serialization support
!*/
// ----------------------------------------------------------------------------------------
template <
typename T
>
double probability_gradient_less_than (
const T& container,
double thresh
);
/*!
requires
- container must be a container of double values that can be enumerated with a
range based for loop.
- The container must contain more than 2 elements.
ensures
- Puts all the elements of container into a running_gradient object, R, and
then returns R.probability_gradient_less_than(thresh).
!*/
template <
typename T
>
double probability_gradient_greater_than (
const T& container,
double thresh
);
/*!
requires
- container must be a container of double values that can be enumerated with a
range based for loop.
- The container must contain more than 2 elements.
ensures
- Puts all the elements of container into a running_gradient object, R, and
then returns R.probability_gradient_greater_than(thresh).
!*/
// ----------------------------------------------------------------------------------------
template <
typename T
>
size_t count_steps_without_decrease (
const T& container,
double probability_of_decrease = 0.51
);
/*!
requires
- container must be a container of double values that can be enumerated with
.rbegin() and .rend().
- 0.5 < probability_of_decrease < 1
ensures
- If you think of the contents of container as a potentially noisy time series,
then this function returns a count of how long the time series has gone
without noticeably decreasing in value. It does this by adding the
elements into a running_gradient object and counting how many elements,
starting with container.back(), that you need to examine before you are
confident that the series has been decreasing in value. Here, "confident of
decrease" means that the probability of decrease is >= probability_of_decrease.
- Setting probability_of_decrease to 0.51 means we count until we see even a
small hint of decrease, whereas a larger value of 0.99 would return a larger
count since it keeps going until it is nearly certain the time series is
decreasing.
- The max possible output from this function is container.size().
!*/
template <
typename T
>
size_t count_steps_without_decrease_robust (
const T& container,
double probability_of_decrease = 0.51,
double quantile_discard = 0.10
);
/*!
requires
- container must be a container of double values that can be enumerated with
.begin() and .end() as well as .rbegin() and .rend().
- 0.5 < probability_of_decrease < 1
- 0 <= quantile_discard <= 1
ensures
- This function behaves just like
count_steps_without_decrease(container,probability_of_decrease) except that
it ignores values in container that are in the upper quantile_discard
quantile. So for example, if the quantile discard is 0.1 then the 10%
largest values in container are ignored.
!*/
// ----------------------------------------------------------------------------------------
template <
typename T
>
size_t count_steps_without_increase (
const T& container,
double probability_of_increase = 0.51
);
/*!
requires
- container must be a container of double values that can be enumerated with
.rbegin() and .rend().
- 0.5 < probability_of_increase < 1
ensures
- If you think of the contents of container as a potentially noisy time series,
then this function returns a count of how long the time series has gone
without noticeably increasing in value. It does this by adding the
elements into a running_gradient object and counting how many elements,
starting with container.back(), that you need to examine before you are
confident that the series has been increasing in value. Here, "confident of
increase" means that the probability of increase is >= probability_of_increase.
- Setting probability_of_increase to 0.51 means we count until we see even a
small hint of increase, whereas a larger value of 0.99 would return a larger
count since it keeps going until it is nearly certain the time series is
increasing.
!*/
// ----------------------------------------------------------------------------------------
template <
typename T
>
double find_upper_quantile (
const T& container,
double quantile
);
/*!
requires
- container must be a container of double values that can be enumerated with
.begin() and .end().
- 0 <= quantile <= 1
- container.size() > 0
ensures
- Finds and returns the value such that quantile percent of the values in
container are greater than it. For example, 0.5 would find the median value
in container while 0.1 would find the value that lower bounded the 10%
largest values in container.
!*/
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_RuNNING_GRADIENT_ABSTRACT_Hh_

View File

@@ -0,0 +1,269 @@
// Copyright (C) 2012 Emanuele Cesena (emanuele.cesena@gmail.com), Davis E. King
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_SAMMoN_Hh_
#define DLIB_SAMMoN_Hh_
#include "sammon_abstract.h"
#include "../matrix.h"
#include "../algs.h"
#include "dpca.h"
#include <vector>
namespace dlib
{
class sammon_projection
{
public:
// ------------------------------------------------------------------------------------
template <typename matrix_type>
std::vector<matrix<double,0,1> > operator() (
const std::vector<matrix_type>& data,
const long num_dims
)
{
// make sure requires clause is not broken
DLIB_ASSERT(num_dims > 0,
"\t std::vector<matrix<double,0,1> > sammon_projection::operator()"
<< "\n\t Invalid inputs were given to this function."
<< "\n\t num_dims: " << num_dims
);
std::vector<matrix<double,0,1> > result; // projections
if (data.size() == 0)
{
return result;
}
#ifdef ENABLE_ASSERTS
DLIB_ASSERT(0 < num_dims && num_dims <= data[0].size(),
"\t std::vector<matrix<double,0,1> > sammon_projection::operator()"
<< "\n\t Invalid inputs were given to this function."
<< "\n\t data.size(): " << data.size()
<< "\n\t num_dims: " << num_dims
<< "\n\t data[0].size(): " << data[0].size()
);
for (unsigned long i = 0; i < data.size(); ++i)
{
DLIB_ASSERT(is_col_vector(data[i]) && data[i].size() == data[0].size(),
"\t std::vector<matrix<double,0,1> > sammon_projection::operator()"
<< "\n\t Invalid inputs were given to this function."
<< "\n\t data["<<i<<"].size(): " << data[i].size()
<< "\n\t data[0].size(): " << data[0].size()
<< "\n\t is_col_vector(data["<<i<<"]): " << is_col_vector(data[i])
);
}
#endif
double err; // error (discarded)
do_sammon_projection(data, num_dims, result, err);
return result;
}
// ------------------------------------------------------------------------------------
template <typename matrix_type>
void operator() (
const std::vector<matrix_type>& data,
const long num_dims,
std::vector<matrix<double,0,1> >& result,
double &err,
const unsigned long num_iters = 1000,
const double err_delta = 1.0e-9
)
{
// make sure requires clause is not broken
DLIB_ASSERT(num_dims > 0 && num_iters > 0 && err_delta > 0.0,
"\t std::vector<matrix<double,0,1> > sammon_projection::operator()"
<< "\n\t Invalid inputs were given to this function."
<< "\n\t data.size(): " << data.size()
<< "\n\t num_dims: " << num_dims
<< "\n\t num_iters: " << num_iters
<< "\n\t err_delta: " << err_delta
);
if (data.size() == 0)
{
result.clear();
err = 0;
return;
}
#ifdef ENABLE_ASSERTS
DLIB_ASSERT(0 < num_dims && num_dims <= data[0].size(),
"\t std::vector<matrix<double,0,1> > sammon_projection::operator()"
<< "\n\t Invalid inputs were given to this function."
<< "\n\t data.size(): " << data.size()
<< "\n\t num_dims: " << num_dims
<< "\n\t data[0].size(): " << data[0].size()
);
for (unsigned long i = 0; i < data.size(); ++i)
{
DLIB_ASSERT(is_col_vector(data[i]) && data[i].size() == data[0].size(),
"\t std::vector<matrix<double,0,1> > sammon_projection::operator()"
<< "\n\t Invalid inputs were given to this function."
<< "\n\t data["<<i<<"].size(): " << data[i].size()
<< "\n\t data[0].size(): " << data[0].size()
<< "\n\t is_col_vector(data["<<i<<"]): " << is_col_vector(data[i])
);
}
#endif
do_sammon_projection(data, num_dims, result, err, num_iters, err_delta);
}
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
private:
void compute_relative_distances(
matrix<double,0,1>& dist, // relative distances (output)
matrix<double,0,0>& data, // input data (matrix whose columns are the input vectors)
double eps_ratio = 1.0e-7 // to compute the minimum distance eps
)
/*!
requires
- dist.nc() == comb( data.nc(), 2 ), preallocated
- eps_ratio > 0
ensures
- dist[k] == lenght(data[i] - data[j]) for k = j(j-1)/2 + i
!*/
{
const long N = data.nc(); // num of points
double eps; // minimum distance, forced to avoid vectors collision
// computed at runtime as eps_ration * mean(vectors distances)
for (int k = 0, i = 1; i < N; ++i)
for (int j = 0; j < i; ++j)
dist(k++) = length(colm(data, i) - colm(data, j));
eps = eps_ratio * mean(dist);
dist = lowerbound(dist, eps);
}
// ----------------------------------------------------------------------------------------
template <typename matrix_type>
void do_sammon_projection(
const std::vector<matrix_type>& data, // input data
unsigned long num_dims, // dimension of the reduced space
std::vector<matrix<double,0,1> >& result, // projections (output)
double &err, // error (output)
unsigned long num_iters = 1000, // max num of iterations: stop condition
const double err_delta = 1.0e-9 // delta error: stop condition
)
/*!
requires
- matrix_type should be a kind of dlib::matrix<double,N,1>
- num_dims > 0
- num_iters > 0
- err_delta > 0
ensures
- result == a set of matrix<double,num_dims,1> objects that represent
the Sammon's projections of data vectors.
- err == the estimated error done in the projection, with the extra
property that err(at previous iteration) - err < err_delta
!*/
{
// other params
const double mf = 0.3; // magic factor
matrix<double> mdata; // input data as matrix
matrix<double> projs; // projected vectors, i.e. output data as matrix
// std::vector<matrix> -> matrix
mdata.set_size(data[0].size(), data.size());
for (unsigned int i = 0; i < data.size(); i++)
set_colm(mdata, i) = data[i];
const long N = mdata.nc(); // num of points
const long d = num_dims; // size of the reduced space
const long nd = N * (N - 1) / 2; // num of pairs of points = size of the distances vectors
matrix<double, 0, 1> dsij, inv_dsij; // d*_ij: pair-wise distances in the input space (and inverses)
dsij.set_size(nd, 1);
inv_dsij.set_size(nd, 1);
double ic; // 1.0 / sum of dsij
matrix<double, 0, 1> dij; // d_ij: pair-wise distances in the reduced space
dij.set_size(nd, 1);
matrix<double, 0, 0> dE, dE2, dtemp; // matrices representing error partial derivatives
dE.set_size(d, N);
dE2.set_size(d, N);
dtemp.set_size(d, N);
matrix<double, 0, 1> inv_dij, alpha; // utility vectors used to compute the partial derivatives
inv_dij.set_size(N, 1); // inv_dij is 1.0/dij, but we only need it column-wise
alpha.set_size(N, 1); // (slightly wasting a bit of computation)
// alpha = 1.0/dij - 1.0/dsij, again column-wise
// initialize projs with PCA
discriminant_pca<matrix<double> > dpca;
for (int i = 0; i < mdata.nc(); ++i)
{
dpca.add_to_total_variance(colm(mdata, i));
}
matrix<double> mat = dpca.dpca_matrix_of_size(num_dims);
projs = mat * mdata;
// compute dsij, inv_dsij and ic
compute_relative_distances(dsij, mdata);
inv_dsij = 1.0 / dsij;
ic = 1.0 / sum(dsij);
// compute dij and err
compute_relative_distances(dij, projs);
err = ic * sum(pointwise_multiply(squared(dij - dsij), inv_dsij));
// start iterating
while (num_iters--)
{
// compute dE, dE2 progressively column by column
for (int p = 0; p < N; ++p)
{
// compute
// - alpha_p, the column vector with 1/d_pj - 1/d*_pj
// - dtemp, the matrix with the p-th column repeated all along
//TODO: optimize constructions
for (int i = 0; i < N; ++i)
{
int pos = (i < p) ? p * (p - 1) / 2 + i : i * (i - 1) / 2 + p;
inv_dij(i) = (i == p) ? 0.0 : 1.0 / dij(pos);
alpha(i) = (i == p) ? 0.0 : inv_dij(i) - inv_dsij(pos);
set_colm(dtemp, i) = colm(projs, p);
}
dtemp -= projs;
set_colm(dE, p) = dtemp * alpha;
double sum_alpha = sum(alpha);
set_colm(dE2, p) = abs( sum_alpha + squared(dtemp) * cubed(inv_dij) );
}
// compute the update projections
projs += pointwise_multiply(dE, mf * reciprocal(dE2));
// compute new dij and error
compute_relative_distances(dij, projs);
double err_new = ic * sum( pointwise_multiply(squared(dij - dsij), inv_dsij) );
if (err - err_new < err_delta)
break;
err = err_new;
}
// matrix -> std::vector<matrix>
result.clear();
for (int i = 0; i < projs.nc(); ++i)
result.push_back(colm(projs, i));
}
};
} // namespace dlib
#endif // DLIB_SAMMoN_Hh_

View File

@@ -0,0 +1,117 @@
// Copyright (C) 2012 Emanuele Cesena (emanuele.cesena@gmail.com), Davis E. King
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_SAMMoN_ABSTRACT_Hh_
#ifdef DLIB_SAMMoN_ABSTRACT_Hh_
#include "../matrix/matrix_abstract.h"
#include <vector>
namespace dlib
{
class sammon_projection
{
/*!
WHAT THIS OBJECT REPRESENTS
This is a function object that computes the Sammon projection of a set
of N points in a L-dimensional vector space onto a d-dimensional space
(d < L), according to the paper:
A Nonlinear Mapping for Data Structure Analysis (1969) by J.W. Sammon
The current implementation is a vectorized version of the original algorithm.
!*/
public:
sammon_projection(
);
/*!
ensures
- this object is properly initialized
!*/
template <typename matrix_type>
std::vector<matrix<double,0,1> > operator() (
const std::vector<matrix_type>& data,
const long num_dims
);
/*!
requires
- num_dims > 0
- matrix_type should be a kind of dlib::matrix of doubles capable
of representing column vectors.
- for all valid i:
- is_col_vector(data[i]) == true
- data[0].size() == data[i].size()
(i.e. all the vectors in data must have the same dimensionality)
- if (data.size() != 0) then
- 0 < num_dims <= data[0].size()
(i.e. you can't project into a higher dimension than the input data,
only to a lower dimension.)
ensures
- This routine computes Sammon's dimensionality reduction method based on the
given input data. It will attempt to project the contents of data into a
num_dims dimensional space that preserves relative distances between the
input data points.
- This function returns a std::vector, OUT, such that:
- OUT == a set of column vectors that represent the Sammon projection of
the input data vectors.
- OUT.size() == data.size()
- for all valid i:
- OUT[i].size() == num_dims
- OUT[i] == the Sammon projection of the input vector data[i]
!*/
template <typename matrix_type>
void operator() (
const std::vector<matrix_type>& data,
const long num_dims,
std::vector<matrix<double,0,1> >& result,
double &err,
const unsigned long num_iters = 1000,
const double err_delta = 1.0e-9
);
/*!
requires
- num_iters > 0
- err_delta > 0
- num_dims > 0
- matrix_type should be a kind of dlib::matrix of doubles capable
of representing column vectors.
- for all valid i:
- is_col_vector(data[i]) == true
- data[0].size() == data[i].size()
(i.e. all the vectors in data must have the same dimensionality)
- if (data.size() != 0) then
- 0 < num_dims <= data[0].size()
(i.e. you can't project into a higher dimension than the input data,
only to a lower dimension.)
ensures
- This routine computes Sammon's dimensionality reduction method based on the
given input data. It will attempt to project the contents of data into a
num_dims dimensional space that preserves relative distances between the
input data points.
- #err == the final error value at the end of the algorithm. The goal of Sammon's
algorithm is to find a lower dimensional projection of the input data that
preserves the relative distances between points. The value in #err is a measure
of the total error at the end of the algorithm. So smaller values indicate
a better projection was found than if a large value is returned via #err.
- Sammon's algorithm will run until either num_iters iterations has executed
or the change in error from one iteration to the next is less than err_delta.
- Upon completion, the output of Sammon's projection is stored into #result, in
particular, we will have:
- #result == a set of column vectors that represent the Sammon projection of
the input data vectors.
- #result.size() == data.size()
- for all valid i:
- #result[i].size() == num_dims
- #result[i] == the Sammon projection of the input vector data[i]
!*/
};
}
#endif // DLIB_SAMMoN_ABSTRACT_Hh_

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,618 @@
// Copyright (C) 2013 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_VECTOR_NORMALIZER_FRoBMETRIC_Hh_
#define DLIB_VECTOR_NORMALIZER_FRoBMETRIC_Hh_
#include "vector_normalizer_frobmetric_abstract.h"
#include "../matrix.h"
#include "../optimization.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
template <
typename matrix_type
>
struct frobmetric_training_sample
{
matrix_type anchor_vect;
std::vector<matrix_type> near_vects;
std::vector<matrix_type> far_vects;
unsigned long num_triples (
) const { return near_vects.size() * far_vects.size(); }
void clear()
{
near_vects.clear();
far_vects.clear();
}
};
template <
typename matrix_type
>
void serialize(const frobmetric_training_sample<matrix_type>& item, std::ostream& out)
{
int version = 1;
serialize(version, out);
serialize(item.anchor_vect, out);
serialize(item.near_vects, out);
serialize(item.far_vects, out);
}
template <
typename matrix_type
>
void deserialize(frobmetric_training_sample<matrix_type>& item, std::istream& in)
{
int version = 0;
deserialize(version, in);
if (version != 1)
throw serialization_error("Unexpected version found while deserializing dlib::frobmetric_training_sample.");
deserialize(item.anchor_vect, in);
deserialize(item.near_vects, in);
deserialize(item.far_vects, in);
}
// ----------------------------------------------------------------------------------------
template <
typename matrix_type
>
class vector_normalizer_frobmetric
{
public:
typedef typename matrix_type::mem_manager_type mem_manager_type;
typedef typename matrix_type::type scalar_type;
typedef matrix_type result_type;
private:
struct compact_frobmetric_training_sample
{
std::vector<matrix_type> near_vects;
std::vector<matrix_type> far_vects;
};
struct objective
{
objective (
const std::vector<compact_frobmetric_training_sample>& samples_,
matrix<double,0,0,mem_manager_type>& Aminus_,
const matrix<double,0,1,mem_manager_type>& bias_
) : samples(samples_), Aminus(Aminus_), bias(bias_) {}
double operator()(const matrix<double,0,1,mem_manager_type>& u) const
{
long idx = 0;
const long dims = samples[0].far_vects[0].size();
// Here we compute \hat A from the paper, which we refer to as just A in
// the code.
matrix<double,0,0,mem_manager_type> A(dims,dims);
A = 0;
std::vector<double> ufar, unear;
for (unsigned long i = 0; i < samples.size(); ++i)
{
ufar.assign(samples[i].far_vects.size(),0);
unear.assign(samples[i].near_vects.size(),0);
for (unsigned long j = 0; j < unear.size(); ++j)
{
for (unsigned long k = 0; k < ufar.size(); ++k)
{
const double val = u(idx++);
ufar[k] -= val;
unear[j] += val;
}
}
for (unsigned long j = 0; j < unear.size(); ++j)
A += unear[j]*samples[i].near_vects[j]*trans(samples[i].near_vects[j]);
for (unsigned long j = 0; j < ufar.size(); ++j)
A += ufar[j]*samples[i].far_vects[j]*trans(samples[i].far_vects[j]);
}
eigenvalue_decomposition<matrix<double,0,0,mem_manager_type> > ed(make_symmetric(A));
Aminus = ed.get_pseudo_v()*diagm(upperbound(ed.get_real_eigenvalues(),0))*trans(ed.get_pseudo_v());
// Do this to avoid numeric instability later on since the above
// computation can make Aminus slightly non-symmetric.
Aminus = make_symmetric(Aminus);
return dot(u,bias) - 0.5*sum(squared(Aminus));
}
private:
const std::vector<compact_frobmetric_training_sample>& samples;
matrix<double,0,0,mem_manager_type>& Aminus;
const matrix<double,0,1,mem_manager_type>& bias;
};
struct derivative
{
derivative (
unsigned long num_triples_,
const std::vector<compact_frobmetric_training_sample>& samples_,
matrix<double,0,0,mem_manager_type>& Aminus_,
const matrix<double,0,1,mem_manager_type>& bias_
) : num_triples(num_triples_), samples(samples_), Aminus(Aminus_), bias(bias_) {}
matrix<double,0,1,mem_manager_type> operator()(const matrix<double,0,1,mem_manager_type>& ) const
{
// Note that Aminus is a function of u (the argument to this function), but
// since Aminus will have been computed already by the most recent call to
// the objective function we don't need to do anything with u. We can just
// use Aminus right away.
matrix<double,0,1,mem_manager_type> grad(num_triples);
long idx = 0;
std::vector<double> ufar, unear;
for (unsigned long i = 0; i < samples.size(); ++i)
{
ufar.resize(samples[i].far_vects.size());
unear.resize(samples[i].near_vects.size());
for (unsigned long j = 0; j < unear.size(); ++j)
unear[j] = sum(pointwise_multiply(Aminus, samples[i].near_vects[j]*trans(samples[i].near_vects[j])));
for (unsigned long j = 0; j < ufar.size(); ++j)
ufar[j] = sum(pointwise_multiply(Aminus, samples[i].far_vects[j]*trans(samples[i].far_vects[j])));
for (unsigned long j = 0; j < samples[i].near_vects.size(); ++j)
{
for (unsigned long k = 0; k < samples[i].far_vects.size(); ++k)
{
grad(idx) = bias(idx) + ufar[k]-unear[j];
idx++;
}
}
}
return grad;
}
private:
const unsigned long num_triples;
const std::vector<compact_frobmetric_training_sample>& samples;
matrix<double,0,0,mem_manager_type>& Aminus;
const matrix<double,0,1,mem_manager_type>& bias;
};
class custom_stop_strategy
{
public:
custom_stop_strategy(
double C_,
double eps_,
bool be_verbose_,
unsigned long max_iter_
)
{
_c = C_;
_cur_iter = 0;
_gradient_thresh = eps_;
_max_iter = max_iter_;
_verbose = be_verbose_;
}
template <typename T>
bool should_continue_search (
const T& u,
const double ,
const T& grad
)
{
++_cur_iter;
double max_gradient = 0;
for (long i = 0; i < grad.size(); ++i)
{
const bool at_lower_bound = (0 >= u(i) && grad(i) > 0);
const bool at_upper_bound = (_c/grad.size() <= u(i) && grad(i) < 0);
if (!at_lower_bound && !at_upper_bound)
max_gradient = std::max(std::abs(grad(i)), max_gradient);
}
if (_verbose)
{
std::cout << "iteration: " << _cur_iter << " max_gradient: "<< max_gradient << std::endl;
}
// Only stop when the largest non-bound-constrained element of the gradient
// is lower than the threshold.
if (max_gradient < _gradient_thresh)
return false;
// Check if we have hit the max allowable number of iterations.
if (_cur_iter > _max_iter)
{
return false;
}
return true;
}
private:
bool _verbose;
unsigned long _max_iter;
unsigned long _cur_iter;
double _c;
double _gradient_thresh;
};
public:
vector_normalizer_frobmetric (
)
{
verbose = false;
eps = 0.1;
C = 1;
max_iter = 5000;
_use_identity_matrix_prior = false;
}
bool uses_identity_matrix_prior (
) const
{
return _use_identity_matrix_prior;
}
void set_uses_identity_matrix_prior (
bool use_prior
)
{
_use_identity_matrix_prior = use_prior;
}
void be_verbose(
)
{
verbose = true;
}
void set_epsilon (
double eps_
)
{
// make sure requires clause is not broken
DLIB_ASSERT(eps_ > 0,
"\t void vector_normalizer_frobmetric::set_epsilon(eps_)"
<< "\n\t invalid inputs were given to this function"
<< "\n\t eps: " << eps_
);
eps = eps_;
}
double get_epsilon (
) const
{
return eps;
}
void set_c (
double C_
)
{
// make sure requires clause is not broken
DLIB_ASSERT(C_ > 0,
"\t void vector_normalizer_frobmetric::set_c()"
<< "\n\t C_ must be greater than 0"
<< "\n\t C_: " << C_
<< "\n\t this: " << this
);
C = C_;
}
void set_max_iterations (
unsigned long max_iterations
)
{
max_iter = max_iterations;
}
unsigned long get_max_iterations (
) const
{
return max_iter;
}
double get_c (
) const
{
return C;
}
void be_quiet (
)
{
verbose = false;
}
void train (
const std::vector<frobmetric_training_sample<matrix_type> >& samples
)
{
// make sure requires clause is not broken
DLIB_ASSERT(samples.size() > 0,
"\tvoid vector_normalizer_frobmetric::train()"
<< "\n\t you have to give a nonempty set of samples to this function"
);
#ifdef ENABLE_ASSERTS
{
const long dims = samples[0].anchor_vect.size();
DLIB_ASSERT(dims != 0,
"\tvoid vector_normalizer_frobmetric::train()"
<< "\n\t The dimension of the input vectors can't be zero."
);
for (unsigned long i = 0; i < samples.size(); ++i)
{
DLIB_ASSERT(is_col_vector(samples[i].anchor_vect),
"\tvoid vector_normalizer_frobmetric::train()"
<< "\n\t Invalid inputs were given to this function."
<< "\n\t i: " << i
);
DLIB_ASSERT(samples[i].anchor_vect.size() == dims,
"\tvoid vector_normalizer_frobmetric::train()"
<< "\n\t Invalid inputs were given to this function."
<< "\n\t i: " << i
<< "\n\t dims: " << dims
<< "\n\t samples[i].anchor_vect.size(): " << samples[i].anchor_vect.size()
);
DLIB_ASSERT(samples[i].num_triples() != 0,
"\tvoid vector_normalizer_frobmetric::train()"
<< "\n\t It is illegal for a training sample to have no data in it"
<< "\n\t i: " << i
);
for (unsigned long j = 0; j < samples[i].near_vects.size(); ++j)
{
DLIB_ASSERT(is_col_vector(samples[i].near_vects[j]),
"\tvoid vector_normalizer_frobmetric::train()"
<< "\n\t Invalid inputs were given to this function."
<< "\n\t i: " << i
<< "\n\t j: " << j
);
DLIB_ASSERT(samples[i].near_vects[j].size() == dims,
"\tvoid vector_normalizer_frobmetric::train()"
<< "\n\t Invalid inputs were given to this function."
<< "\n\t i: " << i
<< "\n\t j: " << j
<< "\n\t dims: " << dims
<< "\n\t samples[i].near_vects[j].size(): " << samples[i].near_vects[j].size()
);
}
for (unsigned long j = 0; j < samples[i].far_vects.size(); ++j)
{
DLIB_ASSERT(is_col_vector(samples[i].far_vects[j]),
"\tvoid vector_normalizer_frobmetric::train()"
<< "\n\t Invalid inputs were given to this function."
<< "\n\t i: " << i
<< "\n\t j: " << j
);
DLIB_ASSERT(samples[i].far_vects[j].size() == dims,
"\tvoid vector_normalizer_frobmetric::train()"
<< "\n\t Invalid inputs were given to this function."
<< "\n\t i: " << i
<< "\n\t j: " << j
<< "\n\t dims: " << dims
<< "\n\t samples[i].far_vects[j].size(): " << samples[i].far_vects[j].size()
);
}
}
}
#endif // end ENABLE_ASSERTS
// compute the mean sample
m = 0;
for (unsigned long i = 0; i < samples.size(); ++i)
m += samples[i].anchor_vect;
m /= samples.size();
DLIB_ASSERT(is_finite(m), "Some of the input vectors to vector_normalizer_frobmetric::train() have infinite or NaN values");
// Now we need to find tform. So we setup the optimization problem and run it
// over the next few lines of code.
unsigned long num_triples = 0;
for (unsigned long i = 0; i < samples.size(); ++i)
num_triples += samples[i].near_vects.size()*samples[i].far_vects.size();
matrix<double,0,1,mem_manager_type> u(num_triples);
matrix<double,0,1,mem_manager_type> bias(num_triples);
u = 0;
bias = 1;
// precompute all the anchor_vect to far_vects/near_vects pairs
std::vector<compact_frobmetric_training_sample> data(samples.size());
unsigned long cnt = 0;
std::vector<double> far_norm, near_norm;
for (unsigned long i = 0; i < data.size(); ++i)
{
far_norm.clear();
near_norm.clear();
data[i].far_vects.reserve(samples[i].far_vects.size());
data[i].near_vects.reserve(samples[i].near_vects.size());
for (unsigned long j = 0; j < samples[i].far_vects.size(); ++j)
{
data[i].far_vects.push_back(samples[i].anchor_vect - samples[i].far_vects[j]);
if (_use_identity_matrix_prior)
far_norm.push_back(length_squared(data[i].far_vects.back()));
}
for (unsigned long j = 0; j < samples[i].near_vects.size(); ++j)
{
data[i].near_vects.push_back(samples[i].anchor_vect - samples[i].near_vects[j]);
if (_use_identity_matrix_prior)
near_norm.push_back(length_squared(data[i].near_vects.back()));
}
// Note that this loop only executes if _use_identity_matrix_prior == true.
for (unsigned long j = 0; j < near_norm.size(); ++j)
{
for (unsigned long k = 0; k < far_norm.size(); ++k)
{
bias(cnt++) = 1 - (far_norm[k] - near_norm[j]);
}
}
}
// Now run the main part of the algorithm
matrix<double,0,0,mem_manager_type> Aminus;
find_max_box_constrained(lbfgs_search_strategy(10),
custom_stop_strategy(C, eps, verbose, max_iter),
objective(data, Aminus, bias),
derivative(num_triples, data, Aminus, bias),
u, 0, C/num_triples);
// What we need is the optimal Aminus which is a function of u. So we already
// have what we need and just need to put it into tform.
eigenvalue_decomposition<matrix<double,0,0,mem_manager_type> > ed(make_symmetric(-Aminus));
matrix<double,0,1,mem_manager_type> eigs = ed.get_real_eigenvalues();
// But first, discard the components that are zero to within the machine epsilon.
const double tol = max(eigs)*std::numeric_limits<double>::epsilon();
for (long i = 0; i < eigs.size(); ++i)
{
if (eigs(i) < tol)
eigs(i) = 0;
}
if (_use_identity_matrix_prior)
tform = matrix_cast<scalar_type>(identity_matrix(Aminus) + diagm(sqrt(eigs))*trans(ed.get_pseudo_v()));
else
tform = matrix_cast<scalar_type>(diagm(sqrt(eigs))*trans(ed.get_pseudo_v()));
// Pre-apply the transform to m so we don't have to do it inside operator()
// every time it's called.
m = tform*m;
}
long in_vector_size (
) const
{
return m.nr();
}
long out_vector_size (
) const
{
return m.nr();
}
const matrix<scalar_type,0,1,mem_manager_type>& transformed_means (
) const
{
return m;
}
const matrix<scalar_type,0,0,mem_manager_type>& transform (
) const
{
return tform;
}
const result_type& operator() (
const matrix_type& x
) const
{
// make sure requires clause is not broken
DLIB_ASSERT(in_vector_size() != 0 && in_vector_size() == x.size() &&
is_col_vector(x) == true,
"\tmatrix vector_normalizer_frobmetric::operator()"
<< "\n\t you have given invalid arguments to this function"
<< "\n\t in_vector_size(): " << in_vector_size()
<< "\n\t x.size(): " << x.size()
<< "\n\t is_col_vector(x): " << is_col_vector(x)
<< "\n\t this: " << this
);
temp_out = tform*x-m;
return temp_out;
}
template <typename mt>
friend void deserialize (
vector_normalizer_frobmetric<mt>& item,
std::istream& in
);
template <typename mt>
friend void serialize (
const vector_normalizer_frobmetric<mt>& item,
std::ostream& out
);
private:
// ------------------- private data members -------------------
matrix_type m;
matrix<scalar_type,0,0,mem_manager_type> tform;
bool verbose;
double eps;
double C;
unsigned long max_iter;
bool _use_identity_matrix_prior;
// This is just a temporary variable that doesn't contribute to the
// state of this object.
mutable matrix_type temp_out;
};
// ----------------------------------------------------------------------------------------
template <
typename matrix_type
>
void serialize (
const vector_normalizer_frobmetric<matrix_type>& item,
std::ostream& out
)
{
const int version = 2;
serialize(version, out);
serialize(item.m, out);
serialize(item.tform, out);
serialize(item.verbose, out);
serialize(item.eps, out);
serialize(item.C, out);
serialize(item.max_iter, out);
serialize(item._use_identity_matrix_prior, out);
}
// ----------------------------------------------------------------------------------------
template <
typename matrix_type
>
void deserialize (
vector_normalizer_frobmetric<matrix_type>& item,
std::istream& in
)
{
int version = 0;
deserialize(version, in);
if (version != 1 && version != 2)
throw serialization_error("Unsupported version found while deserializing dlib::vector_normalizer_frobmetric.");
deserialize(item.m, in);
deserialize(item.tform, in);
deserialize(item.verbose, in);
deserialize(item.eps, in);
deserialize(item.C, in);
deserialize(item.max_iter, in);
if (version == 2)
deserialize(item._use_identity_matrix_prior, in);
else
item._use_identity_matrix_prior = false;
}
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_VECTOR_NORMALIZER_FRoBMETRIC_Hh_

View File

@@ -0,0 +1,328 @@
// Copyright (C) 2013 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_VECTOR_NORMALIZER_FRoBMETRIC_ABSTRACT_Hh_
#ifdef DLIB_VECTOR_NORMALIZER_FRoBMETRIC_ABSTRACT_Hh_
#include "../matrix.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
template <
typename matrix_type
>
struct frobmetric_training_sample
{
/*!
WHAT THIS OBJECT REPRESENTS
This object represents a training data sample for the
vector_normalizer_frobmetric object. It defines a set of training triplets
relative to a single anchor_vect vector. That is, it specifies that the
learned distance metric should satisfy num_triples() constraints which are,
for all valid i and j:
length(T*anchor_vect-T*near_vects[i]) + 1 < length(T*anchor_vect - T*far_vects[j])
for some appropriate linear transformation T which will be learned by
vector_normalizer_frobmetric.
!*/
matrix_type anchor_vect;
std::vector<matrix_type> near_vects;
std::vector<matrix_type> far_vects;
unsigned long num_triples (
) const { return near_vects.size() * far_vects.size(); }
/*!
ensures
- returns the number of training triplets defined by this object.
!*/
void clear()
/*!
ensures
- #near_vects.size() == 0
- #far_vects.size() == 0
!*/
};
template < typename matrix_type >
void serialize(const frobmetric_training_sample<matrix_type>& item, std::ostream& out)
template < typename matrix_type >
void deserialize(frobmetric_training_sample<matrix_type>& item, std::istream& in)
/*!
provides serialisation support.
!*/
// ----------------------------------------------------------------------------------------
template <
typename matrix_type
>
class vector_normalizer_frobmetric
{
/*!
REQUIREMENTS ON matrix_type
- must be a dlib::matrix object capable of representing column
vectors
INITIAL VALUE
- in_vector_size() == 0
- out_vector_size() == 0
- get_epsilon() == 0.1
- get_c() == 1
- get_max_iterations() == 5000
- This object is not verbose
- uses_identity_matrix_prior() == false
WHAT THIS OBJECT REPRESENTS
This object is a tool for performing the FrobMetric distance metric
learning algorithm described in the following paper:
A Scalable Dual Approach to Semidefinite Metric Learning
By Chunhua Shen, Junae Kim, Lei Wang, in CVPR 2011
Therefore, this object is a tool that takes as input training triplets
(anchor_vect, near, far) of vectors and attempts to learn a linear
transformation T such that:
length(T*anchor_vect-T*near) + 1 < length(T*anchor_vect - T*far)
That is, you give a bunch of anchor_vect vectors and for each anchor_vect
you specify some vectors which should be near to it and some that should be
far form it. This object then tries to find a transformation matrix that
makes the "near" vectors close to their anchors while the "far" vectors are
farther away.
THREAD SAFETY
Note that this object contains a cached matrix object it uses
to store intermediate results for normalization. This avoids
needing to reallocate it every time this object performs normalization
but also makes it non-thread safe. So make sure you don't share
instances of this object between threads.
!*/
public:
typedef typename matrix_type::mem_manager_type mem_manager_type;
typedef typename matrix_type::type scalar_type;
typedef matrix_type result_type;
vector_normalizer_frobmetric (
);
/*!
ensures
- this object is properly initialized
!*/
bool uses_identity_matrix_prior (
) const;
/*!
ensures
- Normally this object will try and find a matrix transform() that
minimizes sum(squared(transform())) but also fits the training data.
However, if #uses_identity_matrix_prior() == true then it will instead
try to find the transformation matrix that minimizes
sum(squared(identity_matrix()-transform())). That is, it will try to
find the matrix most similar to the identity matrix that best fits the
training data.
!*/
void set_uses_identity_matrix_prior (
bool use_prior
);
/*!
ensures
- #uses_identity_matrix_prior() == use_prior
!*/
void be_verbose(
);
/*!
ensures
- This object will print status messages to standard out so the user can
observe the progress of the train() routine.
!*/
void be_quiet (
);
/*!
ensures
- this object will not print anything to standard out.
!*/
void set_epsilon (
double eps
);
/*!
requires
- eps > 0
ensures
- #get_epsilon() == eps
!*/
double get_epsilon (
) const;
/*!
ensures
- returns the error epsilon that determines when training should stop.
Smaller values may result in a more accurate solution but take longer to
execute.
!*/
void set_c (
double C
);
/*!
requires
- C > 0
ensures
- #set_c() == C
!*/
double get_c (
) const;
/*!
ensures
- returns the regularization parameter. It is the parameter that
determines the trade-off between trying to fit the training data exactly
or allowing more errors but hopefully improving the generalization of the
resulting distance metric. Larger values encourage exact fitting while
smaller values of C may encourage better generalization.
!*/
void set_max_iterations (
unsigned long max_iterations
);
/*!
ensures
- #get_max_iterations() == max_iterations
!*/
unsigned long get_max_iterations (
) const;
/*!
ensures
- The train() routine uses an iterative numerical solver to find the best
distance metric. This function returns the maximum allowable number of
iterations it will use before terminating. Note that typically the
solver terminates prior to the max iteration count limit due to the error
dropping below get_epsilon().
!*/
void train (
const std::vector<frobmetric_training_sample<matrix_type> >& samples
);
/*!
requires
- samples.size() != 0
- All matrices inside samples (i.e. anchors and elements of near_vects and far_vects)
are column vectors with the same non-zero dimension.
- All the vectors in samples contain finite values.
- All elements of samples contain data, specifically, for all valid i:
- samples[i].num_triples() != 0
ensures
- learns a distance metric from the given training samples. After train
finishes you can use this object's operator() to transform vectors
according to the learned distance metric. In particular, we will have:
- #transform() == The linear transformation learned by the FrobMetric
learning procedure.
- #in_vector_size() == samples[0].anchor_vect.size()
- You can call (*this)(x) to transform a vector according to the learned
distance metric. That is, it should generally be the case that:
- length((*this)(anchor_vect) - (*this)(near)) + 1 < length((*this)(anchor_vect) - (*this)(far))
for the anchor_vect, near, and far vectors in the training data.
- #transformed_means() == the mean of the input anchor_vect vectors
after being transformed by #transform()
!*/
long in_vector_size (
) const;
/*!
ensures
- returns the number of rows that input vectors are required to contain if
they are to be normalized by this object.
!*/
long out_vector_size (
) const;
/*!
ensures
- returns the number of rows in the normalized vectors that come out of
this object.
- The value returned is always in_vector_size(). So out_vector_size() is
just provided to maintain interface consistency with other vector
normalizer objects. That is, the transformations applied by this object
do not change the dimensionality of the vectors.
!*/
const matrix<scalar_type,0,1,mem_manager_type>& transformed_means (
) const;
/*!
ensures
- returns a column vector V such that:
- V.size() == in_vector_size()
- V is a vector such that subtracting it from transformed vectors
results in them having an expected value of 0. Therefore, it is
equal to transform() times the mean of the input anchor_vect vectors
given to train().
!*/
const matrix<scalar_type,0,0,mem_manager_type>& transform (
) const;
/*!
ensures
- returns a copy of the transformation matrix we learned during the last
call to train().
- The returned matrix is square and has in_vector_size() by in_vector_size()
dimensions.
!*/
const result_type& operator() (
const matrix_type& x
) const;
/*!
requires
- in_vector_size() != 0
- in_vector_size() == x.size()
- is_col_vector(x) == true
ensures
- returns a normalized version of x, call it Z, that has the following
properties:
- Z == The result of applying the linear transform we learned during
train() to the input vector x.
- Z == transform()*x-transformed_means()
- is_col_vector(Z) == true
- Z.size() == x.size()
- The expected value of each element of Z is 0.
!*/
};
// ----------------------------------------------------------------------------------------
template <
typename matrix_type
>
void serialize (
const vector_normalizer_frobmetric<matrix_type>& item,
std::ostream& out
);
/*!
provides serialization support
!*/
// ----------------------------------------------------------------------------------------
template <
typename matrix_type
>
void deserialize (
vector_normalizer_frobmetric<matrix_type>& item,
std::istream& in
);
/*!
provides deserialization support
!*/
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_VECTOR_NORMALIZER_FRoBMETRIC_ABSTRACT_Hh_