open source pkg v1
This commit is contained in:
66
pkg/OpenFace/lib/3rdParty/dlib/include/dlib/statistics/average_precision.h
vendored
Normal file
66
pkg/OpenFace/lib/3rdParty/dlib/include/dlib/statistics/average_precision.h
vendored
Normal file
@@ -0,0 +1,66 @@
|
||||
// Copyright (C) 2013 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#ifndef DLIB_AVERAGE_PREcISION_Hh_
|
||||
#define DLIB_AVERAGE_PREcISION_Hh_
|
||||
|
||||
#include "average_precision_abstract.h"
|
||||
#include <vector>
|
||||
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
namespace impl
|
||||
{
|
||||
inline bool get_bool_part (
|
||||
const bool& b
|
||||
) { return b; }
|
||||
|
||||
template <typename T>
|
||||
bool get_bool_part(const std::pair<T,bool>& item) { return item.second; }
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <typename T, typename alloc>
|
||||
double average_precision (
|
||||
const std::vector<T,alloc>& items,
|
||||
unsigned long missing_relevant_items = 0
|
||||
)
|
||||
{
|
||||
using namespace dlib::impl;
|
||||
double relevant_count = 0;
|
||||
// find the precision values
|
||||
std::vector<double> precision;
|
||||
for (unsigned long i = 0; i < items.size(); ++i)
|
||||
{
|
||||
if (get_bool_part(items[i]))
|
||||
{
|
||||
++relevant_count;
|
||||
precision.push_back(relevant_count / (i+1));
|
||||
}
|
||||
}
|
||||
|
||||
double precision_sum = 0;
|
||||
double max_val = 0;
|
||||
// now sum over the interpolated precision values
|
||||
for (std::vector<double>::reverse_iterator i = precision.rbegin(); i != precision.rend(); ++i)
|
||||
{
|
||||
max_val = std::max(max_val, *i);
|
||||
precision_sum += max_val;
|
||||
}
|
||||
|
||||
|
||||
relevant_count += missing_relevant_items;
|
||||
|
||||
if (relevant_count != 0)
|
||||
return precision_sum/relevant_count;
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_AVERAGE_PREcISION_Hh_
|
||||
|
||||
67
pkg/OpenFace/lib/3rdParty/dlib/include/dlib/statistics/average_precision_abstract.h
vendored
Normal file
67
pkg/OpenFace/lib/3rdParty/dlib/include/dlib/statistics/average_precision_abstract.h
vendored
Normal file
@@ -0,0 +1,67 @@
|
||||
// Copyright (C) 2013 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#undef DLIB_AVERAGE_PREcISION_ABSTRACT_Hh_
|
||||
#ifdef DLIB_AVERAGE_PREcISION_ABSTRACT_Hh_
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename alloc
|
||||
>
|
||||
double average_precision (
|
||||
const std::vector<bool,alloc>& items,
|
||||
unsigned long missing_relevant_items = 0
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- Interprets items as a list of relevant and non-relevant items in a response
|
||||
from an information retrieval system. In particular, items with a true value
|
||||
are relevant and false items are non-relevant. This function then returns
|
||||
the average precision of the ranking of the given items. For, example, the
|
||||
ranking [true, true, true, true, false] would have an average precision of 1.
|
||||
On the other hand, the ranking of [true false false true] would have an
|
||||
average precision of 0.75 (because the first true has a precision of 1 and
|
||||
the second true has a precision of 0.5, giving an average of 0.75).
|
||||
- As a special case, if item contains no true elements then the average
|
||||
precision is considered to be 1.
|
||||
- Note that we use the interpolated precision. That is, the interpolated
|
||||
precision at a recall value r is set to the maximum precision obtained at any
|
||||
higher recall value. Or in other words, we interpolate the precision/recall
|
||||
curve so that precision is monotonically decreasing. Therefore, the average
|
||||
precision value returned by this function is the area under this interpolated
|
||||
precision/recall curve.
|
||||
- This function will add in missing_relevant_items number of items with a
|
||||
precision of zero into the average value returned. For example, the average
|
||||
precision of the ranking [true, true] if there are 2 missing relevant items
|
||||
is 0.5.
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename alloc
|
||||
>
|
||||
double average_precision (
|
||||
const std::vector<std::pair<T,bool>,alloc>& items,
|
||||
unsigned long missing_relevant_items = 0
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- this function is equivalent to copying the bool values from items into a
|
||||
std::vector<bool> and then calling the above average_precision() routine on
|
||||
it and returning the result.
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_AVERAGE_PREcISION_ABSTRACT_Hh_
|
||||
|
||||
|
||||
186
pkg/OpenFace/lib/3rdParty/dlib/include/dlib/statistics/cca.h
vendored
Normal file
186
pkg/OpenFace/lib/3rdParty/dlib/include/dlib/statistics/cca.h
vendored
Normal file
@@ -0,0 +1,186 @@
|
||||
// Copyright (C) 2013 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#ifndef DLIB_CCA_hh_
|
||||
#define DLIB_CCA_hh_
|
||||
|
||||
#include "cca_abstract.h"
|
||||
#include "../algs.h"
|
||||
#include "../matrix.h"
|
||||
#include "../sparse_vector.h"
|
||||
#include "random_subset_selector.h"
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename T
|
||||
>
|
||||
matrix<typename T::type,0,1> compute_correlations (
|
||||
const matrix_exp<T>& L,
|
||||
const matrix_exp<T>& R
|
||||
)
|
||||
{
|
||||
DLIB_ASSERT( L.size() > 0 && R.size() > 0 && L.nr() == R.nr(),
|
||||
"\t matrix compute_correlations()"
|
||||
<< "\n\t Invalid inputs were given to this function."
|
||||
<< "\n\t L.size(): " << L.size()
|
||||
<< "\n\t R.size(): " << R.size()
|
||||
<< "\n\t L.nr(): " << L.nr()
|
||||
<< "\n\t R.nr(): " << R.nr()
|
||||
);
|
||||
|
||||
typedef typename T::type type;
|
||||
matrix<type> A, B, C;
|
||||
A = diag(trans(R)*L);
|
||||
B = sqrt(diag(trans(L)*L));
|
||||
C = sqrt(diag(trans(R)*R));
|
||||
A = pointwise_multiply(A , reciprocal(pointwise_multiply(B,C)));
|
||||
return A;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename matrix_type,
|
||||
typename T
|
||||
>
|
||||
matrix<T,0,1> impl_cca (
|
||||
const matrix_type& L,
|
||||
const matrix_type& R,
|
||||
matrix<T>& Ltrans,
|
||||
matrix<T>& Rtrans,
|
||||
unsigned long num_correlations,
|
||||
unsigned long extra_rank,
|
||||
unsigned long q,
|
||||
unsigned long num_output_correlations,
|
||||
double regularization
|
||||
)
|
||||
{
|
||||
matrix<T> Ul, Vl;
|
||||
matrix<T> Ur, Vr;
|
||||
matrix<T> U, V;
|
||||
matrix<T,0,1> Dr, Dl, D;
|
||||
|
||||
|
||||
// Note that we add a few more singular vectors in because it helps improve the
|
||||
// final results if we run this part with a little higher rank than the final SVD.
|
||||
svd_fast(L, Ul, Dl, Vl, num_correlations+extra_rank, q);
|
||||
svd_fast(R, Ur, Dr, Vr, num_correlations+extra_rank, q);
|
||||
|
||||
// Zero out singular values that are essentially zero so they don't cause numerical
|
||||
// difficulties in the code below.
|
||||
const double eps = std::numeric_limits<T>::epsilon()*std::max(max(Dr),max(Dl))*100;
|
||||
Dl = round_zeros(Dl+regularization,eps);
|
||||
Dr = round_zeros(Dr+regularization,eps);
|
||||
|
||||
// This matrix is really small so we can do a normal full SVD on it. Note that we
|
||||
// also throw away the columns of Ul and Ur corresponding to zero singular values.
|
||||
svd3(diagm(Dl>0)*tmp(trans(Ul)*Ur)*diagm(Dr>0), U, D, V);
|
||||
|
||||
// now throw away extra columns of the transformations. We do this in a way
|
||||
// that keeps the directions that have the highest correlations.
|
||||
matrix<T,0,1> temp = D;
|
||||
rsort_columns(U, temp);
|
||||
rsort_columns(V, D);
|
||||
U = colm(U, range(0, num_output_correlations-1));
|
||||
V = colm(V, range(0, num_output_correlations-1));
|
||||
D = rowm(D, range(0, num_output_correlations-1));
|
||||
|
||||
Ltrans = Vl*inv(diagm(Dl))*U;
|
||||
Rtrans = Vr*inv(diagm(Dr))*V;
|
||||
|
||||
// Note that the D matrix contains the correlation values for the transformed
|
||||
// vectors. However, when the L and R matrices have rank higher than
|
||||
// num_correlations+extra_rank then the values in D become only approximate.
|
||||
return D;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <typename T>
|
||||
matrix<T,0,1> cca (
|
||||
const matrix<T>& L,
|
||||
const matrix<T>& R,
|
||||
matrix<T>& Ltrans,
|
||||
matrix<T>& Rtrans,
|
||||
unsigned long num_correlations,
|
||||
unsigned long extra_rank = 5,
|
||||
unsigned long q = 2,
|
||||
double regularization = 0
|
||||
)
|
||||
{
|
||||
DLIB_ASSERT( num_correlations > 0 && L.size() > 0 && R.size() > 0 && L.nr() == R.nr() &&
|
||||
regularization >= 0,
|
||||
"\t matrix cca()"
|
||||
<< "\n\t Invalid inputs were given to this function."
|
||||
<< "\n\t num_correlations: " << num_correlations
|
||||
<< "\n\t regularization: " << regularization
|
||||
<< "\n\t L.size(): " << L.size()
|
||||
<< "\n\t R.size(): " << R.size()
|
||||
<< "\n\t L.nr(): " << L.nr()
|
||||
<< "\n\t R.nr(): " << R.nr()
|
||||
);
|
||||
|
||||
using std::min;
|
||||
const unsigned long n = min(num_correlations, (unsigned long)min(R.nr(),min(L.nc(), R.nc())));
|
||||
return impl_cca(L,R,Ltrans, Rtrans, num_correlations, extra_rank, q, n, regularization);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <typename sparse_vector_type, typename T>
|
||||
matrix<T,0,1> cca (
|
||||
const std::vector<sparse_vector_type>& L,
|
||||
const std::vector<sparse_vector_type>& R,
|
||||
matrix<T>& Ltrans,
|
||||
matrix<T>& Rtrans,
|
||||
unsigned long num_correlations,
|
||||
unsigned long extra_rank = 5,
|
||||
unsigned long q = 2,
|
||||
double regularization = 0
|
||||
)
|
||||
{
|
||||
DLIB_ASSERT( num_correlations > 0 && L.size() == R.size() &&
|
||||
max_index_plus_one(L) > 0 && max_index_plus_one(R) > 0 &&
|
||||
regularization >= 0,
|
||||
"\t matrix cca()"
|
||||
<< "\n\t Invalid inputs were given to this function."
|
||||
<< "\n\t num_correlations: " << num_correlations
|
||||
<< "\n\t regularization: " << regularization
|
||||
<< "\n\t L.size(): " << L.size()
|
||||
<< "\n\t R.size(): " << R.size()
|
||||
<< "\n\t max_index_plus_one(L): " << max_index_plus_one(L)
|
||||
<< "\n\t max_index_plus_one(R): " << max_index_plus_one(R)
|
||||
);
|
||||
|
||||
using std::min;
|
||||
const unsigned long n = min(max_index_plus_one(L), max_index_plus_one(R));
|
||||
const unsigned long num_output_correlations = min(num_correlations, std::min<unsigned long>(R.size(),n));
|
||||
return impl_cca(L,R,Ltrans, Rtrans, num_correlations, extra_rank, q, num_output_correlations, regularization);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <typename sparse_vector_type, typename Rand_type, typename T>
|
||||
matrix<T,0,1> cca (
|
||||
const random_subset_selector<sparse_vector_type,Rand_type>& L,
|
||||
const random_subset_selector<sparse_vector_type,Rand_type>& R,
|
||||
matrix<T>& Ltrans,
|
||||
matrix<T>& Rtrans,
|
||||
unsigned long num_correlations,
|
||||
unsigned long extra_rank = 5,
|
||||
unsigned long q = 2
|
||||
)
|
||||
{
|
||||
return cca(L.to_std_vector(), R.to_std_vector(), Ltrans, Rtrans, num_correlations, extra_rank, q);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_CCA_hh_
|
||||
|
||||
|
||||
191
pkg/OpenFace/lib/3rdParty/dlib/include/dlib/statistics/cca_abstract.h
vendored
Normal file
191
pkg/OpenFace/lib/3rdParty/dlib/include/dlib/statistics/cca_abstract.h
vendored
Normal file
@@ -0,0 +1,191 @@
|
||||
// Copyright (C) 2013 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#undef DLIB_CCA_AbSTRACT_Hh_
|
||||
#ifdef DLIB_CCA_AbSTRACT_Hh_
|
||||
|
||||
#include "../matrix/matrix_la_abstract.h"
|
||||
#include "random_subset_selector_abstract.h"
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename T
|
||||
>
|
||||
matrix<typename T::type,0,1> compute_correlations (
|
||||
const matrix_exp<T>& L,
|
||||
const matrix_exp<T>& R
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- L.size() > 0
|
||||
- R.size() > 0
|
||||
- L.nr() == R.nr()
|
||||
ensures
|
||||
- This function treats L and R as sequences of paired row vectors. It
|
||||
then computes the correlation values between the elements of these
|
||||
row vectors. In particular, we return a vector COR such that:
|
||||
- COR.size() == L.nc()
|
||||
- for all valid i:
|
||||
- COR(i) == the correlation coefficient between the following sequence
|
||||
of paired numbers: (L(k,i), R(k,i)) for k: 0 <= k < L.nr().
|
||||
Therefore, COR(i) is a value between -1 and 1 inclusive where 1
|
||||
indicates perfect correlation and -1 perfect anti-correlation. Note
|
||||
that this function assumes the input data vectors have been centered
|
||||
(i.e. made to have zero mean). If this is not the case then it will
|
||||
report inaccurate results.
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename T
|
||||
>
|
||||
matrix<T,0,1> cca (
|
||||
const matrix<T>& L,
|
||||
const matrix<T>& R,
|
||||
matrix<T>& Ltrans,
|
||||
matrix<T>& Rtrans,
|
||||
unsigned long num_correlations,
|
||||
unsigned long extra_rank = 5,
|
||||
unsigned long q = 2,
|
||||
double regularization = 0
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- num_correlations > 0
|
||||
- L.size() > 0
|
||||
- R.size() > 0
|
||||
- L.nr() == R.nr()
|
||||
- regularization >= 0
|
||||
ensures
|
||||
- This function performs a canonical correlation analysis between the row
|
||||
vectors in L and R. That is, it finds two transformation matrices, Ltrans
|
||||
and Rtrans, such that row vectors in the transformed matrices L*Ltrans and
|
||||
R*Rtrans are as correlated as possible. That is, we try to find two transforms
|
||||
such that the correlation values returned by compute_correlations(L*Ltrans, R*Rtrans)
|
||||
would be maximized.
|
||||
- Let N == min(num_correlations, min(R.nr(),min(L.nc(),R.nc())))
|
||||
(This is the actual number of elements in the transformed vectors.
|
||||
Therefore, note that you can't get more outputs than there are rows or
|
||||
columns in the input matrices.)
|
||||
- #Ltrans.nr() == L.nc()
|
||||
- #Ltrans.nc() == N
|
||||
- #Rtrans.nr() == R.nc()
|
||||
- #Rtrans.nc() == N
|
||||
- This function assumes the data vectors in L and R have already been centered
|
||||
(i.e. we assume the vectors have zero means). However, in many cases it is
|
||||
fine to use uncentered data with cca(). But if it is important for your
|
||||
problem then you should center your data before passing it to cca().
|
||||
- This function works with reduced rank approximations of the L and R matrices.
|
||||
This makes it fast when working with large matrices. In particular, we use
|
||||
the svd_fast() routine to find reduced rank representations of the input
|
||||
matrices by calling it as follows: svd_fast(L, U,D,V, num_correlations+extra_rank, q)
|
||||
and similarly for R. This means that you can use the extra_rank and q
|
||||
arguments to cca() to influence the accuracy of the reduced rank
|
||||
approximation. However, the default values should work fine for most
|
||||
problems.
|
||||
- returns an estimate of compute_correlations(L*#Ltrans, R*#Rtrans). The
|
||||
returned vector should exactly match the output of compute_correlations()
|
||||
when the reduced rank approximation to L and R is accurate and regularization
|
||||
is set to 0. However, if this is not the case then the return value of this
|
||||
function will deviate from compute_correlations(L*#Ltrans, R*#Rtrans). This
|
||||
deviation can be used to check if the reduced rank approximation is working
|
||||
or you need to increase extra_rank.
|
||||
- The dimensions of the output vectors produced by L*#Ltrans or R*#Rtrans are
|
||||
ordered such that the dimensions with the highest correlations come first.
|
||||
That is, after applying the transforms produced by cca() to a set of vectors
|
||||
you will find that dimension 0 has the highest correlation, then dimension 1
|
||||
has the next highest, and so on. This also means that the list of numbers
|
||||
returned from cca() will always be listed in decreasing order.
|
||||
- This function performs the ridge regression version of Canonical Correlation
|
||||
Analysis when regularization is set to a value > 0. In particular, larger
|
||||
values indicate the solution should be more heavily regularized. This can be
|
||||
useful when the dimensionality of the data is larger than the number of
|
||||
samples.
|
||||
- A good discussion of CCA can be found in the paper "Canonical Correlation
|
||||
Analysis" by David Weenink. In particular, this function is implemented
|
||||
using equations 29 and 30 from his paper. We also use the idea of doing CCA
|
||||
on a reduced rank approximation of L and R as suggested by Paramveer S.
|
||||
Dhillon in his paper "Two Step CCA: A new spectral method for estimating
|
||||
vector models of words".
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename sparse_vector_type,
|
||||
typename T
|
||||
>
|
||||
matrix<T,0,1> cca (
|
||||
const std::vector<sparse_vector_type>& L,
|
||||
const std::vector<sparse_vector_type>& R,
|
||||
matrix<T>& Ltrans,
|
||||
matrix<T>& Rtrans,
|
||||
unsigned long num_correlations,
|
||||
unsigned long extra_rank = 5,
|
||||
unsigned long q = 2,
|
||||
double regularization = 0
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- num_correlations > 0
|
||||
- L.size() == R.size()
|
||||
- max_index_plus_one(L) > 0 && max_index_plus_one(R) > 0
|
||||
(i.e. L and R can't represent empty matrices)
|
||||
- L and R must contain sparse vectors (see the top of dlib/svm/sparse_vector_abstract.h
|
||||
for a definition of sparse vector)
|
||||
- regularization >= 0
|
||||
ensures
|
||||
- This is just an overload of the cca() function defined above. Except in this
|
||||
case we take a sparse representation of the input L and R matrices rather than
|
||||
dense matrices. Therefore, in this case, we interpret L and R as matrices
|
||||
with L.size() rows, where each row is defined by a sparse vector. So this
|
||||
function does exactly the same thing as the above cca().
|
||||
- Note that you can apply the output transforms to a sparse vector with the
|
||||
following code:
|
||||
sparse_matrix_vector_multiply(trans(Ltrans), your_sparse_vector)
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename sparse_vector_type,
|
||||
typename Rand_type,
|
||||
typename T
|
||||
>
|
||||
matrix<T,0,1> cca (
|
||||
const random_subset_selector<sparse_vector_type,Rand_type>& L,
|
||||
const random_subset_selector<sparse_vector_type,Rand_type>& R,
|
||||
matrix<T>& Ltrans,
|
||||
matrix<T>& Rtrans,
|
||||
unsigned long num_correlations,
|
||||
unsigned long extra_rank = 5,
|
||||
unsigned long q = 2,
|
||||
double regularization = 0
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- num_correlations > 0
|
||||
- L.size() == R.size()
|
||||
- max_index_plus_one(L) > 0 && max_index_plus_one(R) > 0
|
||||
(i.e. L and R can't represent empty matrices)
|
||||
- L and R must contain sparse vectors (see the top of dlib/svm/sparse_vector_abstract.h
|
||||
for a definition of sparse vector)
|
||||
- regularization >= 0
|
||||
ensures
|
||||
- returns cca(L.to_std_vector(), R.to_std_vector(), Ltrans, Rtrans, num_correlations, extra_rank, q)
|
||||
(i.e. this is just a convenience function for calling the cca() routine when
|
||||
your sparse vectors are contained inside a random_subset_selector rather than
|
||||
a std::vector)
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_CCA_AbSTRACT_Hh_
|
||||
|
||||
|
||||
541
pkg/OpenFace/lib/3rdParty/dlib/include/dlib/statistics/dpca.h
vendored
Normal file
541
pkg/OpenFace/lib/3rdParty/dlib/include/dlib/statistics/dpca.h
vendored
Normal file
@@ -0,0 +1,541 @@
|
||||
// Copyright (C) 2009 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#ifndef DLIB_DPCA_h_
|
||||
#define DLIB_DPCA_h_
|
||||
|
||||
#include "dpca_abstract.h"
|
||||
#include <limits>
|
||||
#include <cmath>
|
||||
#include "../algs.h"
|
||||
#include "../matrix.h"
|
||||
#include <iostream>
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename matrix_type
|
||||
>
|
||||
class discriminant_pca
|
||||
{
|
||||
/*!
|
||||
INITIAL VALUE
|
||||
- vect_size == 0
|
||||
- total_count == 0
|
||||
- between_count == 0
|
||||
- within_count == 0
|
||||
- between_weight == 1
|
||||
- within_weight == 1
|
||||
|
||||
CONVENTION
|
||||
- vect_size == in_vector_size()
|
||||
- total_count == the number of times add_to_total_variance() has been called.
|
||||
- within_count == the number of times add_to_within_class_variance() has been called.
|
||||
- between_count == the number of times add_to_between_class_variance() has been called.
|
||||
- between_weight == between_class_weight()
|
||||
- within_weight == within_class_weight()
|
||||
|
||||
- if (total_count != 0)
|
||||
- total_sum == the sum of all vectors given to add_to_total_variance()
|
||||
- the covariance of all the elements given to add_to_total_variance() is given
|
||||
by:
|
||||
- let avg == total_sum/total_count
|
||||
- covariance == total_cov/total_count - avg*trans(avg)
|
||||
- if (within_count != 0)
|
||||
- within_cov/within_count == the normalized within class scatter matrix
|
||||
- if (between_count != 0)
|
||||
- between_cov/between_count == the normalized between class scatter matrix
|
||||
!*/
|
||||
|
||||
public:
|
||||
|
||||
struct discriminant_pca_error : public error
|
||||
{
|
||||
discriminant_pca_error(const std::string& message): error(message) {}
|
||||
};
|
||||
|
||||
typedef typename matrix_type::mem_manager_type mem_manager_type;
|
||||
typedef typename matrix_type::type scalar_type;
|
||||
typedef typename matrix_type::layout_type layout_type;
|
||||
typedef matrix<scalar_type,0,0,mem_manager_type,layout_type> general_matrix;
|
||||
typedef matrix<scalar_type,0,1,mem_manager_type,layout_type> column_matrix;
|
||||
|
||||
discriminant_pca (
|
||||
)
|
||||
{
|
||||
clear();
|
||||
}
|
||||
|
||||
void clear(
|
||||
)
|
||||
{
|
||||
total_count = 0;
|
||||
between_count = 0;
|
||||
within_count = 0;
|
||||
|
||||
vect_size = 0;
|
||||
|
||||
|
||||
between_weight = 1;
|
||||
within_weight = 1;
|
||||
|
||||
|
||||
total_sum.set_size(0);
|
||||
between_cov.set_size(0,0);
|
||||
total_cov.set_size(0,0);
|
||||
within_cov.set_size(0,0);
|
||||
}
|
||||
|
||||
long in_vector_size (
|
||||
) const
|
||||
{
|
||||
return vect_size;
|
||||
}
|
||||
|
||||
void set_within_class_weight (
|
||||
scalar_type weight
|
||||
)
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(weight >= 0,
|
||||
"\t void discriminant_pca::set_within_class_weight()"
|
||||
<< "\n\t You can't use negative weight values"
|
||||
<< "\n\t weight: " << weight
|
||||
<< "\n\t this: " << this
|
||||
);
|
||||
|
||||
within_weight = weight;
|
||||
}
|
||||
|
||||
scalar_type within_class_weight (
|
||||
) const
|
||||
{
|
||||
return within_weight;
|
||||
}
|
||||
|
||||
void set_between_class_weight (
|
||||
scalar_type weight
|
||||
)
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(weight >= 0,
|
||||
"\t void discriminant_pca::set_between_class_weight()"
|
||||
<< "\n\t You can't use negative weight values"
|
||||
<< "\n\t weight: " << weight
|
||||
<< "\n\t this: " << this
|
||||
);
|
||||
|
||||
between_weight = weight;
|
||||
}
|
||||
|
||||
scalar_type between_class_weight (
|
||||
) const
|
||||
{
|
||||
return between_weight;
|
||||
}
|
||||
|
||||
template <typename EXP1, typename EXP2>
|
||||
void add_to_within_class_variance(
|
||||
const matrix_exp<EXP1>& x,
|
||||
const matrix_exp<EXP2>& y
|
||||
)
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(is_col_vector(x) && is_col_vector(y) &&
|
||||
x.size() == y.size() &&
|
||||
(in_vector_size() == 0 || x.size() == in_vector_size()),
|
||||
"\t void discriminant_pca::add_to_within_class_variance()"
|
||||
<< "\n\t Invalid inputs were given to this function"
|
||||
<< "\n\t is_col_vector(x): " << is_col_vector(x)
|
||||
<< "\n\t is_col_vector(y): " << is_col_vector(y)
|
||||
<< "\n\t x.size(): " << x.size()
|
||||
<< "\n\t y.size(): " << y.size()
|
||||
<< "\n\t in_vector_size(): " << in_vector_size()
|
||||
<< "\n\t this: " << this
|
||||
);
|
||||
|
||||
vect_size = x.size();
|
||||
if (within_count == 0)
|
||||
{
|
||||
within_cov = (x-y)*trans(x-y);
|
||||
}
|
||||
else
|
||||
{
|
||||
within_cov += (x-y)*trans(x-y);
|
||||
}
|
||||
++within_count;
|
||||
}
|
||||
|
||||
template <typename EXP1, typename EXP2>
|
||||
void add_to_between_class_variance(
|
||||
const matrix_exp<EXP1>& x,
|
||||
const matrix_exp<EXP2>& y
|
||||
)
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(is_col_vector(x) && is_col_vector(y) &&
|
||||
x.size() == y.size() &&
|
||||
(in_vector_size() == 0 || x.size() == in_vector_size()),
|
||||
"\t void discriminant_pca::add_to_between_class_variance()"
|
||||
<< "\n\t Invalid inputs were given to this function"
|
||||
<< "\n\t is_col_vector(x): " << is_col_vector(x)
|
||||
<< "\n\t is_col_vector(y): " << is_col_vector(y)
|
||||
<< "\n\t x.size(): " << x.size()
|
||||
<< "\n\t y.size(): " << y.size()
|
||||
<< "\n\t in_vector_size(): " << in_vector_size()
|
||||
<< "\n\t this: " << this
|
||||
);
|
||||
|
||||
vect_size = x.size();
|
||||
if (between_count == 0)
|
||||
{
|
||||
between_cov = (x-y)*trans(x-y);
|
||||
}
|
||||
else
|
||||
{
|
||||
between_cov += (x-y)*trans(x-y);
|
||||
}
|
||||
++between_count;
|
||||
}
|
||||
|
||||
template <typename EXP>
|
||||
void add_to_total_variance(
|
||||
const matrix_exp<EXP>& x
|
||||
)
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(is_col_vector(x) && (in_vector_size() == 0 || x.size() == in_vector_size()),
|
||||
"\t void discriminant_pca::add_to_total_variance()"
|
||||
<< "\n\t Invalid inputs were given to this function"
|
||||
<< "\n\t is_col_vector(x): " << is_col_vector(x)
|
||||
<< "\n\t in_vector_size(): " << in_vector_size()
|
||||
<< "\n\t x.size(): " << x.size()
|
||||
<< "\n\t this: " << this
|
||||
);
|
||||
|
||||
vect_size = x.size();
|
||||
if (total_count == 0)
|
||||
{
|
||||
total_cov = x*trans(x);
|
||||
total_sum = x;
|
||||
}
|
||||
else
|
||||
{
|
||||
total_cov += x*trans(x);
|
||||
total_sum += x;
|
||||
}
|
||||
++total_count;
|
||||
}
|
||||
|
||||
const general_matrix dpca_matrix (
|
||||
const double eps = 0.99
|
||||
) const
|
||||
{
|
||||
general_matrix dpca_mat;
|
||||
general_matrix eigenvalues;
|
||||
dpca_matrix(dpca_mat, eigenvalues, eps);
|
||||
return dpca_mat;
|
||||
}
|
||||
|
||||
const general_matrix dpca_matrix_of_size (
|
||||
const long num_rows
|
||||
)
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(0 < num_rows && num_rows <= in_vector_size(),
|
||||
"\t general_matrix discriminant_pca::dpca_matrix_of_size()"
|
||||
<< "\n\t Invalid inputs were given to this function"
|
||||
<< "\n\t num_rows: " << num_rows
|
||||
<< "\n\t in_vector_size(): " << in_vector_size()
|
||||
<< "\n\t this: " << this
|
||||
);
|
||||
|
||||
general_matrix dpca_mat;
|
||||
general_matrix eigenvalues;
|
||||
dpca_matrix_of_size(dpca_mat, eigenvalues, num_rows);
|
||||
return dpca_mat;
|
||||
}
|
||||
|
||||
void dpca_matrix (
|
||||
general_matrix& dpca_mat,
|
||||
general_matrix& eigenvalues,
|
||||
const double eps = 0.99
|
||||
) const
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(0 < eps && eps <= 1 && in_vector_size() != 0,
|
||||
"\t void discriminant_pca::dpca_matrix()"
|
||||
<< "\n\t Invalid inputs were given to this function"
|
||||
<< "\n\t eps: " << eps
|
||||
<< "\n\t in_vector_size(): " << in_vector_size()
|
||||
<< "\n\t this: " << this
|
||||
);
|
||||
|
||||
compute_dpca_matrix(dpca_mat, eigenvalues, eps, 0);
|
||||
}
|
||||
|
||||
void dpca_matrix_of_size (
|
||||
general_matrix& dpca_mat,
|
||||
general_matrix& eigenvalues,
|
||||
const long num_rows
|
||||
)
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(0 < num_rows && num_rows <= in_vector_size(),
|
||||
"\t general_matrix discriminant_pca::dpca_matrix_of_size()"
|
||||
<< "\n\t Invalid inputs were given to this function"
|
||||
<< "\n\t num_rows: " << num_rows
|
||||
<< "\n\t in_vector_size(): " << in_vector_size()
|
||||
<< "\n\t this: " << this
|
||||
);
|
||||
|
||||
compute_dpca_matrix(dpca_mat, eigenvalues, 1, num_rows);
|
||||
}
|
||||
|
||||
void swap (
|
||||
discriminant_pca& item
|
||||
)
|
||||
{
|
||||
using std::swap;
|
||||
swap(total_cov, item.total_cov);
|
||||
swap(total_sum, item.total_sum);
|
||||
swap(total_count, item.total_count);
|
||||
swap(vect_size, item.vect_size);
|
||||
swap(between_cov, item.between_cov);
|
||||
|
||||
swap(between_count, item.between_count);
|
||||
swap(between_weight, item.between_weight);
|
||||
swap(within_cov, item.within_cov);
|
||||
swap(within_count, item.within_count);
|
||||
swap(within_weight, item.within_weight);
|
||||
}
|
||||
|
||||
friend void deserialize (
|
||||
discriminant_pca& item,
|
||||
std::istream& in
|
||||
)
|
||||
{
|
||||
deserialize( item.total_cov, in);
|
||||
deserialize( item.total_sum, in);
|
||||
deserialize( item.total_count, in);
|
||||
deserialize( item.vect_size, in);
|
||||
deserialize( item.between_cov, in);
|
||||
deserialize( item.between_count, in);
|
||||
deserialize( item.between_weight, in);
|
||||
deserialize( item.within_cov, in);
|
||||
deserialize( item.within_count, in);
|
||||
deserialize( item.within_weight, in);
|
||||
}
|
||||
|
||||
friend void serialize (
|
||||
const discriminant_pca& item,
|
||||
std::ostream& out
|
||||
)
|
||||
{
|
||||
serialize( item.total_cov, out);
|
||||
serialize( item.total_sum, out);
|
||||
serialize( item.total_count, out);
|
||||
serialize( item.vect_size, out);
|
||||
serialize( item.between_cov, out);
|
||||
serialize( item.between_count, out);
|
||||
serialize( item.between_weight, out);
|
||||
serialize( item.within_cov, out);
|
||||
serialize( item.within_count, out);
|
||||
serialize( item.within_weight, out);
|
||||
}
|
||||
|
||||
discriminant_pca operator+ (
|
||||
const discriminant_pca& item
|
||||
) const
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT((in_vector_size() == 0 || item.in_vector_size() == 0 || in_vector_size() == item.in_vector_size()) &&
|
||||
between_class_weight() == item.between_class_weight() &&
|
||||
within_class_weight() == item.within_class_weight(),
|
||||
"\t discriminant_pca discriminant_pca::operator+()"
|
||||
<< "\n\t The two discriminant_pca objects being added must have compatible parameters"
|
||||
<< "\n\t in_vector_size(): " << in_vector_size()
|
||||
<< "\n\t item.in_vector_size(): " << item.in_vector_size()
|
||||
<< "\n\t between_class_weight(): " << between_class_weight()
|
||||
<< "\n\t item.between_class_weight(): " << item.between_class_weight()
|
||||
<< "\n\t within_class_weight(): " << within_class_weight()
|
||||
<< "\n\t item.within_class_weight(): " << item.within_class_weight()
|
||||
<< "\n\t this: " << this
|
||||
);
|
||||
|
||||
discriminant_pca temp(item);
|
||||
|
||||
// We need to make sure to ignore empty matrices. That's what these if statements
|
||||
// are for.
|
||||
|
||||
if (total_count != 0 && temp.total_count != 0)
|
||||
{
|
||||
temp.total_cov += total_cov;
|
||||
temp.total_sum += total_sum;
|
||||
temp.total_count += total_count;
|
||||
}
|
||||
else if (total_count != 0)
|
||||
{
|
||||
temp.total_cov = total_cov;
|
||||
temp.total_sum = total_sum;
|
||||
temp.total_count = total_count;
|
||||
}
|
||||
|
||||
if (between_count != 0 && temp.between_count != 0)
|
||||
{
|
||||
temp.between_cov += between_cov;
|
||||
temp.between_count += between_count;
|
||||
}
|
||||
else if (between_count != 0)
|
||||
{
|
||||
temp.between_cov = between_cov;
|
||||
temp.between_count = between_count;
|
||||
}
|
||||
|
||||
if (within_count != 0 && temp.within_count != 0)
|
||||
{
|
||||
temp.within_cov += within_cov;
|
||||
temp.within_count += within_count;
|
||||
}
|
||||
else if (within_count != 0)
|
||||
{
|
||||
temp.within_cov = within_cov;
|
||||
temp.within_count = within_count;
|
||||
}
|
||||
|
||||
return temp;
|
||||
}
|
||||
|
||||
discriminant_pca& operator+= (
|
||||
const discriminant_pca& rhs
|
||||
)
|
||||
{
|
||||
(*this + rhs).swap(*this);
|
||||
return *this;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
void compute_dpca_matrix (
|
||||
general_matrix& dpca_mat,
|
||||
general_matrix& eigenvalues,
|
||||
const double eps,
|
||||
long num_rows
|
||||
) const
|
||||
{
|
||||
general_matrix cov;
|
||||
|
||||
// now combine the three measures of variance into a single matrix by using the
|
||||
// within_weight and between_weight weights.
|
||||
cov = get_total_covariance_matrix();
|
||||
if (within_count != 0)
|
||||
cov -= within_weight*within_cov/within_count;
|
||||
if (between_count != 0)
|
||||
cov += between_weight*between_cov/between_count;
|
||||
|
||||
|
||||
eigenvalue_decomposition<general_matrix> eig(make_symmetric(cov));
|
||||
|
||||
eigenvalues = eig.get_real_eigenvalues();
|
||||
dpca_mat = eig.get_pseudo_v();
|
||||
|
||||
// sort the eigenvalues and eigenvectors so that the biggest eigenvalues come first
|
||||
rsort_columns(dpca_mat, eigenvalues);
|
||||
|
||||
long num_vectors = 0;
|
||||
if (num_rows == 0)
|
||||
{
|
||||
// Some of the eigenvalues might be negative. So first lets zero those out
|
||||
// so they won't get considered.
|
||||
eigenvalues = pointwise_multiply(eigenvalues > 0, eigenvalues);
|
||||
// figure out how many eigenvectors we want in our dpca matrix
|
||||
const double thresh = sum(eigenvalues)*eps;
|
||||
double total = 0;
|
||||
for (long r = 0; r < eigenvalues.size() && total < thresh; ++r)
|
||||
{
|
||||
// Don't even think about looking at eigenvalues that are 0. If we go this
|
||||
// far then we have all we need.
|
||||
if (eigenvalues(r) == 0)
|
||||
break;
|
||||
|
||||
++num_vectors;
|
||||
total += eigenvalues(r);
|
||||
}
|
||||
|
||||
if (num_vectors == 0)
|
||||
throw discriminant_pca_error("While performing discriminant_pca, all eigenvalues were negative or 0");
|
||||
}
|
||||
else
|
||||
{
|
||||
num_vectors = num_rows;
|
||||
}
|
||||
|
||||
|
||||
// So now we know we want to use num_vectors of the first eigenvectors. So
|
||||
// pull those out and discard the rest.
|
||||
dpca_mat = trans(colm(dpca_mat,range(0,num_vectors-1)));
|
||||
|
||||
// also clip off the eigenvalues we aren't using
|
||||
eigenvalues = rowm(eigenvalues, range(0,num_vectors-1));
|
||||
|
||||
}
|
||||
|
||||
general_matrix get_total_covariance_matrix (
|
||||
) const
|
||||
/*!
|
||||
ensures
|
||||
- returns the covariance matrix of all the data given to the add_to_total_variance()
|
||||
!*/
|
||||
{
|
||||
// if we don't even know the dimensionality of the vectors we are dealing
|
||||
// with then just return an empty matrix
|
||||
if (vect_size == 0)
|
||||
return general_matrix();
|
||||
|
||||
// we know the vector size but we have zero total covariance.
|
||||
if (total_count == 0)
|
||||
{
|
||||
general_matrix temp(vect_size,vect_size);
|
||||
temp = 0;
|
||||
return temp;
|
||||
}
|
||||
|
||||
// In this case we actually have something to make a total covariance matrix out of.
|
||||
// So do that.
|
||||
column_matrix avg = total_sum/total_count;
|
||||
|
||||
return total_cov/total_count - avg*trans(avg);
|
||||
}
|
||||
|
||||
general_matrix total_cov;
|
||||
column_matrix total_sum;
|
||||
scalar_type total_count;
|
||||
|
||||
long vect_size;
|
||||
|
||||
general_matrix between_cov;
|
||||
scalar_type between_count;
|
||||
scalar_type between_weight;
|
||||
|
||||
general_matrix within_cov;
|
||||
scalar_type within_count;
|
||||
scalar_type within_weight;
|
||||
};
|
||||
|
||||
template <
|
||||
typename matrix_type
|
||||
>
|
||||
inline void swap (
|
||||
discriminant_pca<matrix_type>& a,
|
||||
discriminant_pca<matrix_type>& b
|
||||
) { a.swap(b); }
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_DPCA_h_
|
||||
|
||||
|
||||
365
pkg/OpenFace/lib/3rdParty/dlib/include/dlib/statistics/dpca_abstract.h
vendored
Normal file
365
pkg/OpenFace/lib/3rdParty/dlib/include/dlib/statistics/dpca_abstract.h
vendored
Normal file
@@ -0,0 +1,365 @@
|
||||
// Copyright (C) 2009 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#undef DLIB_DPCA_ABSTRaCT_
|
||||
#ifdef DLIB_DPCA_ABSTRaCT_
|
||||
|
||||
#include <limits>
|
||||
#include <cmath>
|
||||
#include "../matrix/matrix_abstract.h"
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename matrix_type
|
||||
>
|
||||
class discriminant_pca
|
||||
{
|
||||
/*!
|
||||
REQUIREMENTS ON matrix_type
|
||||
Must be some type of dlib::matrix.
|
||||
|
||||
INITIAL VALUE
|
||||
- in_vector_size() == 0
|
||||
- between_class_weight() == 1
|
||||
- within_class_weight() == 1
|
||||
|
||||
WHAT THIS OBJECT REPRESENTS
|
||||
This object implements the Discriminant PCA technique described in the paper:
|
||||
A New Discriminant Principal Component Analysis Method with Partial Supervision (2009)
|
||||
by Dan Sun and Daoqiang Zhang
|
||||
|
||||
This algorithm is basically a straightforward generalization of the classical PCA
|
||||
technique to handle partially labeled data. It is useful if you want to learn a linear
|
||||
dimensionality reduction rule using a bunch of data that is partially labeled.
|
||||
|
||||
It functions by estimating three different scatter matrices. The first is the total scatter
|
||||
matrix St (i.e. the total data covariance matrix), the second is the between class scatter
|
||||
matrix Sb (basically a measure of the variance between data of different classes) and the
|
||||
third is the within class scatter matrix Sw (a measure of the variance of data within the
|
||||
same classes).
|
||||
|
||||
Once these three matrices are estimated they are combined according to the following equation:
|
||||
S = St + a*Sb - b*Sw
|
||||
Where a and b are user supplied weights. Then the largest eigenvalues of the S matrix are
|
||||
computed and their associated eigenvectors are returned as the output of this algorithm.
|
||||
That is, the desired linear dimensionality reduction is given by the matrix with these
|
||||
eigenvectors stored in its rows.
|
||||
|
||||
Note that if a and b are set to 0 (or no labeled data is provided) then the output transformation
|
||||
matrix is the same as the one produced by the classical PCA algorithm.
|
||||
!*/
|
||||
|
||||
public:
|
||||
|
||||
struct discriminant_pca_error : public error;
|
||||
/*!
|
||||
This exception is thrown if there is some error that prevents us from creating
|
||||
a DPCA matrix.
|
||||
!*/
|
||||
|
||||
typedef typename matrix_type::mem_manager_type mem_manager_type;
|
||||
typedef typename matrix_type::type scalar_type;
|
||||
typedef typename matrix_type::layout_type layout_type;
|
||||
typedef matrix<scalar_type,0,0,mem_manager_type,layout_type> general_matrix;
|
||||
typedef matrix<scalar_type,0,1,mem_manager_type,layout_type> column_matrix;
|
||||
|
||||
discriminant_pca (
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- this object is properly initialized
|
||||
!*/
|
||||
|
||||
void clear(
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- #*this has its initial value
|
||||
!*/
|
||||
|
||||
long in_vector_size (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- if (this object has been presented with any input vectors) then
|
||||
- returns the dimension of the column vectors used with this object
|
||||
- else
|
||||
- returns 0
|
||||
!*/
|
||||
|
||||
void set_within_class_weight (
|
||||
scalar_type weight
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- weight >= 0
|
||||
ensures
|
||||
- #within_class_weight() == weight
|
||||
!*/
|
||||
|
||||
scalar_type within_class_weight (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the weight used when combining the within class scatter matrix with
|
||||
the other scatter matrices.
|
||||
!*/
|
||||
|
||||
void set_between_class_weight (
|
||||
scalar_type weight
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- weight >= 0
|
||||
ensures
|
||||
- #between_class_weight() == weight
|
||||
!*/
|
||||
|
||||
scalar_type between_class_weight (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the weight used when combining the between class scatter matrix with
|
||||
the other scatter matrices.
|
||||
!*/
|
||||
|
||||
void add_to_within_class_variance(
|
||||
const matrix_exp& x,
|
||||
const matrix_exp& y
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- is_col_vector(x) == true
|
||||
- is_col_vector(y) == true
|
||||
- x.size() == y.size()
|
||||
- if (in_vector_size() != 0) then
|
||||
- x.size() == y.size() == in_vector_size()
|
||||
ensures
|
||||
- #in_vector_size() == x.size()
|
||||
- Adds (x-y)*trans(x-y) to the within class scatter matrix.
|
||||
(i.e. the direction given by (x-y) is recorded as being a direction associated
|
||||
with within class variance and is therefore unimportant and will be weighted
|
||||
less in the final dimensionality reduction)
|
||||
!*/
|
||||
|
||||
void add_to_between_class_variance(
|
||||
const matrix_exp& x,
|
||||
const matrix_exp& y
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- is_col_vector(x) == true
|
||||
- is_col_vector(y) == true
|
||||
- x.size() == y.size()
|
||||
- if (in_vector_size() != 0) then
|
||||
- x.size() == y.size() == in_vector_size()
|
||||
ensures
|
||||
- #in_vector_size() == x.size()
|
||||
- Adds (x-y)*trans(x-y) to the between class scatter matrix.
|
||||
(i.e. the direction given by (x-y) is recorded as being a direction associated
|
||||
with between class variance and is therefore important and will be weighted
|
||||
higher in the final dimensionality reduction)
|
||||
!*/
|
||||
|
||||
void add_to_total_variance(
|
||||
const matrix_exp& x
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- is_col_vector(x) == true
|
||||
- if (in_vector_size() != 0) then
|
||||
- x.size() == in_vector_size()
|
||||
ensures
|
||||
- #in_vector_size() == x.size()
|
||||
- let M denote the centroid (or mean) of all the data. Then this function
|
||||
Adds (x-M)*trans(x-M) to the total scatter matrix.
|
||||
(i.e. the direction given by (x-M) is recorded as being a direction associated
|
||||
with unlabeled variance and is therefore of default importance and will be weighted
|
||||
as described in the discriminant_pca class description.)
|
||||
!*/
|
||||
|
||||
const general_matrix dpca_matrix (
|
||||
const double eps = 0.99
|
||||
) const;
|
||||
/*!
|
||||
requires
|
||||
- 0 < eps <= 1
|
||||
- in_vector_size() != 0
|
||||
(i.e. you have to have given this object some data)
|
||||
ensures
|
||||
- computes and returns the matrix MAT given by dpca_matrix(MAT,eigen,eps).
|
||||
That is, this function returns the dpca_matrix computed by the function
|
||||
defined below.
|
||||
- Note that MAT is the desired linear transformation matrix. That is,
|
||||
multiplying a vector by MAT performs the desired linear dimensionality reduction.
|
||||
throws
|
||||
- discriminant_pca_error
|
||||
This exception is thrown if we are unable to create the dpca_matrix for some
|
||||
reason. For example, if only within class examples have been given or
|
||||
within_class_weight() is very large then all eigenvalues will be negative and
|
||||
that prevents this algorithm from working properly.
|
||||
!*/
|
||||
|
||||
void dpca_matrix (
|
||||
general_matrix& dpca_mat,
|
||||
general_matrix& eigenvalues,
|
||||
const double eps = 0.99
|
||||
) const;
|
||||
/*!
|
||||
requires
|
||||
- 0 < eps <= 1
|
||||
- in_vector_size() != 0
|
||||
(i.e. you have to have given this object some data)
|
||||
ensures
|
||||
- is_col_vector(#eigenvalues) == true
|
||||
- #dpca_mat.nr() == eigenvalues.size()
|
||||
- #dpca_mat.nc() == in_vector_size()
|
||||
- rowm(#dpca_mat,i) represents the ith eigenvector of the S matrix described
|
||||
in the class description and its eigenvalue is given by eigenvalues(i).
|
||||
- all values in #eigenvalues are > 0. Moreover, the eigenvalues are in
|
||||
sorted order with the largest eigenvalue stored at eigenvalues(0).
|
||||
- (#dpca_mat)*trans(#dpca_mat) == identity_matrix.
|
||||
(i.e. the rows of the dpca_matrix are all unit length vectors and are mutually
|
||||
orthogonal)
|
||||
- Note that #dpca_mat is the desired linear transformation matrix. That is,
|
||||
multiplying a vector by #dpca_mat performs the desired linear dimensionality
|
||||
reduction.
|
||||
- sum(#eigenvalues) will be equal to about eps times the total sum of all
|
||||
positive eigenvalues in the S matrix described in this class's description.
|
||||
This means that eps is a number that controls how "lossy" the dimensionality
|
||||
reduction will be. Large values of eps result in more output dimensions
|
||||
while smaller values result in fewer.
|
||||
throws
|
||||
- discriminant_pca_error
|
||||
This exception is thrown if we are unable to create the dpca_matrix for some
|
||||
reason. For example, if only within class examples have been given or
|
||||
within_class_weight() is very large then all eigenvalues will be negative and
|
||||
that prevents this algorithm from working properly.
|
||||
!*/
|
||||
|
||||
const general_matrix dpca_matrix_of_size (
|
||||
const long num_rows
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- 0 < num_rows <= in_vector_size()
|
||||
ensures
|
||||
- computes and returns the matrix MAT given by dpca_matrix_of_size(MAT,eigen,num_rows).
|
||||
That is, this function returns the dpca_matrix computed by the function
|
||||
defined below.
|
||||
- Note that MAT is the desired linear transformation matrix. That is,
|
||||
multiplying a vector by MAT performs the desired linear dimensionality
|
||||
reduction to num_rows dimensions.
|
||||
!*/
|
||||
|
||||
void dpca_matrix_of_size (
|
||||
general_matrix& dpca_mat,
|
||||
general_matrix& eigenvalues,
|
||||
const long num_rows
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- 0 < num_rows <= in_vector_size()
|
||||
ensures
|
||||
- is_col_vector(#eigenvalues) == true
|
||||
- #dpca_mat.nr() == eigenvalues.size()
|
||||
- #dpca_mat.nr() == num_rows
|
||||
- #dpca_mat.nc() == in_vector_size()
|
||||
- rowm(#dpca_mat,i) represents the ith eigenvector of the S matrix described
|
||||
in the class description and its eigenvalue is given by eigenvalues(i).
|
||||
- The values in #eigenvalues might be positive or negative. Additionally, the
|
||||
eigenvalues are in sorted order with the largest eigenvalue stored at
|
||||
eigenvalues(0).
|
||||
- (#dpca_mat)*trans(#dpca_mat) == identity_matrix.
|
||||
(i.e. the rows of the dpca_matrix are all unit length vectors and are mutually
|
||||
orthogonal)
|
||||
- Note that #dpca_mat is the desired linear transformation matrix. That is,
|
||||
multiplying a vector by #dpca_mat performs the desired linear dimensionality
|
||||
reduction to num_rows dimensions.
|
||||
!*/
|
||||
|
||||
discriminant_pca operator+ (
|
||||
const discriminant_pca& item
|
||||
) const;
|
||||
/*!
|
||||
requires
|
||||
- in_vector_size() == 0 || item.in_vector_size() == 0 || in_vector_size() == item.in_vector_size()
|
||||
(i.e. the in_vector_size() of *this and item must match or one must be zero)
|
||||
- between_class_weight() == item.between_class_weight()
|
||||
- within_class_weight() == item.within_class_weight()
|
||||
ensures
|
||||
- returns a new discriminant_pca object that represents the combination of all
|
||||
the measurements given to *this and item. That is, this function returns a
|
||||
discriminant_pca object, R, that is equivalent to what you would obtain if all
|
||||
modifying calls (e.g. the add_to_*() functions) to *this and item had instead
|
||||
been done to R.
|
||||
!*/
|
||||
|
||||
discriminant_pca& operator+= (
|
||||
const discriminant_pca& rhs
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- in_vector_size() == 0 || rhs.in_vector_size() == 0 || in_vector_size() == rhs.in_vector_size()
|
||||
(i.e. the in_vector_size() of *this and rhs must match or one must be zero)
|
||||
- between_class_weight() == rhs.between_class_weight()
|
||||
- within_class_weight() == rhs.within_class_weight()
|
||||
ensures
|
||||
- #*this == *item + rhs
|
||||
- returns #*this
|
||||
!*/
|
||||
|
||||
void swap (
|
||||
discriminant_pca& item
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- swaps *this and item
|
||||
!*/
|
||||
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename matrix_type
|
||||
>
|
||||
inline void swap (
|
||||
discriminant_pca<matrix_type>& a,
|
||||
discriminant_pca<matrix_type>& b
|
||||
) { a.swap(b); }
|
||||
/*!
|
||||
provides a global swap function
|
||||
!*/
|
||||
|
||||
template <
|
||||
typename matrix_type,
|
||||
>
|
||||
void deserialize (
|
||||
discriminant_pca<matrix_type>& item,
|
||||
std::istream& in
|
||||
);
|
||||
/*!
|
||||
provides deserialization support
|
||||
!*/
|
||||
|
||||
template <
|
||||
typename matrix_type,
|
||||
>
|
||||
void serialize (
|
||||
const discriminant_pca<matrix_type>& item,
|
||||
std::ostream& out
|
||||
);
|
||||
/*!
|
||||
provides serialization support
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_DPCA_ABSTRaCT_
|
||||
|
||||
82
pkg/OpenFace/lib/3rdParty/dlib/include/dlib/statistics/image_feature_sampling.h
vendored
Normal file
82
pkg/OpenFace/lib/3rdParty/dlib/include/dlib/statistics/image_feature_sampling.h
vendored
Normal file
@@ -0,0 +1,82 @@
|
||||
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#ifndef DLIB_IMAGE_FEATURE_SaMPLING_Hh_
|
||||
#define DLIB_IMAGE_FEATURE_SaMPLING_Hh_
|
||||
|
||||
#include "image_feature_sampling_abstract.h"
|
||||
#include "../statistics.h"
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_array_type,
|
||||
typename feature_extractor_type,
|
||||
typename pyramid_type
|
||||
>
|
||||
random_subset_selector<typename feature_extractor_type::descriptor_type> randomly_sample_image_features (
|
||||
const image_array_type& images,
|
||||
const pyramid_type& pyr,
|
||||
const feature_extractor_type& fe_,
|
||||
unsigned long num
|
||||
)
|
||||
{
|
||||
feature_extractor_type fe;
|
||||
fe.copy_configuration(fe_);
|
||||
random_subset_selector<typename feature_extractor_type::descriptor_type> basis;
|
||||
basis.set_max_size(num);
|
||||
|
||||
typedef typename image_array_type::type image_type;
|
||||
image_type temp_img, temp_img2;
|
||||
|
||||
for (unsigned long i = 0; i < images.size(); ++i)
|
||||
{
|
||||
bool at_pyramid_top = true;
|
||||
while (true)
|
||||
{
|
||||
if (at_pyramid_top)
|
||||
fe.load(images[i]);
|
||||
else
|
||||
fe.load(temp_img);
|
||||
|
||||
if (fe.size() == 0)
|
||||
break;
|
||||
|
||||
for (long r = 0; r < fe.nr(); ++r)
|
||||
{
|
||||
for (long c = 0; c < fe.nc(); ++c)
|
||||
{
|
||||
if (basis.next_add_accepts())
|
||||
{
|
||||
basis.add(fe(r,c));
|
||||
}
|
||||
else
|
||||
{
|
||||
basis.add();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (at_pyramid_top)
|
||||
{
|
||||
at_pyramid_top = false;
|
||||
pyr(images[i], temp_img);
|
||||
}
|
||||
else
|
||||
{
|
||||
pyr(temp_img, temp_img2);
|
||||
swap(temp_img2,temp_img);
|
||||
}
|
||||
}
|
||||
}
|
||||
return basis;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_IMAGE_FEATURE_SaMPLING_Hh_
|
||||
|
||||
45
pkg/OpenFace/lib/3rdParty/dlib/include/dlib/statistics/image_feature_sampling_abstract.h
vendored
Normal file
45
pkg/OpenFace/lib/3rdParty/dlib/include/dlib/statistics/image_feature_sampling_abstract.h
vendored
Normal file
@@ -0,0 +1,45 @@
|
||||
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#undef DLIB_IMAGE_FEATURE_SaMPLING_ABSTRACT_Hh_
|
||||
#ifdef DLIB_IMAGE_FEATURE_SaMPLING_ABSTRACT_Hh_
|
||||
|
||||
#include "random_subset_selector_abstract.h"
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_array_type,
|
||||
typename feature_extractor_type,
|
||||
typename pyramid_type
|
||||
>
|
||||
random_subset_selector<typename feature_extractor_type::descriptor_type> randomly_sample_image_features (
|
||||
const image_array_type& images,
|
||||
const pyramid_type& pyr,
|
||||
const feature_extractor_type& fe,
|
||||
unsigned long num
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- pyramid_type == a type compatible with the image pyramid objects defined
|
||||
in dlib/image_transforms/image_pyramid_abstract.h
|
||||
- feature_extractor_type == a local image feature extractor type such as the
|
||||
dlib::hog_image
|
||||
- image_array_type == an implementation of dlib/array/array_kernel_abstract.h
|
||||
and it must contain image objects which can be passed to pyr() and fe.load()
|
||||
and are swappable by global swap().
|
||||
ensures
|
||||
- creates an image pyramid for each image in images and performs feature
|
||||
extraction on each pyramid level. Then selects a random subsample of at
|
||||
most num local feature vectors and returns it.
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_IMAGE_FEATURE_SaMPLING_ABSTRACT_Hh_
|
||||
|
||||
|
||||
237
pkg/OpenFace/lib/3rdParty/dlib/include/dlib/statistics/lda.h
vendored
Normal file
237
pkg/OpenFace/lib/3rdParty/dlib/include/dlib/statistics/lda.h
vendored
Normal file
@@ -0,0 +1,237 @@
|
||||
// Copyright (C) 2014 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#ifndef DLIB_LDA_Hh_
|
||||
#define DLIB_LDA_Hh_
|
||||
|
||||
#include "lda_abstract.h"
|
||||
#include "../algs.h"
|
||||
#include <map>
|
||||
#include "../matrix.h"
|
||||
#include <vector>
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
namespace impl
|
||||
{
|
||||
|
||||
inline std::map<unsigned long,unsigned long> make_class_labels(
|
||||
const std::vector<unsigned long>& row_labels
|
||||
)
|
||||
{
|
||||
std::map<unsigned long,unsigned long> class_labels;
|
||||
for (unsigned long i = 0; i < row_labels.size(); ++i)
|
||||
{
|
||||
const unsigned long next = class_labels.size();
|
||||
if (class_labels.count(row_labels[i]) == 0)
|
||||
class_labels[row_labels[i]] = next;
|
||||
}
|
||||
return class_labels;
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename T
|
||||
>
|
||||
matrix<T,0,1> center_matrix (
|
||||
matrix<T>& X
|
||||
)
|
||||
{
|
||||
matrix<T,1> mean;
|
||||
for (long r = 0; r < X.nr(); ++r)
|
||||
mean += rowm(X,r);
|
||||
mean /= X.nr();
|
||||
|
||||
for (long r = 0; r < X.nr(); ++r)
|
||||
set_rowm(X,r) -= mean;
|
||||
|
||||
return trans(mean);
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename T
|
||||
>
|
||||
void compute_lda_transform (
|
||||
matrix<T>& X,
|
||||
matrix<T,0,1>& mean,
|
||||
const std::vector<unsigned long>& row_labels,
|
||||
unsigned long lda_dims = 500,
|
||||
unsigned long extra_pca_dims = 200
|
||||
)
|
||||
{
|
||||
std::map<unsigned long,unsigned long> class_labels = impl::make_class_labels(row_labels);
|
||||
// LDA can only give out at most class_labels.size()-1 dimensions so don't try to
|
||||
// compute more than that.
|
||||
lda_dims = std::min<unsigned long>(lda_dims, class_labels.size()-1);
|
||||
|
||||
// make sure requires clause is not broken
|
||||
DLIB_CASSERT(class_labels.size() > 1,
|
||||
"\t void compute_lda_transform()"
|
||||
<< "\n\t You can't call this function if the number of distinct class labels is less than 2."
|
||||
);
|
||||
DLIB_CASSERT(X.size() != 0 && (long)row_labels.size() == X.nr() && lda_dims != 0,
|
||||
"\t void compute_lda_transform()"
|
||||
<< "\n\t Invalid inputs were given to this function."
|
||||
<< "\n\t X.size(): " << X.size()
|
||||
<< "\n\t row_labels.size(): " << row_labels.size()
|
||||
<< "\n\t lda_dims: " << lda_dims
|
||||
);
|
||||
|
||||
|
||||
mean = impl::center_matrix(X);
|
||||
// Do PCA to reduce dims
|
||||
matrix<T> pu,pw,pv;
|
||||
svd_fast(X, pu, pw, pv, lda_dims+extra_pca_dims, 4);
|
||||
pu.set_size(0,0); // free RAM, we don't need pu.
|
||||
X = X*pv;
|
||||
|
||||
|
||||
matrix<T> class_means(class_labels.size(), X.nc());
|
||||
class_means = 0;
|
||||
matrix<T,0,1> class_counts(class_labels.size());
|
||||
class_counts = 0;
|
||||
|
||||
// First compute the means of each class
|
||||
for (unsigned long i = 0; i < row_labels.size(); ++i)
|
||||
{
|
||||
const unsigned long class_idx = class_labels[row_labels[i]];
|
||||
set_rowm(class_means,class_idx) += rowm(X,i);
|
||||
class_counts(class_idx)++;
|
||||
}
|
||||
class_means = inv(diagm(class_counts))*class_means;
|
||||
// subtract means from the data
|
||||
for (unsigned long i = 0; i < row_labels.size(); ++i)
|
||||
{
|
||||
const unsigned long class_idx = class_labels[row_labels[i]];
|
||||
set_rowm(X,i) -= rowm(class_means,class_idx);
|
||||
}
|
||||
|
||||
// Note that we are using the formulas from the paper Using Discriminant
|
||||
// Eigenfeatures for Image Retrieval by Swets and Weng.
|
||||
matrix<T> Sw = trans(X)*X;
|
||||
matrix<T> Sb = trans(class_means)*class_means;
|
||||
matrix<T> A, H;
|
||||
matrix<T,0,1> W;
|
||||
svd3(Sw, A, W, H);
|
||||
W = sqrt(W);
|
||||
W = reciprocal(lowerbound(W,max(W)*1e-5));
|
||||
A = trans(H*diagm(W))*Sb*H*diagm(W);
|
||||
matrix<T> v,s,u;
|
||||
svd3(A, v, s, u);
|
||||
matrix<T> tform = H*diagm(W)*u;
|
||||
// pick out only the number of dimensions we are supposed to for the output, unless
|
||||
// we should just keep them all, then don't do anything.
|
||||
if ((long)lda_dims <= tform.nc())
|
||||
{
|
||||
rsort_columns(tform, s);
|
||||
tform = colm(tform, range(0, lda_dims-1));
|
||||
}
|
||||
|
||||
X = trans(pv*tform);
|
||||
mean = X*mean;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
inline std::pair<double,double> equal_error_rate (
|
||||
const std::vector<double>& low_vals,
|
||||
const std::vector<double>& high_vals
|
||||
)
|
||||
{
|
||||
std::vector<std::pair<double,int> > temp;
|
||||
temp.reserve(low_vals.size()+high_vals.size());
|
||||
for (unsigned long i = 0; i < low_vals.size(); ++i)
|
||||
temp.push_back(std::make_pair(low_vals[i], -1));
|
||||
for (unsigned long i = 0; i < high_vals.size(); ++i)
|
||||
temp.push_back(std::make_pair(high_vals[i], +1));
|
||||
|
||||
std::sort(temp.begin(), temp.end());
|
||||
|
||||
if (temp.size() == 0)
|
||||
return std::make_pair(0,0);
|
||||
|
||||
double thresh = temp[0].first;
|
||||
|
||||
unsigned long num_low_wrong = low_vals.size();
|
||||
unsigned long num_high_wrong = 0;
|
||||
double low_error = num_low_wrong/(double)low_vals.size();
|
||||
double high_error = num_high_wrong/(double)high_vals.size();
|
||||
for (unsigned long i = 0; i < temp.size() && high_error < low_error; ++i)
|
||||
{
|
||||
thresh = temp[i].first;
|
||||
if (temp[i].second > 0)
|
||||
{
|
||||
num_high_wrong++;
|
||||
high_error = num_high_wrong/(double)high_vals.size();
|
||||
}
|
||||
else
|
||||
{
|
||||
num_low_wrong--;
|
||||
low_error = num_low_wrong/(double)low_vals.size();
|
||||
}
|
||||
}
|
||||
|
||||
return std::make_pair((low_error+high_error)/2, thresh);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
struct roc_point
|
||||
{
|
||||
double true_positive_rate;
|
||||
double false_positive_rate;
|
||||
double detection_threshold;
|
||||
};
|
||||
|
||||
inline std::vector<roc_point> compute_roc_curve (
|
||||
const std::vector<double>& true_detections,
|
||||
const std::vector<double>& false_detections
|
||||
)
|
||||
{
|
||||
DLIB_CASSERT(true_detections.size() != 0);
|
||||
DLIB_CASSERT(false_detections.size() != 0);
|
||||
|
||||
std::vector<std::pair<double,int> > temp;
|
||||
temp.reserve(true_detections.size()+false_detections.size());
|
||||
for (unsigned long i = 0; i < true_detections.size(); ++i)
|
||||
temp.push_back(std::make_pair(true_detections[i], +1));
|
||||
for (unsigned long i = 0; i < false_detections.size(); ++i)
|
||||
temp.push_back(std::make_pair(false_detections[i], -1));
|
||||
|
||||
std::sort(temp.rbegin(), temp.rend());
|
||||
|
||||
|
||||
std::vector<roc_point> roc_curve;
|
||||
roc_curve.reserve(temp.size());
|
||||
|
||||
double num_false_included = 0;
|
||||
double num_true_included = 0;
|
||||
for (unsigned long i = 0; i < temp.size(); ++i)
|
||||
{
|
||||
if (temp[i].second > 0)
|
||||
num_true_included++;
|
||||
else
|
||||
num_false_included++;
|
||||
|
||||
roc_point p;
|
||||
p.true_positive_rate = num_true_included/true_detections.size();
|
||||
p.false_positive_rate = num_false_included/false_detections.size();
|
||||
p.detection_threshold = temp[i].first;
|
||||
roc_curve.push_back(p);
|
||||
}
|
||||
|
||||
return roc_curve;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_LDA_Hh_
|
||||
|
||||
118
pkg/OpenFace/lib/3rdParty/dlib/include/dlib/statistics/lda_abstract.h
vendored
Normal file
118
pkg/OpenFace/lib/3rdParty/dlib/include/dlib/statistics/lda_abstract.h
vendored
Normal file
@@ -0,0 +1,118 @@
|
||||
// Copyright (C) 2014 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#undef DLIB_LDA_ABSTRACT_Hh_
|
||||
#ifdef DLIB_LDA_ABSTRACT_Hh_
|
||||
|
||||
#include <map>
|
||||
#include "../matrix.h"
|
||||
#include <vector>
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename T
|
||||
>
|
||||
void compute_lda_transform (
|
||||
matrix<T>& X,
|
||||
matrix<T,0,1>& M,
|
||||
const std::vector<unsigned long>& row_labels,
|
||||
unsigned long lda_dims = 500,
|
||||
unsigned long extra_pca_dims = 200
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- X.size() != 0
|
||||
- row_labels.size() == X.nr()
|
||||
- The number of distinct values in row_labels > 1
|
||||
- lda_dims != 0
|
||||
ensures
|
||||
- We interpret X as a collection X.nr() of input vectors, where each row of X
|
||||
is one of the vectors.
|
||||
- We interpret row_labels[i] as the label of the vector rowm(X,i).
|
||||
- This function performs the dimensionality reducing version of linear
|
||||
discriminant analysis. That is, you give it a set of labeled vectors and it
|
||||
returns a linear transform that maps the input vectors into a new space that
|
||||
is good for distinguishing between the different classes. In particular,
|
||||
this function finds matrices Z and M such that:
|
||||
- Given an input vector x, Z*x-M, is the transformed version of x. That is,
|
||||
Z*x-M maps x into a space where x vectors that share the same class label
|
||||
are near each other.
|
||||
- Z*x-M results in the transformed vectors having zero expected mean.
|
||||
- Z.nr() <= lda_dims
|
||||
(it might be less than lda_dims if there are not enough distinct class
|
||||
labels to support lda_dims dimensions).
|
||||
- Z.nc() == X.nc()
|
||||
- We overwrite the input matrix X and store Z in it. Therefore, the
|
||||
outputs of this function are in X and M.
|
||||
- In order to deal with very high dimensional inputs, we perform PCA internally
|
||||
to map the input vectors into a space of at most lda_dims+extra_pca_dims
|
||||
prior to performing LDA.
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
std::pair<double,double> equal_error_rate (
|
||||
const std::vector<double>& low_vals,
|
||||
const std::vector<double>& high_vals
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- This function finds a threshold T that best separates the elements of
|
||||
low_vals from high_vals by selecting the threshold with equal error rate. In
|
||||
particular, we try to pick a threshold T such that:
|
||||
- for all valid i:
|
||||
- high_vals[i] >= T
|
||||
- for all valid i:
|
||||
- low_vals[i] < T
|
||||
Where the best T is determined such that the fraction of low_vals >= T is the
|
||||
same as the fraction of high_vals < T.
|
||||
- Let ERR == the equal error rate. I.e. the fraction of times low_vals >= T
|
||||
and high_vals < T. Note that 0 <= ERR <= 1.
|
||||
- returns make_pair(ERR,T)
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
struct roc_point
|
||||
{
|
||||
double true_positive_rate;
|
||||
double false_positive_rate;
|
||||
double detection_threshold;
|
||||
};
|
||||
|
||||
std::vector<roc_point> compute_roc_curve (
|
||||
const std::vector<double>& true_detections,
|
||||
const std::vector<double>& false_detections
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- true_detections.size() != 0
|
||||
- false_detections.size() != 0
|
||||
ensures
|
||||
- This function computes the ROC curve (receiver operating characteristic)
|
||||
curve of the given data. Therefore, we interpret true_detections as
|
||||
containing detection scores for a bunch of true detections and
|
||||
false_detections as detection scores from a bunch of false detections. A
|
||||
perfect detector would always give higher scores to true detections than to
|
||||
false detections, resulting in a true positive rate of 1 and a false positive
|
||||
rate of 0, for some appropriate detection threshold.
|
||||
- Returns an array, ROC, such that:
|
||||
- ROC.size() == true_detections.size()+false_detections.size()
|
||||
- for all valid i:
|
||||
- If you were to accept all detections with a score >= ROC[i].detection_threshold
|
||||
then you would obtain a true positive rate of ROC[i].true_positive_rate and a
|
||||
false positive rate of ROC[i].false_positive_rate.
|
||||
- ROC is ordered such that low detection rates come first. That is, the
|
||||
curve is swept from a high detection threshold to a low threshold.
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_LDA_ABSTRACT_Hh_
|
||||
|
||||
|
||||
372
pkg/OpenFace/lib/3rdParty/dlib/include/dlib/statistics/random_subset_selector.h
vendored
Normal file
372
pkg/OpenFace/lib/3rdParty/dlib/include/dlib/statistics/random_subset_selector.h
vendored
Normal file
@@ -0,0 +1,372 @@
|
||||
// Copyright (C) 2010 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#ifndef DLIB_RANDOM_SUBSeT_SELECTOR_H_
|
||||
#define DLIB_RANDOM_SUBSeT_SELECTOR_H_
|
||||
|
||||
#include "random_subset_selector_abstract.h"
|
||||
#include "../rand.h"
|
||||
#include <vector>
|
||||
#include "../algs.h"
|
||||
#include "../string.h"
|
||||
#include "../serialize.h"
|
||||
#include "../matrix/matrix_mat.h"
|
||||
#include <iostream>
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
template <
|
||||
typename T,
|
||||
typename Rand_type = dlib::rand
|
||||
>
|
||||
class random_subset_selector
|
||||
{
|
||||
/*!
|
||||
INITIAL VALUE
|
||||
- _max_size == 0
|
||||
- items.size() == 0
|
||||
- count == 0
|
||||
- _next_add_accepts == false
|
||||
|
||||
CONVENTION
|
||||
- count == the number of times add() has been called since the last
|
||||
time this object was empty.
|
||||
- items.size() == size()
|
||||
- max_size() == _max_size
|
||||
- next_add_accepts() == _next_add_accepts
|
||||
!*/
|
||||
public:
|
||||
typedef T type;
|
||||
typedef T value_type;
|
||||
typedef default_memory_manager mem_manager_type;
|
||||
typedef Rand_type rand_type;
|
||||
|
||||
typedef typename std::vector<T>::iterator iterator;
|
||||
typedef typename std::vector<T>::const_iterator const_iterator;
|
||||
|
||||
|
||||
random_subset_selector (
|
||||
)
|
||||
{
|
||||
_max_size = 0;
|
||||
make_empty();
|
||||
}
|
||||
|
||||
void set_seed(const std::string& value)
|
||||
{
|
||||
rnd.set_seed(value);
|
||||
}
|
||||
|
||||
void make_empty (
|
||||
)
|
||||
{
|
||||
items.resize(0);
|
||||
count = 0;
|
||||
update_next_add_accepts();
|
||||
}
|
||||
|
||||
const std::vector<T>& to_std_vector(
|
||||
) const { return items; }
|
||||
|
||||
size_t size (
|
||||
) const
|
||||
{
|
||||
return items.size();
|
||||
}
|
||||
|
||||
void set_max_size (
|
||||
unsigned long new_max_size
|
||||
)
|
||||
{
|
||||
items.reserve(new_max_size);
|
||||
make_empty();
|
||||
_max_size = new_max_size;
|
||||
update_next_add_accepts();
|
||||
}
|
||||
|
||||
unsigned long max_size (
|
||||
) const
|
||||
{
|
||||
return _max_size;
|
||||
}
|
||||
|
||||
T& operator[] (
|
||||
unsigned long idx
|
||||
)
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(idx < size(),
|
||||
"\tvoid random_subset_selector::operator[]()"
|
||||
<< "\n\t idx is out of range"
|
||||
<< "\n\t idx: " << idx
|
||||
<< "\n\t size(): " << size()
|
||||
<< "\n\t this: " << this
|
||||
);
|
||||
|
||||
return items[idx];
|
||||
}
|
||||
|
||||
const T& operator[] (
|
||||
unsigned long idx
|
||||
) const
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(idx < size(),
|
||||
"\tvoid random_subset_selector::operator[]()"
|
||||
<< "\n\t idx is out of range"
|
||||
<< "\n\t idx: " << idx
|
||||
<< "\n\t size(): " << size()
|
||||
<< "\n\t this: " << this
|
||||
);
|
||||
|
||||
return items[idx];
|
||||
}
|
||||
|
||||
iterator begin() { return items.begin(); }
|
||||
const_iterator begin() const { return items.begin(); }
|
||||
iterator end() { return items.end(); }
|
||||
const_iterator end() const { return items.end(); }
|
||||
|
||||
bool next_add_accepts (
|
||||
) const
|
||||
{
|
||||
return _next_add_accepts;
|
||||
}
|
||||
|
||||
void add (
|
||||
const T& new_item
|
||||
)
|
||||
{
|
||||
if (items.size() < _max_size)
|
||||
{
|
||||
items.push_back(new_item);
|
||||
// swap into a random place
|
||||
exchange(items[rnd.get_random_32bit_number()%items.size()], items.back());
|
||||
}
|
||||
else if (_next_add_accepts)
|
||||
{
|
||||
// pick a random element of items and replace it.
|
||||
items[rnd.get_random_32bit_number()%items.size()] = new_item;
|
||||
}
|
||||
|
||||
update_next_add_accepts();
|
||||
++count;
|
||||
}
|
||||
|
||||
void add (
|
||||
)
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(next_add_accepts() == false,
|
||||
"\tvoid random_subset_selector::add()"
|
||||
<< "\n\t You should be calling the version of add() that takes an argument"
|
||||
<< "\n\t this: " << this
|
||||
);
|
||||
|
||||
update_next_add_accepts();
|
||||
++count;
|
||||
}
|
||||
|
||||
void swap (
|
||||
random_subset_selector& a
|
||||
)
|
||||
{
|
||||
items.swap(a.items);
|
||||
std::swap(_max_size, a._max_size);
|
||||
std::swap(count, a.count);
|
||||
rnd.swap(a.rnd);
|
||||
std::swap(_next_add_accepts, a._next_add_accepts);
|
||||
}
|
||||
|
||||
template <typename T1, typename T2>
|
||||
friend void serialize (
|
||||
const random_subset_selector<T1,T2>& item,
|
||||
std::ostream& out
|
||||
);
|
||||
|
||||
template <typename T1, typename T2>
|
||||
friend void deserialize (
|
||||
random_subset_selector<T1,T2>& item,
|
||||
std::istream& in
|
||||
);
|
||||
|
||||
private:
|
||||
|
||||
void update_next_add_accepts (
|
||||
)
|
||||
{
|
||||
if (items.size() < _max_size)
|
||||
{
|
||||
_next_add_accepts = true;
|
||||
}
|
||||
else if (_max_size == 0)
|
||||
{
|
||||
_next_add_accepts = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
// At this point each element of items has had an equal chance of being in this object.
|
||||
// In particular, the probability that each arrived here is currently items.size()/count.
|
||||
// We need to be able to say that, after this function ends, the probability of any
|
||||
// particular object ending up in items is items.size()/(count+1). So this means that
|
||||
// we should decide to add a new item into items with this probability. Also, if we do
|
||||
// so then we pick one of the current items and replace it at random with the new item.
|
||||
|
||||
// Make me a random 64 bit number. This might seem excessive but I want this object
|
||||
// to be able to handle an effectively infinite number of calls to add(). So count
|
||||
// might get very large and we need to deal with that properly.
|
||||
const unsigned long num1 = rnd.get_random_32bit_number();
|
||||
const unsigned long num2 = rnd.get_random_32bit_number();
|
||||
uint64 num = num1;
|
||||
num <<= 32;
|
||||
num |= num2;
|
||||
|
||||
num %= (count+1);
|
||||
|
||||
_next_add_accepts = (num < items.size());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
std::vector<T> items;
|
||||
unsigned long _max_size;
|
||||
uint64 count;
|
||||
|
||||
rand_type rnd;
|
||||
|
||||
bool _next_add_accepts;
|
||||
|
||||
};
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename rand_type
|
||||
>
|
||||
void swap (
|
||||
random_subset_selector<T,rand_type>& a,
|
||||
random_subset_selector<T,rand_type>& b
|
||||
) { a.swap(b); }
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <typename T1, typename T2>
|
||||
void serialize (
|
||||
const random_subset_selector<T1,T2>& item,
|
||||
std::ostream& out
|
||||
)
|
||||
{
|
||||
serialize(item.items, out);
|
||||
serialize(item._max_size, out);
|
||||
serialize(item.count, out);
|
||||
serialize(item.rnd, out);
|
||||
serialize(item._next_add_accepts, out);
|
||||
}
|
||||
|
||||
template <typename T1, typename T2>
|
||||
void deserialize (
|
||||
random_subset_selector<T1,T2>& item,
|
||||
std::istream& in
|
||||
)
|
||||
{
|
||||
deserialize(item.items, in);
|
||||
deserialize(item._max_size, in);
|
||||
deserialize(item.count, in);
|
||||
deserialize(item.rnd, in);
|
||||
deserialize(item._next_add_accepts, in);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename alloc
|
||||
>
|
||||
random_subset_selector<T> randomly_subsample (
|
||||
const std::vector<T,alloc>& samples,
|
||||
unsigned long num
|
||||
)
|
||||
{
|
||||
random_subset_selector<T> subset;
|
||||
subset.set_max_size(num);
|
||||
for (unsigned long i = 0; i < samples.size(); ++i)
|
||||
subset.add(samples[i]);
|
||||
return subset;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename alloc,
|
||||
typename U
|
||||
>
|
||||
random_subset_selector<T> randomly_subsample (
|
||||
const std::vector<T,alloc>& samples,
|
||||
unsigned long num,
|
||||
const U& random_seed
|
||||
)
|
||||
{
|
||||
random_subset_selector<T> subset;
|
||||
subset.set_seed(cast_to_string(random_seed));
|
||||
subset.set_max_size(num);
|
||||
for (unsigned long i = 0; i < samples.size(); ++i)
|
||||
subset.add(samples[i]);
|
||||
return subset;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename T
|
||||
>
|
||||
random_subset_selector<T> randomly_subsample (
|
||||
const random_subset_selector<T>& samples,
|
||||
unsigned long num
|
||||
)
|
||||
{
|
||||
random_subset_selector<T> subset;
|
||||
subset.set_max_size(num);
|
||||
for (unsigned long i = 0; i < samples.size(); ++i)
|
||||
subset.add(samples[i]);
|
||||
return subset;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename U
|
||||
>
|
||||
random_subset_selector<T> randomly_subsample (
|
||||
const random_subset_selector<T>& samples,
|
||||
unsigned long num,
|
||||
const U& random_seed
|
||||
)
|
||||
{
|
||||
random_subset_selector<T> subset;
|
||||
subset.set_seed(cast_to_string(random_seed));
|
||||
subset.set_max_size(num);
|
||||
for (unsigned long i = 0; i < samples.size(); ++i)
|
||||
subset.add(samples[i]);
|
||||
return subset;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename T
|
||||
>
|
||||
const matrix_op<op_array_to_mat<random_subset_selector<T> > > mat (
|
||||
const random_subset_selector<T>& m
|
||||
)
|
||||
{
|
||||
typedef op_array_to_mat<random_subset_selector<T> > op;
|
||||
return matrix_op<op>(op(m));
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_RANDOM_SUBSeT_SELECTOR_H_
|
||||
|
||||
|
||||
388
pkg/OpenFace/lib/3rdParty/dlib/include/dlib/statistics/random_subset_selector_abstract.h
vendored
Normal file
388
pkg/OpenFace/lib/3rdParty/dlib/include/dlib/statistics/random_subset_selector_abstract.h
vendored
Normal file
@@ -0,0 +1,388 @@
|
||||
// Copyright (C) 2010 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#undef DLIB_RANDOM_SUBSeT_SELECTOR_ABSTRACT_H_
|
||||
#ifdef DLIB_RANDOM_SUBSeT_SELECTOR_ABSTRACT_H_
|
||||
|
||||
#include <vector>
|
||||
#include "../rand/rand_kernel_abstract.h"
|
||||
#include "../algs.h"
|
||||
#include "../string.h"
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
template <
|
||||
typename T,
|
||||
typename Rand_type = dlib::rand
|
||||
>
|
||||
class random_subset_selector
|
||||
{
|
||||
/*!
|
||||
REQUIREMENTS ON T
|
||||
T must be a copyable type
|
||||
|
||||
REQUIREMENTS ON Rand_type
|
||||
must be an implementation of dlib/rand/rand_kernel_abstract.h
|
||||
|
||||
INITIAL VALUE
|
||||
- size() == 0
|
||||
- max_size() == 0
|
||||
- next_add_accepts() == false
|
||||
|
||||
WHAT THIS OBJECT REPRESENTS
|
||||
This object is a tool to help you select a random subset of a large body of data.
|
||||
In particular, it is useful when the body of data is too large to fit into memory.
|
||||
|
||||
So for example, suppose you have 1000000 data samples and you want to select a
|
||||
random subset of size 1000. Then you could do that as follows:
|
||||
|
||||
random_subset_selector<sample_type> rand_subset;
|
||||
rand_subset.set_max_size(1000)
|
||||
for (int i = 0; i < 1000000; ++i)
|
||||
rand_subset.add( get_next_data_sample());
|
||||
|
||||
|
||||
At the end of the for loop you will have your random subset of 1000 samples. And by
|
||||
random I mean that each of the 1000000 data samples has an equal chance of ending
|
||||
up in the rand_subset object.
|
||||
|
||||
|
||||
Note that the above example calls get_next_data_sample() for each data sample. This
|
||||
may be inefficient since most of the data samples are just ignored. An alternative
|
||||
method that doesn't require you to load each sample can also be used. Consider the
|
||||
following:
|
||||
|
||||
random_subset_selector<sample_type> rand_subset;
|
||||
rand_subset.set_max_size(1000)
|
||||
for (int i = 0; i < 1000000; ++i)
|
||||
if (rand_subset.next_add_accepts())
|
||||
rand_subset.add(get_data_sample(i));
|
||||
else
|
||||
rand_subset.add()
|
||||
|
||||
In the above example we only actually fetch the data sample into memory if we
|
||||
know that the rand_subset would include it into the random subset. Otherwise,
|
||||
we can just call the empty add().
|
||||
|
||||
|
||||
Finally, note that the random_subset_selector uses a deterministic pseudo-random
|
||||
number generator under the hood. Moreover, the default constructor always seeds
|
||||
the random number generator in the same way. So unless you call set_seed()
|
||||
each instance of the random_subset_selector will function identically.
|
||||
!*/
|
||||
public:
|
||||
typedef T type;
|
||||
typedef T value_type;
|
||||
typedef default_memory_manager mem_manager_type;
|
||||
typedef Rand_type rand_type;
|
||||
|
||||
typedef typename std::vector<T>::iterator iterator;
|
||||
typedef typename std::vector<T>::const_iterator const_iterator;
|
||||
|
||||
random_subset_selector (
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- this object is properly initialized
|
||||
!*/
|
||||
|
||||
void set_seed(
|
||||
const std::string& value
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- sets the seed of the random number generator that is embedded in
|
||||
this object to the given value.
|
||||
!*/
|
||||
|
||||
void make_empty (
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- #size() == 0
|
||||
!*/
|
||||
|
||||
size_t size (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the number of items of type T currently contained in this object
|
||||
!*/
|
||||
|
||||
void set_max_size (
|
||||
unsigned long new_max_size
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- #max_size() == new_max_size
|
||||
- #size() == 0
|
||||
!*/
|
||||
|
||||
unsigned long max_size (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the maximum allowable size for this object
|
||||
!*/
|
||||
|
||||
T& operator[] (
|
||||
unsigned long idx
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- idx < size()
|
||||
ensures
|
||||
- returns a non-const reference to the idx'th element of this object
|
||||
!*/
|
||||
|
||||
const T& operator[] (
|
||||
unsigned long idx
|
||||
) const;
|
||||
/*!
|
||||
requires
|
||||
- idx < size()
|
||||
ensures
|
||||
- returns a const reference to the idx'th element of this object
|
||||
!*/
|
||||
|
||||
bool next_add_accepts (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- if (the next call to add(item) will result in item being included
|
||||
into *this) then
|
||||
- returns true
|
||||
- Note that the next item will always be accepted if size() < max_size().
|
||||
- else
|
||||
- returns false
|
||||
- Note that the next item will never be accepted if max_size() == 0.
|
||||
!*/
|
||||
|
||||
void add (
|
||||
const T& new_item
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- if (next_add_accepts()) then
|
||||
- places new_item into *this object at a random location
|
||||
- if (size() < max_size()) then
|
||||
- #size() == size() + 1
|
||||
- #next_add_accepts() == The updated information about the acceptance
|
||||
of the next call to add()
|
||||
!*/
|
||||
|
||||
void add (
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- next_add_accepts() == false
|
||||
ensures
|
||||
- This function does nothing but update the value of #next_add_accepts()
|
||||
!*/
|
||||
|
||||
iterator begin(
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- if (size() > 0) then
|
||||
- returns an iterator referring to the first element in
|
||||
this container.
|
||||
- else
|
||||
- returns end()
|
||||
!*/
|
||||
|
||||
const_iterator begin(
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- if (size() > 0) then
|
||||
- returns a const_iterator referring to the first element in
|
||||
this container.
|
||||
- else
|
||||
- returns end()
|
||||
!*/
|
||||
|
||||
iterator end(
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- returns an iterator that represents one past the end of
|
||||
this container
|
||||
!*/
|
||||
|
||||
const_iterator end(
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns an iterator that represents one past the end of
|
||||
this container
|
||||
!*/
|
||||
|
||||
const std::vector<T>& to_std_vector(
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns a const reference to the underlying std::vector<T> that contains
|
||||
all elements in this object. That is, this function returns a vector, V,
|
||||
which has the following properties:
|
||||
- V.size() == this->size()
|
||||
- V.begin() == this->begin()
|
||||
- V.end() == this->end()
|
||||
!*/
|
||||
|
||||
void swap (
|
||||
random_subset_selector& item
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- swaps *this and item
|
||||
!*/
|
||||
|
||||
};
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename rand_type
|
||||
>
|
||||
void swap (
|
||||
random_subset_selector<T,rand_type>& a,
|
||||
random_subset_selector<T,rand_type>& b
|
||||
) { a.swap(b); }
|
||||
/*!
|
||||
provides global swap support
|
||||
!*/
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename rand_type
|
||||
>
|
||||
void serialize (
|
||||
const random_subset_selector<T,rand_type>& item,
|
||||
std::ostream& out
|
||||
);
|
||||
/*!
|
||||
provides serialization support
|
||||
!*/
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename rand_type
|
||||
>
|
||||
void deserialize (
|
||||
random_subset_selector<T,rand_type>& item,
|
||||
std::istream& in
|
||||
);
|
||||
/*!
|
||||
provides deserialization support
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename alloc
|
||||
>
|
||||
random_subset_selector<T> randomly_subsample (
|
||||
const std::vector<T,alloc>& samples,
|
||||
unsigned long num
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- returns a random subset R such that:
|
||||
- R contains a random subset of the given samples
|
||||
- R.size() == min(num, samples.size())
|
||||
- R.max_size() == num
|
||||
- The random number generator used by this function will always be
|
||||
initialized in the same way. I.e. this function will always pick
|
||||
the same random subsample if called multiple times.
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename alloc,
|
||||
typename U
|
||||
>
|
||||
random_subset_selector<T> randomly_subsample (
|
||||
const std::vector<T,alloc>& samples,
|
||||
unsigned long num,
|
||||
const U& random_seed
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- random_seed must be convertible to a string by dlib::cast_to_string()
|
||||
ensures
|
||||
- returns a random subset R such that:
|
||||
- R contains a random subset of the given samples
|
||||
- R.size() == min(num, samples.size())
|
||||
- R.max_size() == num
|
||||
- The given random_seed will be used to initialize the random number
|
||||
generator used by this function.
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename T
|
||||
>
|
||||
random_subset_selector<T> randomly_subsample (
|
||||
const random_subset_selector<T>& samples,
|
||||
unsigned long num
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- returns a random subset R such that:
|
||||
- R contains a random subset of the given samples
|
||||
- R.size() == min(num, samples.size())
|
||||
- R.max_size() == num
|
||||
- The random number generator used by this function will always be
|
||||
initialized in the same way. I.e. this function will always pick
|
||||
the same random subsample if called multiple times.
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename T,
|
||||
typename U
|
||||
>
|
||||
random_subset_selector<T> randomly_subsample (
|
||||
const random_subset_selector<T>& samples,
|
||||
unsigned long num,
|
||||
const U& random_seed
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- random_seed must be convertible to a string by dlib::cast_to_string()
|
||||
ensures
|
||||
- returns a random subset R such that:
|
||||
- R contains a random subset of the given samples
|
||||
- R.size() == min(num, samples.size())
|
||||
- R.max_size() == num
|
||||
- The given random_seed will be used to initialize the random number
|
||||
generator used by this function.
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename T
|
||||
>
|
||||
const matrix_exp mat (
|
||||
const random_subset_selector<T>& m
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- returns a matrix R such that:
|
||||
- is_col_vector(R) == true
|
||||
- R.size() == m.size()
|
||||
- for all valid r:
|
||||
R(r) == m[r]
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_RANDOM_SUBSeT_SELECTOR_ABSTRACT_H_
|
||||
|
||||
370
pkg/OpenFace/lib/3rdParty/dlib/include/dlib/statistics/running_gradient.h
vendored
Normal file
370
pkg/OpenFace/lib/3rdParty/dlib/include/dlib/statistics/running_gradient.h
vendored
Normal file
@@ -0,0 +1,370 @@
|
||||
// Copyright (C) 2016 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#ifndef DLIB_RuNNING_GRADIENT_Hh_
|
||||
#define DLIB_RuNNING_GRADIENT_Hh_
|
||||
|
||||
#include "running_gradient_abstract.h"
|
||||
#include "../algs.h"
|
||||
#include "../serialize.h"
|
||||
#include <cmath>
|
||||
#include "../matrix.h"
|
||||
#include <algorithm>
|
||||
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
class running_gradient
|
||||
{
|
||||
public:
|
||||
|
||||
running_gradient (
|
||||
)
|
||||
{
|
||||
clear();
|
||||
}
|
||||
|
||||
void clear(
|
||||
)
|
||||
{
|
||||
n = 0;
|
||||
R = identity_matrix<double>(2)*1e6;
|
||||
w = 0;
|
||||
residual_squared = 0;
|
||||
}
|
||||
|
||||
double current_n (
|
||||
) const
|
||||
{
|
||||
return n;
|
||||
}
|
||||
|
||||
void add(
|
||||
double y
|
||||
)
|
||||
{
|
||||
matrix<double,2,1> x;
|
||||
x = n, 1;
|
||||
|
||||
// Do recursive least squares computations
|
||||
const double temp = 1 + trans(x)*R*x;
|
||||
matrix<double,2,1> tmp = R*x;
|
||||
R = R - (tmp*trans(tmp))/temp;
|
||||
// R should always be symmetric. This line improves numeric stability of this algorithm.
|
||||
R = 0.5*(R + trans(R));
|
||||
w = w + R*x*(y - trans(x)*w);
|
||||
|
||||
// Also, recursively keep track of the residual error between the given value
|
||||
// and what our linear predictor outputs.
|
||||
residual_squared = residual_squared + std::pow((y - trans(x)*w),2.0)*temp;
|
||||
|
||||
++n;
|
||||
}
|
||||
|
||||
double gradient (
|
||||
) const
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(current_n() > 1,
|
||||
"\t double running_gradient::gradient()"
|
||||
<< "\n\t You must add more values into this object before calling this function."
|
||||
<< "\n\t this: " << this
|
||||
);
|
||||
|
||||
return w(0);
|
||||
}
|
||||
|
||||
double intercept (
|
||||
) const
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(current_n() > 0,
|
||||
"\t double running_gradient::intercept()"
|
||||
<< "\n\t You must add more values into this object before calling this function."
|
||||
<< "\n\t this: " << this
|
||||
);
|
||||
|
||||
return w(1);
|
||||
}
|
||||
double standard_error (
|
||||
) const
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(current_n() > 2,
|
||||
"\t double running_gradient::standard_error()"
|
||||
<< "\n\t You must add more values into this object before calling this function."
|
||||
<< "\n\t this: " << this
|
||||
);
|
||||
|
||||
|
||||
const double s = residual_squared/(n-2);
|
||||
const double adjust = 12.0/(std::pow(current_n(),3.0) - current_n());
|
||||
return std::sqrt(s*adjust);
|
||||
}
|
||||
|
||||
double probability_gradient_less_than (
|
||||
double thresh
|
||||
) const
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(current_n() > 2,
|
||||
"\t double running_gradient::probability_gradient_less_than()"
|
||||
<< "\n\t You must add more values into this object before calling this function."
|
||||
<< "\n\t this: " << this
|
||||
);
|
||||
|
||||
return normal_cdf(thresh, gradient(), standard_error());
|
||||
}
|
||||
|
||||
double probability_gradient_greater_than (
|
||||
double thresh
|
||||
) const
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(current_n() > 2,
|
||||
"\t double running_gradient::probability_gradient_greater_than()"
|
||||
<< "\n\t You must add more values into this object before calling this function."
|
||||
<< "\n\t this: " << this
|
||||
);
|
||||
|
||||
return 1-probability_gradient_less_than(thresh);
|
||||
}
|
||||
|
||||
friend void serialize (const running_gradient& item, std::ostream& out)
|
||||
{
|
||||
int version = 1;
|
||||
serialize(version, out);
|
||||
serialize(item.n, out);
|
||||
serialize(item.R, out);
|
||||
serialize(item.w, out);
|
||||
serialize(item.residual_squared, out);
|
||||
}
|
||||
|
||||
friend void deserialize (running_gradient& item, std::istream& in)
|
||||
{
|
||||
int version = 0;
|
||||
deserialize(version, in);
|
||||
if (version != 1)
|
||||
throw serialization_error("Unexpected version found while deserializing dlib::running_gradient.");
|
||||
deserialize(item.n, in);
|
||||
deserialize(item.R, in);
|
||||
deserialize(item.w, in);
|
||||
deserialize(item.residual_squared, in);
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
static double normal_cdf(double value, double mean, double stddev)
|
||||
{
|
||||
if (stddev == 0)
|
||||
{
|
||||
if (value < mean)
|
||||
return 0;
|
||||
else if (value > mean)
|
||||
return 1;
|
||||
else
|
||||
return 0.5;
|
||||
}
|
||||
value = (value-mean)/stddev;
|
||||
return 0.5 * std::erfc(-value / std::sqrt(2.0));
|
||||
}
|
||||
|
||||
double n;
|
||||
matrix<double,2,2> R;
|
||||
matrix<double,2,1> w;
|
||||
double residual_squared;
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename T
|
||||
>
|
||||
double probability_gradient_less_than (
|
||||
const T& container,
|
||||
double thresh
|
||||
)
|
||||
{
|
||||
running_gradient g;
|
||||
for(auto&& v : container)
|
||||
g.add(v);
|
||||
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(g.current_n() > 2,
|
||||
"\t double probability_gradient_less_than()"
|
||||
<< "\n\t You need more than 2 elements in the given container to call this function."
|
||||
);
|
||||
return g.probability_gradient_less_than(thresh);
|
||||
}
|
||||
|
||||
template <
|
||||
typename T
|
||||
>
|
||||
double probability_gradient_greater_than (
|
||||
const T& container,
|
||||
double thresh
|
||||
)
|
||||
{
|
||||
running_gradient g;
|
||||
for(auto&& v : container)
|
||||
g.add(v);
|
||||
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(g.current_n() > 2,
|
||||
"\t double probability_gradient_greater_than()"
|
||||
<< "\n\t You need more than 2 elements in the given container to call this function."
|
||||
);
|
||||
return g.probability_gradient_greater_than(thresh);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename T
|
||||
>
|
||||
double find_upper_quantile (
|
||||
const T& container_,
|
||||
double quantile
|
||||
)
|
||||
{
|
||||
DLIB_CASSERT(0 <= quantile && quantile <= 1.0);
|
||||
|
||||
// copy container into a std::vector
|
||||
std::vector<double> container(container_.begin(), container_.end());
|
||||
|
||||
DLIB_CASSERT(container.size() > 0);
|
||||
|
||||
size_t idx_upper = std::round((container.size()-1)*(1-quantile));
|
||||
|
||||
std::nth_element(container.begin(), container.begin()+idx_upper, container.end());
|
||||
auto upper_q = *(container.begin()+idx_upper);
|
||||
return upper_q;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename T
|
||||
>
|
||||
size_t count_steps_without_decrease (
|
||||
const T& container,
|
||||
double probability_of_decrease = 0.51
|
||||
)
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(0.5 < probability_of_decrease && probability_of_decrease < 1,
|
||||
"\t size_t count_steps_without_decrease()"
|
||||
<< "\n\t probability_of_decrease: "<< probability_of_decrease
|
||||
);
|
||||
|
||||
running_gradient g;
|
||||
size_t count = 0;
|
||||
size_t j = 0;
|
||||
for (auto i = container.rbegin(); i != container.rend(); ++i)
|
||||
{
|
||||
++j;
|
||||
g.add(*i);
|
||||
if (g.current_n() > 2)
|
||||
{
|
||||
// Note that this only looks backwards because we are looping over the
|
||||
// container backwards. So here we are really checking if the gradient isn't
|
||||
// decreasing.
|
||||
double prob_decreasing = g.probability_gradient_greater_than(0);
|
||||
// If we aren't confident things are decreasing.
|
||||
if (prob_decreasing < probability_of_decrease)
|
||||
count = j;
|
||||
}
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename T
|
||||
>
|
||||
size_t count_steps_without_decrease_robust (
|
||||
const T& container,
|
||||
double probability_of_decrease = 0.51,
|
||||
double quantile_discard = 0.10
|
||||
)
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(0 <= quantile_discard && quantile_discard <= 1);
|
||||
DLIB_ASSERT(0.5 < probability_of_decrease && probability_of_decrease < 1,
|
||||
"\t size_t count_steps_without_decrease_robust()"
|
||||
<< "\n\t probability_of_decrease: "<< probability_of_decrease
|
||||
);
|
||||
|
||||
if (container.size() == 0)
|
||||
return 0;
|
||||
|
||||
const auto quantile_thresh = find_upper_quantile(container, quantile_discard);
|
||||
|
||||
running_gradient g;
|
||||
size_t count = 0;
|
||||
size_t j = 0;
|
||||
for (auto i = container.rbegin(); i != container.rend(); ++i)
|
||||
{
|
||||
++j;
|
||||
// ignore values that are too large
|
||||
if (*i <= quantile_thresh)
|
||||
g.add(*i);
|
||||
|
||||
if (g.current_n() > 2)
|
||||
{
|
||||
// Note that this only looks backwards because we are looping over the
|
||||
// container backwards. So here we are really checking if the gradient isn't
|
||||
// decreasing.
|
||||
double prob_decreasing = g.probability_gradient_greater_than(0);
|
||||
// If we aren't confident things are decreasing.
|
||||
if (prob_decreasing < probability_of_decrease)
|
||||
count = j;
|
||||
}
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename T
|
||||
>
|
||||
size_t count_steps_without_increase (
|
||||
const T& container,
|
||||
double probability_of_increase = 0.51
|
||||
)
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(0.5 < probability_of_increase && probability_of_increase < 1,
|
||||
"\t size_t count_steps_without_increase()"
|
||||
<< "\n\t probability_of_increase: "<< probability_of_increase
|
||||
);
|
||||
|
||||
running_gradient g;
|
||||
size_t count = 0;
|
||||
size_t j = 0;
|
||||
for (auto i = container.rbegin(); i != container.rend(); ++i)
|
||||
{
|
||||
++j;
|
||||
g.add(*i);
|
||||
if (g.current_n() > 2)
|
||||
{
|
||||
// Note that this only looks backwards because we are looping over the
|
||||
// container backwards. So here we are really checking if the gradient isn't
|
||||
// increasing.
|
||||
double prob_increasing = g.probability_gradient_less_than(0);
|
||||
// If we aren't confident things are increasing.
|
||||
if (prob_increasing < probability_of_increase)
|
||||
count = j;
|
||||
}
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_RuNNING_GRADIENT_Hh_
|
||||
|
||||
|
||||
276
pkg/OpenFace/lib/3rdParty/dlib/include/dlib/statistics/running_gradient_abstract.h
vendored
Normal file
276
pkg/OpenFace/lib/3rdParty/dlib/include/dlib/statistics/running_gradient_abstract.h
vendored
Normal file
@@ -0,0 +1,276 @@
|
||||
// Copyright (C) 2016 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#undef DLIB_RuNNING_GRADIENT_ABSTRACT_Hh_
|
||||
#ifdef DLIB_RuNNING_GRADIENT_ABSTRACT_Hh_
|
||||
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
class running_gradient
|
||||
{
|
||||
/*!
|
||||
WHAT THIS OBJECT REPRESENTS
|
||||
This object is a tool for estimating if a noisy sequence of numbers is
|
||||
trending up or down and by how much. It does this by finding the least
|
||||
squares fit of a line to the data and then allows you to perform a
|
||||
statistical test on the slope of that line.
|
||||
!*/
|
||||
|
||||
public:
|
||||
|
||||
running_gradient (
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- #current_n() == 0
|
||||
!*/
|
||||
|
||||
void clear(
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- #current_n() == 0
|
||||
- this object has its initial value
|
||||
- clears all memory of any previous data points
|
||||
!*/
|
||||
|
||||
double current_n (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the number of values given to this object by add().
|
||||
!*/
|
||||
|
||||
void add(
|
||||
double y
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- Updates the gradient() and standard_error() estimates in this object
|
||||
based on the new y value.
|
||||
- #current_n() == current_n() + 1
|
||||
!*/
|
||||
|
||||
double gradient (
|
||||
) const;
|
||||
/*!
|
||||
requires
|
||||
- current_n() > 1
|
||||
ensures
|
||||
- If we consider the values given to add() as time series data, we can
|
||||
estimate the rate-of-change of those values. That is, how much,
|
||||
typically, do those values change from sample to sample? The gradient()
|
||||
function returns the current estimate. It does this by finding the least
|
||||
squares fit of a line to the data given to add() and returning the slope
|
||||
of this line.
|
||||
!*/
|
||||
|
||||
double intercept (
|
||||
) const;
|
||||
/*!
|
||||
requires
|
||||
- current_n() > 0
|
||||
ensures
|
||||
- This class fits a line to the time series data given to add(). This
|
||||
function returns the intercept of that line while gradient() returns the
|
||||
slope of that line. This means that, for example, the next point that
|
||||
add() will see, as predicted by this best fit line, is the value
|
||||
intercept() + current_n()*gradient().
|
||||
!*/
|
||||
|
||||
double standard_error (
|
||||
) const;
|
||||
/*!
|
||||
requires
|
||||
- current_n() > 2
|
||||
ensures
|
||||
- returns the standard deviation of the estimate of gradient().
|
||||
!*/
|
||||
|
||||
double probability_gradient_less_than (
|
||||
double thresh
|
||||
) const;
|
||||
/*!
|
||||
requires
|
||||
- current_n() > 2
|
||||
ensures
|
||||
- If we can assume the values given to add() are linearly related to each
|
||||
other and corrupted by Gaussian additive noise then our estimate of
|
||||
gradient() is a random variable with a mean value of gradient() and a
|
||||
standard deviation of standard_error(). This lets us compute the
|
||||
probability that the true gradient of the data is less than thresh, which
|
||||
is what this function returns.
|
||||
!*/
|
||||
|
||||
double probability_gradient_greater_than (
|
||||
double thresh
|
||||
) const;
|
||||
/*!
|
||||
requires
|
||||
- current_n() > 2
|
||||
ensures
|
||||
- returns 1-probability_gradient_less_than(thresh)
|
||||
!*/
|
||||
|
||||
};
|
||||
|
||||
void serialize (
|
||||
const running_gradient& item,
|
||||
std::ostream& out
|
||||
);
|
||||
/*!
|
||||
provides serialization support
|
||||
!*/
|
||||
|
||||
void deserialize (
|
||||
running_gradient& item,
|
||||
std::istream& in
|
||||
);
|
||||
/*!
|
||||
provides serialization support
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename T
|
||||
>
|
||||
double probability_gradient_less_than (
|
||||
const T& container,
|
||||
double thresh
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- container must be a container of double values that can be enumerated with a
|
||||
range based for loop.
|
||||
- The container must contain more than 2 elements.
|
||||
ensures
|
||||
- Puts all the elements of container into a running_gradient object, R, and
|
||||
then returns R.probability_gradient_less_than(thresh).
|
||||
!*/
|
||||
|
||||
template <
|
||||
typename T
|
||||
>
|
||||
double probability_gradient_greater_than (
|
||||
const T& container,
|
||||
double thresh
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- container must be a container of double values that can be enumerated with a
|
||||
range based for loop.
|
||||
- The container must contain more than 2 elements.
|
||||
ensures
|
||||
- Puts all the elements of container into a running_gradient object, R, and
|
||||
then returns R.probability_gradient_greater_than(thresh).
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename T
|
||||
>
|
||||
size_t count_steps_without_decrease (
|
||||
const T& container,
|
||||
double probability_of_decrease = 0.51
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- container must be a container of double values that can be enumerated with
|
||||
.rbegin() and .rend().
|
||||
- 0.5 < probability_of_decrease < 1
|
||||
ensures
|
||||
- If you think of the contents of container as a potentially noisy time series,
|
||||
then this function returns a count of how long the time series has gone
|
||||
without noticeably decreasing in value. It does this by adding the
|
||||
elements into a running_gradient object and counting how many elements,
|
||||
starting with container.back(), that you need to examine before you are
|
||||
confident that the series has been decreasing in value. Here, "confident of
|
||||
decrease" means that the probability of decrease is >= probability_of_decrease.
|
||||
- Setting probability_of_decrease to 0.51 means we count until we see even a
|
||||
small hint of decrease, whereas a larger value of 0.99 would return a larger
|
||||
count since it keeps going until it is nearly certain the time series is
|
||||
decreasing.
|
||||
- The max possible output from this function is container.size().
|
||||
!*/
|
||||
|
||||
template <
|
||||
typename T
|
||||
>
|
||||
size_t count_steps_without_decrease_robust (
|
||||
const T& container,
|
||||
double probability_of_decrease = 0.51,
|
||||
double quantile_discard = 0.10
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- container must be a container of double values that can be enumerated with
|
||||
.begin() and .end() as well as .rbegin() and .rend().
|
||||
- 0.5 < probability_of_decrease < 1
|
||||
- 0 <= quantile_discard <= 1
|
||||
ensures
|
||||
- This function behaves just like
|
||||
count_steps_without_decrease(container,probability_of_decrease) except that
|
||||
it ignores values in container that are in the upper quantile_discard
|
||||
quantile. So for example, if the quantile discard is 0.1 then the 10%
|
||||
largest values in container are ignored.
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename T
|
||||
>
|
||||
size_t count_steps_without_increase (
|
||||
const T& container,
|
||||
double probability_of_increase = 0.51
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- container must be a container of double values that can be enumerated with
|
||||
.rbegin() and .rend().
|
||||
- 0.5 < probability_of_increase < 1
|
||||
ensures
|
||||
- If you think of the contents of container as a potentially noisy time series,
|
||||
then this function returns a count of how long the time series has gone
|
||||
without noticeably increasing in value. It does this by adding the
|
||||
elements into a running_gradient object and counting how many elements,
|
||||
starting with container.back(), that you need to examine before you are
|
||||
confident that the series has been increasing in value. Here, "confident of
|
||||
increase" means that the probability of increase is >= probability_of_increase.
|
||||
- Setting probability_of_increase to 0.51 means we count until we see even a
|
||||
small hint of increase, whereas a larger value of 0.99 would return a larger
|
||||
count since it keeps going until it is nearly certain the time series is
|
||||
increasing.
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename T
|
||||
>
|
||||
double find_upper_quantile (
|
||||
const T& container,
|
||||
double quantile
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- container must be a container of double values that can be enumerated with
|
||||
.begin() and .end().
|
||||
- 0 <= quantile <= 1
|
||||
- container.size() > 0
|
||||
ensures
|
||||
- Finds and returns the value such that quantile percent of the values in
|
||||
container are greater than it. For example, 0.5 would find the median value
|
||||
in container while 0.1 would find the value that lower bounded the 10%
|
||||
largest values in container.
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_RuNNING_GRADIENT_ABSTRACT_Hh_
|
||||
|
||||
|
||||
269
pkg/OpenFace/lib/3rdParty/dlib/include/dlib/statistics/sammon.h
vendored
Normal file
269
pkg/OpenFace/lib/3rdParty/dlib/include/dlib/statistics/sammon.h
vendored
Normal file
@@ -0,0 +1,269 @@
|
||||
// Copyright (C) 2012 Emanuele Cesena (emanuele.cesena@gmail.com), Davis E. King
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#ifndef DLIB_SAMMoN_Hh_
|
||||
#define DLIB_SAMMoN_Hh_
|
||||
|
||||
#include "sammon_abstract.h"
|
||||
#include "../matrix.h"
|
||||
#include "../algs.h"
|
||||
#include "dpca.h"
|
||||
#include <vector>
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
|
||||
class sammon_projection
|
||||
{
|
||||
|
||||
public:
|
||||
|
||||
// ------------------------------------------------------------------------------------
|
||||
|
||||
template <typename matrix_type>
|
||||
std::vector<matrix<double,0,1> > operator() (
|
||||
const std::vector<matrix_type>& data,
|
||||
const long num_dims
|
||||
)
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(num_dims > 0,
|
||||
"\t std::vector<matrix<double,0,1> > sammon_projection::operator()"
|
||||
<< "\n\t Invalid inputs were given to this function."
|
||||
<< "\n\t num_dims: " << num_dims
|
||||
);
|
||||
std::vector<matrix<double,0,1> > result; // projections
|
||||
if (data.size() == 0)
|
||||
{
|
||||
return result;
|
||||
}
|
||||
|
||||
#ifdef ENABLE_ASSERTS
|
||||
DLIB_ASSERT(0 < num_dims && num_dims <= data[0].size(),
|
||||
"\t std::vector<matrix<double,0,1> > sammon_projection::operator()"
|
||||
<< "\n\t Invalid inputs were given to this function."
|
||||
<< "\n\t data.size(): " << data.size()
|
||||
<< "\n\t num_dims: " << num_dims
|
||||
<< "\n\t data[0].size(): " << data[0].size()
|
||||
);
|
||||
for (unsigned long i = 0; i < data.size(); ++i)
|
||||
{
|
||||
DLIB_ASSERT(is_col_vector(data[i]) && data[i].size() == data[0].size(),
|
||||
"\t std::vector<matrix<double,0,1> > sammon_projection::operator()"
|
||||
<< "\n\t Invalid inputs were given to this function."
|
||||
<< "\n\t data["<<i<<"].size(): " << data[i].size()
|
||||
<< "\n\t data[0].size(): " << data[0].size()
|
||||
<< "\n\t is_col_vector(data["<<i<<"]): " << is_col_vector(data[i])
|
||||
);
|
||||
}
|
||||
#endif
|
||||
|
||||
double err; // error (discarded)
|
||||
do_sammon_projection(data, num_dims, result, err);
|
||||
return result;
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------------------
|
||||
|
||||
template <typename matrix_type>
|
||||
void operator() (
|
||||
const std::vector<matrix_type>& data,
|
||||
const long num_dims,
|
||||
std::vector<matrix<double,0,1> >& result,
|
||||
double &err,
|
||||
const unsigned long num_iters = 1000,
|
||||
const double err_delta = 1.0e-9
|
||||
)
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(num_dims > 0 && num_iters > 0 && err_delta > 0.0,
|
||||
"\t std::vector<matrix<double,0,1> > sammon_projection::operator()"
|
||||
<< "\n\t Invalid inputs were given to this function."
|
||||
<< "\n\t data.size(): " << data.size()
|
||||
<< "\n\t num_dims: " << num_dims
|
||||
<< "\n\t num_iters: " << num_iters
|
||||
<< "\n\t err_delta: " << err_delta
|
||||
);
|
||||
if (data.size() == 0)
|
||||
{
|
||||
result.clear();
|
||||
err = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef ENABLE_ASSERTS
|
||||
DLIB_ASSERT(0 < num_dims && num_dims <= data[0].size(),
|
||||
"\t std::vector<matrix<double,0,1> > sammon_projection::operator()"
|
||||
<< "\n\t Invalid inputs were given to this function."
|
||||
<< "\n\t data.size(): " << data.size()
|
||||
<< "\n\t num_dims: " << num_dims
|
||||
<< "\n\t data[0].size(): " << data[0].size()
|
||||
);
|
||||
for (unsigned long i = 0; i < data.size(); ++i)
|
||||
{
|
||||
DLIB_ASSERT(is_col_vector(data[i]) && data[i].size() == data[0].size(),
|
||||
"\t std::vector<matrix<double,0,1> > sammon_projection::operator()"
|
||||
<< "\n\t Invalid inputs were given to this function."
|
||||
<< "\n\t data["<<i<<"].size(): " << data[i].size()
|
||||
<< "\n\t data[0].size(): " << data[0].size()
|
||||
<< "\n\t is_col_vector(data["<<i<<"]): " << is_col_vector(data[i])
|
||||
);
|
||||
}
|
||||
#endif
|
||||
|
||||
do_sammon_projection(data, num_dims, result, err, num_iters, err_delta);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
private:
|
||||
|
||||
void compute_relative_distances(
|
||||
matrix<double,0,1>& dist, // relative distances (output)
|
||||
matrix<double,0,0>& data, // input data (matrix whose columns are the input vectors)
|
||||
double eps_ratio = 1.0e-7 // to compute the minimum distance eps
|
||||
)
|
||||
/*!
|
||||
requires
|
||||
- dist.nc() == comb( data.nc(), 2 ), preallocated
|
||||
- eps_ratio > 0
|
||||
ensures
|
||||
- dist[k] == lenght(data[i] - data[j]) for k = j(j-1)/2 + i
|
||||
!*/
|
||||
{
|
||||
const long N = data.nc(); // num of points
|
||||
double eps; // minimum distance, forced to avoid vectors collision
|
||||
// computed at runtime as eps_ration * mean(vectors distances)
|
||||
for (int k = 0, i = 1; i < N; ++i)
|
||||
for (int j = 0; j < i; ++j)
|
||||
dist(k++) = length(colm(data, i) - colm(data, j));
|
||||
|
||||
eps = eps_ratio * mean(dist);
|
||||
dist = lowerbound(dist, eps);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <typename matrix_type>
|
||||
void do_sammon_projection(
|
||||
const std::vector<matrix_type>& data, // input data
|
||||
unsigned long num_dims, // dimension of the reduced space
|
||||
std::vector<matrix<double,0,1> >& result, // projections (output)
|
||||
double &err, // error (output)
|
||||
unsigned long num_iters = 1000, // max num of iterations: stop condition
|
||||
const double err_delta = 1.0e-9 // delta error: stop condition
|
||||
)
|
||||
/*!
|
||||
requires
|
||||
- matrix_type should be a kind of dlib::matrix<double,N,1>
|
||||
- num_dims > 0
|
||||
- num_iters > 0
|
||||
- err_delta > 0
|
||||
ensures
|
||||
- result == a set of matrix<double,num_dims,1> objects that represent
|
||||
the Sammon's projections of data vectors.
|
||||
- err == the estimated error done in the projection, with the extra
|
||||
property that err(at previous iteration) - err < err_delta
|
||||
!*/
|
||||
{
|
||||
// other params
|
||||
const double mf = 0.3; // magic factor
|
||||
|
||||
matrix<double> mdata; // input data as matrix
|
||||
matrix<double> projs; // projected vectors, i.e. output data as matrix
|
||||
|
||||
// std::vector<matrix> -> matrix
|
||||
mdata.set_size(data[0].size(), data.size());
|
||||
for (unsigned int i = 0; i < data.size(); i++)
|
||||
set_colm(mdata, i) = data[i];
|
||||
|
||||
const long N = mdata.nc(); // num of points
|
||||
const long d = num_dims; // size of the reduced space
|
||||
const long nd = N * (N - 1) / 2; // num of pairs of points = size of the distances vectors
|
||||
|
||||
matrix<double, 0, 1> dsij, inv_dsij; // d*_ij: pair-wise distances in the input space (and inverses)
|
||||
dsij.set_size(nd, 1);
|
||||
inv_dsij.set_size(nd, 1);
|
||||
double ic; // 1.0 / sum of dsij
|
||||
|
||||
matrix<double, 0, 1> dij; // d_ij: pair-wise distances in the reduced space
|
||||
dij.set_size(nd, 1);
|
||||
|
||||
matrix<double, 0, 0> dE, dE2, dtemp; // matrices representing error partial derivatives
|
||||
dE.set_size(d, N);
|
||||
dE2.set_size(d, N);
|
||||
dtemp.set_size(d, N);
|
||||
|
||||
matrix<double, 0, 1> inv_dij, alpha; // utility vectors used to compute the partial derivatives
|
||||
inv_dij.set_size(N, 1); // inv_dij is 1.0/dij, but we only need it column-wise
|
||||
alpha.set_size(N, 1); // (slightly wasting a bit of computation)
|
||||
// alpha = 1.0/dij - 1.0/dsij, again column-wise
|
||||
|
||||
|
||||
// initialize projs with PCA
|
||||
discriminant_pca<matrix<double> > dpca;
|
||||
for (int i = 0; i < mdata.nc(); ++i)
|
||||
{
|
||||
dpca.add_to_total_variance(colm(mdata, i));
|
||||
}
|
||||
matrix<double> mat = dpca.dpca_matrix_of_size(num_dims);
|
||||
projs = mat * mdata;
|
||||
|
||||
// compute dsij, inv_dsij and ic
|
||||
compute_relative_distances(dsij, mdata);
|
||||
inv_dsij = 1.0 / dsij;
|
||||
ic = 1.0 / sum(dsij);
|
||||
|
||||
// compute dij and err
|
||||
compute_relative_distances(dij, projs);
|
||||
err = ic * sum(pointwise_multiply(squared(dij - dsij), inv_dsij));
|
||||
|
||||
// start iterating
|
||||
while (num_iters--)
|
||||
{
|
||||
// compute dE, dE2 progressively column by column
|
||||
for (int p = 0; p < N; ++p)
|
||||
{
|
||||
// compute
|
||||
// - alpha_p, the column vector with 1/d_pj - 1/d*_pj
|
||||
// - dtemp, the matrix with the p-th column repeated all along
|
||||
//TODO: optimize constructions
|
||||
for (int i = 0; i < N; ++i)
|
||||
{
|
||||
int pos = (i < p) ? p * (p - 1) / 2 + i : i * (i - 1) / 2 + p;
|
||||
inv_dij(i) = (i == p) ? 0.0 : 1.0 / dij(pos);
|
||||
alpha(i) = (i == p) ? 0.0 : inv_dij(i) - inv_dsij(pos);
|
||||
set_colm(dtemp, i) = colm(projs, p);
|
||||
}
|
||||
|
||||
dtemp -= projs;
|
||||
set_colm(dE, p) = dtemp * alpha;
|
||||
|
||||
double sum_alpha = sum(alpha);
|
||||
set_colm(dE2, p) = abs( sum_alpha + squared(dtemp) * cubed(inv_dij) );
|
||||
}
|
||||
|
||||
|
||||
// compute the update projections
|
||||
projs += pointwise_multiply(dE, mf * reciprocal(dE2));
|
||||
|
||||
// compute new dij and error
|
||||
compute_relative_distances(dij, projs);
|
||||
double err_new = ic * sum( pointwise_multiply(squared(dij - dsij), inv_dsij) );
|
||||
if (err - err_new < err_delta)
|
||||
break;
|
||||
err = err_new;
|
||||
}
|
||||
|
||||
// matrix -> std::vector<matrix>
|
||||
result.clear();
|
||||
for (int i = 0; i < projs.nc(); ++i)
|
||||
result.push_back(colm(projs, i));
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
} // namespace dlib
|
||||
|
||||
#endif // DLIB_SAMMoN_Hh_
|
||||
|
||||
117
pkg/OpenFace/lib/3rdParty/dlib/include/dlib/statistics/sammon_abstract.h
vendored
Normal file
117
pkg/OpenFace/lib/3rdParty/dlib/include/dlib/statistics/sammon_abstract.h
vendored
Normal file
@@ -0,0 +1,117 @@
|
||||
// Copyright (C) 2012 Emanuele Cesena (emanuele.cesena@gmail.com), Davis E. King
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#undef DLIB_SAMMoN_ABSTRACT_Hh_
|
||||
#ifdef DLIB_SAMMoN_ABSTRACT_Hh_
|
||||
|
||||
#include "../matrix/matrix_abstract.h"
|
||||
#include <vector>
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
|
||||
class sammon_projection
|
||||
{
|
||||
/*!
|
||||
WHAT THIS OBJECT REPRESENTS
|
||||
This is a function object that computes the Sammon projection of a set
|
||||
of N points in a L-dimensional vector space onto a d-dimensional space
|
||||
(d < L), according to the paper:
|
||||
A Nonlinear Mapping for Data Structure Analysis (1969) by J.W. Sammon
|
||||
|
||||
The current implementation is a vectorized version of the original algorithm.
|
||||
!*/
|
||||
|
||||
public:
|
||||
|
||||
sammon_projection(
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- this object is properly initialized
|
||||
!*/
|
||||
|
||||
template <typename matrix_type>
|
||||
std::vector<matrix<double,0,1> > operator() (
|
||||
const std::vector<matrix_type>& data,
|
||||
const long num_dims
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- num_dims > 0
|
||||
- matrix_type should be a kind of dlib::matrix of doubles capable
|
||||
of representing column vectors.
|
||||
- for all valid i:
|
||||
- is_col_vector(data[i]) == true
|
||||
- data[0].size() == data[i].size()
|
||||
(i.e. all the vectors in data must have the same dimensionality)
|
||||
- if (data.size() != 0) then
|
||||
- 0 < num_dims <= data[0].size()
|
||||
(i.e. you can't project into a higher dimension than the input data,
|
||||
only to a lower dimension.)
|
||||
ensures
|
||||
- This routine computes Sammon's dimensionality reduction method based on the
|
||||
given input data. It will attempt to project the contents of data into a
|
||||
num_dims dimensional space that preserves relative distances between the
|
||||
input data points.
|
||||
- This function returns a std::vector, OUT, such that:
|
||||
- OUT == a set of column vectors that represent the Sammon projection of
|
||||
the input data vectors.
|
||||
- OUT.size() == data.size()
|
||||
- for all valid i:
|
||||
- OUT[i].size() == num_dims
|
||||
- OUT[i] == the Sammon projection of the input vector data[i]
|
||||
!*/
|
||||
|
||||
template <typename matrix_type>
|
||||
void operator() (
|
||||
const std::vector<matrix_type>& data,
|
||||
const long num_dims,
|
||||
std::vector<matrix<double,0,1> >& result,
|
||||
double &err,
|
||||
const unsigned long num_iters = 1000,
|
||||
const double err_delta = 1.0e-9
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- num_iters > 0
|
||||
- err_delta > 0
|
||||
- num_dims > 0
|
||||
- matrix_type should be a kind of dlib::matrix of doubles capable
|
||||
of representing column vectors.
|
||||
- for all valid i:
|
||||
- is_col_vector(data[i]) == true
|
||||
- data[0].size() == data[i].size()
|
||||
(i.e. all the vectors in data must have the same dimensionality)
|
||||
- if (data.size() != 0) then
|
||||
- 0 < num_dims <= data[0].size()
|
||||
(i.e. you can't project into a higher dimension than the input data,
|
||||
only to a lower dimension.)
|
||||
ensures
|
||||
- This routine computes Sammon's dimensionality reduction method based on the
|
||||
given input data. It will attempt to project the contents of data into a
|
||||
num_dims dimensional space that preserves relative distances between the
|
||||
input data points.
|
||||
- #err == the final error value at the end of the algorithm. The goal of Sammon's
|
||||
algorithm is to find a lower dimensional projection of the input data that
|
||||
preserves the relative distances between points. The value in #err is a measure
|
||||
of the total error at the end of the algorithm. So smaller values indicate
|
||||
a better projection was found than if a large value is returned via #err.
|
||||
- Sammon's algorithm will run until either num_iters iterations has executed
|
||||
or the change in error from one iteration to the next is less than err_delta.
|
||||
- Upon completion, the output of Sammon's projection is stored into #result, in
|
||||
particular, we will have:
|
||||
- #result == a set of column vectors that represent the Sammon projection of
|
||||
the input data vectors.
|
||||
- #result.size() == data.size()
|
||||
- for all valid i:
|
||||
- #result[i].size() == num_dims
|
||||
- #result[i] == the Sammon projection of the input vector data[i]
|
||||
!*/
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_SAMMoN_ABSTRACT_Hh_
|
||||
|
||||
|
||||
1890
pkg/OpenFace/lib/3rdParty/dlib/include/dlib/statistics/statistics.h
vendored
Normal file
1890
pkg/OpenFace/lib/3rdParty/dlib/include/dlib/statistics/statistics.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1387
pkg/OpenFace/lib/3rdParty/dlib/include/dlib/statistics/statistics_abstract.h
vendored
Normal file
1387
pkg/OpenFace/lib/3rdParty/dlib/include/dlib/statistics/statistics_abstract.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
618
pkg/OpenFace/lib/3rdParty/dlib/include/dlib/statistics/vector_normalizer_frobmetric.h
vendored
Normal file
618
pkg/OpenFace/lib/3rdParty/dlib/include/dlib/statistics/vector_normalizer_frobmetric.h
vendored
Normal file
@@ -0,0 +1,618 @@
|
||||
// Copyright (C) 2013 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#ifndef DLIB_VECTOR_NORMALIZER_FRoBMETRIC_Hh_
|
||||
#define DLIB_VECTOR_NORMALIZER_FRoBMETRIC_Hh_
|
||||
|
||||
#include "vector_normalizer_frobmetric_abstract.h"
|
||||
#include "../matrix.h"
|
||||
#include "../optimization.h"
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename matrix_type
|
||||
>
|
||||
struct frobmetric_training_sample
|
||||
{
|
||||
matrix_type anchor_vect;
|
||||
std::vector<matrix_type> near_vects;
|
||||
std::vector<matrix_type> far_vects;
|
||||
|
||||
unsigned long num_triples (
|
||||
) const { return near_vects.size() * far_vects.size(); }
|
||||
|
||||
void clear()
|
||||
{
|
||||
near_vects.clear();
|
||||
far_vects.clear();
|
||||
}
|
||||
};
|
||||
|
||||
template <
|
||||
typename matrix_type
|
||||
>
|
||||
void serialize(const frobmetric_training_sample<matrix_type>& item, std::ostream& out)
|
||||
{
|
||||
int version = 1;
|
||||
serialize(version, out);
|
||||
serialize(item.anchor_vect, out);
|
||||
serialize(item.near_vects, out);
|
||||
serialize(item.far_vects, out);
|
||||
}
|
||||
|
||||
template <
|
||||
typename matrix_type
|
||||
>
|
||||
void deserialize(frobmetric_training_sample<matrix_type>& item, std::istream& in)
|
||||
{
|
||||
int version = 0;
|
||||
deserialize(version, in);
|
||||
if (version != 1)
|
||||
throw serialization_error("Unexpected version found while deserializing dlib::frobmetric_training_sample.");
|
||||
deserialize(item.anchor_vect, in);
|
||||
deserialize(item.near_vects, in);
|
||||
deserialize(item.far_vects, in);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename matrix_type
|
||||
>
|
||||
class vector_normalizer_frobmetric
|
||||
{
|
||||
|
||||
public:
|
||||
typedef typename matrix_type::mem_manager_type mem_manager_type;
|
||||
typedef typename matrix_type::type scalar_type;
|
||||
typedef matrix_type result_type;
|
||||
|
||||
private:
|
||||
struct compact_frobmetric_training_sample
|
||||
{
|
||||
std::vector<matrix_type> near_vects;
|
||||
std::vector<matrix_type> far_vects;
|
||||
};
|
||||
|
||||
struct objective
|
||||
{
|
||||
objective (
|
||||
const std::vector<compact_frobmetric_training_sample>& samples_,
|
||||
matrix<double,0,0,mem_manager_type>& Aminus_,
|
||||
const matrix<double,0,1,mem_manager_type>& bias_
|
||||
) : samples(samples_), Aminus(Aminus_), bias(bias_) {}
|
||||
|
||||
double operator()(const matrix<double,0,1,mem_manager_type>& u) const
|
||||
{
|
||||
long idx = 0;
|
||||
const long dims = samples[0].far_vects[0].size();
|
||||
// Here we compute \hat A from the paper, which we refer to as just A in
|
||||
// the code.
|
||||
matrix<double,0,0,mem_manager_type> A(dims,dims);
|
||||
A = 0;
|
||||
std::vector<double> ufar, unear;
|
||||
for (unsigned long i = 0; i < samples.size(); ++i)
|
||||
{
|
||||
ufar.assign(samples[i].far_vects.size(),0);
|
||||
unear.assign(samples[i].near_vects.size(),0);
|
||||
for (unsigned long j = 0; j < unear.size(); ++j)
|
||||
{
|
||||
for (unsigned long k = 0; k < ufar.size(); ++k)
|
||||
{
|
||||
const double val = u(idx++);
|
||||
ufar[k] -= val;
|
||||
unear[j] += val;
|
||||
}
|
||||
}
|
||||
for (unsigned long j = 0; j < unear.size(); ++j)
|
||||
A += unear[j]*samples[i].near_vects[j]*trans(samples[i].near_vects[j]);
|
||||
for (unsigned long j = 0; j < ufar.size(); ++j)
|
||||
A += ufar[j]*samples[i].far_vects[j]*trans(samples[i].far_vects[j]);
|
||||
}
|
||||
|
||||
eigenvalue_decomposition<matrix<double,0,0,mem_manager_type> > ed(make_symmetric(A));
|
||||
Aminus = ed.get_pseudo_v()*diagm(upperbound(ed.get_real_eigenvalues(),0))*trans(ed.get_pseudo_v());
|
||||
// Do this to avoid numeric instability later on since the above
|
||||
// computation can make Aminus slightly non-symmetric.
|
||||
Aminus = make_symmetric(Aminus);
|
||||
|
||||
return dot(u,bias) - 0.5*sum(squared(Aminus));
|
||||
}
|
||||
|
||||
private:
|
||||
const std::vector<compact_frobmetric_training_sample>& samples;
|
||||
matrix<double,0,0,mem_manager_type>& Aminus;
|
||||
const matrix<double,0,1,mem_manager_type>& bias;
|
||||
};
|
||||
|
||||
struct derivative
|
||||
{
|
||||
derivative (
|
||||
unsigned long num_triples_,
|
||||
const std::vector<compact_frobmetric_training_sample>& samples_,
|
||||
matrix<double,0,0,mem_manager_type>& Aminus_,
|
||||
const matrix<double,0,1,mem_manager_type>& bias_
|
||||
) : num_triples(num_triples_), samples(samples_), Aminus(Aminus_), bias(bias_) {}
|
||||
|
||||
matrix<double,0,1,mem_manager_type> operator()(const matrix<double,0,1,mem_manager_type>& ) const
|
||||
{
|
||||
// Note that Aminus is a function of u (the argument to this function), but
|
||||
// since Aminus will have been computed already by the most recent call to
|
||||
// the objective function we don't need to do anything with u. We can just
|
||||
// use Aminus right away.
|
||||
matrix<double,0,1,mem_manager_type> grad(num_triples);
|
||||
|
||||
long idx = 0;
|
||||
std::vector<double> ufar, unear;
|
||||
for (unsigned long i = 0; i < samples.size(); ++i)
|
||||
{
|
||||
ufar.resize(samples[i].far_vects.size());
|
||||
unear.resize(samples[i].near_vects.size());
|
||||
|
||||
for (unsigned long j = 0; j < unear.size(); ++j)
|
||||
unear[j] = sum(pointwise_multiply(Aminus, samples[i].near_vects[j]*trans(samples[i].near_vects[j])));
|
||||
for (unsigned long j = 0; j < ufar.size(); ++j)
|
||||
ufar[j] = sum(pointwise_multiply(Aminus, samples[i].far_vects[j]*trans(samples[i].far_vects[j])));
|
||||
|
||||
for (unsigned long j = 0; j < samples[i].near_vects.size(); ++j)
|
||||
{
|
||||
for (unsigned long k = 0; k < samples[i].far_vects.size(); ++k)
|
||||
{
|
||||
grad(idx) = bias(idx) + ufar[k]-unear[j];
|
||||
idx++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return grad;
|
||||
}
|
||||
|
||||
private:
|
||||
const unsigned long num_triples;
|
||||
const std::vector<compact_frobmetric_training_sample>& samples;
|
||||
matrix<double,0,0,mem_manager_type>& Aminus;
|
||||
const matrix<double,0,1,mem_manager_type>& bias;
|
||||
};
|
||||
|
||||
|
||||
class custom_stop_strategy
|
||||
{
|
||||
public:
|
||||
custom_stop_strategy(
|
||||
double C_,
|
||||
double eps_,
|
||||
bool be_verbose_,
|
||||
unsigned long max_iter_
|
||||
)
|
||||
{
|
||||
_c = C_;
|
||||
|
||||
_cur_iter = 0;
|
||||
_gradient_thresh = eps_;
|
||||
_max_iter = max_iter_;
|
||||
_verbose = be_verbose_;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool should_continue_search (
|
||||
const T& u,
|
||||
const double ,
|
||||
const T& grad
|
||||
)
|
||||
{
|
||||
++_cur_iter;
|
||||
|
||||
double max_gradient = 0;
|
||||
for (long i = 0; i < grad.size(); ++i)
|
||||
{
|
||||
const bool at_lower_bound = (0 >= u(i) && grad(i) > 0);
|
||||
const bool at_upper_bound = (_c/grad.size() <= u(i) && grad(i) < 0);
|
||||
if (!at_lower_bound && !at_upper_bound)
|
||||
max_gradient = std::max(std::abs(grad(i)), max_gradient);
|
||||
}
|
||||
|
||||
if (_verbose)
|
||||
{
|
||||
std::cout << "iteration: " << _cur_iter << " max_gradient: "<< max_gradient << std::endl;
|
||||
}
|
||||
|
||||
// Only stop when the largest non-bound-constrained element of the gradient
|
||||
// is lower than the threshold.
|
||||
if (max_gradient < _gradient_thresh)
|
||||
return false;
|
||||
|
||||
// Check if we have hit the max allowable number of iterations.
|
||||
if (_cur_iter > _max_iter)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
bool _verbose;
|
||||
|
||||
unsigned long _max_iter;
|
||||
unsigned long _cur_iter;
|
||||
double _c;
|
||||
double _gradient_thresh;
|
||||
};
|
||||
|
||||
public:
|
||||
vector_normalizer_frobmetric (
|
||||
)
|
||||
{
|
||||
verbose = false;
|
||||
eps = 0.1;
|
||||
C = 1;
|
||||
max_iter = 5000;
|
||||
_use_identity_matrix_prior = false;
|
||||
}
|
||||
|
||||
bool uses_identity_matrix_prior (
|
||||
) const
|
||||
{
|
||||
return _use_identity_matrix_prior;
|
||||
}
|
||||
|
||||
void set_uses_identity_matrix_prior (
|
||||
bool use_prior
|
||||
)
|
||||
{
|
||||
_use_identity_matrix_prior = use_prior;
|
||||
}
|
||||
|
||||
void be_verbose(
|
||||
)
|
||||
{
|
||||
verbose = true;
|
||||
}
|
||||
|
||||
void set_epsilon (
|
||||
double eps_
|
||||
)
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(eps_ > 0,
|
||||
"\t void vector_normalizer_frobmetric::set_epsilon(eps_)"
|
||||
<< "\n\t invalid inputs were given to this function"
|
||||
<< "\n\t eps: " << eps_
|
||||
);
|
||||
eps = eps_;
|
||||
}
|
||||
|
||||
double get_epsilon (
|
||||
) const
|
||||
{
|
||||
return eps;
|
||||
}
|
||||
|
||||
void set_c (
|
||||
double C_
|
||||
)
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(C_ > 0,
|
||||
"\t void vector_normalizer_frobmetric::set_c()"
|
||||
<< "\n\t C_ must be greater than 0"
|
||||
<< "\n\t C_: " << C_
|
||||
<< "\n\t this: " << this
|
||||
);
|
||||
|
||||
C = C_;
|
||||
}
|
||||
|
||||
void set_max_iterations (
|
||||
unsigned long max_iterations
|
||||
)
|
||||
{
|
||||
max_iter = max_iterations;
|
||||
}
|
||||
|
||||
unsigned long get_max_iterations (
|
||||
) const
|
||||
{
|
||||
return max_iter;
|
||||
}
|
||||
|
||||
double get_c (
|
||||
) const
|
||||
{
|
||||
return C;
|
||||
}
|
||||
|
||||
void be_quiet (
|
||||
)
|
||||
{
|
||||
verbose = false;
|
||||
}
|
||||
|
||||
void train (
|
||||
const std::vector<frobmetric_training_sample<matrix_type> >& samples
|
||||
)
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(samples.size() > 0,
|
||||
"\tvoid vector_normalizer_frobmetric::train()"
|
||||
<< "\n\t you have to give a nonempty set of samples to this function"
|
||||
);
|
||||
#ifdef ENABLE_ASSERTS
|
||||
{
|
||||
const long dims = samples[0].anchor_vect.size();
|
||||
DLIB_ASSERT(dims != 0,
|
||||
"\tvoid vector_normalizer_frobmetric::train()"
|
||||
<< "\n\t The dimension of the input vectors can't be zero."
|
||||
);
|
||||
for (unsigned long i = 0; i < samples.size(); ++i)
|
||||
{
|
||||
DLIB_ASSERT(is_col_vector(samples[i].anchor_vect),
|
||||
"\tvoid vector_normalizer_frobmetric::train()"
|
||||
<< "\n\t Invalid inputs were given to this function."
|
||||
<< "\n\t i: " << i
|
||||
);
|
||||
DLIB_ASSERT(samples[i].anchor_vect.size() == dims,
|
||||
"\tvoid vector_normalizer_frobmetric::train()"
|
||||
<< "\n\t Invalid inputs were given to this function."
|
||||
<< "\n\t i: " << i
|
||||
<< "\n\t dims: " << dims
|
||||
<< "\n\t samples[i].anchor_vect.size(): " << samples[i].anchor_vect.size()
|
||||
);
|
||||
|
||||
DLIB_ASSERT(samples[i].num_triples() != 0,
|
||||
"\tvoid vector_normalizer_frobmetric::train()"
|
||||
<< "\n\t It is illegal for a training sample to have no data in it"
|
||||
<< "\n\t i: " << i
|
||||
);
|
||||
for (unsigned long j = 0; j < samples[i].near_vects.size(); ++j)
|
||||
{
|
||||
DLIB_ASSERT(is_col_vector(samples[i].near_vects[j]),
|
||||
"\tvoid vector_normalizer_frobmetric::train()"
|
||||
<< "\n\t Invalid inputs were given to this function."
|
||||
<< "\n\t i: " << i
|
||||
<< "\n\t j: " << j
|
||||
);
|
||||
DLIB_ASSERT(samples[i].near_vects[j].size() == dims,
|
||||
"\tvoid vector_normalizer_frobmetric::train()"
|
||||
<< "\n\t Invalid inputs were given to this function."
|
||||
<< "\n\t i: " << i
|
||||
<< "\n\t j: " << j
|
||||
<< "\n\t dims: " << dims
|
||||
<< "\n\t samples[i].near_vects[j].size(): " << samples[i].near_vects[j].size()
|
||||
);
|
||||
}
|
||||
for (unsigned long j = 0; j < samples[i].far_vects.size(); ++j)
|
||||
{
|
||||
DLIB_ASSERT(is_col_vector(samples[i].far_vects[j]),
|
||||
"\tvoid vector_normalizer_frobmetric::train()"
|
||||
<< "\n\t Invalid inputs were given to this function."
|
||||
<< "\n\t i: " << i
|
||||
<< "\n\t j: " << j
|
||||
);
|
||||
DLIB_ASSERT(samples[i].far_vects[j].size() == dims,
|
||||
"\tvoid vector_normalizer_frobmetric::train()"
|
||||
<< "\n\t Invalid inputs were given to this function."
|
||||
<< "\n\t i: " << i
|
||||
<< "\n\t j: " << j
|
||||
<< "\n\t dims: " << dims
|
||||
<< "\n\t samples[i].far_vects[j].size(): " << samples[i].far_vects[j].size()
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // end ENABLE_ASSERTS
|
||||
|
||||
|
||||
// compute the mean sample
|
||||
m = 0;
|
||||
for (unsigned long i = 0; i < samples.size(); ++i)
|
||||
m += samples[i].anchor_vect;
|
||||
m /= samples.size();
|
||||
|
||||
DLIB_ASSERT(is_finite(m), "Some of the input vectors to vector_normalizer_frobmetric::train() have infinite or NaN values");
|
||||
|
||||
// Now we need to find tform. So we setup the optimization problem and run it
|
||||
// over the next few lines of code.
|
||||
unsigned long num_triples = 0;
|
||||
for (unsigned long i = 0; i < samples.size(); ++i)
|
||||
num_triples += samples[i].near_vects.size()*samples[i].far_vects.size();
|
||||
|
||||
matrix<double,0,1,mem_manager_type> u(num_triples);
|
||||
matrix<double,0,1,mem_manager_type> bias(num_triples);
|
||||
u = 0;
|
||||
bias = 1;
|
||||
|
||||
|
||||
// precompute all the anchor_vect to far_vects/near_vects pairs
|
||||
std::vector<compact_frobmetric_training_sample> data(samples.size());
|
||||
unsigned long cnt = 0;
|
||||
std::vector<double> far_norm, near_norm;
|
||||
for (unsigned long i = 0; i < data.size(); ++i)
|
||||
{
|
||||
far_norm.clear();
|
||||
near_norm.clear();
|
||||
data[i].far_vects.reserve(samples[i].far_vects.size());
|
||||
data[i].near_vects.reserve(samples[i].near_vects.size());
|
||||
for (unsigned long j = 0; j < samples[i].far_vects.size(); ++j)
|
||||
{
|
||||
data[i].far_vects.push_back(samples[i].anchor_vect - samples[i].far_vects[j]);
|
||||
if (_use_identity_matrix_prior)
|
||||
far_norm.push_back(length_squared(data[i].far_vects.back()));
|
||||
}
|
||||
for (unsigned long j = 0; j < samples[i].near_vects.size(); ++j)
|
||||
{
|
||||
data[i].near_vects.push_back(samples[i].anchor_vect - samples[i].near_vects[j]);
|
||||
if (_use_identity_matrix_prior)
|
||||
near_norm.push_back(length_squared(data[i].near_vects.back()));
|
||||
}
|
||||
|
||||
// Note that this loop only executes if _use_identity_matrix_prior == true.
|
||||
for (unsigned long j = 0; j < near_norm.size(); ++j)
|
||||
{
|
||||
for (unsigned long k = 0; k < far_norm.size(); ++k)
|
||||
{
|
||||
bias(cnt++) = 1 - (far_norm[k] - near_norm[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Now run the main part of the algorithm
|
||||
matrix<double,0,0,mem_manager_type> Aminus;
|
||||
find_max_box_constrained(lbfgs_search_strategy(10),
|
||||
custom_stop_strategy(C, eps, verbose, max_iter),
|
||||
objective(data, Aminus, bias),
|
||||
derivative(num_triples, data, Aminus, bias),
|
||||
u, 0, C/num_triples);
|
||||
|
||||
|
||||
// What we need is the optimal Aminus which is a function of u. So we already
|
||||
// have what we need and just need to put it into tform.
|
||||
eigenvalue_decomposition<matrix<double,0,0,mem_manager_type> > ed(make_symmetric(-Aminus));
|
||||
matrix<double,0,1,mem_manager_type> eigs = ed.get_real_eigenvalues();
|
||||
// But first, discard the components that are zero to within the machine epsilon.
|
||||
const double tol = max(eigs)*std::numeric_limits<double>::epsilon();
|
||||
for (long i = 0; i < eigs.size(); ++i)
|
||||
{
|
||||
if (eigs(i) < tol)
|
||||
eigs(i) = 0;
|
||||
}
|
||||
if (_use_identity_matrix_prior)
|
||||
tform = matrix_cast<scalar_type>(identity_matrix(Aminus) + diagm(sqrt(eigs))*trans(ed.get_pseudo_v()));
|
||||
else
|
||||
tform = matrix_cast<scalar_type>(diagm(sqrt(eigs))*trans(ed.get_pseudo_v()));
|
||||
|
||||
// Pre-apply the transform to m so we don't have to do it inside operator()
|
||||
// every time it's called.
|
||||
m = tform*m;
|
||||
}
|
||||
|
||||
long in_vector_size (
|
||||
) const
|
||||
{
|
||||
return m.nr();
|
||||
}
|
||||
|
||||
long out_vector_size (
|
||||
) const
|
||||
{
|
||||
return m.nr();
|
||||
}
|
||||
|
||||
const matrix<scalar_type,0,1,mem_manager_type>& transformed_means (
|
||||
) const
|
||||
{
|
||||
return m;
|
||||
}
|
||||
|
||||
const matrix<scalar_type,0,0,mem_manager_type>& transform (
|
||||
) const
|
||||
{
|
||||
return tform;
|
||||
}
|
||||
|
||||
const result_type& operator() (
|
||||
const matrix_type& x
|
||||
) const
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(in_vector_size() != 0 && in_vector_size() == x.size() &&
|
||||
is_col_vector(x) == true,
|
||||
"\tmatrix vector_normalizer_frobmetric::operator()"
|
||||
<< "\n\t you have given invalid arguments to this function"
|
||||
<< "\n\t in_vector_size(): " << in_vector_size()
|
||||
<< "\n\t x.size(): " << x.size()
|
||||
<< "\n\t is_col_vector(x): " << is_col_vector(x)
|
||||
<< "\n\t this: " << this
|
||||
);
|
||||
|
||||
temp_out = tform*x-m;
|
||||
return temp_out;
|
||||
}
|
||||
|
||||
template <typename mt>
|
||||
friend void deserialize (
|
||||
vector_normalizer_frobmetric<mt>& item,
|
||||
std::istream& in
|
||||
);
|
||||
|
||||
template <typename mt>
|
||||
friend void serialize (
|
||||
const vector_normalizer_frobmetric<mt>& item,
|
||||
std::ostream& out
|
||||
);
|
||||
|
||||
private:
|
||||
|
||||
// ------------------- private data members -------------------
|
||||
|
||||
matrix_type m;
|
||||
matrix<scalar_type,0,0,mem_manager_type> tform;
|
||||
bool verbose;
|
||||
double eps;
|
||||
double C;
|
||||
unsigned long max_iter;
|
||||
bool _use_identity_matrix_prior;
|
||||
|
||||
// This is just a temporary variable that doesn't contribute to the
|
||||
// state of this object.
|
||||
mutable matrix_type temp_out;
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename matrix_type
|
||||
>
|
||||
void serialize (
|
||||
const vector_normalizer_frobmetric<matrix_type>& item,
|
||||
std::ostream& out
|
||||
)
|
||||
{
|
||||
const int version = 2;
|
||||
serialize(version, out);
|
||||
|
||||
serialize(item.m, out);
|
||||
serialize(item.tform, out);
|
||||
serialize(item.verbose, out);
|
||||
serialize(item.eps, out);
|
||||
serialize(item.C, out);
|
||||
serialize(item.max_iter, out);
|
||||
serialize(item._use_identity_matrix_prior, out);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename matrix_type
|
||||
>
|
||||
void deserialize (
|
||||
vector_normalizer_frobmetric<matrix_type>& item,
|
||||
std::istream& in
|
||||
)
|
||||
{
|
||||
int version = 0;
|
||||
deserialize(version, in);
|
||||
if (version != 1 && version != 2)
|
||||
throw serialization_error("Unsupported version found while deserializing dlib::vector_normalizer_frobmetric.");
|
||||
|
||||
deserialize(item.m, in);
|
||||
deserialize(item.tform, in);
|
||||
deserialize(item.verbose, in);
|
||||
deserialize(item.eps, in);
|
||||
deserialize(item.C, in);
|
||||
deserialize(item.max_iter, in);
|
||||
if (version == 2)
|
||||
deserialize(item._use_identity_matrix_prior, in);
|
||||
else
|
||||
item._use_identity_matrix_prior = false;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_VECTOR_NORMALIZER_FRoBMETRIC_Hh_
|
||||
|
||||
328
pkg/OpenFace/lib/3rdParty/dlib/include/dlib/statistics/vector_normalizer_frobmetric_abstract.h
vendored
Normal file
328
pkg/OpenFace/lib/3rdParty/dlib/include/dlib/statistics/vector_normalizer_frobmetric_abstract.h
vendored
Normal file
@@ -0,0 +1,328 @@
|
||||
// Copyright (C) 2013 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#undef DLIB_VECTOR_NORMALIZER_FRoBMETRIC_ABSTRACT_Hh_
|
||||
#ifdef DLIB_VECTOR_NORMALIZER_FRoBMETRIC_ABSTRACT_Hh_
|
||||
|
||||
#include "../matrix.h"
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename matrix_type
|
||||
>
|
||||
struct frobmetric_training_sample
|
||||
{
|
||||
/*!
|
||||
WHAT THIS OBJECT REPRESENTS
|
||||
This object represents a training data sample for the
|
||||
vector_normalizer_frobmetric object. It defines a set of training triplets
|
||||
relative to a single anchor_vect vector. That is, it specifies that the
|
||||
learned distance metric should satisfy num_triples() constraints which are,
|
||||
for all valid i and j:
|
||||
length(T*anchor_vect-T*near_vects[i]) + 1 < length(T*anchor_vect - T*far_vects[j])
|
||||
for some appropriate linear transformation T which will be learned by
|
||||
vector_normalizer_frobmetric.
|
||||
!*/
|
||||
|
||||
matrix_type anchor_vect;
|
||||
std::vector<matrix_type> near_vects;
|
||||
std::vector<matrix_type> far_vects;
|
||||
|
||||
unsigned long num_triples (
|
||||
) const { return near_vects.size() * far_vects.size(); }
|
||||
/*!
|
||||
ensures
|
||||
- returns the number of training triplets defined by this object.
|
||||
!*/
|
||||
|
||||
void clear()
|
||||
/*!
|
||||
ensures
|
||||
- #near_vects.size() == 0
|
||||
- #far_vects.size() == 0
|
||||
!*/
|
||||
};
|
||||
|
||||
template < typename matrix_type >
|
||||
void serialize(const frobmetric_training_sample<matrix_type>& item, std::ostream& out)
|
||||
template < typename matrix_type >
|
||||
void deserialize(frobmetric_training_sample<matrix_type>& item, std::istream& in)
|
||||
/*!
|
||||
provides serialisation support.
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename matrix_type
|
||||
>
|
||||
class vector_normalizer_frobmetric
|
||||
{
|
||||
/*!
|
||||
REQUIREMENTS ON matrix_type
|
||||
- must be a dlib::matrix object capable of representing column
|
||||
vectors
|
||||
|
||||
INITIAL VALUE
|
||||
- in_vector_size() == 0
|
||||
- out_vector_size() == 0
|
||||
- get_epsilon() == 0.1
|
||||
- get_c() == 1
|
||||
- get_max_iterations() == 5000
|
||||
- This object is not verbose
|
||||
- uses_identity_matrix_prior() == false
|
||||
|
||||
WHAT THIS OBJECT REPRESENTS
|
||||
This object is a tool for performing the FrobMetric distance metric
|
||||
learning algorithm described in the following paper:
|
||||
A Scalable Dual Approach to Semidefinite Metric Learning
|
||||
By Chunhua Shen, Junae Kim, Lei Wang, in CVPR 2011
|
||||
|
||||
Therefore, this object is a tool that takes as input training triplets
|
||||
(anchor_vect, near, far) of vectors and attempts to learn a linear
|
||||
transformation T such that:
|
||||
length(T*anchor_vect-T*near) + 1 < length(T*anchor_vect - T*far)
|
||||
That is, you give a bunch of anchor_vect vectors and for each anchor_vect
|
||||
you specify some vectors which should be near to it and some that should be
|
||||
far form it. This object then tries to find a transformation matrix that
|
||||
makes the "near" vectors close to their anchors while the "far" vectors are
|
||||
farther away.
|
||||
|
||||
THREAD SAFETY
|
||||
Note that this object contains a cached matrix object it uses
|
||||
to store intermediate results for normalization. This avoids
|
||||
needing to reallocate it every time this object performs normalization
|
||||
but also makes it non-thread safe. So make sure you don't share
|
||||
instances of this object between threads.
|
||||
!*/
|
||||
|
||||
public:
|
||||
typedef typename matrix_type::mem_manager_type mem_manager_type;
|
||||
typedef typename matrix_type::type scalar_type;
|
||||
typedef matrix_type result_type;
|
||||
|
||||
vector_normalizer_frobmetric (
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- this object is properly initialized
|
||||
!*/
|
||||
|
||||
bool uses_identity_matrix_prior (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- Normally this object will try and find a matrix transform() that
|
||||
minimizes sum(squared(transform())) but also fits the training data.
|
||||
However, if #uses_identity_matrix_prior() == true then it will instead
|
||||
try to find the transformation matrix that minimizes
|
||||
sum(squared(identity_matrix()-transform())). That is, it will try to
|
||||
find the matrix most similar to the identity matrix that best fits the
|
||||
training data.
|
||||
!*/
|
||||
|
||||
void set_uses_identity_matrix_prior (
|
||||
bool use_prior
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- #uses_identity_matrix_prior() == use_prior
|
||||
!*/
|
||||
|
||||
void be_verbose(
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- This object will print status messages to standard out so the user can
|
||||
observe the progress of the train() routine.
|
||||
!*/
|
||||
|
||||
void be_quiet (
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- this object will not print anything to standard out.
|
||||
!*/
|
||||
|
||||
void set_epsilon (
|
||||
double eps
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- eps > 0
|
||||
ensures
|
||||
- #get_epsilon() == eps
|
||||
!*/
|
||||
|
||||
double get_epsilon (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the error epsilon that determines when training should stop.
|
||||
Smaller values may result in a more accurate solution but take longer to
|
||||
execute.
|
||||
!*/
|
||||
|
||||
void set_c (
|
||||
double C
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- C > 0
|
||||
ensures
|
||||
- #set_c() == C
|
||||
!*/
|
||||
|
||||
double get_c (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the regularization parameter. It is the parameter that
|
||||
determines the trade-off between trying to fit the training data exactly
|
||||
or allowing more errors but hopefully improving the generalization of the
|
||||
resulting distance metric. Larger values encourage exact fitting while
|
||||
smaller values of C may encourage better generalization.
|
||||
!*/
|
||||
|
||||
void set_max_iterations (
|
||||
unsigned long max_iterations
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- #get_max_iterations() == max_iterations
|
||||
!*/
|
||||
|
||||
unsigned long get_max_iterations (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- The train() routine uses an iterative numerical solver to find the best
|
||||
distance metric. This function returns the maximum allowable number of
|
||||
iterations it will use before terminating. Note that typically the
|
||||
solver terminates prior to the max iteration count limit due to the error
|
||||
dropping below get_epsilon().
|
||||
!*/
|
||||
|
||||
void train (
|
||||
const std::vector<frobmetric_training_sample<matrix_type> >& samples
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- samples.size() != 0
|
||||
- All matrices inside samples (i.e. anchors and elements of near_vects and far_vects)
|
||||
are column vectors with the same non-zero dimension.
|
||||
- All the vectors in samples contain finite values.
|
||||
- All elements of samples contain data, specifically, for all valid i:
|
||||
- samples[i].num_triples() != 0
|
||||
ensures
|
||||
- learns a distance metric from the given training samples. After train
|
||||
finishes you can use this object's operator() to transform vectors
|
||||
according to the learned distance metric. In particular, we will have:
|
||||
- #transform() == The linear transformation learned by the FrobMetric
|
||||
learning procedure.
|
||||
- #in_vector_size() == samples[0].anchor_vect.size()
|
||||
- You can call (*this)(x) to transform a vector according to the learned
|
||||
distance metric. That is, it should generally be the case that:
|
||||
- length((*this)(anchor_vect) - (*this)(near)) + 1 < length((*this)(anchor_vect) - (*this)(far))
|
||||
for the anchor_vect, near, and far vectors in the training data.
|
||||
- #transformed_means() == the mean of the input anchor_vect vectors
|
||||
after being transformed by #transform()
|
||||
!*/
|
||||
|
||||
long in_vector_size (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the number of rows that input vectors are required to contain if
|
||||
they are to be normalized by this object.
|
||||
!*/
|
||||
|
||||
long out_vector_size (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the number of rows in the normalized vectors that come out of
|
||||
this object.
|
||||
- The value returned is always in_vector_size(). So out_vector_size() is
|
||||
just provided to maintain interface consistency with other vector
|
||||
normalizer objects. That is, the transformations applied by this object
|
||||
do not change the dimensionality of the vectors.
|
||||
!*/
|
||||
|
||||
const matrix<scalar_type,0,1,mem_manager_type>& transformed_means (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns a column vector V such that:
|
||||
- V.size() == in_vector_size()
|
||||
- V is a vector such that subtracting it from transformed vectors
|
||||
results in them having an expected value of 0. Therefore, it is
|
||||
equal to transform() times the mean of the input anchor_vect vectors
|
||||
given to train().
|
||||
!*/
|
||||
|
||||
const matrix<scalar_type,0,0,mem_manager_type>& transform (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns a copy of the transformation matrix we learned during the last
|
||||
call to train().
|
||||
- The returned matrix is square and has in_vector_size() by in_vector_size()
|
||||
dimensions.
|
||||
!*/
|
||||
|
||||
const result_type& operator() (
|
||||
const matrix_type& x
|
||||
) const;
|
||||
/*!
|
||||
requires
|
||||
- in_vector_size() != 0
|
||||
- in_vector_size() == x.size()
|
||||
- is_col_vector(x) == true
|
||||
ensures
|
||||
- returns a normalized version of x, call it Z, that has the following
|
||||
properties:
|
||||
- Z == The result of applying the linear transform we learned during
|
||||
train() to the input vector x.
|
||||
- Z == transform()*x-transformed_means()
|
||||
- is_col_vector(Z) == true
|
||||
- Z.size() == x.size()
|
||||
- The expected value of each element of Z is 0.
|
||||
!*/
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename matrix_type
|
||||
>
|
||||
void serialize (
|
||||
const vector_normalizer_frobmetric<matrix_type>& item,
|
||||
std::ostream& out
|
||||
);
|
||||
/*!
|
||||
provides serialization support
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename matrix_type
|
||||
>
|
||||
void deserialize (
|
||||
vector_normalizer_frobmetric<matrix_type>& item,
|
||||
std::istream& in
|
||||
);
|
||||
/*!
|
||||
provides deserialization support
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_VECTOR_NORMALIZER_FRoBMETRIC_ABSTRACT_Hh_
|
||||
|
||||
Reference in New Issue
Block a user