open source pkg v1

This commit is contained in:
Vijay Yadev
2020-08-04 19:12:31 -04:00
parent bef213dba9
commit c389fc2c47
3708 changed files with 1624220 additions and 1 deletions

View File

@@ -0,0 +1,128 @@
// Copyright (C) 2015 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_APPROXIMATE_LINEAR_MODELS_Hh_
#define DLIB_APPROXIMATE_LINEAR_MODELS_Hh_
#include "approximate_linear_models_abstract.h"
#include "../matrix.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
template <
typename feature_extractor
>
struct process_sample
{
typedef feature_extractor feature_extractor_type;
typedef typename feature_extractor::state_type state_type;
typedef typename feature_extractor::action_type action_type;
process_sample(){}
process_sample(
const state_type& s,
const action_type& a,
const state_type& n,
const double& r
) : state(s), action(a), next_state(n), reward(r) {}
state_type state;
action_type action;
state_type next_state;
double reward;
};
template < typename feature_extractor >
void serialize (const process_sample<feature_extractor>& item, std::ostream& out)
{
serialize(item.state, out);
serialize(item.action, out);
serialize(item.next_state, out);
serialize(item.reward, out);
}
template < typename feature_extractor >
void deserialize (process_sample<feature_extractor>& item, std::istream& in)
{
deserialize(item.state, in);
deserialize(item.action, in);
deserialize(item.next_state, in);
deserialize(item.reward, in);
}
// ----------------------------------------------------------------------------------------
template <
typename feature_extractor
>
class policy
{
public:
typedef feature_extractor feature_extractor_type;
typedef typename feature_extractor::state_type state_type;
typedef typename feature_extractor::action_type action_type;
policy (
)
{
w.set_size(fe.num_features());
w = 0;
}
policy (
const matrix<double,0,1>& weights_,
const feature_extractor& fe_
) : w(weights_), fe(fe_) {}
action_type operator() (
const state_type& state
) const
{
return fe.find_best_action(state,w);
}
const feature_extractor& get_feature_extractor (
) const { return fe; }
const matrix<double,0,1>& get_weights (
) const { return w; }
private:
matrix<double,0,1> w;
feature_extractor fe;
};
template < typename feature_extractor >
inline void serialize(const policy<feature_extractor>& item, std::ostream& out)
{
int version = 1;
serialize(version, out);
serialize(item.get_feature_extractor(), out);
serialize(item.get_weights(), out);
}
template < typename feature_extractor >
inline void deserialize(policy<feature_extractor>& item, std::istream& in)
{
int version = 0;
deserialize(version, in);
if (version != 1)
throw serialization_error("Unexpected version found while deserializing dlib::policy object.");
feature_extractor fe;
matrix<double,0,1> w;
deserialize(fe, in);
deserialize(w, in);
item = policy<feature_extractor>(w,fe);
}
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_APPROXIMATE_LINEAR_MODELS_Hh_

View File

@@ -0,0 +1,213 @@
// Copyright (C) 2015 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_APPROXIMATE_LINEAR_MODELS_ABSTRACT_Hh_
#ifdef DLIB_APPROXIMATE_LINEAR_MODELS_ABSTRACT_Hh_
#include "../matrix.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
struct example_feature_extractor
{
/*!
WHAT THIS OBJECT REPRESENTS
This object defines the interface a feature extractor must implement if it
is to be used with the process_sample and policy objects defined at the
bottom of this file. Moreover, it is meant to represent the core part
of a model used in a reinforcement learning algorithm.
In particular, this object models a Q(state,action) function where
Q(state,action) == dot(w, PSI(state,action))
where PSI(state,action) is a feature vector and w is a parameter
vector.
Therefore, a feature extractor defines how the PSI(x,y) feature vector is
calculated. It also defines the types used to represent the state and
action objects.
THREAD SAFETY
Instances of this object are required to be threadsafe, that is, it should
be safe for multiple threads to make concurrent calls to the member
functions of this object.
!*/
// The state and actions can be any types so long as you provide typedefs for them.
typedef T state_type;
typedef U action_type;
// We can also say that the last element in the weight vector w must be 1. This
// can be useful for including a prior into your model.
const static bool force_last_weight_to_1 = false;
example_feature_extractor(
);
/*!
ensures
- this object is properly initialized.
!*/
unsigned long num_features(
) const;
/*!
ensures
- returns the dimensionality of the PSI() feature vector.
!*/
action_type find_best_action (
const state_type& state,
const matrix<double,0,1>& w
) const;
/*!
ensures
- returns the action A that maximizes Q(state,A) = dot(w,PSI(state,A)).
That is, this function finds the best action to take in the given state
when our model is parameterized by the given weight vector w.
!*/
void get_features (
const state_type& state,
const action_type& action,
matrix<double,0,1>& feats
) const;
/*!
ensures
- #feats.size() == num_features()
- #feats == PSI(state,action)
!*/
};
// ----------------------------------------------------------------------------------------
template <
typename feature_extractor
>
struct process_sample
{
/*!
REQUIREMENTS ON feature_extractor
feature_extractor should implement the example_feature_extractor interface
defined at the top of this file.
WHAT THIS OBJECT REPRESENTS
This object holds a training sample for a reinforcement learning algorithm.
In particular, it should be a sample from some process where the process
was in state this->state, then took this->action action which resulted in
receiving this->reward and ending up in the state this->next_state.
!*/
typedef feature_extractor feature_extractor_type;
typedef typename feature_extractor::state_type state_type;
typedef typename feature_extractor::action_type action_type;
process_sample(){}
process_sample(
const state_type& s,
const action_type& a,
const state_type& n,
const double& r
) : state(s), action(a), next_state(n), reward(r) {}
state_type state;
action_type action;
state_type next_state;
double reward;
};
template < typename feature_extractor >
void serialize (const process_sample<feature_extractor>& item, std::ostream& out);
template < typename feature_extractor >
void deserialize (process_sample<feature_extractor>& item, std::istream& in);
/*!
provides serialization support.
!*/
// ----------------------------------------------------------------------------------------
template <
typename feature_extractor
>
class policy
{
/*!
REQUIREMENTS ON feature_extractor
feature_extractor should implement the example_feature_extractor interface
defined at the top of this file.
WHAT THIS OBJECT REPRESENTS
This is a policy based on the supplied feature_extractor model. In
particular, it maps from feature_extractor::state_type to the best action
to take in that state.
!*/
public:
typedef feature_extractor feature_extractor_type;
typedef typename feature_extractor::state_type state_type;
typedef typename feature_extractor::action_type action_type;
policy (
);
/*!
ensures
- #get_feature_extractor() == feature_extractor()
(i.e. it will have its default value)
- #get_weights().size() == #get_feature_extractor().num_features()
- #get_weights() == 0
!*/
policy (
const matrix<double,0,1>& weights,
const feature_extractor& fe
);
/*!
requires
- fe.num_features() == weights.size()
ensures
- #get_feature_extractor() == fe
- #get_weights() == weights
!*/
action_type operator() (
const state_type& state
) const;
/*!
ensures
- returns get_feature_extractor().find_best_action(state,w);
!*/
const feature_extractor& get_feature_extractor (
) const;
/*!
ensures
- returns the feature extractor used by this object
!*/
const matrix<double,0,1>& get_weights (
) const;
/*!
ensures
- returns the parameter vector (w) associated with this object. The length
of the vector is get_feature_extractor().num_features().
!*/
};
template < typename feature_extractor >
void serialize(const policy<feature_extractor>& item, std::ostream& out);
template < typename feature_extractor >
void deserialize(policy<feature_extractor>& item, std::istream& in);
/*!
provides serialization support.
!*/
// ----------------------------------------------------------------------------------------
#endif // DLIB_APPROXIMATE_LINEAR_MODELS_ABSTRACT_Hh_

View File

@@ -0,0 +1,188 @@
// Copyright (C) 2015 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_LSPI_Hh_
#define DLIB_LSPI_Hh_
#include "lspi_abstract.h"
#include "approximate_linear_models.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
template <
typename feature_extractor
>
class lspi
{
public:
typedef feature_extractor feature_extractor_type;
typedef typename feature_extractor::state_type state_type;
typedef typename feature_extractor::action_type action_type;
explicit lspi(
const feature_extractor& fe_
) : fe(fe_)
{
init();
}
lspi(
)
{
init();
}
double get_discount (
) const { return discount; }
void set_discount (
double value
)
{
// make sure requires clause is not broken
DLIB_ASSERT(0 < value && value <= 1,
"\t void lspi::set_discount(value)"
<< "\n\t invalid inputs were given to this function"
<< "\n\t value: " << value
);
discount = value;
}
const feature_extractor& get_feature_extractor (
) const { return fe; }
void be_verbose (
)
{
verbose = true;
}
void be_quiet (
)
{
verbose = false;
}
void set_epsilon (
double eps_
)
{
// make sure requires clause is not broken
DLIB_ASSERT(eps_ > 0,
"\t void lspi::set_epsilon(eps_)"
<< "\n\t invalid inputs were given to this function"
<< "\n\t eps_: " << eps_
);
eps = eps_;
}
double get_epsilon (
) const
{
return eps;
}
void set_lambda (
double lambda_
)
{
// make sure requires clause is not broken
DLIB_ASSERT(lambda_ >= 0,
"\t void lspi::set_lambda(lambda_)"
<< "\n\t invalid inputs were given to this function"
<< "\n\t lambda_: " << lambda_
);
lambda = lambda_;
}
double get_lambda (
) const
{
return lambda;
}
void set_max_iterations (
unsigned long max_iter
) { max_iterations = max_iter; }
unsigned long get_max_iterations (
) { return max_iterations; }
template <typename vector_type>
policy<feature_extractor> train (
const vector_type& samples
) const
{
// make sure requires clause is not broken
DLIB_ASSERT(samples.size() > 0,
"\t policy lspi::train(samples)"
<< "\n\t invalid inputs were given to this function"
);
matrix<double,0,1> w(fe.num_features());
w = 0;
matrix<double,0,1> prev_w, b, f1, f2;
matrix<double> A;
double change;
unsigned long iter = 0;
do
{
A = identity_matrix<double>(fe.num_features())*lambda;
b = 0;
for (unsigned long i = 0; i < samples.size(); ++i)
{
fe.get_features(samples[i].state, samples[i].action, f1);
fe.get_features(samples[i].next_state,
fe.find_best_action(samples[i].next_state,w),
f2);
A += f1*trans(f1 - discount*f2);
b += f1*samples[i].reward;
}
prev_w = w;
if (feature_extractor::force_last_weight_to_1)
w = join_cols(pinv(colm(A,range(0,A.nc()-2)))*(b-colm(A,A.nc()-1)),mat(1.0));
else
w = pinv(A)*b;
change = length(w-prev_w);
++iter;
if (verbose)
std::cout << "iteration: " << iter << "\tchange: " << change << std::endl;
} while(change > eps && iter < max_iterations);
return policy<feature_extractor>(w,fe);
}
private:
void init()
{
lambda = 0.01;
discount = 0.8;
eps = 0.01;
verbose = false;
max_iterations = 100;
}
double lambda;
double discount;
double eps;
bool verbose;
unsigned long max_iterations;
feature_extractor fe;
};
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_LSPI_Hh_

View File

@@ -0,0 +1,193 @@
// Copyright (C) 2015 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_LSPI_ABSTRACT_Hh_
#ifdef DLIB_LSPI_ABSTRACT_Hh_
#include "approximate_linear_models_abstract.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
template <
typename feature_extractor
>
class lspi
{
/*!
REQUIREMENTS ON feature_extractor
feature_extractor should implement the example_feature_extractor interface
defined at the top of dlib/control/approximate_linear_models_abstract.h
WHAT THIS OBJECT REPRESENTS
This object is an implementation of the reinforcement learning algorithm
described in the following paper:
Lagoudakis, Michail G., and Ronald Parr. "Least-squares policy
iteration." The Journal of Machine Learning Research 4 (2003):
1107-1149.
This means that it takes a bunch of training data in the form of
process_samples and outputs a policy that hopefully performs well when run
on the process that generated those samples.
!*/
public:
typedef feature_extractor feature_extractor_type;
typedef typename feature_extractor::state_type state_type;
typedef typename feature_extractor::action_type action_type;
explicit lspi(
const feature_extractor& fe_
);
/*!
ensures
- #get_feature_extractor() == fe_
- #get_lambda() == 0.01
- #get_discount == 0.8
- #get_epsilon() == 0.01
- is not verbose
- #get_max_iterations() == 100
!*/
lspi(
);
/*!
ensures
- #get_feature_extractor() == feature_extractor()
(i.e. it will have its default value)
- #get_lambda() == 0.01
- #get_discount == 0.8
- #get_epsilon() == 0.01
- is not verbose
- #get_max_iterations() == 100
!*/
double get_discount (
) const;
/*!
ensures
- returns the discount applied to the sum of rewards in the Bellman
equation.
!*/
void set_discount (
double value
);
/*!
requires
- 0 < value <= 1
ensures
- #get_discount() == value
!*/
const feature_extractor& get_feature_extractor (
) const;
/*!
ensures
- returns the feature extractor used by this object
!*/
void be_verbose (
);
/*!
ensures
- This object will print status messages to standard out so that a
user can observe the progress of the algorithm.
!*/
void be_quiet (
);
/*!
ensures
- this object will not print anything to standard out
!*/
void set_epsilon (
double eps
);
/*!
requires
- eps > 0
ensures
- #get_epsilon() == eps
!*/
double get_epsilon (
) const;
/*!
ensures
- returns the error epsilon that determines when training should stop.
Smaller values may result in a more accurate solution but take longer to
train.
!*/
void set_lambda (
double lambda_
);
/*!
requires
- lambda >= 0
ensures
- #get_lambda() == lambda
!*/
double get_lambda (
) const;
/*!
ensures
- returns the regularization parameter. It is the parameter that
determines the trade off between trying to fit the training data
exactly or allowing more errors but hopefully improving the
generalization ability of the resulting function. Smaller values
encourage exact fitting while larger values of lambda may encourage
better generalization.
!*/
void set_max_iterations (
unsigned long max_iter
);
/*!
ensures
- #get_max_iterations() == max_iter
!*/
unsigned long get_max_iterations (
);
/*!
ensures
- returns the maximum number of iterations the SVM optimizer is allowed to
run before it is required to stop and return a result.
!*/
template <
typename vector_type
>
policy<feature_extractor> train (
const vector_type& samples
) const;
/*!
requires
- samples.size() > 0
- samples is something with an interface that looks like
std::vector<process_sample<feature_extractor>>. That is, it should
be some kind of array of process_sample objects.
ensures
- Trains a policy based on the given data and returns the results. The
idea is to find a policy that will obtain the largest possible reward
when run on the process that generated the samples. In particular,
if the returned policy is P then:
- P(S) == the best action to take when in state S.
- if (feature_extractor::force_last_weight_to_1) then
- The last element of P.get_weights() is 1.
!*/
};
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_LSPI_ABSTRACT_Hh_

View File

@@ -0,0 +1,370 @@
// Copyright (C) 2015 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_MPC_Hh_
#define DLIB_MPC_Hh_
#include "mpc_abstract.h"
#include "../matrix.h"
#include "../algs.h"
namespace dlib
{
template <
long S_,
long I_,
unsigned long horizon_
>
class mpc
{
public:
const static long S = S_;
const static long I = I_;
const static unsigned long horizon = horizon_;
mpc(
)
{
A = 0;
B = 0;
C = 0;
Q = 0;
R = 0;
lower = 0;
upper = 0;
max_iterations = 0;
eps = 0.01;
for (unsigned long i = 0; i < horizon; ++i)
{
target[i].set_size(A.nr());
target[i] = 0;
controls[i].set_size(B.nc());
controls[i] = 0;
}
lambda = 0;
}
mpc (
const matrix<double,S,S>& A_,
const matrix<double,S,I>& B_,
const matrix<double,S,1>& C_,
const matrix<double,S,1>& Q_,
const matrix<double,I,1>& R_,
const matrix<double,I,1>& lower_,
const matrix<double,I,1>& upper_
) : A(A_), B(B_), C(C_), Q(Q_), R(R_), lower(lower_), upper(upper_)
{
// make sure requires clause is not broken
DLIB_ASSERT(A.nr() > 0 && B.nc() > 0,
"\t mpc::mpc()"
<< "\n\t invalid inputs were given to this function"
<< "\n\t A.nr(): " << A.nr()
<< "\n\t B.nc(): " << B.nc()
);
DLIB_ASSERT(A.nr() == A.nc() &&
A.nr() == B.nr() &&
A.nr() == C.nr() &&
A.nr() == Q.nr(),
"\t mpc::mpc()"
<< "\n\t invalid inputs were given to this function"
<< "\n\t A.nr(): " << A.nr()
<< "\n\t A.nc(): " << A.nc()
<< "\n\t B.nr(): " << B.nr()
<< "\n\t C.nr(): " << C.nr()
<< "\n\t Q.nr(): " << Q.nr()
);
DLIB_ASSERT(
B.nc() == R.nr() &&
B.nc() == lower.nr() &&
B.nc() == upper.nr() ,
"\t mpc::mpc()"
<< "\n\t invalid inputs were given to this function"
<< "\n\t B.nr(): " << B.nr()
<< "\n\t B.nc(): " << B.nc()
<< "\n\t lower.nr(): " << lower.nr()
<< "\n\t upper.nr(): " << upper.nr()
);
DLIB_ASSERT(min(Q) >= 0 &&
min(R) > 0 &&
min(upper-lower) >= 0,
"\t mpc::mpc()"
<< "\n\t invalid inputs were given to this function"
<< "\n\t min(Q): " << min(Q)
<< "\n\t min(R): " << min(R)
<< "\n\t min(upper-lower): " << min(upper-lower)
);
max_iterations = 10000;
eps = 0.01;
for (unsigned long i = 0; i < horizon; ++i)
{
target[i].set_size(A.nr());
target[i] = 0;
controls[i].set_size(B.nc());
controls[i] = 0;
}
// Bound the maximum eigenvalue of the hessian by computing the trace of the
// hessian matrix.
lambda = sum(R)*horizon;
matrix<double,S,S> temp = diagm(Q);
for (unsigned long c = 0; c < horizon; ++c)
{
lambda += trace(trans(B)*temp*B);
Q_diag[horizon-c-1] = diag(trans(B)*temp*B);
temp = trans(A)*temp*A + diagm(Q);
}
}
const matrix<double,S,S>& get_A (
) const { return A; }
const matrix<double,S,I>& get_B (
) const { return B; }
const matrix<double,S,1>& get_C (
) const { return C; }
const matrix<double,S,1>& get_Q (
) const { return Q; }
const matrix<double,I,1>& get_R (
) const { return R; }
const matrix<double,I,1>& get_lower_constraints (
) const { return lower; }
const matrix<double,I,1>& get_upper_constraints (
) const { return upper; }
void set_target (
const matrix<double,S,1>& val,
const unsigned long time
)
{
DLIB_ASSERT(time < horizon,
"\t void mpc::set_target(eps_)"
<< "\n\t invalid inputs were given to this function"
<< "\n\t time: " << time
<< "\n\t horizon: " << horizon
);
target[time] = val;
}
void set_target (
const matrix<double,S,1>& val
)
{
for (unsigned long i = 0; i < horizon; ++i)
target[i] = val;
}
void set_last_target (
const matrix<double,S,1>& val
)
{
set_target(val, horizon-1);
}
const matrix<double,S,1>& get_target (
const unsigned long time
) const
{
// make sure requires clause is not broken
DLIB_ASSERT(time < horizon,
"\t matrix mpc::get_target(eps_)"
<< "\n\t invalid inputs were given to this function"
<< "\n\t time: " << time
<< "\n\t horizon: " << horizon
);
return target[time];
}
unsigned long get_max_iterations (
) const { return max_iterations; }
void set_max_iterations (
unsigned long max_iter
)
{
max_iterations = max_iter;
}
void set_epsilon (
double eps_
)
{
// make sure requires clause is not broken
DLIB_ASSERT(eps_ > 0,
"\t void mpc::set_epsilon(eps_)"
<< "\n\t invalid inputs were given to this function"
<< "\n\t eps_: " << eps_
);
eps = eps_;
}
double get_epsilon (
) const
{
return eps;
}
matrix<double,I,1> operator() (
const matrix<double,S,1>& current_state
)
{
// make sure requires clause is not broken
DLIB_ASSERT(min(R) > 0 && A.nr() == current_state.size(),
"\t matrix mpc::operator(current_state)"
<< "\n\t invalid inputs were given to this function"
<< "\n\t min(R): " << min(R)
<< "\n\t A.nr(): " << A.nr()
<< "\n\t current_state.size(): " << current_state.size()
);
// Shift the inputs over by one time step so we can use them to warm start the
// optimizer.
for (unsigned long i = 1; i < horizon; ++i)
controls[i-1] = controls[i];
solve_linear_mpc(current_state);
for (unsigned long i = 1; i < horizon; ++i)
target[i-1] = target[i];
return controls[0];
}
private:
// These temporary variables here just to avoid reallocating them on each call to
// operator().
matrix<double,S,1> M[horizon];
matrix<double,I,1> MM[horizon];
matrix<double,I,1> df[horizon];
matrix<double,I,1> v[horizon];
matrix<double,I,1> v_old[horizon];
void solve_linear_mpc (
const matrix<double,S,1>& initial_state
)
{
// make it so MM == trans(K)*Q*(M-target)
M[0] = A*initial_state + C;
for (unsigned long i = 1; i < horizon; ++i)
M[i] = A*M[i-1] + C;
for (unsigned long i = 0; i < horizon; ++i)
M[i] = diagm(Q)*(M[i]-target[i]);
for (long i = (long)horizon-2; i >= 0; --i)
M[i] += trans(A)*M[i+1];
for (unsigned long i = 0; i < horizon; ++i)
MM[i] = trans(B)*M[i];
unsigned long iter = 0;
for (; iter < max_iterations; ++iter)
{
// compute current gradient and put it into df.
// df == H*controls + MM;
M[0] = B*controls[0];
for (unsigned long i = 1; i < horizon; ++i)
M[i] = A*M[i-1] + B*controls[i];
for (unsigned long i = 0; i < horizon; ++i)
M[i] = diagm(Q)*M[i];
for (long i = (long)horizon-2; i >= 0; --i)
M[i] += trans(A)*M[i+1];
for (unsigned long i = 0; i < horizon; ++i)
df[i] = MM[i] + trans(B)*M[i] + diagm(R)*controls[i];
// Check the stopping condition, which is the magnitude of the largest element
// of the gradient.
double max_df = 0;
unsigned long max_t = 0;
long max_v = 0;
for (unsigned long i = 0; i < horizon; ++i)
{
for (long j = 0; j < controls[i].size(); ++j)
{
// if this variable isn't an active constraint then we care about it's
// derivative.
if (!((controls[i](j) <= lower(j) && df[i](j) > 0) ||
(controls[i](j) >= upper(j) && df[i](j) < 0)))
{
if (std::abs(df[i](j)) > max_df)
{
max_df = std::abs(df[i](j));
max_t = i;
max_v = j;
}
}
}
}
if (max_df < eps)
break;
// We will start out by doing a little bit of coordinate descent because it
// allows us to optimize individual variables exactly. Since we are warm
// starting each iteration with a really good solution this helps speed
// things up a lot.
const unsigned long smo_iters = 50;
if (iter < smo_iters)
{
if (Q_diag[max_t](max_v) == 0) continue;
// Take the optimal step but just for one variable.
controls[max_t](max_v) = -(df[max_t](max_v)-Q_diag[max_t](max_v)*controls[max_t](max_v))/Q_diag[max_t](max_v);
controls[max_t](max_v) = put_in_range(lower(max_v), upper(max_v), controls[max_t](max_v));
// If this is the last SMO iteration then don't forget to initialize v
// for the gradient steps.
if (iter+1 == smo_iters)
{
for (unsigned long i = 0; i < horizon; ++i)
v[i] = controls[i];
}
}
else
{
// Take a projected gradient step.
for (unsigned long i = 0; i < horizon; ++i)
{
v_old[i] = v[i];
v[i] = dlib::clamp(controls[i] - 1.0/lambda * df[i], lower, upper);
controls[i] = dlib::clamp(v[i] + (std::sqrt(lambda)-1)/(std::sqrt(lambda)+1)*(v[i]-v_old[i]), lower, upper);
}
}
}
}
unsigned long max_iterations;
double eps;
matrix<double,S,S> A;
matrix<double,S,I> B;
matrix<double,S,1> C;
matrix<double,S,1> Q;
matrix<double,I,1> R;
matrix<double,I,1> lower;
matrix<double,I,1> upper;
matrix<double,S,1> target[horizon];
double lambda; // abound on the largest eigenvalue of the hessian matrix.
matrix<double,I,1> Q_diag[horizon];
matrix<double,I,1> controls[horizon];
};
}
#endif // DLIB_MPC_Hh_

View File

@@ -0,0 +1,276 @@
// Copyright (C) 2015 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_MPC_ABSTRACT_Hh_
#ifdef DLIB_MPC_ABSTRACT_Hh_
#include "../matrix.h"
namespace dlib
{
template <
long S_,
long I_,
unsigned long horizon_
>
class mpc
{
/*!
REQUIREMENTS ON horizon_
horizon_ > 0
REQUIREMENTS ON S_
S_ >= 0
REQUIREMENTS ON I_
I_ >= 0
WHAT THIS OBJECT REPRESENTS
This object implements a linear model predictive controller. To explain
what that means, suppose you have some process you want to control and the
process dynamics are described by the linear equation:
x_{i+1} = A*x_i + B*u_i + C
That is, the next state the system goes into is a linear function of its
current state (x_i) and the current control (u_i) plus some constant bias
or disturbance.
A model predictive controller can find the control (u) you should apply to
drive the state (x) to some reference value, or alternatively to make the
state track some reference time-varying sequence. It does this by
simulating the process for horizon_ time steps and selecting the control
that leads to the best performance over the next horizon_ steps.
To be precise, each time you ask this object for a control, it solves the
following quadratic program:
min sum_i trans(x_i-target_i)*Q*(x_i-target_i) + trans(u_i)*R*u_i
x_i,u_i
such that: x_0 == current_state
x_{i+1} == A*x_i + B*u_i + C
lower <= u_i <= upper
0 <= i < horizon_
and reports u_0 as the control you should take given that you are currently
in current_state. Q and R are user supplied matrices that define how we
penalize variations away from the target state as well as how much we want
to avoid generating large control signals.
Finally, the algorithm we use to solve this quadratic program is based
largely on the method described in:
A Fast Gradient method for embedded linear predictive control (2011)
by Markus Kogel and Rolf Findeisen
!*/
public:
const static long S = S_;
const static long I = I_;
const static unsigned long horizon = horizon_;
mpc(
);
/*!
ensures
- #get_max_iterations() == 0
- The A,B,C,Q,R,lower, and upper parameter matrices are filled with zeros.
Therefore, to use this object you must initialize it via the constructor
that supplies these parameters.
!*/
mpc (
const matrix<double,S,S>& A,
const matrix<double,S,I>& B,
const matrix<double,S,1>& C,
const matrix<double,S,1>& Q,
const matrix<double,I,1>& R,
const matrix<double,I,1>& lower,
const matrix<double,I,1>& upper
);
/*!
requires
- A.nr() > 0
- B.nc() > 0
- A.nr() == A.nc() == B.nr() == C.nr() == Q.nr()
- B.nc() == R.nr() == lower.nr() == upper.nr()
- min(Q) >= 0
- min(R) > 0
- min(upper-lower) >= 0
ensures
- #get_A() == A
- #get_B() == B
- #get_C() == C
- #get_Q() == Q
- #get_R() == R
- #get_lower_constraints() == lower
- #get_upper_constraints() == upper
- for all valid i:
- get_target(i) == a vector of all zeros
- get_target(i).size() == A.nr()
- #get_max_iterations() == 10000
- #get_epsilon() == 0.01
!*/
const matrix<double,S,S>& get_A (
) const;
/*!
ensures
- returns the A matrix from the quadratic program defined above.
!*/
const matrix<double,S,I>& get_B (
) const;
/*!
ensures
- returns the B matrix from the quadratic program defined above.
!*/
const matrix<double,S,1>& get_C (
) const;
/*!
ensures
- returns the C matrix from the quadratic program defined above.
!*/
const matrix<double,S,1>& get_Q (
) const;
/*!
ensures
- returns the diagonal of the Q matrix from the quadratic program defined
above.
!*/
const matrix<double,I,1>& get_R (
) const;
/*!
ensures
- returns the diagonal of the R matrix from the quadratic program defined
above.
!*/
const matrix<double,I,1>& get_lower_constraints (
) const;
/*!
ensures
- returns the lower matrix from the quadratic program defined above. All
controls generated by this object will have values no less than this
lower bound. That is, any control u will satisfy min(u-lower) >= 0.
!*/
const matrix<double,I,1>& get_upper_constraints (
) const;
/*!
ensures
- returns the upper matrix from the quadratic program defined above. All
controls generated by this object will have values no larger than this
upper bound. That is, any control u will satisfy min(upper-u) >= 0.
!*/
const matrix<double,S,1>& get_target (
const unsigned long time
) const;
/*!
requires
- time < horizon
ensures
- This object will try to find the control sequence that results in the
process obtaining get_target(time) state at the indicated time. Note
that the next time instant after "right now" is time 0.
!*/
void set_target (
const matrix<double,S,1>& val,
const unsigned long time
);
/*!
requires
- time < horizon
ensures
- #get_target(time) == val
!*/
void set_target (
const matrix<double,S,1>& val
);
/*!
ensures
- for all valid t:
- #get_target(t) == val
!*/
void set_last_target (
const matrix<double,S,1>& val
);
/*!
ensures
- performs: set_target(val, horizon-1)
!*/
unsigned long get_max_iterations (
) const;
/*!
ensures
- When operator() is called it solves an optimization problem to
get_epsilon() precision to determine the next control action. In
particular, we run the optimizer until the magnitude of each element of
the gradient vector is less than get_epsilon() or until
get_max_iterations() solver iterations have been executed.
!*/
void set_max_iterations (
unsigned long max_iter
);
/*!
ensures
- #get_max_iterations() == max_iter
!*/
void set_epsilon (
double eps
);
/*!
requires
- eps > 0
ensures
- #get_epsilon() == eps
!*/
double get_epsilon (
) const;
/*!
ensures
- When operator() is called it solves an optimization problem to
get_epsilon() precision to determine the next control action. In
particular, we run the optimizer until the magnitude of each element of
the gradient vector is less than get_epsilon() or until
get_max_iterations() solver iterations have been executed. This means
that smaller epsilon values will give more accurate outputs but may take
longer to compute.
!*/
matrix<double,I,1> operator() (
const matrix<double,S,1>& current_state
);
/*!
requires
- min(R) > 0
- A.nr() == current_state.size()
ensures
- Solves the model predictive control problem defined by the arguments to
this objects constructor, assuming that the starting state is given by
current_state. Then we return the control that should be taken in the
current state that best optimizes the quadratic objective function
defined above.
- We also shift over the target states so that you only need to update the
last one (if you are using non-zero target states) via a call to
set_last_target()). In particular, for all valid t, it will be the case
that:
- #get_target(t) == get_target(t+1)
- #get_target(horizon-1) == get_target(horizon-1)
!*/
};
}
#endif // DLIB_MPC_ABSTRACT_Hh_