open source pkg v1

2020-08-04 19:12:31 -04:00
parent bef213dba9
commit c389fc2c47
3708 changed files with 1624220 additions and 1 deletions
--- a/pkg/OpenFace/lib/local/LandmarkDetector/src/CCNF_patch_expert.cpp
+++ b/pkg/OpenFace/lib/local/LandmarkDetector/src/CCNF_patch_expert.cpp
@@ -0,0 +1,514 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (C) 2017, Carnegie Mellon University and University of Cambridge,
+// all rights reserved.
+//
+// ACADEMIC OR NON-PROFIT ORGANIZATION NONCOMMERCIAL RESEARCH USE ONLY
+//
+// BY USING OR DOWNLOADING THE SOFTWARE, YOU ARE AGREEING TO THE TERMS OF THIS LICENSE AGREEMENT.  
+// IF YOU DO NOT AGREE WITH THESE TERMS, YOU MAY NOT USE OR DOWNLOAD THE SOFTWARE.
+//
+// License can be found in OpenFace-license.txt
+//
+//     * Any publications arising from the use of this software, including but
+//       not limited to academic journal and conference publications, technical
+//       reports and manuals, must cite at least one of the following works:
+//
+//       OpenFace 2.0: Facial Behavior Analysis Toolkit
+//       Tadas Baltrušaitis, Amir Zadeh, Yao Chong Lim, and Louis-Philippe Morency
+//       in IEEE International Conference on Automatic Face and Gesture Recognition, 2018  
+//
+//       Convolutional experts constrained local model for facial landmark detection.
+//       A. Zadeh, T. Baltrušaitis, and Louis-Philippe Morency,
+//       in Computer Vision and Pattern Recognition Workshops, 2017.    
+//
+//       Rendering of Eyes for Eye-Shape Registration and Gaze Estimation
+//       Erroll Wood, Tadas Baltrušaitis, Xucong Zhang, Yusuke Sugano, Peter Robinson, and Andreas Bulling 
+//       in IEEE International. Conference on Computer Vision (ICCV),  2015 
+//
+//       Cross-dataset learning and person-specific normalisation for automatic Action Unit detection
+//       Tadas Baltrušaitis, Marwa Mahmoud, and Peter Robinson 
+//       in Facial Expression Recognition and Analysis Challenge, 
+//       IEEE International Conference on Automatic Face and Gesture Recognition, 2015 
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "stdafx.h"
+
+#include "CCNF_patch_expert.h"
+
+// OpenCV includes
+#include <opencv2/core/core.hpp>
+#include <opencv2/imgproc.hpp>
+
+// Local includes
+#include "LandmarkDetectorUtils.h"
+
+using namespace LandmarkDetector;
+
+// Copy constructors of neuron and patch expert
+CCNF_neuron::CCNF_neuron(const CCNF_neuron& other) : weights(other.weights.clone())
+{
+	this->neuron_type = other.neuron_type;
+	this->norm_weights = other.norm_weights;
+	this->bias = other.bias;
+	this->alpha = other.alpha;
+
+	for (std::map<int, cv::Mat_<double> >::const_iterator it = other.weights_dfts.begin(); it != other.weights_dfts.end(); it++)
+	{
+		// Make sure the matrix is copied.
+		this->weights_dfts.insert(std::pair<int, cv::Mat>(it->first, it->second.clone()));
+	}
+}
+
+// Copy constructor		
+CCNF_patch_expert::CCNF_patch_expert(const CCNF_patch_expert& other) : neurons(other.neurons), window_sizes(other.window_sizes), betas(other.betas)
+{
+	this->width = other.width;
+	this->height = other.height;
+	this->patch_confidence = other.patch_confidence;
+	
+	this->weight_matrix = other.weight_matrix.clone();
+
+	// Copy the Sigmas in a deep way
+	for (std::vector<cv::Mat_<float> >::const_iterator it = other.Sigmas.begin(); it != other.Sigmas.end(); it++)
+	{
+		// Make sure the matrix is copied.
+		this->Sigmas.push_back(it->clone());
+	}
+
+}
+
+// Compute sigmas for all landmarks for a particular view and window size
+void CCNF_patch_expert::ComputeSigmas(std::vector<cv::Mat_<float> > sigma_components, int window_size)
+{
+	for(size_t i=0; i < window_sizes.size(); ++i)
+	{
+		if( window_sizes[i] == window_size)
+			return;
+	}
+	// Each of the landmarks will have the same connections, hence constant number of sigma components
+	int n_betas = sigma_components.size();
+
+	// calculate the sigmas based on alphas and betas
+	float sum_alphas = 0;
+
+	int n_alphas = this->neurons.size();
+
+	// sum the alphas first
+	for(int a = 0; a < n_alphas; ++a)
+	{
+		sum_alphas = sum_alphas + this->neurons[a].alpha;
+	}
+
+	cv::Mat_<float> q1 = sum_alphas * cv::Mat_<float>::eye(window_size*window_size, window_size*window_size);
+
+	cv::Mat_<float> q2 = cv::Mat_<float>::zeros(window_size*window_size, window_size*window_size);
+	for (int b=0; b < n_betas; ++b)
+	{			
+		q2 = q2 + ((float)this->betas[b]) * sigma_components[b];
+	}
+
+	cv::Mat_<float> SigmaInv = 2 * (q1 + q2);
+	
+	cv::Mat Sigma_f;
+	cv::invert(SigmaInv, Sigma_f, cv::DECOMP_CHOLESKY);
+
+	window_sizes.push_back(window_size);
+	Sigmas.push_back(Sigma_f);
+
+}
+
+//===========================================================================
+void CCNF_neuron::Read(std::ifstream &stream)
+{
+	// Sanity check
+	int read_type;
+	stream.read ((char*)&read_type, 4);
+	assert(read_type == 2);
+
+	stream.read ((char*)&neuron_type, 4);
+	stream.read ((char*)&norm_weights, 8);
+	stream.read ((char*)&bias, 8);
+	stream.read ((char*)&alpha, 8);
+	
+	LandmarkDetector::ReadMatBin(stream, weights); 
+
+}
+
+// Perform im2col, while at the same time doing contrast normalization and adding a bias term 
+void im2colContrastNormBias(const cv::Mat_<float>& input, const unsigned int width, const unsigned int height, cv::Mat_<float>& output)
+{
+	const unsigned int m = input.rows;
+	const unsigned int n = input.cols;
+
+	// determine how many blocks there will be with a sliding window of width x height in the input
+	const unsigned int yB = m - height + 1;
+	const unsigned int xB = n - width + 1;
+
+	// Allocate the output size
+	if (output.rows != xB*yB && output.cols != width * height + 1) 
+	{
+		output = cv::Mat::ones(xB*yB, width * height + 1, CV_32F);
+	}
+
+	// Iterate over the blocks
+	unsigned int rowIdx = 0;
+	for (unsigned int j = 0; j< xB; j++)
+	{
+		for (unsigned int i = 0; i< yB; i++)
+		{
+
+			float* Mo = output.ptr<float>(rowIdx);
+
+			float sum = 0;
+
+			for (unsigned int yy = 0; yy < height; ++yy)
+			{
+				const float* Mi = input.ptr<float>(i + yy);
+				for (unsigned int xx = 0; xx < width; ++xx)
+				{
+					unsigned int colIdx = xx*height + yy;
+					float in = Mi[j + xx];
+					sum += in;
+
+					Mo[colIdx + 1] = in;
+				}
+			}
+
+			// Working out the mean
+			float mean = sum / (float)(width * height);
+
+			float sum_sq = 0;
+			const unsigned int num_items = width*height + 1;
+			// Working out the sum squared and subtracting the mean
+			for (unsigned int x = 1; x < num_items; ++x)
+			{
+				float in = Mo[x] - mean;
+				Mo[x] = in;
+				sum_sq += in * in;
+			}
+
+			float norm = sqrt(sum_sq);
+
+			// Avoiding division by 0
+			if (norm == 0)
+			{
+				norm = 1;
+			}
+
+			// Flip multiplication to division for speed
+			norm = 1.0 / norm;
+
+			for (unsigned int x = 1; x < num_items; ++x)
+			{
+				Mo[x] *= norm;
+			}
+
+			rowIdx++;
+		}
+	}
+}
+
+//===========================================================================
+void CCNF_neuron::Response(const cv::Mat_<float> &im, cv::Mat_<double> &im_dft, cv::Mat &integral_img, cv::Mat &integral_img_sq, cv::Mat_<float> &resp)
+{
+
+	int h = im.rows - weights.rows + 1;
+	int w = im.cols - weights.cols + 1;
+	
+	// the patch area on which we will calculate reponses
+	cv::Mat_<float> I;
+
+	if(neuron_type == 3)
+	{
+		// Perform normalisation across whole patch (ignoring the invalid values indicated by <= 0
+
+		cv::Scalar mean;
+		cv::Scalar std;
+		
+		// ignore missing values
+		cv::Mat_<uchar> mask = im > 0;
+		cv::meanStdDev(im, mean, std, mask);
+
+		// if all values the same don't divide by 0
+		if(std[0] != 0)
+		{
+			I = (im - mean[0]) / std[0];
+		}
+		else
+		{
+			I = (im - mean[0]);
+		}
+
+		I.setTo(0, mask == 0);
+	}
+	else
+	{
+		if(neuron_type == 0)
+		{
+			I = im;
+		}
+		else
+		{
+			printf("ERROR(%s,%d): Unsupported patch type %d!\n", __FILE__,__LINE__,neuron_type);
+			abort();
+		}
+	}
+  
+	if(resp.empty())
+	{		
+		resp.create(h, w);
+	}
+
+	// The response from neuron before activation
+	if(neuron_type == 3)
+	{
+		// In case of depth we use per area, rather than per patch normalisation
+		matchTemplate_m(I, im_dft, integral_img, integral_img_sq, weights, weights_dfts, resp, cv::TM_CCOEFF); // the linear multiplication, efficient calc of response
+	}
+	else
+	{
+		matchTemplate_m(I, im_dft, integral_img, integral_img_sq, weights, weights_dfts, resp, cv::TM_CCOEFF_NORMED); // the linear multiplication, efficient calc of response
+	}
+	
+	cv::MatIterator_<float> p = resp.begin();
+
+	cv::MatIterator_<float> q1 = resp.begin(); // respone for each pixel
+	cv::MatIterator_<float> q2 = resp.end();
+
+	// the logistic function (sigmoid) applied to the response
+	while(q1 != q2)
+	{
+		*p++ = (2 * alpha) * 1.0 /(1.0 + exp( -(*q1++ * norm_weights + bias )));
+	}
+
+}
+
+//===========================================================================
+void CCNF_patch_expert::Read(std::ifstream &stream, std::vector<int> window_sizes, std::vector<std::vector<cv::Mat_<float> > > sigma_components)
+{
+
+	// Sanity check
+	int read_type;
+
+	stream.read ((char*)&read_type, 4);
+	assert(read_type == 5);
+
+	// the number of neurons for this patch
+	int num_neurons;
+	stream.read ((char*)&width, 4);
+	stream.read ((char*)&height, 4);
+	stream.read ((char*)&num_neurons, 4);
+
+	if(num_neurons == 0)
+	{
+		// empty patch due to landmark being invisible at that orientation
+	
+		// read an empty int (due to the way things were written out)
+		stream.read ((char*)&num_neurons, 4);
+		return;
+	}
+
+	neurons.resize(num_neurons);
+	for(int i = 0; i < num_neurons; i++)
+		neurons[i].Read(stream);
+		
+	// Combine the neuron weights to one weight matrix for more efficient computation
+	weight_matrix = cv::Mat_<float>(neurons.size(), 1 + neurons[0].weights.rows * neurons[0].weights.cols);
+	for (size_t i = 0; i < neurons.size(); i++)
+	{
+		cv::Mat_<float> w_tmp = neurons[i].weights.t();
+		cv::Mat_<float> weights_flat = w_tmp.reshape(1, neurons[i].weights.rows * neurons[i].weights.cols);
+		weights_flat = weights_flat.t();
+
+		// Incorporate neuron weights directly
+		weights_flat = weights_flat * neurons[i].norm_weights;
+		weights_flat.copyTo(weight_matrix(cv::Rect(1, i, neurons[i].weights.rows * neurons[i].weights.cols, 1)));
+		// Incorporate bias as well
+		weight_matrix.at<float>(i, 0) = neurons[i].bias;
+	}
+
+	// In case we are using OpenBLAS, make sure it is not multi-threading as we are multi-threading outside of it
+	openblas_set_num_threads(1);
+
+	int n_sigmas = window_sizes.size();
+
+	int n_betas = 0;
+
+	if(n_sigmas > 0)
+	{
+		n_betas = sigma_components[0].size();
+
+		betas.resize(n_betas);
+
+		for (int i=0; i < n_betas;  ++i)
+		{
+			stream.read ((char*)&betas[i], 8);
+		}
+	}	
+
+	// Read the patch confidence
+	stream.read ((char*)&patch_confidence, 8);
+
+}
+
+//===========================================================================
+void CCNF_patch_expert::Response(const cv::Mat_<float> &area_of_interest, cv::Mat_<float> &response)
+{
+	
+	int response_height = area_of_interest.rows - height + 1;
+	int response_width = area_of_interest.cols - width + 1;
+
+	if(response.rows != response_height || response.cols != response_width)
+	{
+		response.create(response_height, response_width);
+	}
+		
+	response.setTo(0);
+	
+	// the placeholder for the DFT of the image, the integral image, and squared integral image so they don't get recalculated for every response
+	cv::Mat_<double> area_of_interest_dft;
+	cv::Mat integral_image, integral_image_sq;
+	
+	cv::Mat_<float> neuron_response;
+
+	// responses from the neural layers
+	for(size_t i = 0; i < neurons.size(); i++)
+	{		
+		// Do not bother with neuron response if the alpha is tiny and will not contribute much to overall result
+		if(neurons[i].alpha > 1e-4)
+		{
+
+			neurons[i].Response(area_of_interest, area_of_interest_dft, integral_image, integral_image_sq, neuron_response);
+			response = response + neuron_response;
+		}
+	}
+
+	int s_to_use = -1;
+
+	// Find the matching sigma
+	for(size_t i=0; i < window_sizes.size(); ++i)
+	{
+		if(window_sizes[i] == response_height)
+		{
+			// Found the correct sigma
+			s_to_use = i;			
+			break;
+		}
+	}
+
+	cv::Mat_<float> resp_vec_f = response.reshape(1, response_height * response_width);
+
+	cv::Mat out = Sigmas[s_to_use] * resp_vec_f;
+	
+	response = out.reshape(1, response_height);
+
+	// Making sure the response does not have negative numbers
+	double min;
+
+	minMaxIdx(response, &min, 0);
+	if(min < 0)
+	{
+		response = response - min;
+	}
+
+}
+
+void CCNF_patch_expert::ResponseOpenBlas(const cv::Mat_<float> &area_of_interest, cv::Mat_<float> &response, cv::Mat_<float>& im2col_prealloc)
+{
+
+	int response_height = area_of_interest.rows - height + 1;
+	int response_width = area_of_interest.cols - width + 1;
+
+	if (response.rows != response_height || response.cols != response_width)
+	{
+		response.create(response_height, response_width);
+	}
+
+	response.setTo(0);
+	if (neurons.size() == 0)
+	{
+		return;
+	}
+
+	// the placeholder for the column normalized representation of the image, don't get recalculated for every response
+	im2colContrastNormBias(area_of_interest, neurons[0].weights.cols, neurons[0].weights.rows, im2col_prealloc);
+	cv::Mat_<float> normalized_input = im2col_prealloc.t();
+
+	// the placeholder for the DFT of the image, the integral image, and squared integral image so they don't get recalculated for every response
+	cv::Mat_<double> area_of_interest_dft;
+	cv::Mat integral_image, integral_image_sq;
+
+	cv::Mat_<float> neuron_response;
+
+
+	int h = area_of_interest.rows - neurons[0].weights.rows + 1;
+	int w = area_of_interest.cols - neurons[0].weights.cols + 1;
+
+
+	cv::Mat_<float> neuron_resp_full(weight_matrix.rows, normalized_input.cols, 0.0f);
+	// Perform matrix multiplication in OpenBLAS (fortran call)
+	float alpha1 = 1.0;
+	float beta1 = 0.0;
+	char N[2]; N[0] = 'N';
+	sgemm_(N, N, &normalized_input.cols, &weight_matrix.rows, &weight_matrix.cols, &alpha1, (float*)normalized_input.data, &normalized_input.cols, (float*)weight_matrix.data, &weight_matrix.cols, &beta1, (float*)neuron_resp_full.data, &normalized_input.cols);
+
+	// Above is a faster version of this
+	//cv::Mat_<float> neuron_resp_full = this->weight_matrix * normalized_input;
+
+	for (size_t i = 0; i < neurons.size(); i++)
+	{
+		if (neurons[i].alpha > 1e-4)
+		{
+			cv::MatIterator_<float> p = response.begin();
+
+			cv::Mat_<float> rel_row = neuron_resp_full.row(i);
+			cv::MatIterator_<float> q1 = rel_row.begin(); // respone for each pixel
+			cv::MatIterator_<float> q2 = rel_row.end();
+
+			// the logistic function (sigmoid) applied to the response
+			while (q1 != q2)
+			{
+				*p++ += (2.0 * neurons[i].alpha) / (1.0 + exp(-*q1++));
+			}
+		}
+	}
+	response = response.t();
+
+	int s_to_use = -1;
+
+	// Find the matching sigma
+	for (size_t i = 0; i < window_sizes.size(); ++i)
+	{
+		if (window_sizes[i] == response_height)
+		{
+			// Found the correct sigma
+			s_to_use = i;
+			break;
+		}
+	}
+
+	cv::Mat_<float> resp_vec_f = response.reshape(1, response_height * response_width);
+
+	cv::Mat_<float> out(Sigmas[s_to_use].rows, resp_vec_f.cols, 0.0f);
+
+	// Perform matrix multiplication in OpenBLAS (fortran call)
+	alpha1 = 1.0;
+	beta1 = 0.0;
+	sgemm_(N, N, &resp_vec_f.cols, &Sigmas[s_to_use].rows, &Sigmas[s_to_use].cols, &alpha1, (float*)resp_vec_f.data, &resp_vec_f.cols, (float*)Sigmas[s_to_use].data, &Sigmas[s_to_use].cols, &beta1, (float*)out.data, &resp_vec_f.cols);
+
+	// Above is a faster version of this
+	//cv::Mat out = Sigmas[s_to_use] * resp_vec_f;
+
+	response = out.reshape(1, response_height);
+
+	// Making sure the response does not have negative numbers
+	double min;
+
+	minMaxIdx(response, &min, 0);
+	if (min < 0)
+	{
+		response = response - min;
+	}
+
+}
--- a/pkg/OpenFace/lib/local/LandmarkDetector/src/CEN_patch_expert.cpp
+++ b/pkg/OpenFace/lib/local/LandmarkDetector/src/CEN_patch_expert.cpp
@@ -0,0 +1,617 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (C) 2017, Carnegie Mellon University and University of Cambridge,
+// all rights reserved.
+//
+// ACADEMIC OR NON-PROFIT ORGANIZATION NONCOMMERCIAL RESEARCH USE ONLY
+//
+// BY USING OR DOWNLOADING THE SOFTWARE, YOU ARE AGREEING TO THE TERMS OF THIS LICENSE AGREEMENT.  
+// IF YOU DO NOT AGREE WITH THESE TERMS, YOU MAY NOT USE OR DOWNLOAD THE SOFTWARE.
+//
+// License can be found in OpenFace-license.txt
+//
+//     * Any publications arising from the use of this software, including but
+//       not limited to academic journal and conference publications, technical
+//       reports and manuals, must cite at least one of the following works:
+//
+//       OpenFace 2.0: Facial Behavior Analysis Toolkit
+//       Tadas Baltrušaitis, Amir Zadeh, Yao Chong Lim, and Louis-Philippe Morency
+//       in IEEE International Conference on Automatic Face and Gesture Recognition, 2018  
+//
+//       Convolutional experts constrained local model for facial landmark detection.
+//       A. Zadeh, T. Baltrušaitis, and Louis-Philippe Morency,
+//       in Computer Vision and Pattern Recognition Workshops, 2017.    
+//
+//       Rendering of Eyes for Eye-Shape Registration and Gaze Estimation
+//       Erroll Wood, Tadas Baltrušaitis, Xucong Zhang, Yusuke Sugano, Peter Robinson, and Andreas Bulling 
+//       in IEEE International. Conference on Computer Vision (ICCV),  2015 
+//
+//       Cross-dataset learning and person-specific normalisation for automatic Action Unit detection
+//       Tadas Baltrušaitis, Marwa Mahmoud, and Peter Robinson 
+//       in Facial Expression Recognition and Analysis Challenge, 
+//       IEEE International Conference on Automatic Face and Gesture Recognition, 2015 
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "stdafx.h"
+
+#include "CEN_patch_expert.h"
+
+// OpenCV includes
+#include <opencv2/core/core.hpp>
+#include <opencv2/imgproc.hpp>
+
+// Local includes
+#include "LandmarkDetectorUtils.h"
+
+// For exponential
+#include <math.h> 
+
+using namespace LandmarkDetector;
+
+// Copy constructor	(do not perform a deep copy of data as it is very large, also there is no real need to stor the copies
+CEN_patch_expert::CEN_patch_expert(const CEN_patch_expert& other) : confidence(other.confidence), width_support(other.width_support), height_support(other.height_support)
+{
+
+	// Copy the layer weights in a deep way
+	for (size_t i = 0; i < other.weights.size(); ++i)
+	{
+		this->weights.push_back(other.weights[i]);
+		this->biases.push_back(other.biases[i]);
+		this->activation_function.push_back(other.activation_function[i]);
+	}
+
+}
+
+//===========================================================================
+void CEN_patch_expert::Read(std::ifstream &stream)
+{
+
+	// Setting up OpenBLAS
+	openblas_set_num_threads(1);
+	
+	// Sanity check
+	int read_type;
+
+	stream.read((char*)&read_type, 4);
+	assert(read_type == 6);
+
+	// the number of neurons for this patch
+	int num_layers;
+	stream.read((char*)&width_support, 4);
+	stream.read((char*)&height_support, 4);
+	stream.read((char*)&num_layers, 4);
+
+	if (num_layers == 0)
+	{
+		// empty patch due to landmark being invisible at that orientation (or visible through mirroring)
+		stream.read((char*)&confidence, 8);
+		return;
+	}
+
+	activation_function.resize(num_layers);
+	weights.resize(num_layers);
+	biases.resize(num_layers);
+
+	for (int i = 0; i < num_layers; i++)
+	{
+		int neuron_type;
+		stream.read((char*)&neuron_type, 4);
+		activation_function[i] = neuron_type;
+
+		cv::Mat_<double> bias;
+		LandmarkDetector::ReadMatBin(stream, bias);
+
+		cv::Mat_<double> weight;
+		LandmarkDetector::ReadMatBin(stream, weight);
+
+		weights[i] = weight;
+		biases[i] = bias;
+	}
+
+	// Read the patch confidence
+	stream.read((char*)&confidence, 8);
+
+}
+
+// Contrast normalize the input for response map computation
+void contrastNorm(const cv::Mat_<float>& input, cv::Mat_<float>& output)
+{
+
+	const unsigned int num_cols = input.cols;
+
+	const unsigned int num_rows = input.rows;
+
+	output = input.clone();
+
+	cv::MatConstIterator_<float> p = input.begin();
+
+	// Compute row wise
+	for (unsigned int y = 0; y < num_rows; ++y)
+	{
+		
+		cv::Scalar mean_s = cv::mean(input(cv::Rect(1,y,num_cols-1, 1)));
+		float mean = (float)mean_s[0];
+
+		p++;
+
+		float sum_sq = 0;
+		for (unsigned int x = 1; x < num_cols; ++x)
+		{
+			float curr = *p++;
+			sum_sq += (curr - mean) * (curr - mean);
+		}
+
+		float norm = sqrt(sum_sq);
+
+		if (norm == 0)
+			norm = 1;
+
+		for (unsigned int x = 1; x < num_cols; ++x)
+		{
+			output.at<float>(y, x) = (output.at<float>(y, x) - mean) / norm;
+		}
+
+	}
+
+}
+
+void im2colBias(const cv::Mat_<float>& input, const unsigned int width, const unsigned int height, cv::Mat_<float>& output)
+{
+
+	const unsigned int m = input.rows;
+	const unsigned int n = input.cols;
+
+	// determine how many blocks there will be with a sliding window of width x height in the input
+	const unsigned int yB = m - height + 1;
+	const unsigned int xB = n - width + 1;
+
+	// Allocate the output size
+	if(output.rows != xB*yB && output.cols != width * height + 1)
+	{
+		output = cv::Mat::ones(xB*yB, width * height + 1, CV_32F);
+	}
+
+	// Iterate over the blocks
+	for (unsigned int j = 0; j< xB; j++)
+	{
+		for (unsigned int i = 0; i< yB; i++)
+		{
+			unsigned int rowIdx = i + j*yB;
+
+			for (unsigned int yy = 0; yy < height; ++yy)
+				for (unsigned int xx = 0; xx < width; ++xx)
+				{
+					unsigned int colIdx = xx*height + yy;
+					output.at<float>(rowIdx, colIdx + 1) = input.at<float>(i + yy, j + xx);
+				}
+		}
+	}
+}
+
+//===========================================================================
+void CEN_patch_expert::Response(const cv::Mat_<float> &area_of_interest, cv::Mat_<float> &response)
+{
+
+	int response_height = area_of_interest.rows - height_support + 1;
+	int response_width = area_of_interest.cols - width_support + 1;
+	
+	cv::Mat_<float> input_col;
+	im2colBias(area_of_interest, width_support, height_support, input_col);
+
+	// Mean and standard deviation normalization
+	contrastNorm(input_col, response);
+	response = response.t();
+
+	for (size_t layer = 0; layer < activation_function.size(); ++layer)
+	{
+
+		// We are performing response = weights[layers] * response(t), but in OpenBLAS as that is significantly quicker than OpenCV		
+		cv::Mat_<float> resp = response;
+		float* m1 = (float*)resp.data;
+		cv::Mat_<float> weight = weights[layer];
+		float* m2 = (float*)weight.data;
+
+		cv::Mat_<float> resp_blas(weight.rows, resp.cols);
+		float* m3 = (float*)resp_blas.data;
+
+		// Perform matrix multiplication in OpenBLAS (fortran call)
+		float alpha1 = 1.0;
+		float beta1 = 0.0;
+		char N[2]; N[0] = 'N';
+		sgemm_(N, N, &resp.cols, &weight.rows, &weight.cols, &alpha1, m1, &resp.cols, m2, &weight.cols, &beta1, m3, &resp.cols);
+
+		// The above is a faster version of this, by calling the fortran version directly
+		//cblas_sgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, resp.cols, weight.rows, weight.cols, 1, m1, resp.cols, m2, weight.cols, 0.0, m3, resp.cols);
+
+		// Adding the bias (bit ugly, but the fastest way to do this)
+		response = resp_blas;
+
+		float* data = (float*)response.data;
+		size_t height = response.rows;
+		size_t width = response.cols;
+		float* data_b = (float*)biases[layer].data;
+		for (size_t y = 0; y < height; ++y)
+		{
+			float bias = data_b[y];
+			for (size_t x = 0; x < width; ++x)
+			{
+				float in = *data + bias;
+				*data++ = in;
+			}
+		}
+
+		// Perform activation and add bias at the same time	
+		if (activation_function[layer] == 0) // Sigmoid
+		{
+
+			size_t resp_size = response.rows * response.cols;
+
+			// Iterate over the data directly
+			float* data = (float*)response.data;
+
+			for (size_t counter = 0; counter < resp_size; ++counter)
+			{
+				float in = *data;
+				*data++ = 1.0 / (1.0 + exp(-(in)));
+			}
+
+		}
+		else if (activation_function[layer] == 2)// ReLU
+		{
+			cv::threshold(response, response, 0, 0, cv::THRESH_TOZERO);
+		}
+
+	}
+
+	response = response.t();
+	response = response.reshape(1, response_height);
+	response = response.t();
+
+}
+
+// Perform im2col, while at the same time doing contrast normalization and adding a bias term (also skip every other region)
+void im2colBiasSparseContrastNorm(const cv::Mat_<float>& input, const unsigned int width, const unsigned int height, cv::Mat_<float>& output)
+{
+	const unsigned int m = input.rows;
+	const unsigned int n = input.cols;
+
+	// determine how many blocks there will be with a sliding window of width x height in the input
+	const unsigned int yB = m - height + 1;
+	const unsigned int xB = n - width + 1;
+
+	// As we will be skipping half of the outputs
+	const unsigned int out_size = (yB*xB - 1) / 2;
+
+	// Allocate the output size
+	if (output.rows != out_size && output.cols != width * height + 1)
+	{
+		output = cv::Mat::ones(out_size, width * height + 1, CV_32F);
+	}
+
+	// Iterate over the blocks, skipping every second block
+	unsigned int rowIdx = 0;
+	unsigned int skipCounter = 0;
+	for (unsigned int j = 0; j< xB; j++)
+	{
+		for (unsigned int i = 0; i< yB; i++)
+		{
+			// Skip every second row
+			skipCounter++;
+			if ((skipCounter + 1) % 2 == 0)
+			{
+				continue;
+			}
+
+			float* Mo = output.ptr<float>(rowIdx);
+
+			float sum = 0;
+
+			for (unsigned int yy = 0; yy < height; ++yy)
+			{
+				const float* Mi = input.ptr<float>(i + yy);
+				for (unsigned int xx = 0; xx < width; ++xx)
+				{
+					int colIdx = xx*height + yy;
+					float in = Mi[j + xx];
+					sum += in;
+
+					Mo[colIdx+1] = in;
+				}
+			}
+
+			// Working out the mean
+			float mean = sum / (float)(width * height);
+
+			float sum_sq = 0;
+			const unsigned int num_items = width*height + 1;
+			// Working out the sum squared and subtracting the mean
+			for (unsigned int x = 1; x < num_items; ++x)
+			{
+				float in = Mo[x] - mean;
+				Mo[x] = in;
+				sum_sq += in * in;
+			}
+
+			float norm = sqrt(sum_sq);
+
+			// Avoiding division by 0
+			if (norm == 0)
+			{
+				norm = 1;
+			}
+
+			// Flip multiplication to division for speed
+			norm = 1.0 / norm;
+
+			for (unsigned int x = 1; x < num_items; ++x)
+			{
+				Mo[x] *= norm;
+			}
+
+			rowIdx++;
+		}
+	}
+}
+
+void im2colBiasSparse(const cv::Mat_<float>& input, const unsigned int width, const unsigned int height, cv::Mat_<float>& output)
+{
+
+	const unsigned int m = input.rows;
+	const unsigned int n = input.cols;
+
+	// determine how many blocks there will be with a sliding window of width x height in the input
+	const unsigned int yB = m - height + 1;
+	const unsigned int xB = n - width + 1;
+
+	// As we will be skipping half of the outputs
+	const unsigned int out_size = (yB*xB - 1) / 2;
+
+	// Allocate the output size
+	if (output.rows != out_size && output.cols != width * height + 1)
+	{
+		output = cv::Mat::ones(out_size, width * height + 1, CV_32F);
+	}
+
+	// Iterate over the blocks, skipping every second block
+	unsigned int rowIdx = 0;
+	unsigned int skipCounter = 0;
+	for (unsigned int j = 0; j< xB; j++)
+	{
+		for (unsigned int i = 0; i< yB; i++)
+		{
+			// Skip every second row
+			skipCounter++;
+			if ((skipCounter + 1) % 2 == 0)
+			{
+				continue;
+			}
+
+			for (unsigned int yy = 0; yy < height; ++yy)
+			{
+				for (unsigned int xx = 0; xx < width; ++xx)
+				{
+					unsigned int colIdx = xx*height + yy;
+					output.at<float>(rowIdx, colIdx + 1) = input.at<float>(i + yy, j + xx);
+				}
+			}
+			rowIdx++;
+		}
+	}
+}
+
+// As the sparse patch expert output with interpolation, this function creates an interpolation matrix
+void LandmarkDetector::interpolationMatrix(cv::Mat_<float>& mapMatrix, int response_height, int response_width, 
+	int input_width, int input_height)
+{
+	int m = input_height;
+	int n = input_width;
+
+	// determine how many blocks there will be with a sliding window of width x height in the input
+	int yB = m - 11 + 1;
+	int xB = n - 11 + 1;
+
+	// As we will be skipping half of the outputs
+	int out_size = (yB*xB - 1) / 2;
+
+	mapMatrix.create(out_size, response_height * response_width);
+	mapMatrix.setTo(0.0f);
+
+	// Find a mapping from indices in the computed sparse response and the original full response
+	cv::Mat_<int> value_id_matrix(response_width, response_height, 0);
+
+	int ind = 0;
+	for (int k = 0; k < value_id_matrix.rows * value_id_matrix.cols; ++k)
+	{
+		if (k % 2 != 0)
+		{
+			value_id_matrix.at<int>(k) = ind;
+			ind++;
+		}
+	}
+	value_id_matrix = value_id_matrix.t();
+
+	int skip_counter = 0;
+	for (int x = 0; x < response_width; ++x)
+	{
+		for (int y = 0; y < response_height; ++y)
+		{
+			int mapping_col = x * response_height + y;
+			skip_counter++;
+			if (skip_counter % 2 == 0)
+			{
+				int val_id = value_id_matrix.at<int>(y, x);
+				mapMatrix.at<float>(val_id, mapping_col) = 1;
+				continue;
+			}
+
+			float num_neigh = 0.0;
+			std::vector<int> val_ids;
+			if (x - 1 >= 0)
+			{
+				num_neigh++;
+				val_ids.push_back(value_id_matrix.at<int>(y, x - 1));
+			}
+			if (y - 1 >= 0)
+			{
+				num_neigh++;
+				val_ids.push_back(value_id_matrix.at<int>(y - 1, x));
+			}
+			if (x + 1 < response_width)
+			{
+				num_neigh++;
+				val_ids.push_back(value_id_matrix.at<int>(y, x + 1));
+			}
+			if (y + 1 < response_height)
+			{
+				num_neigh++;
+				val_ids.push_back(value_id_matrix.at<int>(y + 1, x));
+			}
+
+			for (size_t k = 0; k < val_ids.size(); ++k)
+			{
+				mapMatrix.at<float>(val_ids[k], mapping_col) = 1.0 / num_neigh;
+			}
+		}
+	}
+}
+
+void CEN_patch_expert::ResponseInternal(cv::Mat_<float>& response)
+{
+	for (size_t layer = 0; layer < activation_function.size(); ++layer)
+	{
+
+		// We are performing response = weights[layers] * response, but in OpenBLAS as that is significantly quicker than OpenCV		
+		cv::Mat_<float> resp = response;
+		float* m1 = (float*)resp.data;
+		float* m2 = (float*)weights[layer].data;
+
+		cv::Mat_<float> resp_blas(weights[layer].rows, resp.cols);
+		float* m3 = (float*)resp_blas.data;
+
+		// Perform matrix multiplication in OpenBLAS (fortran call)
+		float alpha1 = 1.0;
+		float beta1 = 0.0;
+		char N[2]; N[0] = 'N';
+		sgemm_(N, N, &resp.cols, &weights[layer].rows, &weights[layer].cols, &alpha1, m1, &resp.cols, m2, &weights[layer].cols, &beta1, m3, &resp.cols);
+
+		// The above is a faster version of this, by calling the fortran version directly
+		//cblas_sgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, resp.cols, weight.rows, weight.cols, 1, m1, resp.cols, m2, weight.cols, 0.0, m3, resp.cols);
+
+		response = resp_blas;
+
+		// Alternative is to multiply the responses directly using OpenCV (much slower)
+		//response = weights[layer] * response;
+
+		// Adding the bias (bit ugly, but the fastest way to do this), TODO can this bias be incorporated in the above?
+		float* data = (float*)response.data;
+		const unsigned height = response.rows;
+		const unsigned width = response.cols;
+		float* data_b = (float*)biases[layer].data;
+		for (unsigned int y = 0; y < height; ++y)
+		{
+			float bias = data_b[y];
+			for (unsigned int x = 0; x < width; ++x)
+			{
+				float in = *data + bias;
+				*data++ = in;
+			}
+		}
+
+		// Perform activation and add bias at the same time	
+		if (activation_function[layer] == 0) // Sigmoid
+		{
+
+			const unsigned int resp_size = response.rows * response.cols;
+
+			// Iterate over the data directly
+			float* data = (float*)response.data;
+
+			for (unsigned int counter = 0; counter < resp_size; ++counter)
+			{
+				float in = *data;
+				*data++ = 1.0f / (1.0f + exp(-(in)));
+			}
+
+		}
+		else if (activation_function[layer] == 2)// ReLU
+		{
+			cv::threshold(response, response, 0, 0, cv::THRESH_TOZERO);
+		}
+
+	}
+
+}
+
+//===========================================================================
+void CEN_patch_expert::ResponseSparse(const cv::Mat_<float> &area_of_interest_left, const cv::Mat_<float> &area_of_interest_right, cv::Mat_<float> &response_left, cv::Mat_<float> &response_right, cv::Mat_<float>& mapMatrix, cv::Mat_<float>& im2col_prealloc_left, cv::Mat_<float>& im2col_prealloc_right)
+{
+	unsigned int response_height = 0;
+
+	const bool left_provided = !area_of_interest_left.empty();
+	const bool right_provided = !area_of_interest_right.empty();
+
+	if(right_provided)
+	{
+		cv::flip(area_of_interest_right, area_of_interest_right, 1);
+		response_height = area_of_interest_right.rows - height_support + 1;
+		im2colBiasSparseContrastNorm(area_of_interest_right, width_support, height_support, im2col_prealloc_right);
+	}
+
+	// Extract im2col but in a sparse way and contrast normalize
+	if(left_provided)
+	{
+		response_height = area_of_interest_left.rows - height_support + 1;
+		im2colBiasSparseContrastNorm(area_of_interest_left, width_support, height_support, im2col_prealloc_left);
+	}
+
+	cv::Mat_<float> response;
+	if(right_provided && left_provided)
+	{
+		cv::vconcat(im2col_prealloc_left, im2col_prealloc_right, response);
+		response = response.t();
+	}
+	else if (left_provided)
+	{
+		response = im2col_prealloc_left.t();
+	}
+	else if (right_provided)
+	{
+		response = im2col_prealloc_right.t();
+	}
+
+	ResponseInternal(response);
+	
+	if(left_provided && right_provided)
+	{
+		response_left = response(cv::Rect(0, 0, response.cols / 2, 1));
+		response_right = response(cv::Rect(response.cols / 2, 0, response.cols / 2, 1));
+	}
+	else if (left_provided)
+	{
+		response_left = response;
+	}
+	else if (right_provided)
+	{
+		response_right = response;
+	}
+
+	if(left_provided)
+	{
+		// TODO This could and should be gemm'ed
+		response_left = response_left * mapMatrix;
+		response_left = response_left.t();
+		response_left = response_left.reshape(1, response_height);
+		response_left = response_left.t();
+	}
+
+	if(right_provided)
+	{
+		// TODO This could and should be gemm'ed
+		response_right = response_right * mapMatrix;
+		response_right = response_right.t();
+		response_right = response_right.reshape(1, response_height);
+		response_right = response_right.t();
+
+		cv::flip(response_right, response_right, 1);
+	}
+}
--- a/pkg/OpenFace/lib/local/LandmarkDetector/src/CNN_utils.cpp
+++ b/pkg/OpenFace/lib/local/LandmarkDetector/src/CNN_utils.cpp
@@ -0,0 +1,543 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (C) 2017, Tadas Baltrusaitis, all rights reserved.
+//
+// ACADEMIC OR NON-PROFIT ORGANIZATION NONCOMMERCIAL RESEARCH USE ONLY
+//
+// BY USING OR DOWNLOADING THE SOFTWARE, YOU ARE AGREEING TO THE TERMS OF THIS LICENSE AGREEMENT.  
+// IF YOU DO NOT AGREE WITH THESE TERMS, YOU MAY NOT USE OR DOWNLOAD THE SOFTWARE.
+//
+// License can be found in OpenFace-license.txt
+//
+//     * Any publications arising from the use of this software, including but
+//       not limited to academic journal and conference publications, technical
+//       reports and manuals, must cite at least one of the following works:
+//
+//       OpenFace 2.0: Facial Behavior Analysis Toolkit
+//       Tadas Baltrušaitis, Amir Zadeh, Yao Chong Lim, and Louis-Philippe Morency
+//       in IEEE International Conference on Automatic Face and Gesture Recognition, 2018  
+//
+//       Convolutional experts constrained local model for facial landmark detection.
+//       A. Zadeh, T. Baltrušaitis, and Louis-Philippe Morency,
+//       in Computer Vision and Pattern Recognition Workshops, 2017.    
+//
+//       Rendering of Eyes for Eye-Shape Registration and Gaze Estimation
+//       Erroll Wood, Tadas Baltrušaitis, Xucong Zhang, Yusuke Sugano, Peter Robinson, and Andreas Bulling 
+//       in IEEE International. Conference on Computer Vision (ICCV),  2015 
+//
+//       Cross-dataset learning and person-specific normalisation for automatic Action Unit detection
+//       Tadas Baltrušaitis, Marwa Mahmoud, and Peter Robinson 
+//       in Facial Expression Recognition and Analysis Challenge, 
+//       IEEE International Conference on Automatic Face and Gesture Recognition, 2015 
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "stdafx.h"
+
+#include "CNN_utils.h"
+
+namespace LandmarkDetector
+{
+
+	// Parametric ReLU with leaky weights (separate ones per channel)
+	void PReLU(std::vector<cv::Mat_<float> >& input_output_maps, cv::Mat_<float> prelu_weights)
+	{
+
+		if (input_output_maps.size() > 1)
+		{
+			unsigned int h = input_output_maps[0].rows;
+			unsigned int w = input_output_maps[0].cols;
+			unsigned int size_in = h * w;
+
+			for (int k = 0; k < (int) input_output_maps.size(); ++k)
+			{
+				// Apply the PReLU
+				auto iter = input_output_maps[k].begin();
+
+				float neg_mult = prelu_weights.at<float>(k);
+
+				for (unsigned int i = 0; i < size_in; ++i)
+				{
+					float in_val = *iter;
+
+					// The prelu step
+					*iter++ = in_val >= 0 ? in_val : in_val * neg_mult;
+
+				}
+			}
+		}
+		else
+		{
+
+			int w = input_output_maps[0].cols;
+
+			for (int k = 0; k < prelu_weights.rows; ++k)
+			{
+				auto iter = input_output_maps[0].row(k).begin();
+				float neg_mult = prelu_weights.at<float>(k);
+
+				for (int i = 0; i < w; ++i)
+				{
+					float in_val = *iter;
+					// Apply the PReLU
+					*iter = in_val >= 0 ? in_val : in_val * neg_mult;					
+
+					// To deal with OpenCV 3.4s debug mode not allowing to go over iteration boundaries
+					if(i + 1 < w)
+					{
+						iter++;
+					}
+				}
+			}
+
+		}
+
+	}
+
+	void fully_connected(std::vector<cv::Mat_<float> >& outputs, const std::vector<cv::Mat_<float> >& input_maps, cv::Mat_<float> weights, cv::Mat_<float> biases)
+	{
+		outputs.clear();
+
+		if (input_maps.size() > 1)
+		{
+			// Concatenate all the maps
+			cv::Size orig_size = input_maps[0].size();
+			cv::Mat_<float> input_concat((int)input_maps.size(), input_maps[0].cols * input_maps[0].rows);
+
+			for (int in = 0; in < (int)input_maps.size(); ++in)
+			{
+				cv::Mat_<float> add = input_maps[in];
+
+				// Reshape if all of the data will be flattened
+				if (input_concat.rows != weights.cols)
+				{
+					add = add.t();
+				}
+
+				add = add.reshape(0, 1);
+				add.copyTo(input_concat.row(in));
+			}
+
+			// Treat the input as separate feature maps
+			if (input_concat.rows == weights.cols)
+			{
+				input_concat = weights * input_concat;
+				// Add biases
+				for (int k = 0; k < biases.rows; ++k)
+				{
+					input_concat.row(k) = input_concat.row(k) + biases.at<float>(k);
+				}
+
+				outputs.clear();
+				// Resize and add as output
+				for (int k = 0; k < biases.rows; ++k)
+				{
+					cv::Mat_<float> reshaped = input_concat.row(k).clone();
+					reshaped = reshaped.reshape(1, orig_size.height);
+					outputs.push_back(reshaped);
+				}
+			}
+			else
+			{
+				// Flatten the input
+				input_concat = input_concat.reshape(0, input_concat.rows * input_concat.cols);
+
+				input_concat = weights * input_concat + biases;
+
+				outputs.clear();
+				outputs.push_back(input_concat);
+			}
+
+		}
+		else
+		{
+			cv::Mat out = weights * input_maps[0] + biases;
+			outputs.clear();
+			outputs.push_back(out.t());
+		}
+
+	}
+
+
+	void max_pooling(std::vector<cv::Mat_<float> >& outputs, const std::vector<cv::Mat_<float> >& input_maps, int stride_x, int stride_y, int kernel_size_x, int kernel_size_y)
+	{
+		std::vector<cv::Mat_<float> > outputs_sub;
+
+		// Iterate over kernel height and width, based on stride
+		for (size_t in = 0; in < input_maps.size(); ++in)
+		{
+			// Help with rounding up a bit, to match caffe style output
+			int out_x = (int)round((float)(input_maps[in].cols - kernel_size_x) / (float)stride_x) + 1;
+			int out_y = (int)round((float)(input_maps[in].rows - kernel_size_y) / (float)stride_y) + 1;
+
+			cv::Mat_<float> sub_out(out_y, out_x, 0.0);
+			cv::Mat_<float> in_map = input_maps[in];
+
+			for (int x = 0; x < input_maps[in].cols; x += stride_x)
+			{
+				int max_x = cv::min(input_maps[in].cols, x + kernel_size_x);
+				int x_in_out = int(x / stride_x);
+
+				if (x_in_out >= out_x)
+					continue;
+
+				for (int y = 0; y < input_maps[in].rows; y += stride_y)
+				{
+					int y_in_out = int(y / stride_y);
+
+					if (y_in_out >= out_y)
+						continue;
+
+					int max_y = cv::min(input_maps[in].rows, y + kernel_size_y);
+
+					float curr_max = -FLT_MAX;
+
+					for (int x_in = x; x_in < max_x; ++x_in)
+					{
+						for (int y_in = y; y_in < max_y; ++y_in)
+						{
+							float curr_val = in_map.at<float>(y_in, x_in);
+							if (curr_val > curr_max)
+							{
+								curr_max = curr_val;
+							}
+						}
+					}
+					sub_out.at<float>(y_in_out, x_in_out) = curr_max;
+				}
+			}
+
+			outputs_sub.push_back(sub_out);
+
+		}
+		outputs = outputs_sub;
+
+	}
+
+	void convolution_single_kern_fft(const std::vector<cv::Mat_<float> >& input_imgs, std::vector<cv::Mat_<double> >& img_dfts, 
+		const std::vector<cv::Mat_<float> >&  _templs, std::map<int, std::vector<cv::Mat_<double> > >& _templ_dfts, cv::Mat_<float>& result)
+	{
+		// Assume result is defined properly
+		if (result.empty())
+		{
+			cv::Size corrSize(input_imgs[0].cols - _templs[0].cols + 1, input_imgs[0].rows - _templs[0].rows + 1);
+			result.create(corrSize);
+		}
+
+		// Our model will always be under min block size so can ignore this
+		//const double blockScale = 4.5;
+		//const int minBlockSize = 256;
+
+		int maxDepth = CV_64F;
+
+		cv::Size dftsize;
+
+		dftsize.width = cv::getOptimalDFTSize(result.cols + _templs[0].cols - 1);
+		dftsize.height = cv::getOptimalDFTSize(result.rows + _templs[0].rows - 1);
+
+		// Compute block size
+		cv::Size blocksize;
+		blocksize.width = dftsize.width - _templs[0].cols + 1;
+		blocksize.width = MIN(blocksize.width, result.cols);
+		blocksize.height = dftsize.height - _templs[0].rows + 1;
+		blocksize.height = MIN(blocksize.height, result.rows);
+
+		std::vector<cv::Mat_<double>> dftTempl;
+
+		// if this has not been precomputed, precompute it, otherwise use it
+		if (_templ_dfts.find(dftsize.width) == _templ_dfts.end())
+		{
+			dftTempl.resize(_templs.size());
+			for (size_t k = 0; k < _templs.size(); ++k)
+			{
+				dftTempl[k].create(dftsize.height, dftsize.width);
+
+				cv::Mat_<float> src = _templs[k];
+
+				cv::Mat_<double> dst(dftTempl[k], cv::Rect(0, 0, dftsize.width, dftsize.height));
+
+				cv::Mat_<double> dst1(dftTempl[k], cv::Rect(0, 0, _templs[k].cols, _templs[k].rows));
+
+				if (dst1.data != src.data)
+					src.convertTo(dst1, dst1.depth());
+
+				if (dst.cols > _templs[k].cols)
+				{
+					cv::Mat_<double> part(dst, cv::Range(0, _templs[k].rows), cv::Range(_templs[k].cols, dst.cols));
+					part.setTo(0);
+				}
+
+				// Perform DFT of the template
+				dft(dst, dst, 0, _templs[k].rows);
+
+			}
+			_templ_dfts[dftsize.width] = dftTempl;
+
+		}
+		else
+		{
+			dftTempl = _templ_dfts[dftsize.width];
+		}
+
+		cv::Size bsz(std::min(blocksize.width, result.cols), std::min(blocksize.height, result.rows));
+		cv::Mat src;
+
+		cv::Mat cdst(result, cv::Rect(0, 0, bsz.width, bsz.height));
+
+		std::vector<cv::Mat_<double> > dftImgs;
+		dftImgs.resize(input_imgs.size());
+
+		if (img_dfts.empty())
+		{
+			for (size_t k = 0; k < input_imgs.size(); ++k)
+			{
+				dftImgs[k].create(dftsize);
+				dftImgs[k].setTo(0.0);
+
+				cv::Size dsz(bsz.width + _templs[k].cols - 1, bsz.height + _templs[k].rows - 1);
+
+				int x2 = std::min(input_imgs[k].cols, dsz.width);
+				int y2 = std::min(input_imgs[k].rows, dsz.height);
+
+				cv::Mat src0(input_imgs[k], cv::Range(0, y2), cv::Range(0, x2));
+				cv::Mat dst(dftImgs[k], cv::Rect(0, 0, dsz.width, dsz.height));
+				cv::Mat dst1(dftImgs[k], cv::Rect(0, 0, x2, y2));
+
+				src = src0;
+
+				if (dst1.data != src.data)
+					src.convertTo(dst1, dst1.depth());
+
+				dft(dftImgs[k], dftImgs[k], 0, dsz.height);
+				img_dfts.push_back(dftImgs[k].clone());
+			}
+		}
+
+		cv::Mat_<double> dft_img(img_dfts[0].rows, img_dfts[0].cols, 0.0);
+		for (size_t k = 0; k < input_imgs.size(); ++k)
+		{
+			cv::Mat dftTempl1(dftTempl[k], cv::Rect(0, 0, dftsize.width, dftsize.height));
+			if (k == 0)
+			{
+				cv::mulSpectrums(img_dfts[k], dftTempl1, dft_img, 0, true);
+			}
+			else
+			{
+				cv::mulSpectrums(img_dfts[k], dftTempl1, dftImgs[k], 0, true);
+				dft_img = dft_img + dftImgs[k];
+			}
+		}
+
+		cv::dft(dft_img, dft_img, cv::DFT_INVERSE + cv::DFT_SCALE, bsz.height);
+
+		src = dft_img(cv::Rect(0, 0, bsz.width, bsz.height));
+
+		src.convertTo(cdst, CV_32F);
+
+	}
+
+	void convolution_fft2(std::vector<cv::Mat_<float> >& outputs, const std::vector<cv::Mat_<float> >& input_maps,
+		const std::vector<std::vector<cv::Mat_<float> > >& kernels, const std::vector<float >& biases,
+		std::vector<std::map<int, std::vector<cv::Mat_<double> > > >& precomp_dfts)
+	{
+		outputs.clear();
+
+		// Useful precomputed data placeholders for quick correlation (convolution)
+		std::vector<cv::Mat_<double> > input_image_dft;
+
+		for (size_t k = 0; k < kernels.size(); ++k)
+		{
+
+			// The convolution (with precomputation)
+			cv::Mat_<float> output;
+			convolution_single_kern_fft(input_maps, input_image_dft, kernels[k], precomp_dfts[k], output);
+
+			// Combining the maps
+			outputs.push_back(output + biases[k]);
+
+		}
+	}
+
+	void im2col_t(const cv::Mat_<float>& input, const unsigned int width, const unsigned int height, cv::Mat_<float>& output)
+	{
+
+		const unsigned int m = input.cols;
+		const unsigned int n = input.rows;
+
+		// determine how many blocks there will be with a sliding window of width x height in the input
+		const unsigned int yB = m - height + 1;
+		const unsigned int xB = n - width + 1;
+
+		// Allocate the output size
+		if (output.rows != width * height || output.cols != xB*yB)
+		{
+			output = cv::Mat::ones(width * height, xB*yB, CV_32F);
+		}
+
+		// Iterate over the whole image
+		for (unsigned int i = 0; i< yB; i++)
+		{
+			unsigned int rowIdx = i;
+			for (unsigned int j = 0; j< xB; j++)
+			{
+				//int rowIdx = i; +j*yB;
+				// iterate over the blocks within the image
+				for (unsigned int yy = 0; yy < height; ++yy)
+				{
+					// Faster iteration over the image
+					const float* Mi = input.ptr<float>(j + yy);
+					for (unsigned int xx = 0; xx < width; ++xx)
+					{
+						unsigned int colIdx = xx*height + yy;
+
+						output.at<float>(colIdx, rowIdx) = Mi[i + xx];
+					}
+				}
+				rowIdx += yB;
+
+			}
+		}
+	}
+
+	void im2col(const cv::Mat_<float>& input, const unsigned int width, const unsigned int height, cv::Mat_<float>& output)
+	{
+	
+		const unsigned int m = input.rows;
+		const unsigned int n = input.cols;
+	
+		// determine how many blocks there will be with a sliding window of width x height in the input
+		const unsigned int yB = m - height + 1;
+		const unsigned int xB = n - width + 1;
+	
+		// Allocate the output size
+		if (output.cols != width * height || output.rows != xB*yB)
+		{
+			output = cv::Mat::ones(xB*yB, width * height, CV_32F);
+		}
+	
+		// Iterate over the whole image
+		for (unsigned int i = 0; i< yB; i++)
+		{
+			unsigned int rowIdx = i*xB;
+			for (unsigned int j = 0; j< xB; j++)
+			{
+	
+				float* Mo = output.ptr<float>(rowIdx);
+	
+				// iterate over the blocks within the image
+				for (unsigned int yy = 0; yy < height; ++yy)
+				{
+					// Faster iteration over the image
+					const float* Mi = input.ptr<float>(i + yy);
+	
+					for (unsigned int xx = 0; xx < width; ++xx)
+					{
+						unsigned int colIdx = xx*height + yy;
+						//output.at<float>(rowIdx, colIdx) = Mi[j + xx]; //input.at<float>(i + yy, j + xx);
+						Mo[colIdx] = Mi[j + xx];
+					}
+				}
+				rowIdx++;
+	
+			}
+		}
+	}
+
+	void im2col_multimap(const std::vector<cv::Mat_<float> >& inputs, const unsigned int width, const unsigned int height, 
+		cv::Mat_<float>& output)
+	{
+	
+		const unsigned int m = inputs[0].rows;
+		const unsigned int n = inputs[0].cols;
+	
+		// determine how many blocks there will be with a sliding window of width x height in the input
+		const unsigned int yB = m - height + 1;
+		const unsigned int xB = n - width + 1;
+	
+		int stride = height * width;
+	
+		unsigned int num_maps = (unsigned int)inputs.size();
+	
+		// Allocate the output size
+		if (output.cols != width * height * inputs.size() + 1 || (unsigned int) output.rows < xB*yB)
+		{
+			output = cv::Mat::ones(xB*yB, width * height * num_maps + 1, CV_32F);
+		}
+	
+		// Iterate over the whole image
+		for (unsigned int i = 0; i< yB; i++)
+		{
+			unsigned int rowIdx = i*xB;
+			for (unsigned int j = 0; j< xB; j++)
+			{
+	
+				float* Mo = output.ptr<float>(rowIdx);
+
+				// TODO, this should be rearranged and done through mem-copy
+
+				// iterate over the blocks within the image
+				for (unsigned int yy = 0; yy < height; ++yy)
+				{
+					for (unsigned int in_maps = 0; in_maps < num_maps; ++in_maps)
+					{
+						// Faster iteration over the image
+						const float* Mi = inputs[in_maps].ptr<float>(i + yy);
+	
+						for (unsigned int xx = 0; xx < width; ++xx)
+						{
+							unsigned int colIdx = xx*height + yy + in_maps * stride;
+							//output.at<float>(rowIdx, colIdx) = Mi[j + xx]; //input.at<float>(i + yy, j + xx);
+							Mo[colIdx] = Mi[j + xx];
+						}
+					}
+				}
+				rowIdx++;
+	
+			}
+		}
+	}
+
+	// A fast convolution implementation, can provide a pre-allocated im2col as well, if empty, it is created
+	void convolution_direct_blas(std::vector<cv::Mat_<float> >& outputs, const std::vector<cv::Mat_<float> >& input_maps, const cv::Mat_<float>& weight_matrix, int height_k, int width_k, cv::Mat_<float>& pre_alloc_im2col)
+	{
+		outputs.clear();
+	
+		int height_in = input_maps[0].rows;
+		int width_n = input_maps[0].cols;
+	
+		// determine how many blocks there will be with a sliding window of width x height in the input
+		int yB = height_in - height_k + 1;
+		int xB = width_n - width_k + 1;
+		int num_rows = yB * xB;
+
+		// Instead of re-allocating data use the first rows of already allocated data and re-allocate only if not enough rows are present, this is what makes this non thread safe, as same memory would be used
+		im2col_multimap(input_maps, width_k, height_k, pre_alloc_im2col);
+		
+		float* m1 = (float*)pre_alloc_im2col.data;
+		float* m2 = (float*)weight_matrix.data;
+		int m2_cols = weight_matrix.cols;
+		int m2_rows = weight_matrix.rows;
+
+		cv::Mat_<float> out(num_rows, weight_matrix.cols, 1.0);
+		float* m3 = (float*)out.data;
+		
+		//cblas_sgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, weight_t.cols, yB * xB, pre_alloc_im2col.cols, 1, m2, weight_t.cols, m1, pre_alloc_im2col.cols, 0.0, m3, weight_t.cols);
+		float alpha = 1.0f;
+		float beta = 0.0f;
+		// Call fortran directly (faster)
+		char N[2]; N[0] = 'N';
+		sgemm_(N, N, &m2_cols, &num_rows, &pre_alloc_im2col.cols, &alpha, m2, &m2_cols, m1, &pre_alloc_im2col.cols, &beta, m3, &m2_cols);
+
+		// Above is equivalent to out = pre_alloc_im2col * weight_matrix;
+		
+		out = out.t();
+
+		// Move back to vectors and reshape accordingly
+		for (int k = 0; k < out.rows; ++k)
+		{
+			outputs.push_back(out.row(k).reshape(1, yB));
+		}
+	
+	}
+
+
+}
--- a/pkg/OpenFace/lib/local/LandmarkDetector/src/FaceDetectorMTCNN.cpp
+++ b/pkg/OpenFace/lib/local/LandmarkDetector/src/FaceDetectorMTCNN.cpp
@@ -0,0 +1,893 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (C) 2017, Carnegie Mellon University and University of Cambridge,
+// all rights reserved.
+//
+// ACADEMIC OR NON-PROFIT ORGANIZATION NONCOMMERCIAL RESEARCH USE ONLY
+//
+// BY USING OR DOWNLOADING THE SOFTWARE, YOU ARE AGREEING TO THE TERMS OF THIS LICENSE AGREEMENT.  
+// IF YOU DO NOT AGREE WITH THESE TERMS, YOU MAY NOT USE OR DOWNLOAD THE SOFTWARE.
+//
+// License can be found in OpenFace-license.txt
+//
+//     * Any publications arising from the use of this software, including but
+//       not limited to academic journal and conference publications, technical
+//       reports and manuals, must cite at least one of the following works:
+//
+//       OpenFace 2.0: Facial Behavior Analysis Toolkit
+//       Tadas Baltrušaitis, Amir Zadeh, Yao Chong Lim, and Louis-Philippe Morency
+//       in IEEE International Conference on Automatic Face and Gesture Recognition, 2018  
+//
+//       Convolutional experts constrained local model for facial landmark detection.
+//       A. Zadeh, T. Baltrušaitis, and Louis-Philippe Morency,
+//       in Computer Vision and Pattern Recognition Workshops, 2017.    
+//
+//       Rendering of Eyes for Eye-Shape Registration and Gaze Estimation
+//       Erroll Wood, Tadas Baltrušaitis, Xucong Zhang, Yusuke Sugano, Peter Robinson, and Andreas Bulling 
+//       in IEEE International. Conference on Computer Vision (ICCV),  2015 
+//
+//       Cross-dataset learning and person-specific normalisation for automatic Action Unit detection
+//       Tadas Baltrušaitis, Marwa Mahmoud, and Peter Robinson 
+//       in Facial Expression Recognition and Analysis Challenge, 
+//       IEEE International Conference on Automatic Face and Gesture Recognition, 2015 
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "stdafx.h"
+
+#include "FaceDetectorMTCNN.h"
+
+#ifndef M_PI
+#define M_PI 3.14159265358979323846
+#endif
+
+#include "LandmarkDetectorUtils.h"
+
+// CNN includes
+#include "CNN_utils.h"
+
+// Instead of including cblas.h (the definitions from OpenBLAS and other BLAS libraries differ, declare the required OpenBLAS functionality here)
+#ifdef __cplusplus
+extern "C" {
+	/* Assume C declarations for C++ */
+#endif  /* __cplusplus */
+
+	/*Set the number of threads on runtime.*/
+	void openblas_set_num_threads(int num_threads);
+}
+
+using namespace LandmarkDetector;
+
+// Constructor from model file location
+FaceDetectorMTCNN::FaceDetectorMTCNN(const std::string& location)
+{
+	this->Read(location);
+}
+// Copy constructor
+FaceDetectorMTCNN::FaceDetectorMTCNN(const FaceDetectorMTCNN& other) : PNet(other.PNet), RNet(other.RNet), ONet(other.ONet)
+{
+}
+
+CNN::CNN(const CNN& other) : cnn_layer_types(other.cnn_layer_types), cnn_max_pooling_layers(other.cnn_max_pooling_layers), cnn_convolutional_layers_bias(other.cnn_convolutional_layers_bias), conv_layer_pre_alloc_im2col(other.conv_layer_pre_alloc_im2col)
+{
+
+	this->cnn_convolutional_layers_weights.resize(other.cnn_convolutional_layers_weights.size());
+	for (size_t l = 0; l < other.cnn_convolutional_layers_weights.size(); ++l)
+	{
+		// Make sure the matrix is copied.
+		this->cnn_convolutional_layers_weights[l] = other.cnn_convolutional_layers_weights[l].clone();
+	}
+
+	this->cnn_convolutional_layers.resize(other.cnn_convolutional_layers.size());
+	for (size_t l = 0; l < other.cnn_convolutional_layers.size(); ++l)
+	{
+		this->cnn_convolutional_layers[l].resize(other.cnn_convolutional_layers[l].size());
+
+		for (size_t i = 0; i < other.cnn_convolutional_layers[l].size(); ++i)
+		{
+			this->cnn_convolutional_layers[l][i].resize(other.cnn_convolutional_layers[l][i].size());
+
+			for (size_t k = 0; k < other.cnn_convolutional_layers[l][i].size(); ++k)
+			{
+				// Make sure the matrix is copied.
+				this->cnn_convolutional_layers[l][i][k] = other.cnn_convolutional_layers[l][i][k].clone();
+			}
+		}
+	}
+
+	this->cnn_fully_connected_layers_weights.resize(other.cnn_fully_connected_layers_weights.size());
+
+	for (size_t l = 0; l < other.cnn_fully_connected_layers_weights.size(); ++l)
+	{
+		// Make sure the matrix is copied.
+		this->cnn_fully_connected_layers_weights[l] = other.cnn_fully_connected_layers_weights[l].clone();
+	}
+
+	this->cnn_fully_connected_layers_biases.resize(other.cnn_fully_connected_layers_biases.size());
+
+	for (size_t l = 0; l < other.cnn_fully_connected_layers_biases.size(); ++l)
+	{
+		// Make sure the matrix is copied.
+		this->cnn_fully_connected_layers_biases[l] = other.cnn_fully_connected_layers_biases[l].clone();
+	}
+
+	this->cnn_prelu_layer_weights.resize(other.cnn_prelu_layer_weights.size());
+
+	for (size_t l = 0; l < other.cnn_prelu_layer_weights.size(); ++l)
+	{
+		// Make sure the matrix is copied.
+		this->cnn_prelu_layer_weights[l] = other.cnn_prelu_layer_weights[l].clone();
+	}
+}
+
+std::vector<cv::Mat_<float>> CNN::Inference(const cv::Mat& input_img, bool direct, bool thread_safe)
+{
+	if (input_img.channels() == 1)
+	{
+		cv::cvtColor(input_img, input_img, cv::COLOR_GRAY2BGR);
+	}
+
+	int cnn_layer = 0;
+	int fully_connected_layer = 0;
+	int prelu_layer = 0;
+	int max_pool_layer = 0;
+
+	// Slit a BGR image into three chnels
+	cv::Mat channels[3]; 
+	cv::split(input_img, channels);  
+
+	// Flip the BGR order to RGB
+	std::vector<cv::Mat_<float> > input_maps;
+	input_maps.push_back(channels[2]);
+	input_maps.push_back(channels[1]);
+	input_maps.push_back(channels[0]);
+
+	std::vector<cv::Mat_<float> > outputs;
+
+	for (size_t layer = 0; layer < cnn_layer_types.size(); ++layer)
+	{
+
+		// Determine layer type
+		int layer_type = cnn_layer_types[layer];
+
+		// Convolutional layer
+		if (layer_type == 0)		
+		{
+
+			// Either perform direct convolution through matrix multiplication or use an FFT optimized version, which one is optimal depends on the kernel and input sizes
+			if (direct)
+			{
+				if(thread_safe)
+				{
+					cv::Mat_<float> pre_alloc;
+					convolution_direct_blas(outputs, input_maps, cnn_convolutional_layers_weights[cnn_layer], cnn_convolutional_layers[cnn_layer][0][0].rows, cnn_convolutional_layers[cnn_layer][0][0].cols, pre_alloc);
+				}
+				else
+				{
+					convolution_direct_blas(outputs, input_maps, cnn_convolutional_layers_weights[cnn_layer], cnn_convolutional_layers[cnn_layer][0][0].rows, cnn_convolutional_layers[cnn_layer][0][0].cols, conv_layer_pre_alloc_im2col[cnn_layer]);
+				}
+	
+			}
+			else
+			{
+				convolution_fft2(outputs, input_maps, cnn_convolutional_layers[cnn_layer], cnn_convolutional_layers_bias[cnn_layer], cnn_convolutional_layers_dft[cnn_layer]);
+			}
+			//vector<cv::Mat_<float> > outs;
+			//convolution_fft(outs, input_maps, cnn_convolutional_layers[cnn_layer], cnn_convolutional_layers_bias[cnn_layer], cnn_convolutional_layers_dft[cnn_layer]);
+
+
+
+			cnn_layer++;
+		}
+		if (layer_type == 1)
+		{
+
+			int stride_x = std::get<2>(cnn_max_pooling_layers[max_pool_layer]);
+			int stride_y = std::get<3>(cnn_max_pooling_layers[max_pool_layer]);
+			
+			int kernel_size_x = std::get<0>(cnn_max_pooling_layers[max_pool_layer]);
+			int kernel_size_y = std::get<1>(cnn_max_pooling_layers[max_pool_layer]);
+
+			max_pooling(outputs, input_maps, stride_x, stride_y, kernel_size_x, kernel_size_y);
+			max_pool_layer++;
+		}
+		if (layer_type == 2)
+		{
+			fully_connected(outputs, input_maps, cnn_fully_connected_layers_weights[fully_connected_layer], cnn_fully_connected_layers_biases[fully_connected_layer]);
+			fully_connected_layer++;
+		}
+		if (layer_type == 3) // PReLU
+		{
+			// In place prelu computation
+			PReLU(input_maps, cnn_prelu_layer_weights[prelu_layer]);
+			outputs = input_maps;
+			prelu_layer++;
+		}
+		if (layer_type == 4)
+		{
+			outputs.clear();
+			for (size_t k = 0; k < input_maps.size(); ++k)
+			{
+				// Apply the sigmoid
+				cv::exp(-input_maps[k], input_maps[k]);
+				input_maps[k] = 1.0 / (1.0 + input_maps[k]);
+
+				outputs.push_back(input_maps[k]);
+
+			}
+		}
+		// Set the outputs of this layer to inputs of the next one
+		input_maps = outputs;		
+	}
+
+	
+	return outputs;
+
+}
+
+void ReadMatBin(std::ifstream& stream, cv::Mat &output_mat)
+{
+	// Read in the number of rows, columns and the data type
+	int row, col, type;
+
+	stream.read((char*)&row, 4);
+	stream.read((char*)&col, 4);
+	stream.read((char*)&type, 4);
+
+	output_mat = cv::Mat(row, col, type);
+	int size = output_mat.rows * output_mat.cols * output_mat.elemSize();
+	stream.read((char *)output_mat.data, size);
+
+}
+
+void CNN::ClearPrecomp()
+{
+	for (size_t k1 = 0; k1 < cnn_convolutional_layers_dft.size(); ++k1)
+	{
+		for (size_t k2 = 0; k2 < cnn_convolutional_layers_dft[k1].size(); ++k2)
+		{
+			cnn_convolutional_layers_dft[k1][k2].clear();
+		}
+	}
+}
+
+void CNN::Read(const std::string& location)
+{
+
+	openblas_set_num_threads(1);
+
+	std::ifstream cnn_stream(location, std::ios::in | std::ios::binary);
+	if (cnn_stream.is_open())
+	{
+		cnn_stream.seekg(0, std::ios::beg);
+
+		// Reading in CNNs
+
+		int network_depth;
+		cnn_stream.read((char*)&network_depth, 4);
+
+		cnn_layer_types.resize(network_depth);
+
+		for (int layer = 0; layer < network_depth; ++layer)
+		{
+
+			int layer_type;
+			cnn_stream.read((char*)&layer_type, 4);
+			cnn_layer_types[layer] = layer_type;
+
+			// convolutional
+			if (layer_type == 0)
+			{
+
+				// Read the number of input maps
+				int num_in_maps;
+				cnn_stream.read((char*)&num_in_maps, 4);
+
+				// Read the number of kernels for each input map
+				int num_kernels;
+				cnn_stream.read((char*)&num_kernels, 4);
+
+				std::vector<std::vector<cv::Mat_<float> > > kernels;
+
+				kernels.resize(num_in_maps);
+
+				std::vector<float> biases;
+				for (int k = 0; k < num_kernels; ++k)
+				{
+					float bias;
+					cnn_stream.read((char*)&bias, 4);
+					biases.push_back(bias);
+				}
+
+				cnn_convolutional_layers_bias.push_back(biases);
+
+				// For every input map
+				for (int in = 0; in < num_in_maps; ++in)
+				{
+					kernels[in].resize(num_kernels);
+
+					// For every kernel on that input map
+					for (int k = 0; k < num_kernels; ++k)
+					{
+						ReadMatBin(cnn_stream, kernels[in][k]);
+
+					}
+				}
+
+				// Rearrange the kernels for faster inference with FFT
+				std::vector<std::vector<cv::Mat_<float> > > kernels_rearr;
+				kernels_rearr.resize(num_kernels);
+
+				// Fill up the rearranged layer
+				for (int k = 0; k < num_kernels; ++k)
+				{
+					for (int in = 0; in < num_in_maps; ++in)
+					{
+						kernels_rearr[k].push_back(kernels[in][k]);
+					}
+				}
+
+				cnn_convolutional_layers.push_back(kernels_rearr);
+
+				// Place-holders for DFT precomputation
+				std::vector<std::map<int, std::vector<cv::Mat_<double> > > > cnn_convolutional_layers_dft_curr_layer;
+				cnn_convolutional_layers_dft_curr_layer.resize(num_kernels);
+				cnn_convolutional_layers_dft.push_back(cnn_convolutional_layers_dft_curr_layer);
+
+				// Rearrange the flattened kernels into weight matrices for direct convolution computation
+				cv::Mat_<float> weight_matrix(num_in_maps * kernels_rearr[0][0].rows * kernels_rearr[0][0].cols, num_kernels);
+				for (int k = 0; k < num_kernels; ++k)
+				{
+					for (int i = 0; i < num_in_maps; ++i)
+					{
+						// Flatten the kernel
+						cv::Mat_<float> k_flat = kernels_rearr[k][i].t();
+						k_flat = k_flat.reshape(0, 1).t();
+						k_flat.copyTo(weight_matrix(cv::Rect(k, i * kernels_rearr[0][0].rows * kernels_rearr[0][0].cols, 1, kernels_rearr[0][0].rows * kernels_rearr[0][0].cols)));
+					}
+				}
+
+				// Transpose the weight matrix for more convenient computation
+				weight_matrix = weight_matrix.t();
+
+				// Add a bias term to the weight matrix for efficiency
+				cv::Mat_<float> W(weight_matrix.rows, weight_matrix.cols + 1, 1.0);
+				for (int k = 0; k < weight_matrix.rows; ++k)
+				{
+					W.at<float>(k, weight_matrix.cols) = biases[k];
+				}
+				weight_matrix.copyTo(W(cv::Rect(0, 0, weight_matrix.cols, weight_matrix.rows)));
+
+				cnn_convolutional_layers_weights.push_back(W.t());
+				conv_layer_pre_alloc_im2col.push_back(cv::Mat_<float>());
+
+			}
+			else if (layer_type == 1)
+			{
+				int kernel_x, kernel_y, stride_x, stride_y;
+				cnn_stream.read((char*)&kernel_x, 4);
+				cnn_stream.read((char*)&kernel_y, 4);
+				cnn_stream.read((char*)&stride_x, 4);
+				cnn_stream.read((char*)&stride_y, 4);
+				cnn_max_pooling_layers.push_back(std::tuple<int, int, int, int>(kernel_x, kernel_y, stride_x, stride_y));
+			}
+			else if (layer_type == 2)
+			{
+				cv::Mat_<float> biases;
+				ReadMatBin(cnn_stream, biases);
+				cnn_fully_connected_layers_biases.push_back(biases);
+
+				// Fully connected layer
+				cv::Mat_<float> weights;
+				ReadMatBin(cnn_stream, weights);
+				cnn_fully_connected_layers_weights.push_back(weights.t());
+			}
+
+			else if (layer_type == 3)
+			{
+				cv::Mat_<float> weights;
+				ReadMatBin(cnn_stream, weights);
+				cnn_prelu_layer_weights.push_back(weights);
+			}
+		}
+	}
+	else
+	{
+		std::cout << "WARNING: Can't find the CNN location" << std::endl;
+	}
+}
+
+//===========================================================================
+// Read in the MTCNN detector
+void FaceDetectorMTCNN::Read(const std::string& location)
+{
+
+	std::cout << "Reading the MTCNN face detector from: " << location << std::endl;
+
+	std::ifstream locations(location.c_str(), std::ios_base::in);
+	if (!locations.is_open())
+	{
+		std::cout << "MTCNN model file not found or can't be opened" << std::endl;
+		return;
+	}
+	std::string line;
+
+	// The other module locations should be defined as relative paths from the main model
+	fs::path root = fs::path(location).parent_path();
+
+	// The main file contains the references to other files
+	while (!locations.eof())
+	{
+		getline(locations, line);
+
+		std::stringstream lineStream(line);
+
+		std::string module;
+		std::string location;
+
+		// figure out which module is to be read from which file
+		lineStream >> module;
+
+		lineStream >> location;
+
+		// remove carriage return at the end for compatibility with unix systems
+		if (location.size() > 0 && location.at(location.size() - 1) == '\r')
+		{
+			location = location.substr(0, location.size() - 1);
+		}
+
+		// append to root
+		location = (root / location).string();
+		if (module.compare("PNet") == 0)
+		{
+			std::cout << "Reading the PNet module from: " << location << std::endl;
+			PNet.Read(location);
+		}
+		else if(module.compare("RNet") == 0)
+		{
+			std::cout << "Reading the RNet module from: " << location << std::endl;
+			RNet.Read(location);
+		}
+		else if (module.compare("ONet") == 0)
+		{
+			std::cout << "Reading the ONet module from: " << location << std::endl;
+			ONet.Read(location);
+		}
+	}
+}
+
+// Perform non maximum supression on proposal bounding boxes prioritizing boxes with high score/confidence
+std::vector<int> non_maximum_supression(const std::vector<cv::Rect_<float> >& original_bb, const std::vector<float>& scores, float thresh, bool minimum)
+{
+
+	// Sort the input bounding boxes by the detection score, using the nice trick of multimap always being sorted internally
+	std::multimap<float, size_t> idxs;
+	for (size_t i = 0; i < original_bb.size(); ++i)
+	{
+		idxs.insert(std::pair<float, size_t>(scores[i], i));
+	}
+
+	std::vector<int> output_ids;
+
+	// keep looping while some indexes still remain in the indexes list
+	while (idxs.size() > 0)
+	{
+		// grab the last rectangle
+		auto lastElem = --std::end(idxs);
+		size_t curr_id = lastElem->second;
+
+		const cv::Rect& rect1 = original_bb[curr_id];
+
+		idxs.erase(lastElem);
+
+		// Iterate through remaining bounding boxes and choose which ones to remove
+		for (auto pos = std::begin(idxs); pos != std::end(idxs); )
+		{
+			// grab the current rectangle
+			const cv::Rect& rect2 = original_bb[pos->second];
+
+			float intArea = (rect1 & rect2).area();
+			float unionArea;
+			if (minimum)
+			{
+				unionArea = cv::min(rect1.area(), rect2.area());
+			}
+			else 
+			{
+				unionArea = rect1.area() + rect2.area() - intArea;
+			}
+			float overlap = intArea / unionArea;
+
+			// Remove the bounding boxes with less confidence but with significant overlap with the current one
+			if (overlap > thresh)
+			{
+				pos = idxs.erase(pos);
+			}
+			else
+			{
+				++pos;
+			}
+		}
+		output_ids.push_back(curr_id);
+
+	}
+
+	return output_ids;
+
+}
+
+// Helper function for selecting a subset of bounding boxes based on indices
+void select_subset(const std::vector<int>& to_keep, std::vector<cv::Rect_<float> >& bounding_boxes, std::vector<float>& scores,
+	std::vector<cv::Rect_<float> >& corrections)
+{
+	std::vector<cv::Rect_<float> > bounding_boxes_tmp;
+	std::vector<float> scores_tmp;
+	std::vector<cv::Rect_<float> > corrections_tmp;
+
+	for (size_t i = 0; i < to_keep.size(); ++i)
+	{
+		bounding_boxes_tmp.push_back(bounding_boxes[to_keep[i]]);
+		scores_tmp.push_back(scores[to_keep[i]]);
+		corrections_tmp.push_back(corrections[to_keep[i]]);
+	}
+	
+	bounding_boxes = bounding_boxes_tmp;
+	scores = scores_tmp;
+	corrections = corrections_tmp;
+}
+
+// Use the heatmap generated by PNet to generate bounding boxes in the original image space, also generate the correction values and scores of the bounding boxes as well
+void generate_bounding_boxes(std::vector<cv::Rect_<float> >& o_bounding_boxes, std::vector<float>& o_scores, 
+	std::vector<cv::Rect_<float> >& o_corrections, const cv::Mat_<float>& heatmap, const std::vector<cv::Mat_<float> >& corrections,
+	float scale, float threshold, int face_support)
+{
+
+	// Correction for the pooling
+	int stride = 2;
+
+	o_bounding_boxes.clear();
+	o_scores.clear();
+	o_corrections.clear();
+
+	int counter = 0;
+	for (int x = 0; x < heatmap.cols; ++x)
+	{
+		for(int y = 0; y < heatmap.rows; ++y)
+		{
+			if (heatmap.at<float>(y, x) >= threshold)
+			{
+				float min_x = int((stride * x + 1) / scale);
+				float max_x = int((stride * x + face_support) / scale);
+				float min_y = int((stride * y + 1) / scale);
+				float max_y = int((stride * y + face_support) / scale);
+
+				o_bounding_boxes.push_back(cv::Rect_<float>(min_x, min_y, max_x - min_x, max_y - min_y));
+				o_scores.push_back(heatmap.at<float>(y, x));
+
+				float corr_x = corrections[0].at<float>(y, x);
+				float corr_y = corrections[1].at<float>(y, x);
+				float corr_width = corrections[2].at<float>(y, x);
+				float corr_height = corrections[3].at<float>(y, x);
+				o_corrections.push_back(cv::Rect_<float>(corr_x, corr_y, corr_width, corr_height));
+
+				counter++;
+			}
+		}
+	}
+	
+}
+
+// Converting the bounding boxes to squares
+void rectify(std::vector<cv::Rect_<float> >& total_bboxes)
+{
+
+	// Apply size and location offsets
+	for (size_t i = 0; i < total_bboxes.size(); ++i)
+	{
+		float height = total_bboxes[i].height;
+		float width = total_bboxes[i].width;
+
+		float max_side = std::max(width, height);
+
+		// Correct the starts based on new size
+		float new_min_x = total_bboxes[i].x + 0.5 * (width - max_side);
+		float new_min_y = total_bboxes[i].y + 0.5 * (height - max_side);
+
+		total_bboxes[i].x = (int)new_min_x;
+		total_bboxes[i].y = (int)new_min_y;
+		total_bboxes[i].width = (int)max_side;
+		total_bboxes[i].height = (int)max_side;
+	}
+}
+
+void apply_correction(std::vector<cv::Rect_<float> >& total_bboxes, const std::vector<cv::Rect_<float> > corrections, bool add1)
+{
+
+	// Apply size and location offsets
+	for (size_t i = 0; i < total_bboxes.size(); ++i)
+	{
+		cv::Rect curr_box = total_bboxes[i];
+		if (add1)
+		{
+			curr_box.width++;
+			curr_box.height++;
+		}
+
+		float new_min_x = curr_box.x + corrections[i].x * curr_box.width;
+		float new_min_y = curr_box.y + corrections[i].y * curr_box.height;
+		float new_max_x = curr_box.x + curr_box.width + curr_box.width * corrections[i].width;
+		float new_max_y = curr_box.y + curr_box.height + curr_box.height * corrections[i].height;
+		total_bboxes[i] = cv::Rect_<float>(new_min_x, new_min_y, new_max_x - new_min_x, new_max_y - new_min_y);
+
+	}
+
+
+}
+
+
+// The actual MTCNN face detection step
+bool FaceDetectorMTCNN::DetectFaces(std::vector<cv::Rect_<float> >& o_regions, const cv::Mat& img_in, 
+	std::vector<float>& o_confidences, int min_face_size, float t1, float t2, float t3)
+{
+
+	int height_orig = img_in.size().height;
+	int width_orig = img_in.size().width;
+
+	// Size ratio of image pyramids
+	double pyramid_factor = 0.709;
+
+	// Face support region is 12x12 px, so from that can work out the largest
+	// scale(which is 12 / min), and work down from there to smallest scale(no smaller than 12x12px)
+	int min_dim = std::min(height_orig, width_orig);
+
+	int face_support = 12;
+	int num_scales = floor(log((double)min_face_size / (double)min_dim) / log(pyramid_factor)) + 1;
+
+	cv::Mat input_img;
+
+	// Force the image to three channels
+	if (img_in.channels() == 1)
+	{
+		cv::cvtColor(img_in, input_img, cv::COLOR_GRAY2RGB);
+	}
+	else
+	{
+		input_img = img_in;
+	}
+
+	cv::Mat img_float;
+	input_img.convertTo(img_float, CV_32FC3);
+
+	std::vector<cv::Rect_<float> > proposal_boxes_all;
+	std::vector<float> scores_all;
+	std::vector<cv::Rect_<float> > proposal_corrections_all;
+
+	// As the scales will be done in parallel have some containers for them
+	std::vector<std::vector<cv::Rect_<float> > > proposal_boxes_cross_scale(num_scales);
+	std::vector<std::vector<float> > scores_cross_scale(num_scales);
+	std::vector<std::vector<cv::Rect_<float> > > proposal_corrections_cross_scale(num_scales);
+
+	for (int i = 0; i < num_scales; ++i)
+	{
+		double scale = ((double)face_support / (double)min_face_size)*cv::pow(pyramid_factor, i);
+
+		int h_pyr = ceil(height_orig * scale);
+		int w_pyr = ceil(width_orig * scale);
+
+		cv::Mat normalised_img;
+		cv::resize(img_float, normalised_img, cv::Size(w_pyr, h_pyr));
+		
+		// Normalize the image
+		normalised_img = (normalised_img - 127.5) * 0.0078125;
+
+		// Actual PNet CNN step
+		std::vector<cv::Mat_<float> > pnet_out = PNet.Inference(normalised_img, true, false);
+
+		// Clear the precomputations, as the image sizes will be different
+		PNet.ClearPrecomp();
+
+		// Extract the probabilities from PNet response
+		cv::Mat_<float> prob_heatmap;
+		cv::exp(pnet_out[0]- pnet_out[1], prob_heatmap);
+		prob_heatmap = 1.0 / (1.0 + prob_heatmap);
+
+		// Extract the probabilities from PNet response
+		std::vector<cv::Mat_<float>> corrections_heatmap(pnet_out.begin() + 2, pnet_out.end());
+
+		// Grab the detections
+		std::vector<cv::Rect_<float> > proposal_boxes;
+		std::vector<float> scores;
+		std::vector<cv::Rect_<float> > proposal_corrections;
+		generate_bounding_boxes(proposal_boxes, scores, proposal_corrections, prob_heatmap, corrections_heatmap, scale, t1, face_support);
+
+		proposal_boxes_cross_scale[i] = proposal_boxes;
+		scores_cross_scale[i] = scores;
+		proposal_corrections_cross_scale[i] = proposal_corrections;
+	}
+	//});
+
+	// Perform non-maximum supression on proposals accross scales and combine them
+	for (int i = 0; i < num_scales; ++i)
+	{
+		std::vector<int> to_keep = non_maximum_supression(proposal_boxes_cross_scale[i], scores_cross_scale[i], 0.5, false);
+		select_subset(to_keep, proposal_boxes_cross_scale[i], scores_cross_scale[i], proposal_corrections_cross_scale[i]);
+
+		proposal_boxes_all.insert(proposal_boxes_all.end(), proposal_boxes_cross_scale[i].begin(), proposal_boxes_cross_scale[i].end());
+		scores_all.insert(scores_all.end(), scores_cross_scale[i].begin(), scores_cross_scale[i].end());
+		proposal_corrections_all.insert(proposal_corrections_all.end(), proposal_corrections_cross_scale[i].begin(), proposal_corrections_cross_scale[i].end());
+	}
+
+	// Preparation for RNet step
+
+	// Non maximum supression accross bounding boxes, and their offset correction
+	std::vector<int> to_keep = non_maximum_supression(proposal_boxes_all, scores_all, 0.7, false);
+	select_subset(to_keep, proposal_boxes_all, scores_all, proposal_corrections_all);
+
+	apply_correction(proposal_boxes_all, proposal_corrections_all, false);
+
+	// Convert to rectangles and round
+	rectify(proposal_boxes_all);
+
+	// Creating proposal images from previous step detections
+	std::vector<bool> above_thresh;
+	above_thresh.resize(proposal_boxes_all.size(), false);
+
+	for (size_t k = 0; k < proposal_boxes_all.size(); ++k) 
+	{
+		float width_target = proposal_boxes_all[k].width + 1;
+		float height_target = proposal_boxes_all[k].height + 1;
+
+		// Work out the start and end indices in the original image
+		int start_x_in = cv::max((int)(proposal_boxes_all[k].x - 1), 0);
+		int start_y_in = cv::max((int)(proposal_boxes_all[k].y - 1), 0);
+		int end_x_in = cv::min((int)(proposal_boxes_all[k].x + width_target - 1), width_orig);
+		int end_y_in = cv::min((int)(proposal_boxes_all[k].y + height_target - 1), height_orig);
+
+		// Work out the start and end indices in the target image
+		int	start_x_out = cv::max((int)(-proposal_boxes_all[k].x + 1), 0);
+		int start_y_out = cv::max((int)(-proposal_boxes_all[k].y + 1), 0);
+		int end_x_out = cv::min(width_target - (proposal_boxes_all[k].x + proposal_boxes_all[k].width - width_orig), width_target);
+		int end_y_out = cv::min(height_target - (proposal_boxes_all[k].y + proposal_boxes_all[k].height - height_orig), height_target);
+
+		cv::Mat tmp(height_target, width_target, CV_32FC3, cv::Scalar(0.0f,0.0f,0.0f));
+
+		img_float(cv::Rect(start_x_in, start_y_in, end_x_in - start_x_in, end_y_in - start_y_in)).copyTo(
+			tmp(cv::Rect(start_x_out, start_y_out, end_x_out - start_x_out, end_y_out - start_y_out)));
+		
+		cv::Mat prop_img;
+		cv::resize(tmp, prop_img, cv::Size(24, 24));
+			
+		prop_img = (prop_img - 127.5) * 0.0078125;
+		
+		// Perform RNet on the proposal image
+		std::vector<cv::Mat_<float> > rnet_out = RNet.Inference(prop_img, true, false);
+
+		float prob = 1.0 / (1.0 + cv::exp(rnet_out[0].at<float>(0) - rnet_out[0].at<float>(1)));
+		scores_all[k] = prob;
+		proposal_corrections_all[k].x = rnet_out[0].at<float>(2);
+		proposal_corrections_all[k].y = rnet_out[0].at<float>(3);
+		proposal_corrections_all[k].width = rnet_out[0].at<float>(4);
+		proposal_corrections_all[k].height = rnet_out[0].at<float>(5);
+		if(prob >= t2)
+		{
+			above_thresh[k] = true;
+		}
+		else
+		{
+			above_thresh[k] = false;
+		}
+
+	}
+	//});
+
+	to_keep.clear();
+	for (size_t i = 0; i < above_thresh.size(); ++i)
+	{
+		if (above_thresh[i])
+		{
+			to_keep.push_back(i);
+		}
+	}
+
+	// Pick only the bounding boxes above the threshold
+	select_subset(to_keep, proposal_boxes_all, scores_all, proposal_corrections_all);
+
+	// Non maximum supression accross bounding boxes, and their offset correction
+	to_keep = non_maximum_supression(proposal_boxes_all, scores_all, 0.7, false);
+	select_subset(to_keep, proposal_boxes_all, scores_all, proposal_corrections_all);
+
+	apply_correction(proposal_boxes_all, proposal_corrections_all, false);
+
+	// Convert to rectangles and round
+	rectify(proposal_boxes_all);
+
+	// Preparing for the ONet stage
+	above_thresh.clear();
+	above_thresh.resize(proposal_boxes_all.size());
+
+	for (size_t k = 0; k < proposal_boxes_all.size(); ++k)
+	{
+		float width_target = proposal_boxes_all[k].width + 1;
+		float height_target = proposal_boxes_all[k].height + 1;
+
+		// Work out the start and end indices in the original image
+		int start_x_in = cv::max((int)(proposal_boxes_all[k].x - 1), 0);
+		int start_y_in = cv::max((int)(proposal_boxes_all[k].y - 1), 0);
+		int end_x_in = cv::min((int)(proposal_boxes_all[k].x + width_target - 1), width_orig);
+		int end_y_in = cv::min((int)(proposal_boxes_all[k].y + height_target - 1), height_orig);
+
+		// Work out the start and end indices in the target image
+		int	start_x_out = cv::max((int)(-proposal_boxes_all[k].x + 1), 0);
+		int start_y_out = cv::max((int)(-proposal_boxes_all[k].y + 1), 0);
+		int end_x_out = cv::min(width_target - (proposal_boxes_all[k].x + proposal_boxes_all[k].width - width_orig), width_target);
+		int end_y_out = cv::min(height_target - (proposal_boxes_all[k].y + proposal_boxes_all[k].height - height_orig), height_target);
+
+		cv::Mat tmp(height_target, width_target, CV_32FC3, cv::Scalar(0.0f, 0.0f, 0.0f));
+
+		img_float(cv::Rect(start_x_in, start_y_in, end_x_in - start_x_in, end_y_in - start_y_in)).copyTo(
+			tmp(cv::Rect(start_x_out, start_y_out, end_x_out - start_x_out, end_y_out - start_y_out)));
+
+		cv::Mat prop_img;
+		cv::resize(tmp, prop_img, cv::Size(48, 48));
+
+		prop_img = (prop_img - 127.5) * 0.0078125;
+
+		// Perform RNet on the proposal image
+		std::vector<cv::Mat_<float> > onet_out = ONet.Inference(prop_img, true, false);
+
+		float prob = 1.0 / (1.0 + cv::exp(onet_out[0].at<float>(0) - onet_out[0].at<float>(1)));
+		scores_all[k] = prob;
+		proposal_corrections_all[k].x = onet_out[0].at<float>(2);
+		proposal_corrections_all[k].y = onet_out[0].at<float>(3);
+		proposal_corrections_all[k].width = onet_out[0].at<float>(4);
+		proposal_corrections_all[k].height = onet_out[0].at<float>(5);
+		if (prob >= t3)
+		{
+			above_thresh[k] = true;
+		}
+		else
+		{
+			above_thresh[k] = false;
+		}
+	}
+	//});
+
+	to_keep.clear();
+	for (size_t i = 0; i < above_thresh.size(); ++i)
+	{
+		if (above_thresh[i])
+		{
+			to_keep.push_back(i);
+		}
+	}
+
+	// Pick only the bounding boxes above the threshold
+	select_subset(to_keep, proposal_boxes_all, scores_all, proposal_corrections_all);
+	apply_correction(proposal_boxes_all, proposal_corrections_all, true);
+
+	// Non maximum supression accross bounding boxes, and their offset correction
+	to_keep = non_maximum_supression(proposal_boxes_all, scores_all, 0.7, true);
+	select_subset(to_keep, proposal_boxes_all, scores_all, proposal_corrections_all);
+
+	// Correct the box to expectation to be tight around facial landmarks
+	for (size_t k = 0; k < proposal_boxes_all.size(); ++k)
+	{
+		proposal_boxes_all[k].x = proposal_boxes_all[k].width * -0.0075 + proposal_boxes_all[k].x;
+		proposal_boxes_all[k].y = proposal_boxes_all[k].height * 0.2459 + proposal_boxes_all[k].y;
+		proposal_boxes_all[k].width = 1.0323 * proposal_boxes_all[k].width;
+		proposal_boxes_all[k].height = 0.7751 * proposal_boxes_all[k].height;
+
+		o_regions.push_back(cv::Rect_<float>(proposal_boxes_all[k].x, proposal_boxes_all[k].y, proposal_boxes_all[k].width, proposal_boxes_all[k].height));
+		o_confidences.push_back(scores_all[k]);
+
+	}
+
+	if(o_regions.size() > 0)
+	{
+		return true;
+	}
+	else
+	{
+		return false;
+	}
+}
+
--- a/pkg/OpenFace/lib/local/LandmarkDetector/src/LandmarkDetectionValidator.cpp
+++ b/pkg/OpenFace/lib/local/LandmarkDetector/src/LandmarkDetectionValidator.cpp
@@ -0,0 +1,550 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (C) 2017, Carnegie Mellon University and University of Cambridge,
+// all rights reserved.
+//
+// ACADEMIC OR NON-PROFIT ORGANIZATION NONCOMMERCIAL RESEARCH USE ONLY
+//
+// BY USING OR DOWNLOADING THE SOFTWARE, YOU ARE AGREEING TO THE TERMS OF THIS LICENSE AGREEMENT.  
+// IF YOU DO NOT AGREE WITH THESE TERMS, YOU MAY NOT USE OR DOWNLOAD THE SOFTWARE.
+//
+// License can be found in OpenFace-license.txt
+//
+//     * Any publications arising from the use of this software, including but
+//       not limited to academic journal and conference publications, technical
+//       reports and manuals, must cite at least one of the following works:
+//
+//       OpenFace 2.0: Facial Behavior Analysis Toolkit
+//       Tadas Baltrušaitis, Amir Zadeh, Yao Chong Lim, and Louis-Philippe Morency
+//       in IEEE International Conference on Automatic Face and Gesture Recognition, 2018  
+//
+//       Convolutional experts constrained local model for facial landmark detection.
+//       A. Zadeh, T. Baltrušaitis, and Louis-Philippe Morency,
+//       in Computer Vision and Pattern Recognition Workshops, 2017.    
+//
+//       Rendering of Eyes for Eye-Shape Registration and Gaze Estimation
+//       Erroll Wood, Tadas Baltrušaitis, Xucong Zhang, Yusuke Sugano, Peter Robinson, and Andreas Bulling 
+//       in IEEE International. Conference on Computer Vision (ICCV),  2015 
+//
+//       Cross-dataset learning and person-specific normalisation for automatic Action Unit detection
+//       Tadas Baltrušaitis, Marwa Mahmoud, and Peter Robinson 
+//       in Facial Expression Recognition and Analysis Challenge, 
+//       IEEE International Conference on Automatic Face and Gesture Recognition, 2015 
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "stdafx.h"
+
+#include "LandmarkDetectionValidator.h"
+
+// OpenCV includes
+#include <opencv2/core/core.hpp>
+#include <opencv2/imgproc.hpp>
+
+// System includes
+#include <fstream>
+
+// Math includes
+#define _USE_MATH_DEFINES
+#include <cmath>
+
+#ifndef M_PI
+#define M_PI 3.14159265358979323846
+#endif
+// Local includes
+#include "LandmarkDetectorUtils.h"
+#include "CNN_utils.h"
+
+using namespace LandmarkDetector;
+
+// Copy constructor
+DetectionValidator::DetectionValidator(const DetectionValidator& other) : orientations(other.orientations), paws(other.paws),
+cnn_subsampling_layers(other.cnn_subsampling_layers), cnn_layer_types(other.cnn_layer_types), cnn_convolutional_layers_im2col_precomp(other.cnn_convolutional_layers_im2col_precomp),
+cnn_convolutional_layers_weights(other.cnn_convolutional_layers_weights)
+{
+
+	this->cnn_convolutional_layers.resize(other.cnn_convolutional_layers.size());
+	for (size_t v = 0; v < other.cnn_convolutional_layers.size(); ++v)
+	{
+		this->cnn_convolutional_layers[v].resize(other.cnn_convolutional_layers[v].size());
+
+		for (size_t l = 0; l < other.cnn_convolutional_layers[v].size(); ++l)
+		{
+			this->cnn_convolutional_layers[v][l].resize(other.cnn_convolutional_layers[v][l].size());
+
+			for (size_t i = 0; i < other.cnn_convolutional_layers[v][l].size(); ++i)
+			{
+				this->cnn_convolutional_layers[v][l][i].resize(other.cnn_convolutional_layers[v][l][i].size());
+
+				for (size_t k = 0; k < other.cnn_convolutional_layers[v][l][i].size(); ++k)
+				{
+					// Make sure the matrix is copied.
+					this->cnn_convolutional_layers[v][l][i][k] = other.cnn_convolutional_layers[v][l][i][k].clone();
+				}
+
+			}
+		}
+	}
+
+	this->cnn_fully_connected_layers_weights.resize(other.cnn_fully_connected_layers_weights.size());
+	for (size_t v = 0; v < other.cnn_fully_connected_layers_weights.size(); ++v)
+	{
+		this->cnn_fully_connected_layers_weights[v].resize(other.cnn_fully_connected_layers_weights[v].size());
+
+		for (size_t l = 0; l < other.cnn_fully_connected_layers_weights[v].size(); ++l)
+		{
+			// Make sure the matrix is copied.
+			this->cnn_fully_connected_layers_weights[v][l] = other.cnn_fully_connected_layers_weights[v][l].clone();
+		}
+	}
+
+	this->cnn_fully_connected_layers_biases.resize(other.cnn_fully_connected_layers_biases.size());
+	for (size_t v = 0; v < other.cnn_fully_connected_layers_biases.size(); ++v)
+	{
+		this->cnn_fully_connected_layers_biases[v].resize(other.cnn_fully_connected_layers_biases[v].size());
+
+		for (size_t l = 0; l < other.cnn_fully_connected_layers_biases[v].size(); ++l)
+		{
+			// Make sure the matrix is copied.
+			this->cnn_fully_connected_layers_biases[v][l] = other.cnn_fully_connected_layers_biases[v][l].clone();
+		}
+	}
+
+	this->mean_images.resize(other.mean_images.size());
+	for (size_t i = 0; i < other.mean_images.size(); ++i)
+	{
+		// Make sure the matrix is copied.
+		this->mean_images[i] = other.mean_images[i].clone();
+	}
+
+	this->standard_deviations.resize(other.standard_deviations.size());
+	for (size_t i = 0; i < other.standard_deviations.size(); ++i)
+	{
+		// Make sure the matrix is copied.
+		this->standard_deviations[i] = other.standard_deviations[i].clone();
+	}
+
+}
+
+//===========================================================================
+// Read in the landmark detection validation module
+void DetectionValidator::Read(std::string location)
+{
+
+	std::ifstream detection_validator_stream (location, std::ios::in | std::ios::binary);
+	if (detection_validator_stream.is_open())	
+	{				
+		detection_validator_stream.seekg (0, std::ios::beg);
+
+		// Read validator type
+		int validator_type;
+		detection_validator_stream.read ((char*)&validator_type, 4);
+
+		if (validator_type != 3)
+		{
+			std::cout << "ERROR: Using old face validator, no longer supported" << std::endl;
+		}
+
+		// Read the number of views (orientations) within the validator
+		int n;
+		detection_validator_stream.read ((char*)&n, 4);
+	
+		orientations.resize(n);
+
+		for(int i = 0; i < n; i++)
+		{
+			cv::Mat_<double> orientation_tmp;
+			LandmarkDetector::ReadMatBin(detection_validator_stream, orientation_tmp);		
+		
+			orientations[i] = cv::Vec3d(orientation_tmp.at<double>(0), orientation_tmp.at<double>(1), orientation_tmp.at<double>(2));
+
+			// Convert from degrees to radians
+			orientations[i] = orientations[i] * M_PI / 180.0;
+		}
+
+		// Initialise the piece-wise affine warps, biases and weights
+		paws.resize(n);
+
+		cnn_convolutional_layers_weights.resize(n);
+		cnn_convolutional_layers_im2col_precomp.resize(n);
+		cnn_convolutional_layers.resize(n);
+		cnn_fully_connected_layers_weights.resize(n);
+		cnn_layer_types.resize(n);
+		cnn_fully_connected_layers_biases.resize(n);
+
+		// Initialise the normalisation terms
+		mean_images.resize(n);
+		standard_deviations.resize(n);
+
+		// Read in the validators for each of the views
+		for(int i = 0; i < n; i++)
+		{
+
+			// Read in the mean images
+			cv::Mat_<double> mean_img;
+			LandmarkDetector::ReadMatBin(detection_validator_stream, mean_img);
+			mean_img.convertTo(mean_images[i], CV_32F);
+			mean_images[i] = mean_images[i].t();
+
+			cv::Mat_<double> std_dev;
+			LandmarkDetector::ReadMatBin(detection_validator_stream, std_dev);
+			std_dev.convertTo(standard_deviations[i], CV_32F);
+
+			standard_deviations[i] = standard_deviations[i].t();
+
+			// Model specifics
+			if (validator_type == 3)
+			{
+				int network_depth;
+				detection_validator_stream.read((char*)&network_depth, 4);
+
+				cnn_layer_types[i].resize(network_depth);
+
+				for (int layer = 0; layer < network_depth; ++layer)
+				{
+
+					int layer_type;
+					detection_validator_stream.read((char*)&layer_type, 4);
+					cnn_layer_types[i][layer] = layer_type;
+
+					// convolutional
+					if (layer_type == 0)
+					{
+
+						// Read the number of input maps
+						int num_in_maps;
+						detection_validator_stream.read((char*)&num_in_maps, 4);
+
+						// Read the number of kernels for each input map
+						int num_kernels;
+						detection_validator_stream.read((char*)&num_kernels, 4);
+
+						std::vector<std::vector<cv::Mat_<float> > > kernels;
+
+						kernels.resize(num_in_maps);
+
+						std::vector<float> biases;
+						for (int k = 0; k < num_kernels; ++k)
+						{
+							float bias;
+							detection_validator_stream.read((char*)&bias, 4);
+							biases.push_back(bias);
+						}
+
+						// For every input map
+						for (int in = 0; in < num_in_maps; ++in)
+						{
+							kernels[in].resize(num_kernels);
+
+							// For every kernel on that input map
+							for (int k = 0; k < num_kernels; ++k)
+							{
+								ReadMatBin(detection_validator_stream, kernels[in][k]);
+
+							}
+						}
+
+						cnn_convolutional_layers[i].push_back(kernels);
+
+						// Rearrange the kernels for faster inference with FFT
+						std::vector<std::vector<cv::Mat_<float> > > kernels_rearr;
+						kernels_rearr.resize(num_kernels);
+
+						// Fill up the rearranged layer
+						for (int k = 0; k < num_kernels; ++k)
+						{
+							for (int in = 0; in < num_in_maps; ++in)
+							{
+								kernels_rearr[k].push_back(kernels[in][k]);
+							}
+						}
+
+						// Rearrange the flattened kernels into weight matrices for direct convolution computation
+						cv::Mat_<float> weight_matrix(num_in_maps * kernels_rearr[0][0].rows * kernels_rearr[0][0].cols, num_kernels);
+						for (int k = 0; k < num_kernels; ++k)
+						{
+							for (int i = 0; i < num_in_maps; ++i)
+							{
+								// Flatten the kernel
+								cv::Mat_<float> k_flat = kernels_rearr[k][i].t();
+								k_flat = k_flat.reshape(0, 1).t();
+								k_flat.copyTo(weight_matrix(cv::Rect(k, i * kernels_rearr[0][0].rows * kernels_rearr[0][0].cols, 1, kernels_rearr[0][0].rows * kernels_rearr[0][0].cols)));
+							}
+						}
+
+						// Transpose the weight matrix for more convenient computation
+						weight_matrix = weight_matrix.t();
+
+						// Add a bias term to the weight matrix for efficiency
+						cv::Mat_<float> W(weight_matrix.rows, weight_matrix.cols + 1, 1.0);
+						for (int k = 0; k < weight_matrix.rows; ++k)
+						{
+							W.at<float>(k, weight_matrix.cols) = biases[k];
+						}
+						weight_matrix.copyTo(W(cv::Rect(0, 0, weight_matrix.cols, weight_matrix.rows)));
+
+						cnn_convolutional_layers_weights[i].push_back(W.t());
+						cnn_convolutional_layers_im2col_precomp[i].push_back(cv::Mat_<float>());
+					}
+					else if (layer_type == 2)
+					{
+						cv::Mat_<float> biases;
+						ReadMatBin(detection_validator_stream, biases);
+						cnn_fully_connected_layers_biases[i].push_back(biases);
+
+						// Fully connected layer
+						cv::Mat_<float> weights;
+						ReadMatBin(detection_validator_stream, weights);
+						cnn_fully_connected_layers_weights[i].push_back(weights);
+					}
+				}
+			}
+			// Read in the piece-wise affine warps
+			paws[i].Read(detection_validator_stream);
+		}
+		
+	}
+	else
+	{
+		std::cout << "WARNING: Can't find the Face checker location" << std::endl;
+	}
+}
+
+//===========================================================================
+// Check if the fitting actually succeeded
+float DetectionValidator::Check(const cv::Vec3d& orientation, const cv::Mat_<uchar>& intensity_img, cv::Mat_<float>& detected_landmarks)
+{
+
+	int id = GetViewId(orientation);
+	
+	// The warped (cropped) image, corresponding to a face lying withing the detected lanmarks
+	cv::Mat_<float> warped;
+	
+	// First only use the ROI of the image of interest
+	cv::Mat_<float> detected_landmarks_local = detected_landmarks.clone();
+
+	float min_x_f, max_x_f, min_y_f, max_y_f;
+	ExtractBoundingBox(detected_landmarks_local, min_x_f, max_x_f, min_y_f, max_y_f);
+
+	cv::Mat_<float> xs = detected_landmarks_local(cv::Rect(0, 0, 1, detected_landmarks.rows / 2));
+	cv::Mat_<float> ys = detected_landmarks_local(cv::Rect(0, detected_landmarks.rows / 2, 1, detected_landmarks.rows / 2));
+
+	// Picking the ROI (some extra space for bilinear interpolation)
+	int min_x = (int)(min_x_f - 3.0f);
+	int max_x = (int)(max_x_f + 3.0f);
+	int min_y = (int)(min_y_f - 3.0f);
+	int max_y = (int)(max_y_f + 3.0f);
+
+	if (min_x < 0) min_x = 0;
+	if (min_y < 0) min_y = 0;
+	if (max_x > intensity_img.cols - 1) max_x = intensity_img.cols - 1;
+	if (max_y > intensity_img.rows - 1) max_y = intensity_img.rows - 1;
+	xs = xs - min_x;
+	ys = ys - min_y;
+
+	// If the ROI is non existent return failure (this could happen if all landmarks are outside of the image)
+	if (max_x - min_x <= 1 || max_y - min_y <= 1)
+	{
+		return 0.0f;
+	}
+
+	cv::Mat_<float> intensity_img_float_local;
+	intensity_img(cv::Rect(min_x, min_y, max_x - min_x, max_y - min_y)).convertTo(intensity_img_float_local, CV_32F);
+
+	// the piece-wise affine image warping
+	paws[id].Warp(intensity_img_float_local, warped, detected_landmarks_local);
+
+	// The actual validation step
+	double dec = CheckCNN(warped, id);
+
+	// Convert it to a more interpretable signal (0 low confidence, 1 high confidence)
+	dec = 0.5 * (1.0 - dec);
+
+	return (float)dec;
+}
+
+double DetectionValidator::CheckCNN(const cv::Mat_<float>& warped_img, int view_id)
+{
+
+	cv::Mat_<float> feature_vec;
+	NormaliseWarpedToVector(warped_img, feature_vec, view_id);
+
+	// Create a normalised image from the crop vector
+	cv::Mat_<float> img(warped_img.size(), 0.0);
+	img = img.t();
+
+	cv::Mat mask = paws[view_id].pixel_mask.t();
+	cv::MatIterator_<uchar>  mask_it = mask.begin<uchar>();
+
+	cv::MatIterator_<float> feature_it = feature_vec.begin();
+	cv::MatIterator_<float> img_it = img.begin();
+
+	int wInt = img.cols;
+	int hInt = img.rows;
+
+	for (int i = 0; i < wInt; ++i)
+	{
+		for (int j = 0; j < hInt; ++j, ++mask_it, ++img_it)
+		{
+			// if is within mask
+			if (*mask_it)
+			{
+				// assign the feature to image if it is within the mask
+				*img_it = (float)*feature_it++;
+			}
+		}
+	}
+	img = img.t();
+
+	int cnn_layer = 0;
+	int fully_connected_layer = 0;
+
+	std::vector<cv::Mat_<float> > input_maps;
+	input_maps.push_back(img);
+
+	std::vector<cv::Mat_<float> > outputs;
+
+	for (size_t layer = 0; layer < cnn_layer_types[view_id].size(); ++layer)
+	{
+		// Determine layer type
+		int layer_type = cnn_layer_types[view_id][layer];
+
+		// Convolutional layer
+		if (layer_type == 0)
+		{
+
+			convolution_direct_blas(outputs, input_maps, cnn_convolutional_layers_weights[view_id][cnn_layer], cnn_convolutional_layers[view_id][cnn_layer][0][0].rows, cnn_convolutional_layers[view_id][cnn_layer][0][0].cols, cnn_convolutional_layers_im2col_precomp[view_id][cnn_layer]);
+
+			cnn_layer++;
+		}
+		if (layer_type == 1)
+		{
+			max_pooling(outputs, input_maps, 2, 2, 2, 2);
+		}
+		if (layer_type == 2)
+		{
+
+			fully_connected(outputs, input_maps, cnn_fully_connected_layers_weights[view_id][fully_connected_layer].t(), cnn_fully_connected_layers_biases[view_id][fully_connected_layer]);
+			fully_connected_layer++;
+		}
+		if (layer_type == 3) // ReLU
+		{
+			outputs.clear();
+			for (size_t k = 0; k < input_maps.size(); ++k)
+			{
+				// Apply the ReLU
+				cv::threshold(input_maps[k], input_maps[k], 0, 0, cv::THRESH_TOZERO);
+				outputs.push_back(input_maps[k]);
+
+			}
+		}
+		if (layer_type == 4)
+		{
+			outputs.clear();
+			for (size_t k = 0; k < input_maps.size(); ++k)
+			{
+				// Apply the sigmoid
+				cv::exp(-input_maps[k], input_maps[k]);
+				input_maps[k] = 1.0 / (1.0 + input_maps[k]);
+
+				outputs.push_back(input_maps[k]);
+
+			}
+		}
+		// Set the outputs of this layer to inputs of the next
+		input_maps = outputs;
+
+	}
+
+	// Convert the class label to a continuous value
+	double max_val = 0;
+	cv::Point max_loc;
+	cv::minMaxLoc(outputs[0].t(), 0, &max_val, 0, &max_loc);
+	int max_idx = max_loc.y;
+	double max = 1;
+	double min = -1;
+	double bins = (double)outputs[0].cols;
+	// Unquantizing the softmax layer to continuous value
+	double step_size = (max - min) / bins; // This should be saved somewhere
+	double unquantized = min + step_size / 2.0 + max_idx * step_size;
+
+	return unquantized;
+}
+
+void DetectionValidator::NormaliseWarpedToVector(const cv::Mat_<float>& warped_img, cv::Mat_<float>& feature_vec, int view_id)
+{
+	cv::Mat_<float> warped_t = warped_img.t();
+	
+	// the vector to be filled with paw values
+	cv::MatIterator_<float> vp;	
+	cv::MatIterator_<float>  cp;
+
+	cv::Mat_<float> vec(paws[view_id].number_of_pixels,1);
+	vp = vec.begin();
+
+	cp = warped_t.begin();		
+
+	int wInt = warped_img.cols;
+	int hInt = warped_img.rows;
+
+	// the mask indicating if point is within or outside the face region
+	
+	cv::Mat maskT = paws[view_id].pixel_mask.t();
+
+	cv::MatIterator_<uchar>  mp = maskT.begin<uchar>();
+
+	for(int i=0; i < wInt; ++i)
+	{
+		for(int j=0; j < hInt; ++j, ++mp, ++cp)
+		{
+			// if is within mask
+			if(*mp)
+			{
+				*vp++ = *cp;
+			}
+		}
+	}
+
+	// Local normalisation
+	cv::Scalar mean;
+	cv::Scalar std;
+	cv::meanStdDev(vec, mean, std);
+
+	// subtract the mean image
+	vec -= mean[0];
+
+	// Normalise the image
+	if(std[0] == 0)
+	{
+		std[0] = 1;
+	}
+	
+	vec /= std[0];
+
+	// Global normalisation
+	feature_vec = (vec - mean_images[view_id])  / standard_deviations[view_id];
+}
+
+// Getting the closest view center based on orientation
+int DetectionValidator::GetViewId(const cv::Vec3d& orientation) const
+{
+	int id = 0;
+
+	double dbest = -1.0;
+
+	for(size_t i = 0; i < this->orientations.size(); i++)
+	{
+	
+		// Distance to current view
+		double d = cv::norm(orientation, this->orientations[i]);
+
+		if(i == 0 || d < dbest)
+		{
+			dbest = d;
+			id = i;
+		}
+	}
+	return id;
+	
+}
+
+
--- a/pkg/OpenFace/lib/local/LandmarkDetector/src/LandmarkDetectorFunc.cpp
+++ b/pkg/OpenFace/lib/local/LandmarkDetector/src/LandmarkDetectorFunc.cpp
@@ -0,0 +1,768 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (C) 2017, Carnegie Mellon University and University of Cambridge,
+// all rights reserved.
+//
+// ACADEMIC OR NON-PROFIT ORGANIZATION NONCOMMERCIAL RESEARCH USE ONLY
+//
+// BY USING OR DOWNLOADING THE SOFTWARE, YOU ARE AGREEING TO THE TERMS OF THIS LICENSE AGREEMENT.  
+// IF YOU DO NOT AGREE WITH THESE TERMS, YOU MAY NOT USE OR DOWNLOAD THE SOFTWARE.
+//
+// License can be found in OpenFace-license.txt
+//
+//     * Any publications arising from the use of this software, including but
+//       not limited to academic journal and conference publications, technical
+//       reports and manuals, must cite at least one of the following works:
+//
+//       OpenFace 2.0: Facial Behavior Analysis Toolkit
+//       Tadas Baltrušaitis, Amir Zadeh, Yao Chong Lim, and Louis-Philippe Morency
+//       in IEEE International Conference on Automatic Face and Gesture Recognition, 2018  
+//
+//       Convolutional experts constrained local model for facial landmark detection.
+//       A. Zadeh, T. Baltrušaitis, and Louis-Philippe Morency,
+//       in Computer Vision and Pattern Recognition Workshops, 2017.    
+//
+//       Rendering of Eyes for Eye-Shape Registration and Gaze Estimation
+//       Erroll Wood, Tadas Baltrušaitis, Xucong Zhang, Yusuke Sugano, Peter Robinson, and Andreas Bulling 
+//       in IEEE International. Conference on Computer Vision (ICCV),  2015 
+//
+//       Cross-dataset learning and person-specific normalisation for automatic Action Unit detection
+//       Tadas Baltrušaitis, Marwa Mahmoud, and Peter Robinson 
+//       in Facial Expression Recognition and Analysis Challenge, 
+//       IEEE International Conference on Automatic Face and Gesture Recognition, 2015 
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "stdafx.h"
+
+#include "LandmarkDetectorFunc.h"
+#include "RotationHelpers.h"
+#include "ImageManipulationHelpers.h"
+
+// OpenCV includes
+#include <opencv2/core/core.hpp>
+#include <opencv2/calib3d.hpp>
+#include <opencv2/imgproc.hpp>
+
+// System includes
+#include <vector>
+#include <numeric>
+
+using namespace LandmarkDetector;
+
+// Getting a head pose estimate from the currently detected landmarks, with appropriate correction due to the PDM assuming an orthographic camera
+// which is only correct close to the centre of the image
+// This method returns a corrected pose estimate with respect to world coordinates with camera at origin (0,0,0)
+// The format returned is [Tx, Ty, Tz, Eul_x, Eul_y, Eul_z]
+cv::Vec6f LandmarkDetector::GetPose(const CLNF& clnf_model, float fx, float fy, float cx, float cy)
+{
+	if (!clnf_model.detected_landmarks.empty() && clnf_model.params_global[0] != 0)
+	{
+		// This is used as an initial estimate for the iterative PnP algorithm
+		float Z = fx / clnf_model.params_global[0];
+
+		float X = ((clnf_model.params_global[4] - cx) * (1.0 / fx)) * Z;
+		float Y = ((clnf_model.params_global[5] - cy) * (1.0 / fy)) * Z;
+
+		// Correction for orientation
+
+		// 2D points
+		cv::Mat_<float> landmarks_2D = clnf_model.detected_landmarks;
+
+		landmarks_2D = landmarks_2D.reshape(1, 2).t();
+
+		// 3D points
+		cv::Mat_<float> landmarks_3D;
+		clnf_model.pdm.CalcShape3D(landmarks_3D, clnf_model.params_local);
+		
+		landmarks_3D = landmarks_3D.reshape(1, 3).t();
+
+		// Solving the PNP model
+
+		// The camera matrix
+		cv::Matx33f camera_matrix(fx, 0, cx, 0, fy, cy, 0, 0, 1);
+
+		cv::Vec3f vec_trans(X, Y, Z);
+		cv::Vec3f vec_rot(clnf_model.params_global[1], clnf_model.params_global[2], clnf_model.params_global[3]);
+
+		cv::solvePnP(landmarks_3D, landmarks_2D, camera_matrix, cv::Mat(), vec_rot, vec_trans, true);
+
+		cv::Vec3f euler = Utilities::AxisAngle2Euler(vec_rot);
+
+		return cv::Vec6f(vec_trans[0], vec_trans[1], vec_trans[2], euler[0], euler[1], euler[2]);
+	}
+	else
+	{
+		return cv::Vec6f(0, 0, 0, 0, 0, 0);
+	}
+}
+
+// Getting a head pose estimate from the currently detected landmarks, with appropriate correction due to perspective projection
+// This method returns a corrected pose estimate with respect to a point camera (NOTE not the world coordinates), which is useful to find out if the person is looking at a camera
+// The format returned is [Tx, Ty, Tz, Eul_x, Eul_y, Eul_z]
+cv::Vec6f LandmarkDetector::GetPoseWRTCamera(const CLNF& clnf_model, float fx, float fy, float cx, float cy)
+{
+	if (!clnf_model.detected_landmarks.empty() && clnf_model.params_global[0] != 0)
+	{
+
+		float Z = fx / clnf_model.params_global[0];
+
+		float X = ((clnf_model.params_global[4] - cx) * (1.0 / fx)) * Z;
+		float Y = ((clnf_model.params_global[5] - cy) * (1.0 / fy)) * Z;
+
+		// Correction for orientation
+
+		// 3D points
+		cv::Mat_<float> landmarks_3D;
+		clnf_model.pdm.CalcShape3D(landmarks_3D, clnf_model.params_local);
+
+		landmarks_3D = landmarks_3D.reshape(1, 3).t();
+
+		// 2D points
+		cv::Mat_<float> landmarks_2D = clnf_model.detected_landmarks;
+
+		landmarks_2D = landmarks_2D.reshape(1, 2).t();
+
+		// Solving the PNP model
+
+		// The camera matrix
+		cv::Matx33f camera_matrix(fx, 0, cx, 0, fy, cy, 0, 0, 1);
+
+		cv::Vec3f vec_trans(X, Y, Z);
+		cv::Vec3f vec_rot(clnf_model.params_global[1], clnf_model.params_global[2], clnf_model.params_global[3]);
+
+		cv::solvePnP(landmarks_3D, landmarks_2D, camera_matrix, cv::Mat(), vec_rot, vec_trans, true);
+
+		// Here we correct for the camera orientation, for this need to determine the angle the camera makes with the head pose
+		float z_x = cv::sqrt(vec_trans[0] * vec_trans[0] + vec_trans[2] * vec_trans[2]);
+		float eul_x = atan2(vec_trans[1], z_x);
+
+		float z_y = cv::sqrt(vec_trans[1] * vec_trans[1] + vec_trans[2] * vec_trans[2]);
+		float eul_y = -atan2(vec_trans[0], z_y);
+
+		cv::Matx33f camera_rotation = Utilities::Euler2RotationMatrix(cv::Vec3f(eul_x, eul_y, 0));
+		cv::Matx33f head_rotation = Utilities::AxisAngle2RotationMatrix(vec_rot);
+
+		cv::Matx33f corrected_rotation = camera_rotation * head_rotation;
+
+		cv::Vec3f euler_corrected = Utilities::RotationMatrix2Euler(corrected_rotation);
+
+		return cv::Vec6f(vec_trans[0], vec_trans[1], vec_trans[2], euler_corrected[0], euler_corrected[1], euler_corrected[2]);
+	}
+	else
+	{
+		return cv::Vec6f(0, 0, 0, 0, 0, 0);
+	}
+}
+
+// If landmark detection in video succeeded create a template for use in simple tracking
+void UpdateTemplate(const cv::Mat_<uchar> &grayscale_image, CLNF& clnf_model)
+{
+	cv::Rect_<float> bounding_box;
+	clnf_model.pdm.CalcBoundingBox(bounding_box, clnf_model.params_global, clnf_model.params_local);
+	
+	// Make sure the box is not out of bounds
+	cv::Rect_<int> bbox_tmp((int)bounding_box.x, (int)bounding_box.y, (int)bounding_box.width, (int)bounding_box.height);
+	bounding_box = bbox_tmp & cv::Rect(0, 0, grayscale_image.cols, grayscale_image.rows);
+
+	clnf_model.face_template = grayscale_image(bounding_box).clone();
+}
+
+// This method uses basic template matching in order to allow for better tracking of fast moving faces
+void CorrectGlobalParametersVideo(const cv::Mat_<uchar> &grayscale_image, CLNF& clnf_model, const FaceModelParameters& params)
+{
+	cv::Rect_<float> init_box;
+	clnf_model.pdm.CalcBoundingBox(init_box, clnf_model.params_global, clnf_model.params_local);
+
+	cv::Rect roi(init_box.x - init_box.width/2, init_box.y - init_box.height/2, init_box.width * 2, init_box.height * 2);
+	roi = roi & cv::Rect(0, 0, grayscale_image.cols, grayscale_image.rows);
+
+	int off_x = roi.x;
+	int off_y = roi.y;
+
+	float scaling = params.face_template_scale / clnf_model.params_global[0];
+	cv::Mat_<uchar> image;
+	if(scaling < 1)
+	{
+		cv::resize(clnf_model.face_template, clnf_model.face_template, cv::Size(), scaling, scaling);
+		cv::resize(grayscale_image(roi), image, cv::Size(), scaling, scaling);
+	}
+	else
+	{
+		scaling = 1;
+		image = grayscale_image(roi).clone();
+	}
+		
+	// Resizing the template			
+	cv::Mat corr_out;
+	cv::matchTemplate(image, clnf_model.face_template, corr_out, cv::TM_CCOEFF_NORMED);
+
+	// Actually matching it
+	//double min, max;
+	int max_loc[2];
+
+	cv::minMaxIdx(corr_out, NULL, NULL, NULL, max_loc);
+
+	cv::Rect_<float> out_bbox(max_loc[1]/scaling + off_x, max_loc[0]/scaling + off_y, clnf_model.face_template.rows / scaling, clnf_model.face_template.cols / scaling);
+
+	float shift_x = out_bbox.x - init_box.x;
+	float shift_y = out_bbox.y - init_box.y;
+			
+	clnf_model.params_global[4] = clnf_model.params_global[4] + shift_x;
+	clnf_model.params_global[5] = clnf_model.params_global[5] + shift_y;
+	
+}
+
+bool LandmarkDetector::DetectLandmarksInVideo(const cv::Mat &rgb_image, CLNF& clnf_model, FaceModelParameters& params, cv::Mat& grayscale_image)
+{
+	// First need to decide if the landmarks should be "detected" or "tracked"
+	// Detected means running face detection and a larger search area, tracked means initialising from previous step
+	// and using a smaller search area
+
+	if(grayscale_image.empty())
+	{
+		Utilities::ConvertToGrayscale_8bit(rgb_image, grayscale_image);
+	}
+
+	// Indicating that this is a first detection in video sequence or after restart
+	bool initial_detection = !clnf_model.tracking_initialised;
+
+	// Only do it if there was a face detection at all
+	if(clnf_model.tracking_initialised)
+	{
+
+		// The area of interest search size will depend if the previous track was successful
+		if(!clnf_model.detection_success)
+		{
+			params.window_sizes_current = params.window_sizes_init;
+		}
+		else
+		{
+			params.window_sizes_current = params.window_sizes_small;
+		}
+
+		// Before the expensive landmark detection step apply a quick template tracking approach
+		if(params.use_face_template && !clnf_model.face_template.empty() && clnf_model.detection_success)
+		{
+			CorrectGlobalParametersVideo(grayscale_image, clnf_model, params);
+		}
+
+		bool track_success = clnf_model.DetectLandmarks(grayscale_image, params);
+		
+		if(!track_success)
+		{
+			// Make a record that tracking failed
+			clnf_model.failures_in_a_row++;
+		}
+		else
+		{
+			// indicate that tracking is a success
+			clnf_model.failures_in_a_row = -1;		
+			
+			if(params.use_face_template)
+			{
+				UpdateTemplate(grayscale_image, clnf_model);
+			}
+		}
+	}
+
+	// This is used for both detection (if it the tracking has not been initialised yet) or if the tracking failed (however we do this every n frames, for speed)
+	// This also has the effect of an attempt to reinitialise just after the tracking has failed, which is useful during large motions
+	if((!clnf_model.tracking_initialised && (clnf_model.failures_in_a_row + 1) % (params.reinit_video_every * 6) == 0) 
+		|| (clnf_model.tracking_initialised && !clnf_model.detection_success && params.reinit_video_every > 0 && clnf_model.failures_in_a_row % params.reinit_video_every == 0))
+	{
+
+		cv::Rect_<float> bounding_box;
+		
+		// If the face detector has not been initialised and we're using it, then read it in
+		if(clnf_model.face_detector_HAAR.empty() && params.curr_face_detector == params.HAAR_DETECTOR)
+		{
+			clnf_model.face_detector_HAAR.load(params.haar_face_detector_location);
+			clnf_model.haar_face_detector_location = params.haar_face_detector_location;
+		}
+		if (clnf_model.face_detector_MTCNN.empty() && params.curr_face_detector == params.MTCNN_DETECTOR)
+		{
+			clnf_model.face_detector_MTCNN.Read(params.mtcnn_face_detector_location);
+			clnf_model.mtcnn_face_detector_location = params.mtcnn_face_detector_location;
+
+			// If the model is still empty default to HOG
+			if (clnf_model.face_detector_MTCNN.empty())
+			{
+				std::cout << "INFO: defaulting to HOG-SVM face detector" << std::endl;
+				params.curr_face_detector = LandmarkDetector::FaceModelParameters::HOG_SVM_DETECTOR;
+			}
+
+		}
+
+		cv::Point preference_det(-1, -1);
+		if(clnf_model.preference_det.x != -1 && clnf_model.preference_det.y != -1)
+		{
+			preference_det.x = clnf_model.preference_det.x * grayscale_image.cols;
+			preference_det.y = clnf_model.preference_det.y * grayscale_image.rows;
+			clnf_model.preference_det = cv::Point(-1, -1);
+		}
+
+		bool face_detection_success;
+		if(params.curr_face_detector == FaceModelParameters::HOG_SVM_DETECTOR)
+		{
+			float confidence;
+			face_detection_success = LandmarkDetector::DetectSingleFaceHOG(bounding_box, grayscale_image, clnf_model.face_detector_HOG, confidence, preference_det);
+		}
+		else if(params.curr_face_detector == FaceModelParameters::HAAR_DETECTOR)
+		{
+			face_detection_success = LandmarkDetector::DetectSingleFace(bounding_box, grayscale_image, clnf_model.face_detector_HAAR, preference_det);
+		}
+		else if (params.curr_face_detector == FaceModelParameters::MTCNN_DETECTOR)
+		{
+			float confidence;
+			face_detection_success = LandmarkDetector::DetectSingleFaceMTCNN(bounding_box, rgb_image, clnf_model.face_detector_MTCNN, confidence, preference_det);
+		}
+
+		// Attempt to detect landmarks using the detected face (if unseccessful the detection will be ignored)
+		if(face_detection_success)
+		{
+			// Indicate that tracking has started as a face was detected
+			clnf_model.tracking_initialised = true;
+						
+			// Keep track of old model values so that they can be restored if redetection fails
+			cv::Vec6f params_global_init = clnf_model.params_global;
+			cv::Mat_<float> params_local_init = clnf_model.params_local.clone();
+			float likelihood_init = clnf_model.model_likelihood;
+			cv::Mat_<float> detected_landmarks_init = clnf_model.detected_landmarks.clone();
+			cv::Mat_<float> landmark_likelihoods_init = clnf_model.landmark_likelihoods.clone();
+
+			// Use the detected bounding box and empty local parameters
+			clnf_model.params_local.setTo(0);
+			clnf_model.pdm.CalcParams(clnf_model.params_global, bounding_box, clnf_model.params_local);		
+
+			// Make sure the search size is large
+			params.window_sizes_current = params.window_sizes_init;
+
+			// TODO rem (should the multi-hyp version be only for CEN and not CLNF?), otherwise poss too slow, and poss not accurate
+			//bool landmark_detection_success = clnf_model.DetectLandmarks(grayscale_image, params);
+
+			// Do the actual landmark detection (and keep it only if successful)
+			// Perform multi-hypothesis detection here (as face detector can pick up multiple of them)
+			params.multi_view = true;
+			bool landmark_detection_success = DetectLandmarksInImage(rgb_image, bounding_box, clnf_model, params, grayscale_image);
+			params.multi_view = false;
+
+
+			// If landmark reinitialisation unsucessful continue from previous estimates
+			// if it's initial detection however, do not care if it was successful as the validator might be wrong, so continue trackig
+			// regardless
+			if(!initial_detection && !landmark_detection_success)
+			{
+
+				// Restore previous estimates
+				clnf_model.params_global = params_global_init;
+				clnf_model.params_local = params_local_init.clone();
+				clnf_model.pdm.CalcShape2D(clnf_model.detected_landmarks, clnf_model.params_local, clnf_model.params_global);
+				clnf_model.model_likelihood = likelihood_init;
+				clnf_model.detected_landmarks = detected_landmarks_init.clone();
+				clnf_model.landmark_likelihoods = landmark_likelihoods_init.clone();
+
+				return false;
+			}
+			else
+			{
+				clnf_model.failures_in_a_row = -1;			
+				
+				if(params.use_face_template)
+				{
+					UpdateTemplate(grayscale_image, clnf_model);
+				}
+
+				return true;
+			}
+		}
+	}
+
+	// if the model has not been initialised yet class it as a failure
+	if(!clnf_model.tracking_initialised)
+	{
+		clnf_model.failures_in_a_row++;
+	}
+
+	// un-initialise the tracking
+	if(	clnf_model.failures_in_a_row > 100)
+	{
+		clnf_model.tracking_initialised = false;
+	}
+
+	return clnf_model.detection_success;
+	
+}
+
+bool LandmarkDetector::DetectLandmarksInVideo(const cv::Mat &rgb_image, const cv::Rect_<double> bounding_box, CLNF& clnf_model, FaceModelParameters& params, cv::Mat &grayscale_image)
+{
+	if(bounding_box.width > 0)
+	{
+		// calculate the local and global parameters from the generated 2D shape (mapping from the 2D to 3D because camera params are unknown)
+		clnf_model.params_local.setTo(0);
+		clnf_model.pdm.CalcParams(clnf_model.params_global, bounding_box, clnf_model.params_local);		
+
+		// indicate that face was detected so initialisation is not necessary
+		clnf_model.tracking_initialised = true;
+	}
+
+	return DetectLandmarksInVideo(rgb_image, clnf_model, params, grayscale_image);
+
+}
+
+//================================================================================================================
+// Landmark detection in image, need to provide an image and optionally CLNF model together with parameters (default values work well)
+// Optionally can provide a bounding box in which detection is performed (this is useful if multiple faces are to be detected in images)
+//================================================================================================================
+
+bool DetectLandmarksInImageMultiHypBasic(const cv::Mat_<uchar> &grayscale_image, std::vector<cv::Vec3d> rotation_hypotheses, 
+	const cv::Rect_<double> bounding_box, CLNF& clnf_model, FaceModelParameters& params)
+{
+
+	// Use the initialisation size for the landmark detection
+	params.window_sizes_current = params.window_sizes_init;
+
+	// Store the current best estimate
+	float best_likelihood;
+	float best_detection_certainty;
+	cv::Vec6f best_global_parameters;
+	cv::Mat_<float> best_local_parameters;
+	cv::Mat_<float> best_detected_landmarks;
+	cv::Mat_<float> best_landmark_likelihoods;
+	bool best_success;
+
+	// The hierarchical model parameters
+	std::vector<float> best_likelihood_h(clnf_model.hierarchical_models.size());
+	std::vector<cv::Vec6f> best_global_parameters_h(clnf_model.hierarchical_models.size());
+	std::vector<cv::Mat_<float>> best_local_parameters_h(clnf_model.hierarchical_models.size());
+	std::vector<cv::Mat_<float>> best_detected_landmarks_h(clnf_model.hierarchical_models.size());
+	std::vector<cv::Mat_<float>> best_landmark_likelihoods_h(clnf_model.hierarchical_models.size());
+
+	for (size_t hypothesis = 0; hypothesis < rotation_hypotheses.size(); ++hypothesis)
+	{
+		// Reset the potentially set clnf_model parameters
+		clnf_model.params_local.setTo(0.0);
+
+		for (size_t part = 0; part < clnf_model.hierarchical_models.size(); ++part)
+		{
+			clnf_model.hierarchical_models[part].params_local.setTo(0.0);
+		}
+
+		// calculate the local and global parameters from the generated 2D shape (mapping from the 2D to 3D because camera params are unknown)
+		clnf_model.pdm.CalcParams(clnf_model.params_global, bounding_box, clnf_model.params_local, rotation_hypotheses[hypothesis]);
+	
+		bool success = clnf_model.DetectLandmarks(grayscale_image, params);	
+
+		if (hypothesis == 0 || best_likelihood < clnf_model.model_likelihood)
+		{
+			best_likelihood = clnf_model.model_likelihood;
+			best_global_parameters = clnf_model.params_global;
+			best_local_parameters = clnf_model.params_local.clone();
+			best_detected_landmarks = clnf_model.detected_landmarks.clone();
+			best_landmark_likelihoods = clnf_model.landmark_likelihoods.clone();
+			best_detection_certainty = clnf_model.detection_certainty;
+			best_success = success;
+			
+			for (size_t part = 0; part < clnf_model.hierarchical_models.size(); ++part)
+			{
+				best_likelihood_h[part] = clnf_model.hierarchical_models[part].model_likelihood;
+				best_global_parameters_h[part] = clnf_model.hierarchical_models[part].params_global;
+				best_local_parameters_h[part] = clnf_model.hierarchical_models[part].params_local.clone();
+				best_detected_landmarks_h[part] = clnf_model.hierarchical_models[part].detected_landmarks.clone();
+				best_landmark_likelihoods_h[part] = clnf_model.hierarchical_models[part].landmark_likelihoods.clone();
+			}
+		}
+
+	}
+
+	// Store the best estimates in the clnf_model
+	clnf_model.model_likelihood = best_likelihood;
+	clnf_model.params_global = best_global_parameters;
+	clnf_model.params_local = best_local_parameters.clone();
+	clnf_model.detected_landmarks = best_detected_landmarks.clone();
+	clnf_model.detection_success = best_success;
+	clnf_model.landmark_likelihoods = best_landmark_likelihoods.clone();
+	clnf_model.detection_certainty = best_detection_certainty;
+
+	for (size_t part = 0; part < clnf_model.hierarchical_models.size(); ++part)
+	{
+		clnf_model.hierarchical_models[part].params_global = best_global_parameters_h[part];
+		clnf_model.hierarchical_models[part].params_local = best_local_parameters_h[part].clone();
+		clnf_model.hierarchical_models[part].detected_landmarks = best_detected_landmarks_h[part].clone();
+		clnf_model.hierarchical_models[part].landmark_likelihoods = best_landmark_likelihoods_h[part].clone();
+	}
+
+	return best_success;
+
+
+}
+
+// Helper index sorting function
+template <typename T> std::vector<size_t> sort_indexes(const std::vector<T> &v) {
+
+	// initialize original index locations
+	std::vector<size_t> idx(v.size());
+	std::iota(idx.begin(), idx.end(), 0);
+
+	// sort indexes based on comparing values in v
+	std::sort(idx.begin(), idx.end(),
+		[&v](size_t i1, size_t i2) {return v[i1] > v[i2]; });
+
+	return idx;
+}
+
+bool DetectLandmarksInImageMultiHypEarlyTerm(const cv::Mat_<uchar> &grayscale_image, std::vector<cv::Vec3d> rotation_hypotheses, 
+	const cv::Rect_<double> bounding_box, CLNF& clnf_model, FaceModelParameters& params)
+{
+	FaceModelParameters old_params(params);
+	
+	// Use the initialisation size for the landmark detection
+	params.window_sizes_current = params.window_sizes_init;
+
+	bool early_term = false;
+
+	// Setup the parameters accordingly
+	// Only do the first iteration
+	for (size_t i = 1; i < params.window_sizes_current.size(); ++i)
+	{
+		params.window_sizes_current[i] = 0;
+	}
+	params.refine_hierarchical = false;
+	params.validate_detections = false;
+
+	bool success = false;
+
+	// Keeping track of converges
+	std::vector<float> likelihoods;
+	std::vector<cv::Vec6f> global_parameters;
+	std::vector<cv::Mat_<float>> local_parameters;
+
+	for (size_t hypothesis = 0; hypothesis < rotation_hypotheses.size(); ++hypothesis)
+	{
+		// Reset the potentially set clnf_model parameters
+		clnf_model.params_local.setTo(0.0);
+
+		for (size_t part = 0; part < clnf_model.hierarchical_models.size(); ++part)
+		{
+			clnf_model.hierarchical_models[part].params_local.setTo(0.0);
+		}
+
+		// calculate the local and global parameters from the generated 2D shape (mapping from the 2D to 3D because camera params are unknown)
+		clnf_model.pdm.CalcParams(clnf_model.params_global, bounding_box, clnf_model.params_local, rotation_hypotheses[hypothesis]);
+
+		// Perform landmark detection in first scale
+		clnf_model.DetectLandmarks(grayscale_image, params);
+
+		float lhood = clnf_model.model_likelihood * clnf_model.patch_experts.early_term_weights[clnf_model.view_used] + clnf_model.patch_experts.early_term_biases[clnf_model.view_used];
+
+		// If likelihood higher than cutoff continue on this model
+		if (lhood > clnf_model.patch_experts.early_term_cutoffs[clnf_model.view_used])
+		{
+			params.refine_hierarchical = old_params.refine_hierarchical;
+			params.window_sizes_current = params.window_sizes_init;
+			params.window_sizes_current[0] = 0;
+			params.validate_detections = old_params.validate_detections;
+			success = clnf_model.DetectLandmarks(grayscale_image, params);
+			early_term = true;
+			break;
+		}
+		else
+		{
+			likelihoods.push_back(lhood);
+			global_parameters.push_back(clnf_model.params_global);
+			local_parameters.push_back(clnf_model.params_local);
+		}
+	}
+
+
+	if (!early_term)
+	{
+
+		// Store the current best estimate
+		float best_likelihood;
+		cv::Vec6f best_global_parameters;
+		cv::Mat_<float> best_local_parameters;
+		cv::Mat_<float> best_detected_landmarks;
+		cv::Mat_<float> best_landmark_likelihoods;
+		bool best_success;
+
+		// The hierarchical model parameters
+		std::vector<float> best_likelihood_h(clnf_model.hierarchical_models.size());
+		std::vector<cv::Vec6f> best_global_parameters_h(clnf_model.hierarchical_models.size());
+		std::vector<cv::Mat_<float>> best_local_parameters_h(clnf_model.hierarchical_models.size());
+		std::vector<cv::Mat_<float>> best_detected_landmarks_h(clnf_model.hierarchical_models.size());
+		std::vector<cv::Mat_<float>> best_landmark_likelihoods_h(clnf_model.hierarchical_models.size());
+
+		// Sort the likelihoods and pick the best top 3 models
+		std::vector<size_t> indices = sort_indexes(likelihoods);
+
+		// Pick 3 best hypotheses and complete them
+		size_t max = indices.size() >= 3 ? 3 : indices.size();
+
+		params.refine_hierarchical = old_params.refine_hierarchical;
+		params.window_sizes_current = params.window_sizes_init;
+		params.window_sizes_current[0] = 0;
+		params.validate_detections = old_params.validate_detections;
+
+
+		for (size_t i = 0; i < max; ++i)
+		{
+			// Reset the potentially set clnf_model parameters
+			clnf_model.params_local = local_parameters[indices[i]];
+			clnf_model.params_global = global_parameters[indices[i]];
+			for (size_t part = 0; part < clnf_model.hierarchical_models.size(); ++part)
+			{
+				clnf_model.hierarchical_models[part].params_local.setTo(0.0);
+			}
+	
+			// Perform landmark detection in first scale
+			success = clnf_model.DetectLandmarks(grayscale_image, params);
+
+			if (i == 0 || best_likelihood < clnf_model.model_likelihood)
+			{
+				best_likelihood = clnf_model.model_likelihood;
+				best_global_parameters = clnf_model.params_global;
+				best_local_parameters = clnf_model.params_local.clone();
+				best_detected_landmarks = clnf_model.detected_landmarks.clone();
+				best_landmark_likelihoods = clnf_model.landmark_likelihoods.clone();
+				best_success = success;
+
+				for (size_t part = 0; part < clnf_model.hierarchical_models.size(); ++part)
+				{
+					best_likelihood_h[part] = clnf_model.hierarchical_models[part].model_likelihood;
+					best_global_parameters_h[part] = clnf_model.hierarchical_models[part].params_global;
+					best_local_parameters_h[part] = clnf_model.hierarchical_models[part].params_local.clone();
+					best_detected_landmarks_h[part] = clnf_model.hierarchical_models[part].detected_landmarks.clone();
+					best_landmark_likelihoods_h[part] = clnf_model.hierarchical_models[part].landmark_likelihoods.clone();
+				}
+			}
+
+		}
+
+		// Store the best estimates in the clnf_model
+		clnf_model.model_likelihood = best_likelihood;
+		clnf_model.params_global = best_global_parameters;
+		clnf_model.params_local = best_local_parameters.clone();
+		clnf_model.detected_landmarks = best_detected_landmarks.clone();
+		clnf_model.detection_success = best_success;
+		clnf_model.landmark_likelihoods = best_landmark_likelihoods.clone();
+
+		for (size_t part = 0; part < clnf_model.hierarchical_models.size(); ++part)
+		{
+			clnf_model.hierarchical_models[part].params_global = best_global_parameters_h[part];
+			clnf_model.hierarchical_models[part].params_local = best_local_parameters_h[part].clone();
+			clnf_model.hierarchical_models[part].detected_landmarks = best_detected_landmarks_h[part].clone();
+			clnf_model.hierarchical_models[part].landmark_likelihoods = best_landmark_likelihoods_h[part].clone();
+		}
+
+	}
+
+	params = old_params;
+
+	return success;
+
+}
+
+
+// This is the one where the actual work gets done, other DetectLandmarksInImage calls lead to this one
+bool LandmarkDetector::DetectLandmarksInImage(const cv::Mat &rgb_image, const cv::Rect_<double> bounding_box, CLNF& clnf_model, FaceModelParameters& params, cv::Mat &grayscale_image)
+{
+
+	if (grayscale_image.empty())
+	{
+		Utilities::ConvertToGrayscale_8bit(rgb_image, grayscale_image);
+	}
+
+	// Can have multiple hypotheses
+	std::vector<cv::Vec3d> rotation_hypotheses;
+
+	if(params.multi_view)
+	{
+		// Try out different orientation initialisations
+		// It is possible to add other orientation hypotheses easilly by just pushing to this vector
+		rotation_hypotheses.push_back(cv::Vec3d(0,0,0));
+		rotation_hypotheses.push_back(cv::Vec3d(0, -0.5236, 0));
+		rotation_hypotheses.push_back(cv::Vec3d(0, 0.5236,0));
+		rotation_hypotheses.push_back(cv::Vec3d(0, -0.96, 0));
+		rotation_hypotheses.push_back(cv::Vec3d(0, 0.96, 0));
+		rotation_hypotheses.push_back(cv::Vec3d(0, 0, 0.5236));
+		rotation_hypotheses.push_back(cv::Vec3d(0, 0, -0.5236));
+		rotation_hypotheses.push_back(cv::Vec3d(0, -1.57, 0));
+		rotation_hypotheses.push_back(cv::Vec3d(0, 1.57, 0));
+		rotation_hypotheses.push_back(cv::Vec3d(0, -1.22, 0.698));
+		rotation_hypotheses.push_back(cv::Vec3d(0, 1.22, -0.698));
+	}
+	else
+	{
+		// Assume the face is close to frontal
+		rotation_hypotheses.push_back(cv::Vec3d(0,0,0));
+	}
+	
+	bool success;
+
+	// Either use basic multi-hypothesis testing or clever testing if early termination parameters are present
+	if(clnf_model.patch_experts.early_term_biases.size() == 0)
+	{
+		success = DetectLandmarksInImageMultiHypBasic(grayscale_image, rotation_hypotheses, bounding_box, clnf_model, params);
+	}
+	else
+	{
+		success = DetectLandmarksInImageMultiHypEarlyTerm(grayscale_image, rotation_hypotheses, bounding_box, clnf_model, params);
+	}
+	return success;
+}
+
+bool LandmarkDetector::DetectLandmarksInImage(const cv::Mat &rgb_image, CLNF& clnf_model, FaceModelParameters& params, cv::Mat &grayscale_image)
+{
+	if (grayscale_image.empty())
+	{
+		Utilities::ConvertToGrayscale_8bit(rgb_image, grayscale_image);
+	}
+
+	cv::Rect_<float> bounding_box;
+
+	// If the face detector has not been initialised read it in
+	if(clnf_model.face_detector_HAAR.empty() && params.curr_face_detector == FaceModelParameters::HAAR_DETECTOR)
+	{
+		clnf_model.face_detector_HAAR.load(params.haar_face_detector_location);
+		clnf_model.haar_face_detector_location = params.haar_face_detector_location;
+	}
+	
+	if (clnf_model.face_detector_MTCNN.empty() && params.curr_face_detector == FaceModelParameters::MTCNN_DETECTOR)
+	{
+		clnf_model.face_detector_MTCNN.Read(params.mtcnn_face_detector_location);
+		clnf_model.mtcnn_face_detector_location = params.mtcnn_face_detector_location;
+
+		// If the model is still empty default to HOG
+		if (clnf_model.face_detector_MTCNN.empty())
+		{
+			std::cout << "INFO: defaulting to HOG-SVM face detector" << std::endl;
+			params.curr_face_detector = LandmarkDetector::FaceModelParameters::HOG_SVM_DETECTOR;
+		}
+
+	}
+
+	// Detect the face first
+	if(params.curr_face_detector == FaceModelParameters::HOG_SVM_DETECTOR)
+	{
+		float confidence;
+		LandmarkDetector::DetectSingleFaceHOG(bounding_box, grayscale_image, clnf_model.face_detector_HOG, confidence);
+	}
+	else if(params.curr_face_detector == FaceModelParameters::HAAR_DETECTOR)
+	{
+		LandmarkDetector::DetectSingleFace(bounding_box, rgb_image, clnf_model.face_detector_HAAR);
+	}
+	else if (params.curr_face_detector == FaceModelParameters::MTCNN_DETECTOR)
+	{
+		float confidence;
+		LandmarkDetector::DetectSingleFaceMTCNN(bounding_box, rgb_image, clnf_model.face_detector_MTCNN, confidence);
+	}
+
+	if(bounding_box.width == 0)
+	{
+		return false;
+	}
+	else
+	{
+		return DetectLandmarksInImage(rgb_image, bounding_box, clnf_model, params, grayscale_image);
+	}
+}
--- a/pkg/OpenFace/lib/local/LandmarkDetector/src/LandmarkDetectorModel.cpp
+++ b/pkg/OpenFace/lib/local/LandmarkDetector/src/LandmarkDetectorModel.cpp
--- a/pkg/OpenFace/lib/local/LandmarkDetector/src/LandmarkDetectorParameters.cpp
+++ b/pkg/OpenFace/lib/local/LandmarkDetector/src/LandmarkDetectorParameters.cpp
@@ -0,0 +1,340 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (C) 2017, Carnegie Mellon University and University of Cambridge,
+// all rights reserved.
+//
+// ACADEMIC OR NON-PROFIT ORGANIZATION NONCOMMERCIAL RESEARCH USE ONLY
+//
+// BY USING OR DOWNLOADING THE SOFTWARE, YOU ARE AGREEING TO THE TERMS OF THIS LICENSE AGREEMENT.  
+// IF YOU DO NOT AGREE WITH THESE TERMS, YOU MAY NOT USE OR DOWNLOAD THE SOFTWARE.
+//
+// License can be found in OpenFace-license.txt
+//
+//     * Any publications arising from the use of this software, including but
+//       not limited to academic journal and conference publications, technical
+//       reports and manuals, must cite at least one of the following works:
+//
+//       OpenFace 2.0: Facial Behavior Analysis Toolkit
+//       Tadas Baltrušaitis, Amir Zadeh, Yao Chong Lim, and Louis-Philippe Morency
+//       in IEEE International Conference on Automatic Face and Gesture Recognition, 2018  
+//
+//       Convolutional experts constrained local model for facial landmark detection.
+//       A. Zadeh, T. Baltrušaitis, and Louis-Philippe Morency,
+//       in Computer Vision and Pattern Recognition Workshops, 2017.    
+//
+//       Rendering of Eyes for Eye-Shape Registration and Gaze Estimation
+//       Erroll Wood, Tadas Baltrušaitis, Xucong Zhang, Yusuke Sugano, Peter Robinson, and Andreas Bulling 
+//       in IEEE International. Conference on Computer Vision (ICCV),  2015 
+//
+//       Cross-dataset learning and person-specific normalisation for automatic Action Unit detection
+//       Tadas Baltrušaitis, Marwa Mahmoud, and Peter Robinson 
+//       in Facial Expression Recognition and Analysis Challenge, 
+//       IEEE International Conference on Automatic Face and Gesture Recognition, 2015 
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "stdafx.h"
+
+#include "LandmarkDetectorParameters.h"
+
+// System includes
+#include <sstream>
+#include <iostream>
+#include <cstdlib>
+
+#ifndef CONFIG_DIR
+#define CONFIG_DIR "~"
+#endif
+
+using namespace LandmarkDetector;
+
+FaceModelParameters::FaceModelParameters()
+{
+	// initialise the default values
+	init();
+	check_model_path();
+
+}
+
+FaceModelParameters::FaceModelParameters(std::vector<std::string> &arguments)
+{
+	// initialise the default values
+	init();
+
+	// First element is reserved for the executable location (useful for finding relative model locs)
+	fs::path root = fs::path(arguments[0]).parent_path();
+
+	bool* valid = new bool[arguments.size()];
+	valid[0] = true;
+
+	for (size_t i = 1; i < arguments.size(); ++i)
+	{
+		valid[i] = true;
+
+		if (arguments[i].compare("-mloc") == 0)
+		{
+			std::string model_loc = arguments[i + 1];
+			model_location = model_loc;
+			valid[i] = false;
+			valid[i + 1] = false;
+			i++;
+
+		}
+		if (arguments[i].compare("-fdloc") ==0)
+		{
+			std::string face_detector_loc = arguments[i + 1];
+			haar_face_detector_location = face_detector_loc;
+			curr_face_detector = HAAR_DETECTOR;
+			valid[i] = false;
+			valid[i + 1] = false;
+			i++;
+		}
+		if (arguments[i].compare("-sigma") == 0)
+		{
+			std::stringstream data(arguments[i + 1]);
+			data >> sigma;
+			valid[i] = false;
+			valid[i + 1] = false;
+			i++;
+		}
+		else if (arguments[i].compare("-w_reg") == 0)
+		{
+			std::stringstream data(arguments[i + 1]);
+			data >> weight_factor;
+			valid[i] = false;
+			valid[i + 1] = false;
+			i++;
+		}
+		else if (arguments[i].compare("-reg") == 0)
+		{
+			std::stringstream data(arguments[i + 1]);
+			data >> reg_factor;
+			valid[i] = false;
+			valid[i + 1] = false;
+			i++;
+		}
+		else if (arguments[i].compare("-multi_view") == 0)
+		{
+
+			std::stringstream data(arguments[i + 1]);
+			int m_view;
+			data >> m_view;
+
+			multi_view = (bool)(m_view != 0);
+			valid[i] = false;
+			valid[i + 1] = false;
+			i++;
+		}
+		else if (arguments[i].compare("-validate_detections") == 0)
+		{
+			std::stringstream data(arguments[i + 1]);
+			int v_det;
+			data >> v_det;
+
+			validate_detections = (bool)(v_det != 0);
+			valid[i] = false;
+			valid[i + 1] = false;
+			i++;
+		}
+		else if (arguments[i].compare("-n_iter") == 0)
+		{
+			std::stringstream data(arguments[i + 1]);
+			data >> num_optimisation_iteration;
+
+			valid[i] = false;
+			valid[i + 1] = false;
+			i++;
+		}
+		else if (arguments[i].compare("-wild") == 0)
+		{
+			// For in the wild fitting these parameters are suitable
+			window_sizes_init = std::vector<int>(4);
+			window_sizes_init[0] = 15; window_sizes_init[1] = 13; window_sizes_init[2] = 11; window_sizes_init[3] = 11;
+
+			sigma = 1.25;
+			reg_factor = 35;
+			weight_factor = 2.5;
+			num_optimisation_iteration = 10;
+
+			valid[i] = false;
+
+			// For in-the-wild images use an in-the wild detector				
+			curr_face_detector = MTCNN_DETECTOR;
+
+			// Use multi-view hypotheses if in-the-wild setting
+			multi_view = true;
+		}
+	}
+
+	for (int i = (int)arguments.size() - 1; i >= 0; --i)
+	{
+		if (!valid[i])
+		{
+			arguments.erase(arguments.begin() + i);
+		}
+	}
+
+
+	// Make sure model_location is valid
+	// First check working directory, then the executable's directory, then the config path set by the build process.
+	fs::path config_path = fs::path(CONFIG_DIR);
+	fs::path model_path = fs::path(model_location);
+	if (fs::exists(model_path))
+	{
+		model_location = model_path.string();
+	}
+	else if (fs::exists(root/model_path))
+	{
+		model_location = (root/model_path).string();
+	}
+	else if (fs::exists(config_path/model_path))
+	{
+		model_location = (config_path/model_path).string();
+	}
+	else
+	{
+		std::cout << "Could not find the landmark detection model to load" << std::endl;
+	}
+
+	if (model_path.stem().string().compare("main_ceclm_general") == 0)
+	{
+		curr_landmark_detector = CECLM_DETECTOR;
+		sigma = 1.5f * sigma;
+		reg_factor = 0.9f * reg_factor;
+	}
+	else if (model_path.stem().string().compare("main_clnf_general") == 0)
+	{
+		curr_landmark_detector = CLNF_DETECTOR;
+	}
+	else if (model_path.stem().string().compare("main_clm_general") == 0)
+	{
+		curr_landmark_detector = CLM_DETECTOR;
+	}
+
+	// Make sure face detector location is valid
+	// First check working directory, then the executable's directory, then the config path set by the build process.
+	model_path = fs::path(haar_face_detector_location);
+	if (fs::exists(model_path))
+	{
+		haar_face_detector_location = model_path.string();
+	}
+	else if (fs::exists(root / model_path))
+	{
+		haar_face_detector_location = (root / model_path).string();
+	}
+	else if (fs::exists(config_path / model_path))
+	{
+		haar_face_detector_location = (config_path / model_path).string();
+	}
+	else
+	{
+		std::cout << "Could not find the HAAR face detector location" << std::endl;
+	}
+
+	// Make sure face detector location is valid
+	// First check working directory, then the executable's directory, then the config path set by the build process.
+	model_path = fs::path(mtcnn_face_detector_location);
+	if (fs::exists(model_path))
+	{
+		mtcnn_face_detector_location = model_path.string();
+	}
+	else if (fs::exists(root / model_path))
+	{
+		mtcnn_face_detector_location = (root / model_path).string();
+	}
+	else if (fs::exists(config_path / model_path))
+	{
+		mtcnn_face_detector_location = (config_path / model_path).string();
+	}
+	else
+	{
+		std::cout << "Could not find the MTCNN face detector location" << std::endl;
+	}
+	check_model_path(root.string());
+}
+
+void FaceModelParameters::check_model_path(const std::string& root)
+{
+	// Make sure model_location is valid
+	// First check working directory, then the executable's directory, then the config path set by the build process.
+	fs::path config_path = fs::path(CONFIG_DIR);
+	fs::path model_path = fs::path(model_location);
+	fs::path root_path = fs::path(root);
+
+	if (fs::exists(model_path))
+	{
+		model_location = model_path.string();
+	}
+	else if (fs::exists(root_path / model_path))
+	{
+		model_location = (root_path / model_path).string();
+	}
+	else if (fs::exists(config_path / model_path))
+	{
+		model_location = (config_path / model_path).string();
+	}
+	else
+	{
+		std::cout << "Could not find the landmark detection model to load" << std::endl;
+	}
+}
+
+void FaceModelParameters::init()
+{
+
+	// number of iterations that will be performed at each scale
+	num_optimisation_iteration = 5;
+
+	// using an external face checker based on SVM
+	validate_detections = true;
+
+	// Using hierarchical refinement by default (can be turned off)
+	refine_hierarchical = true;
+
+	// Refining parameters by default
+	refine_parameters = true;
+
+	window_sizes_small = std::vector<int>(4);
+	window_sizes_init = std::vector<int>(4);
+
+	// For fast tracking
+	window_sizes_small[0] = 0;
+	window_sizes_small[1] = 9;
+	window_sizes_small[2] = 7;
+	window_sizes_small[3] = 0;
+
+	// Just for initialisation
+	window_sizes_init.at(0) = 11;
+	window_sizes_init.at(1) = 9;
+	window_sizes_init.at(2) = 7;
+	window_sizes_init.at(3) = 5;
+
+	face_template_scale = 0.3f;
+	// Off by default (as it might lead to some slight inaccuracies in slowly moving faces)
+	use_face_template = false;
+
+	// For first frame use the initialisation
+	window_sizes_current = window_sizes_init;
+
+	model_location = "model/main_ceclm_general.txt";
+	curr_landmark_detector = CECLM_DETECTOR;
+
+	sigma = 1.5f;
+	reg_factor = 25.0f;
+	weight_factor = 0.0f; // By default do not use NU-RLMS for videos as it does not work as well for them
+
+	validation_boundary = 0.725f;
+
+	limit_pose = true;
+	multi_view = false;
+
+	reinit_video_every = 2;
+
+	// Face detection
+	haar_face_detector_location = "classifiers/haarcascade_frontalface_alt.xml";
+	mtcnn_face_detector_location = "model/mtcnn_detector/MTCNN_detector.txt";
+
+	// By default use MTCNN
+	curr_face_detector = MTCNN_DETECTOR;
+
+}
+
--- a/pkg/OpenFace/lib/local/LandmarkDetector/src/LandmarkDetectorUtils.cpp
+++ b/pkg/OpenFace/lib/local/LandmarkDetector/src/LandmarkDetectorUtils.cpp
@@ -0,0 +1,925 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (C) 2017, Carnegie Mellon University and University of Cambridge,
+// all rights reserved.
+//
+// ACADEMIC OR NON-PROFIT ORGANIZATION NONCOMMERCIAL RESEARCH USE ONLY
+//
+// BY USING OR DOWNLOADING THE SOFTWARE, YOU ARE AGREEING TO THE TERMS OF THIS LICENSE AGREEMENT.  
+// IF YOU DO NOT AGREE WITH THESE TERMS, YOU MAY NOT USE OR DOWNLOAD THE SOFTWARE.
+//
+// License can be found in OpenFace-license.txt
+//
+//     * Any publications arising from the use of this software, including but
+//       not limited to academic journal and conference publications, technical
+//       reports and manuals, must cite at least one of the following works:
+//
+//       OpenFace 2.0: Facial Behavior Analysis Toolkit
+//       Tadas Baltrušaitis, Amir Zadeh, Yao Chong Lim, and Louis-Philippe Morency
+//       in IEEE International Conference on Automatic Face and Gesture Recognition, 2018  
+//
+//       Convolutional experts constrained local model for facial landmark detection.
+//       A. Zadeh, T. Baltrušaitis, and Louis-Philippe Morency,
+//       in Computer Vision and Pattern Recognition Workshops, 2017.    
+//
+//       Rendering of Eyes for Eye-Shape Registration and Gaze Estimation
+//       Erroll Wood, Tadas Baltrušaitis, Xucong Zhang, Yusuke Sugano, Peter Robinson, and Andreas Bulling 
+//       in IEEE International. Conference on Computer Vision (ICCV),  2015 
+//
+//       Cross-dataset learning and person-specific normalisation for automatic Action Unit detection
+//       Tadas Baltrušaitis, Marwa Mahmoud, and Peter Robinson 
+//       in Facial Expression Recognition and Analysis Challenge, 
+//       IEEE International Conference on Automatic Face and Gesture Recognition, 2015 
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "stdafx.h"
+
+#include <LandmarkDetectorUtils.h>
+#include <RotationHelpers.h>
+
+// OpenCV includes
+#include <opencv2/core/core.hpp>
+#include <opencv2/imgproc.hpp>
+#include <opencv2/calib3d.hpp>
+
+namespace LandmarkDetector
+{
+
+	//===========================================================================
+	// Fast patch expert response computation (linear model across a ROI) using normalised cross-correlation
+	//===========================================================================
+
+	void crossCorr_m(const cv::Mat_<float>& img, cv::Mat_<double>& img_dft, const cv::Mat_<float>& _templ, 
+		std::map<int, cv::Mat_<double> >& _templ_dfts, cv::Mat_<float>& corr)
+	{
+		// Our model will always be under min block size so can ignore this
+		//const double blockScale = 4.5;
+		//const int minBlockSize = 256;
+
+		int maxDepth = CV_64F;
+
+		cv::Size dftsize;
+
+		dftsize.width = cv::getOptimalDFTSize(corr.cols + _templ.cols - 1);
+		dftsize.height = cv::getOptimalDFTSize(corr.rows + _templ.rows - 1);
+
+		// Compute block size
+		cv::Size blocksize;
+		blocksize.width = dftsize.width - _templ.cols + 1;
+		blocksize.width = MIN(blocksize.width, corr.cols);
+		blocksize.height = dftsize.height - _templ.rows + 1;
+		blocksize.height = MIN(blocksize.height, corr.rows);
+
+		cv::Mat_<double> dftTempl;
+
+		// if this has not been precomputed, precompute it, otherwise use it
+		if (_templ_dfts.find(dftsize.width) == _templ_dfts.end())
+		{
+			dftTempl.create(dftsize.height, dftsize.width);
+
+			cv::Mat_<float> src = _templ;
+
+			cv::Mat_<double> dst(dftTempl, cv::Rect(0, 0, dftsize.width, dftsize.height));
+
+			cv::Mat_<double> dst1(dftTempl, cv::Rect(0, 0, _templ.cols, _templ.rows));
+
+			if (dst1.data != src.data)
+				src.convertTo(dst1, dst1.depth());
+
+			if (dst.cols > _templ.cols)
+			{
+				cv::Mat_<double> part(dst, cv::Range(0, _templ.rows), cv::Range(_templ.cols, dst.cols));
+				part.setTo(0);
+			}
+
+			// Perform DFT of the template
+			dft(dst, dst, 0, _templ.rows);
+
+			_templ_dfts[dftsize.width] = dftTempl;
+
+		}
+		else
+		{
+			// use the precomputed version
+			dftTempl = _templ_dfts.find(dftsize.width)->second;
+		}
+
+		cv::Size bsz(std::min(blocksize.width, corr.cols), std::min(blocksize.height, corr.rows));
+		cv::Mat src;
+
+		cv::Mat cdst(corr, cv::Rect(0, 0, bsz.width, bsz.height));
+
+		cv::Mat_<double> dftImg;
+
+		if (img_dft.empty())
+		{
+			dftImg.create(dftsize);
+			dftImg.setTo(0.0);
+
+			cv::Size dsz(bsz.width + _templ.cols - 1, bsz.height + _templ.rows - 1);
+
+			int x2 = std::min(img.cols, dsz.width);
+			int y2 = std::min(img.rows, dsz.height);
+
+			cv::Mat src0(img, cv::Range(0, y2), cv::Range(0, x2));
+			cv::Mat dst(dftImg, cv::Rect(0, 0, dsz.width, dsz.height));
+			cv::Mat dst1(dftImg, cv::Rect(0, 0, x2, y2));
+
+			src = src0;
+
+			if (dst1.data != src.data)
+				src.convertTo(dst1, dst1.depth());
+
+			dft(dftImg, dftImg, 0, dsz.height);
+			img_dft = dftImg.clone();
+		}
+
+		cv::Mat dftTempl1(dftTempl, cv::Rect(0, 0, dftsize.width, dftsize.height));
+		cv::mulSpectrums(img_dft, dftTempl1, dftImg, 0, true);
+		cv::dft(dftImg, dftImg, cv::DFT_INVERSE + cv::DFT_SCALE, bsz.height);
+
+		src = dftImg(cv::Rect(0, 0, bsz.width, bsz.height));
+
+		src.convertTo(cdst, CV_32F);
+
+	}
+
+	////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+	void matchTemplate_m(const cv::Mat_<float>& input_img, cv::Mat_<double>& img_dft, cv::Mat& _integral_img, 
+		cv::Mat& _integral_img_sq, const cv::Mat_<float>&  templ, std::map<int, cv::Mat_<double> >& templ_dfts, 
+		cv::Mat_<float>& result, int method)
+	{
+
+		int numType = method == cv::TM_CCORR || method == cv::TM_CCORR_NORMED ? 0 :
+			method == cv::TM_CCOEFF || method == cv::TM_CCOEFF_NORMED ? 1 : 2;
+		bool isNormed = method == cv::TM_CCORR_NORMED ||
+			method == cv::TM_SQDIFF_NORMED ||
+			method == cv::TM_CCOEFF_NORMED;
+
+		// Assume result is defined properly
+		if (result.empty())
+		{
+			cv::Size corrSize(input_img.cols - templ.cols + 1, input_img.rows - templ.rows + 1);
+			result.create(corrSize);
+		}
+		LandmarkDetector::crossCorr_m(input_img, img_dft, templ, templ_dfts, result);
+
+		if (method == cv::TM_CCORR)
+			return;
+
+		double invArea = 1. / ((double)templ.rows * templ.cols);
+
+		cv::Mat sum, sqsum;
+		cv::Scalar templMean, templSdv;
+		double *q0 = 0, *q1 = 0, *q2 = 0, *q3 = 0;
+		double templNorm = 0, templSum2 = 0;
+
+		if (method == cv::TM_CCOEFF)
+		{
+			// If it has not been precomputed compute it now
+			if (_integral_img.empty())
+			{
+				integral(input_img, _integral_img, CV_64F);
+			}
+			sum = _integral_img;
+
+			templMean = cv::mean(templ);
+		}
+		else
+		{
+			// If it has not been precomputed compute it now
+			if (_integral_img.empty())
+			{
+				integral(input_img, _integral_img, _integral_img_sq, CV_64F);
+			}
+
+			sum = _integral_img;
+			sqsum = _integral_img_sq;
+
+			meanStdDev(templ, templMean, templSdv);
+
+			templNorm = templSdv[0] * templSdv[0] + templSdv[1] * templSdv[1] + templSdv[2] * templSdv[2] + templSdv[3] * templSdv[3];
+
+			if (templNorm < DBL_EPSILON && method == cv::TM_CCOEFF_NORMED)
+			{
+				result.setTo(1.0);
+				return;
+			}
+
+			templSum2 = templNorm + templMean[0] * templMean[0] + templMean[1] * templMean[1] + templMean[2] * templMean[2] + templMean[3] * templMean[3];
+
+			if (numType != 1)
+			{
+				templMean = cv::Scalar::all(0);
+				templNorm = templSum2;
+			}
+
+			templSum2 /= invArea;
+			templNorm = std::sqrt(templNorm);
+			templNorm /= std::sqrt(invArea); // care of accuracy here
+
+			q0 = (double*)sqsum.data;
+			q1 = q0 + templ.cols;
+			q2 = (double*)(sqsum.data + templ.rows*sqsum.step);
+			q3 = q2 + templ.cols;
+		}
+
+		double* p0 = (double*)sum.data;
+		double* p1 = p0 + templ.cols;
+		double* p2 = (double*)(sum.data + templ.rows*sum.step);
+		double* p3 = p2 + templ.cols;
+
+		int sumstep = sum.data ? (int)(sum.step / sizeof(double)) : 0;
+		int sqstep = sqsum.data ? (int)(sqsum.step / sizeof(double)) : 0;
+
+		int i, j;
+
+		for (i = 0; i < result.rows; i++)
+		{
+			float* rrow = result.ptr<float>(i);
+			int idx = i * sumstep;
+			int idx2 = i * sqstep;
+
+			for (j = 0; j < result.cols; j++, idx += 1, idx2 += 1)
+			{
+				double num = rrow[j], t;
+				double wndMean2 = 0, wndSum2 = 0;
+
+				if (numType == 1)
+				{
+
+					t = p0[idx] - p1[idx] - p2[idx] + p3[idx];
+					wndMean2 += t*t;
+					num -= t*templMean[0];
+
+					wndMean2 *= invArea;
+				}
+
+				if (isNormed || numType == 2)
+				{
+
+					t = q0[idx2] - q1[idx2] - q2[idx2] + q3[idx2];
+					wndSum2 += t;
+
+					if (numType == 2)
+					{
+						num = wndSum2 - 2 * num + templSum2;
+						num = MAX(num, 0.);
+					}
+				}
+
+				if (isNormed)
+				{
+					t = std::sqrt(MAX(wndSum2 - wndMean2, 0))*templNorm;
+					if (fabs(num) < t)
+						num /= t;
+					else if (fabs(num) < t*1.125)
+						num = num > 0 ? 1 : -1;
+					else
+						num = method != cv::TM_SQDIFF_NORMED ? 0 : 1;
+				}
+
+				rrow[j] = (float)num;
+			}
+		}
+	}
+
+	// Useful utility for grabing a bounding box around a set of 2D landmarks (as a 1D 2n x 1 vector of xs followed by doubles or as an n x 2 vector)
+	void ExtractBoundingBox(const cv::Mat_<float>& landmarks, float &min_x, float &max_x, float &min_y, float &max_y)
+	{
+
+		if (landmarks.cols == 1)
+		{
+			int n = landmarks.rows / 2;
+			cv::MatConstIterator_<float> landmarks_it = landmarks.begin();
+
+			for (int i = 0; i < n; ++i)
+			{
+				float val = *landmarks_it++;
+				
+				if (i == 0 || val < min_x)
+					min_x = val;
+
+				if (i == 0 || val > max_x)
+					max_x = val;
+
+			}
+
+			for (int i = 0; i < n; ++i)
+			{
+				float val = *landmarks_it++;
+
+				if (i == 0 || val < min_y)
+					min_y = val;
+
+				if (i == 0 || val > max_y)
+					max_y = val;
+
+			}
+		}
+		else
+		{
+			int n = landmarks.rows;
+			for (int i = 0; i < n; ++i)
+			{
+				float val_x = landmarks.at<float>(i, 0);
+				float val_y = landmarks.at<float>(i, 1);
+
+				if (i == 0 || val_x < min_x)
+					min_x = val_x;
+
+				if (i == 0 || val_x > max_x)
+					max_x = val_x;
+
+				if (i == 0 || val_y < min_y)
+					min_y = val_y;
+
+				if (i == 0 || val_y > max_y)
+					max_y = val_y;
+
+			}
+
+		}
+
+
+	}
+
+	// Computing landmarks (to be drawn later possibly)
+	std::vector<cv::Point2f> CalculateVisibleLandmarks(const cv::Mat_<float>& shape2D, const cv::Mat_<int>& visibilities)
+	{
+		int n = shape2D.rows / 2;
+		std::vector<cv::Point2f> landmarks;
+
+		for (int i = 0; i < n; ++i)
+		{
+			if (visibilities.at<int>(i))
+			{
+				cv::Point2f featurePoint(shape2D.at<float>(i), shape2D.at<float>(i + n));
+
+				landmarks.push_back(featurePoint);
+			}
+		}
+
+		return landmarks;
+	}
+
+	// Computing landmarks (to be drawn later possibly)
+	std::vector<cv::Point2f> CalculateAllLandmarks(const cv::Mat_<float>& shape2D)
+	{
+
+		int n = 0;
+		std::vector<cv::Point2f> landmarks;
+
+		if (shape2D.cols == 2)
+		{
+			n = shape2D.rows;
+		}
+		else if (shape2D.cols == 1)
+		{
+			n = shape2D.rows / 2;
+		}
+
+		for (int i = 0; i < n; ++i)
+		{
+			cv::Point2f featurePoint;
+			if (shape2D.cols == 1)
+			{
+				featurePoint = cv::Point2f(shape2D.at<float>(i), shape2D.at<float>(i + n));
+			}
+			else
+			{
+				featurePoint = cv::Point2f(shape2D.at<float>(i, 0), shape2D.at<float>(i, 1));
+			}
+
+			landmarks.push_back(featurePoint);
+		}
+
+		return landmarks;
+	}
+
+	// Computing landmarks (to be drawn later possibly)
+	std::vector<cv::Point2f> CalculateAllLandmarks(const CLNF& clnf_model)
+	{
+		return CalculateAllLandmarks(clnf_model.detected_landmarks);
+	}
+
+	// Computing landmarks (to be drawn later possibly)
+	std::vector<cv::Point2f> CalculateVisibleLandmarks(const CLNF& clnf_model)
+	{
+		// If the detection was not successful no landmarks are visible
+		if (clnf_model.detection_success)
+		{
+			int idx = clnf_model.patch_experts.GetViewIdx(clnf_model.params_global, 0);
+			// Because we only draw visible points, need to find which points patch experts consider visible at a certain orientation
+			return CalculateVisibleLandmarks(clnf_model.detected_landmarks, clnf_model.patch_experts.visibilities[0][idx]);
+		}
+		else
+		{
+			return std::vector<cv::Point2f>();
+		}
+	}
+
+	// Computing eye landmarks (to be drawn later or in different interfaces)
+	std::vector<cv::Point2f> CalculateVisibleEyeLandmarks(const CLNF& clnf_model)
+	{
+
+		std::vector<cv::Point2f> to_return;
+		// If the model has hierarchical updates draw those too
+		for (size_t i = 0; i < clnf_model.hierarchical_models.size(); ++i)
+		{
+
+			if (clnf_model.hierarchical_model_names[i].compare("left_eye_28") == 0 ||
+				clnf_model.hierarchical_model_names[i].compare("right_eye_28") == 0)
+			{
+
+				auto lmks = CalculateVisibleLandmarks(clnf_model.hierarchical_models[i]);
+				for (auto lmk : lmks)
+				{
+					to_return.push_back(lmk);
+				}
+			}
+		}
+		return to_return;
+	}
+	// Computing the 3D eye landmarks
+	std::vector<cv::Point3f> Calculate3DEyeLandmarks(const CLNF& clnf_model, float fx, float fy, float cx, float cy)
+	{
+
+		std::vector<cv::Point3f> to_return;
+
+		for (size_t i = 0; i < clnf_model.hierarchical_models.size(); ++i)
+		{
+
+			if (clnf_model.hierarchical_model_names[i].compare("left_eye_28") == 0 ||
+				clnf_model.hierarchical_model_names[i].compare("right_eye_28") == 0)
+			{
+
+				auto lmks = clnf_model.hierarchical_models[i].GetShape(fx, fy, cx, cy);
+
+				int num_landmarks = lmks.cols;
+
+				for (int lmk = 0; lmk < num_landmarks; ++lmk)
+				{
+					cv::Point3f curr_lmk(lmks.at<float>(0, lmk), lmks.at<float>(1, lmk), lmks.at<float>(2, lmk));
+					to_return.push_back(curr_lmk);
+				}
+			}
+		}
+		return to_return;
+	}
+
+	// Computing eye landmarks (to be drawn later or in different interfaces)
+	std::vector<cv::Point2f> CalculateAllEyeLandmarks(const CLNF& clnf_model)
+	{
+
+		std::vector<cv::Point2f> to_return;
+		// If the model has hierarchical updates draw those too
+		for (size_t i = 0; i < clnf_model.hierarchical_models.size(); ++i)
+		{
+
+			if (clnf_model.hierarchical_model_names[i].compare("left_eye_28") == 0 ||
+				clnf_model.hierarchical_model_names[i].compare("right_eye_28") == 0)
+			{
+
+				auto lmks = CalculateAllLandmarks(clnf_model.hierarchical_models[i]);
+				for (auto lmk : lmks)
+				{
+					to_return.push_back(lmk);
+				}
+			}
+		}
+		return to_return;
+	}
+
+	//===========================================================================
+
+	//============================================================================
+	// Face detection helpers
+	//============================================================================
+	bool DetectFaces(std::vector<cv::Rect_<float> >& o_regions, const cv::Mat_<uchar>& intensity, float min_width, cv::Rect_<float> roi)
+	{
+		cv::CascadeClassifier classifier("./classifiers/haarcascade_frontalface_alt.xml");
+		if (classifier.empty())
+		{
+			std::cout << "Couldn't load the Haar cascade classifier" << std::endl;
+			return false;
+		}
+		else
+		{
+			return DetectFaces(o_regions, intensity, classifier, min_width, roi);
+		}
+
+	}
+
+	bool DetectFaces(std::vector<cv::Rect_<float> >& o_regions, const cv::Mat_<uchar>& intensity, cv::CascadeClassifier& classifier, float min_width, cv::Rect_<float> roi)
+	{
+
+		std::vector<cv::Rect> face_detections;
+		if (min_width == -1)
+		{
+			classifier.detectMultiScale(intensity, face_detections, 1.2, 2, 0, cv::Size(50, 50));
+		}
+		else
+		{
+			classifier.detectMultiScale(intensity, face_detections, 1.2, 2, 0, cv::Size(min_width, min_width));
+		}
+
+		// Convert from int bounding box do a double one with corrections
+		for (size_t face = 0; face < face_detections.size(); ++face)
+		{
+			// OpenCV is overgenerous with face size and y location is off
+			// CLNF detector expects the bounding box to encompass from eyebrow to chin in y, and from cheeck outline to cheeck outline in x, so we need to compensate
+
+			// The scalings were learned using the Face Detections on LFPW, Helen, AFW and iBUG datasets, using ground truth and detections from openCV
+			cv::Rect_<float> region;
+			// Correct for scale
+			region.width = face_detections[face].width * 0.8924f;
+			region.height = face_detections[face].height * 0.8676f;
+
+			// Move the face slightly to the right (as the width was made smaller)
+			region.x = face_detections[face].x + 0.0578f * face_detections[face].width;
+			// Shift face down as OpenCV Haar Cascade detects the forehead as well, and we're not interested
+			region.y = face_detections[face].y + face_detections[face].height * 0.2166f;
+
+			if (min_width != -1)
+			{
+				if (region.width < min_width || region.x < ((float)intensity.cols) * roi.x || region.y < ((float)intensity.cols) * roi.y || region.x + region.width >((float)intensity.cols) * (roi.x + roi.width) || region.y + region.height >((float)intensity.rows) * (roi.y + roi.height))
+					continue;
+			}
+
+
+			o_regions.push_back(region);
+		}
+		return o_regions.size() > 0;
+	}
+
+	bool DetectSingleFace(cv::Rect_<float>& o_region, const cv::Mat_<uchar>& intensity_image, cv::CascadeClassifier& classifier, cv::Point preference, float min_width, cv::Rect_<float> roi)
+	{
+		// The tracker can return multiple faces
+		std::vector<cv::Rect_<float> > face_detections;
+
+		bool detect_success = LandmarkDetector::DetectFaces(face_detections, intensity_image, classifier, min_width, roi);
+
+		if (detect_success)
+		{
+
+			bool use_preferred = (preference.x != -1) && (preference.y != -1);
+
+			if (face_detections.size() > 1)
+			{
+				// keep the closest one if preference point not set
+				float best = -1;
+				int bestIndex = -1;
+				for (size_t i = 0; i < face_detections.size(); ++i)
+				{
+					float dist;
+					bool better;
+
+					if (use_preferred)
+					{
+						dist = sqrt((preference.x) * (face_detections[i].width / 2 + face_detections[i].x) +
+							(preference.y) * (face_detections[i].height / 2 + face_detections[i].y));
+						better = dist < best;
+					}
+					else
+					{
+						dist = face_detections[i].width;
+						better = face_detections[i].width > best;
+					}
+
+					// Pick a closest face to preffered point or the biggest face
+					if (i == 0 || better)
+					{
+						bestIndex = i;
+						best = dist;
+					}
+				}
+
+				o_region = face_detections[bestIndex];
+
+			}
+			else
+			{
+				o_region = face_detections[0];
+			}
+
+		}
+		else
+		{
+			// if not detected
+			o_region = cv::Rect_<float>(0, 0, 0, 0);
+		}
+		return detect_success;
+	}
+
+	bool DetectFacesHOG(std::vector<cv::Rect_<float> >& o_regions, const cv::Mat_<uchar>& intensity, 
+		std::vector<float>& confidences, float min_width, cv::Rect_<float> roi)
+	{
+		dlib::frontal_face_detector detector = dlib::get_frontal_face_detector();
+
+		return DetectFacesHOG(o_regions, intensity, detector, confidences, min_width, roi);
+
+	}
+
+	bool DetectFacesHOG(std::vector<cv::Rect_<float> >& o_regions, const cv::Mat_<uchar>& intensity, 
+		dlib::frontal_face_detector& detector, std::vector<float>& o_confidences, float min_width, cv::Rect_<float> roi)
+	{
+		if (detector.num_detectors() == 0)
+		{
+			detector = dlib::get_frontal_face_detector();
+		}
+
+		cv::Mat_<uchar> upsampled_intensity;
+
+		float scaling = 1.3f;
+
+		cv::resize(intensity, upsampled_intensity, cv::Size((int)(intensity.cols * scaling), (int)(intensity.rows * scaling)));
+
+		dlib::cv_image<uchar> cv_grayscale(upsampled_intensity);
+
+		std::vector<dlib::full_detection> face_detections;
+		detector(cv_grayscale, face_detections, -0.2);
+
+		// Convert from int bounding box do a double one with corrections
+		for (size_t face = 0; face < face_detections.size(); ++face)
+		{
+			// CLNF expects the bounding box to encompass from eyebrow to chin in y, and from cheeck outline to cheeck outline in x, so we need to compensate
+
+			cv::Rect_<float> region;
+			// Move the face slightly to the right (as the width was made smaller)
+			region.x = (face_detections[face].rect.get_rect().tl_corner().x() + 0.0389f * face_detections[face].rect.get_rect().width()) / scaling;
+			// Shift face down as OpenCV Haar Cascade detects the forehead as well, and we're not interested
+			region.y = (face_detections[face].rect.get_rect().tl_corner().y() + 0.1278f * face_detections[face].rect.get_rect().height()) / scaling;
+
+			// Correct for scale
+			region.width = (face_detections[face].rect.get_rect().width() * 0.9611) / scaling;
+			region.height = (face_detections[face].rect.get_rect().height() * 0.9388) / scaling;
+
+			// The scalings were learned using the Face Detections on LFPW and Helen using ground truth and detections from the HOG detector
+			if (min_width != -1)
+			{
+				if (region.width < min_width || region.x < ((float)intensity.cols) * roi.x || region.y < ((float)intensity.cols) * roi.y ||
+					region.x + region.width >((float)intensity.cols) * (roi.x + roi.width) || region.y + region.height >((float)intensity.rows) * (roi.y + roi.height))
+					continue;
+			}
+
+
+			o_regions.push_back(region);
+			o_confidences.push_back(face_detections[face].detection_confidence);
+
+
+		}
+		return o_regions.size() > 0;
+	}
+
+	bool DetectSingleFaceHOG(cv::Rect_<float>& o_region, const cv::Mat_<uchar>& intensity_img, dlib::frontal_face_detector& detector, float& confidence, cv::Point preference, float min_width, cv::Rect_<float> roi)
+	{
+
+		if (detector.num_detectors() == 0)
+		{
+			detector = dlib::get_frontal_face_detector();
+		}
+
+		// The tracker can return multiple faces
+		std::vector<cv::Rect_<float> > face_detections;
+		std::vector<float> confidences;
+		bool detect_success = LandmarkDetector::DetectFacesHOG(face_detections, intensity_img, detector, confidences, min_width, roi);
+
+		// In case of multiple faces pick the biggest one
+		bool use_size = true;
+
+		if (detect_success)
+		{
+
+			bool use_preferred = (preference.x != -1) && (preference.y != -1);
+
+			// keep the most confident one or the one closest to preference point if set
+			float best_so_far;
+			if (use_preferred)
+			{
+				best_so_far = sqrt((preference.x - (face_detections[0].width / 2 + face_detections[0].x)) * (preference.x - (face_detections[0].width / 2 + face_detections[0].x)) +
+					(preference.y - (face_detections[0].height / 2 + face_detections[0].y)) * (preference.y - (face_detections[0].height / 2 + face_detections[0].y)));
+			}
+			else if (use_size)
+			{
+				best_so_far = (face_detections[0].width + face_detections[0].height) / 2.0;
+			}
+			else
+			{
+				best_so_far = confidences[0];
+			}
+			int bestIndex = 0;
+
+			for (size_t i = 1; i < face_detections.size(); ++i)
+			{
+
+				float dist;
+				bool better;
+
+				if (use_preferred)
+				{
+					dist = sqrt((preference.x - (face_detections[i].width / 2 + face_detections[i].x)) * (preference.x - (face_detections[i].width / 2 + face_detections[i].x)) +
+						(preference.y - (face_detections[i].height / 2 + face_detections[i].y)) * (preference.y - (face_detections[i].height / 2 + face_detections[i].y)));
+
+					better = dist < best_so_far;
+				}
+				else if (use_size)
+				{
+					dist = (face_detections[i].width + face_detections[i].height) / 2.0;
+					better = dist > best_so_far;
+				}
+				else
+				{
+					dist = confidences[i];
+					better = dist > best_so_far;
+				}
+
+				// Pick a closest face
+				if (better)
+				{
+					best_so_far = dist;
+					bestIndex = i;
+				}
+			}
+
+			o_region = face_detections[bestIndex];
+			confidence = confidences[bestIndex];
+		}
+		else
+		{
+			// if not detected
+			o_region = cv::Rect_<float>(0, 0, 0, 0);
+			// A completely unreliable detection (shouldn't really matter what is returned here)
+			confidence = -2;
+		}
+		return detect_success;
+	}
+
+bool DetectFacesMTCNN(std::vector<cv::Rect_<float> >& o_regions, const cv::Mat& image, LandmarkDetector::FaceDetectorMTCNN& detector, 
+	std::vector<float>& o_confidences)
+{
+	detector.DetectFaces(o_regions, image, o_confidences);
+
+	return o_regions.size() > 0;
+}
+
+bool DetectSingleFaceMTCNN(cv::Rect_<float>& o_region, const cv::Mat& image, LandmarkDetector::FaceDetectorMTCNN& detector, 
+	float& confidence, cv::Point preference)
+{
+	// The tracker can return multiple faces
+	std::vector<cv::Rect_<float> > face_detections;
+	std::vector<float> confidences;
+
+	detector.DetectFaces(face_detections, image, confidences);
+
+	bool detect_success = face_detections.size() > 0;
+	if (detect_success)
+	{
+
+		bool use_preferred = (preference.x != -1) && (preference.y != -1);
+
+		// keep the most confident one or the one closest to preference point if set
+		float best_so_far;
+		if (use_preferred)
+		{
+			best_so_far = sqrt((preference.x - (face_detections[0].width / 2 + face_detections[0].x)) * (preference.x - (face_detections[0].width / 2 + face_detections[0].x)) +
+				(preference.y - (face_detections[0].height / 2 + face_detections[0].y)) * (preference.y - (face_detections[0].height / 2 + face_detections[0].y)));
+		}
+		else
+		{
+			best_so_far = face_detections[0].width;
+		}
+		int bestIndex = 0;
+
+		for (size_t i = 1; i < face_detections.size(); ++i)
+		{
+
+			float dist;
+			bool better;
+
+			if (use_preferred)
+			{
+				dist = sqrt((preference.x - (face_detections[i].width / 2 + face_detections[i].x)) * (preference.x - (face_detections[i].width / 2 + face_detections[i].x)) +
+					(preference.y - (face_detections[i].height / 2 + face_detections[i].y)) * (preference.y - (face_detections[i].height / 2 + face_detections[i].y)));
+				better = dist < best_so_far;
+			}
+			else
+			{
+				dist = face_detections[i].width;
+				better = dist > best_so_far;
+			}
+
+			// Pick a closest face
+			if (better)
+			{
+				best_so_far = dist;
+				bestIndex = i;
+			}
+		}
+
+		o_region = face_detections[bestIndex];
+		confidence = confidences[bestIndex];
+	}
+	else
+	{
+		// if not detected
+		o_region = cv::Rect_<float>(0, 0, 0, 0);
+		// A completely unreliable detection (shouldn't really matter what is returned here)
+		confidence = -2;
+	}
+	return detect_success;
+}
+
+
+//============================================================================
+// Matrix reading functionality
+//============================================================================
+
+// Reading in a matrix from a stream
+void ReadMat(std::ifstream& stream, cv::Mat &output_mat)
+{
+	// Read in the number of rows, columns and the data type
+	int row, col, type;
+
+	stream >> row >> col >> type;
+
+	output_mat = cv::Mat(row, col, type);
+
+	switch (output_mat.type())
+	{
+	case CV_64FC1:
+	{
+		cv::MatIterator_<double> begin_it = output_mat.begin<double>();
+		cv::MatIterator_<double> end_it = output_mat.end<double>();
+
+		while (begin_it != end_it)
+		{
+			stream >> *begin_it++;
+		}
+	}
+	break;
+	case CV_32FC1:
+	{
+		cv::MatIterator_<float> begin_it = output_mat.begin<float>();
+		cv::MatIterator_<float> end_it = output_mat.end<float>();
+
+		while (begin_it != end_it)
+		{
+			stream >> *begin_it++;
+		}
+	}
+	break;
+	case CV_32SC1:
+	{
+		cv::MatIterator_<int> begin_it = output_mat.begin<int>();
+		cv::MatIterator_<int> end_it = output_mat.end<int>();
+		while (begin_it != end_it)
+		{
+			stream >> *begin_it++;
+		}
+	}
+	break;
+	case CV_8UC1:
+	{
+		cv::MatIterator_<uchar> begin_it = output_mat.begin<uchar>();
+		cv::MatIterator_<uchar> end_it = output_mat.end<uchar>();
+		while (begin_it != end_it)
+		{
+			stream >> *begin_it++;
+		}
+	}
+	break;
+	default:
+		printf("ERROR(%s,%d) : Unsupported Matrix type %d!\n", __FILE__, __LINE__, output_mat.type()); abort();
+
+
+	}
+}
+
+void ReadMatBin(std::ifstream& stream, cv::Mat &output_mat)
+{
+	// Read in the number of rows, columns and the data type
+	int row, col, type;
+
+	stream.read((char*)&row, 4);
+	stream.read((char*)&col, 4);
+	stream.read((char*)&type, 4);
+
+	output_mat = cv::Mat(row, col, type);
+	int size = output_mat.rows * output_mat.cols * output_mat.elemSize();
+	stream.read((char *)output_mat.data, size);
+
+}
+
+// Skipping lines that start with # (together with empty lines)
+void SkipComments(std::ifstream& stream)
+{
+	while (stream.peek() == '#' || stream.peek() == '\n' || stream.peek() == ' ' || stream.peek() == '\r')
+	{
+		std::string skipped;
+		std::getline(stream, skipped);
+	}
+}
+
+}
--- a/pkg/OpenFace/lib/local/LandmarkDetector/src/PAW.cpp
+++ b/pkg/OpenFace/lib/local/LandmarkDetector/src/PAW.cpp
@@ -0,0 +1,515 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (C) 2017, Carnegie Mellon University and University of Cambridge,
+// all rights reserved.
+//
+// ACADEMIC OR NON-PROFIT ORGANIZATION NONCOMMERCIAL RESEARCH USE ONLY
+//
+// BY USING OR DOWNLOADING THE SOFTWARE, YOU ARE AGREEING TO THE TERMS OF THIS LICENSE AGREEMENT.  
+// IF YOU DO NOT AGREE WITH THESE TERMS, YOU MAY NOT USE OR DOWNLOAD THE SOFTWARE.
+//
+// License can be found in OpenFace-license.txt
+//
+//     * Any publications arising from the use of this software, including but
+//       not limited to academic journal and conference publications, technical
+//       reports and manuals, must cite at least one of the following works:
+//
+//       OpenFace 2.0: Facial Behavior Analysis Toolkit
+//       Tadas Baltrušaitis, Amir Zadeh, Yao Chong Lim, and Louis-Philippe Morency
+//       in IEEE International Conference on Automatic Face and Gesture Recognition, 2018  
+//
+//       Convolutional experts constrained local model for facial landmark detection.
+//       A. Zadeh, T. Baltrušaitis, and Louis-Philippe Morency,
+//       in Computer Vision and Pattern Recognition Workshops, 2017.    
+//
+//       Rendering of Eyes for Eye-Shape Registration and Gaze Estimation
+//       Erroll Wood, Tadas Baltrušaitis, Xucong Zhang, Yusuke Sugano, Peter Robinson, and Andreas Bulling 
+//       in IEEE International. Conference on Computer Vision (ICCV),  2015 
+//
+//       Cross-dataset learning and person-specific normalisation for automatic Action Unit detection
+//       Tadas Baltrušaitis, Marwa Mahmoud, and Peter Robinson 
+//       in Facial Expression Recognition and Analysis Challenge, 
+//       IEEE International Conference on Automatic Face and Gesture Recognition, 2015 
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "stdafx.h"
+
+#include "PAW.h"
+
+// OpenCV includes
+#include <opencv2/core/core.hpp>
+#include <opencv2/imgproc.hpp>
+
+#include "LandmarkDetectorUtils.h"
+
+using namespace LandmarkDetector;
+
+// Copy constructor
+PAW::PAW(const PAW& other) : destination_landmarks(other.destination_landmarks.clone()), source_landmarks(other.source_landmarks.clone()), triangulation(other.triangulation.clone()),
+triangle_id(other.triangle_id.clone()), pixel_mask(other.pixel_mask.clone()), coefficients(other.coefficients.clone()), alpha(other.alpha.clone()), beta(other.beta.clone()), map_x(other.map_x.clone()), map_y(other.map_y.clone())
+{
+	this->number_of_pixels = other.number_of_pixels;
+	this->min_x = other.min_x;
+	this->min_y = other.min_y;
+}
+
+// A constructor from destination shape and triangulation
+PAW::PAW(const cv::Mat_<float>& destination_shape, const cv::Mat_<int>& triangulation)
+{
+	// Initialise some variables directly
+	this->destination_landmarks = destination_shape;
+	this->triangulation = triangulation;
+
+	int num_points = destination_shape.rows / 2;
+
+	int num_tris = triangulation.rows;
+
+	// Pre-compute the rest
+	alpha = cv::Mat_<float>(num_tris, 3);
+	beta = cv::Mat_<float>(num_tris, 3);
+
+	cv::Mat_<float> xs = destination_shape(cv::Rect(0, 0, 1, num_points));
+	cv::Mat_<float> ys = destination_shape(cv::Rect(0, num_points, 1, num_points));
+
+	// Create a vector representation of the control points
+	std::vector<std::vector<float>> destination_points;
+
+	for (int tri = 0; tri < num_tris; ++tri)
+	{
+		int j = triangulation.at<int>(tri, 0);
+		int k = triangulation.at<int>(tri, 1);
+		int l = triangulation.at<int>(tri, 2);
+
+		float c1 = ys.at<float>(l) - ys.at<float>(j);
+		float c2 = xs.at<float>(l) - xs.at<float>(j);
+		float c4 = ys.at<float>(k) - ys.at<float>(j);
+		float c3 = xs.at<float>(k) - xs.at<float>(j);
+
+		float c5 = c3*c1 - c2*c4;
+
+		alpha.at<float>(tri, 0) = (ys.at<float>(j) * c2 - xs.at<float>(j) * c1) / c5;
+		alpha.at<float>(tri, 1) = c1 / c5;
+		alpha.at<float>(tri, 2) = -c2 / c5;
+
+		beta.at<float>(tri, 0) = (xs.at<float>(j) * c4 - ys.at<float>(j) * c3) / c5;
+		beta.at<float>(tri, 1) = -c4 / c5;
+		beta.at<float>(tri, 2) = c3 / c5;
+
+		// Add points corresponding to triangles as optimisation
+		std::vector<float> triangle_points(10);
+
+		triangle_points[0] = xs.at<float>(j);
+		triangle_points[1] = ys.at<float>(j);
+		triangle_points[2] = xs.at<float>(k);
+		triangle_points[3] = ys.at<float>(k);
+		triangle_points[4] = xs.at<float>(l);
+		triangle_points[5] = ys.at<float>(l);
+
+		cv::Vec3f xs_three(triangle_points[0], triangle_points[2], triangle_points[4]);
+		cv::Vec3f ys_three(triangle_points[1], triangle_points[3], triangle_points[5]);
+
+		double min_x, max_x, min_y, max_y;
+		cv::minMaxIdx(xs_three, &min_x, &max_x);
+		cv::minMaxIdx(ys_three, &min_y, &max_y);
+
+		triangle_points[6] = (float)max_x;
+		triangle_points[7] = (float)max_y;
+
+		triangle_points[8] = (float)min_x;
+		triangle_points[9] = (float)min_y;
+
+		destination_points.push_back(triangle_points);
+
+	}
+
+	double max_x;
+	double max_y;
+	double min_x_d;
+	double min_y_d;
+
+	minMaxLoc(xs, &min_x_d, &max_x);
+	minMaxLoc(ys, &min_y_d, &max_y);
+
+	min_x = min_x_d;
+	min_y = min_y_d;
+
+	int w = (int)(max_x - min_x + 1.5);
+	int h = (int)(max_y - min_y + 1.5);
+
+	// Round the min_x and min_y for simplicity?
+
+	pixel_mask = cv::Mat_<uchar>(h, w, (uchar)0);
+	triangle_id = cv::Mat_<int>(h, w, -1);
+
+	int curr_tri = -1;
+
+	for (int y = 0; y < pixel_mask.rows; y++)
+	{
+		for (int x = 0; x < pixel_mask.cols; x++)
+		{
+			curr_tri = findTriangle(cv::Point_<float>(x + min_x, y + min_y), destination_points, curr_tri);
+			// If there is a triangle at this location
+			if (curr_tri != -1)
+			{
+				triangle_id.at<int>(y, x) = curr_tri;
+				pixel_mask.at<uchar>(y, x) = 1;
+			}
+		}
+	}
+
+	// Preallocate maps and coefficients
+	coefficients.create(num_tris, 6);
+	map_x.create(pixel_mask.rows, pixel_mask.cols);
+	map_y.create(pixel_mask.rows, pixel_mask.cols);
+
+
+}
+
+// Manually define min and max values
+PAW::PAW(const cv::Mat_<float>& destination_shape, const cv::Mat_<int>& triangulation, float in_min_x, float in_min_y, float in_max_x, float in_max_y)
+{
+	// Initialise some variables directly
+	this->destination_landmarks = destination_shape;
+	this->triangulation = triangulation;
+
+	int num_points = destination_shape.rows / 2;
+
+	int num_tris = triangulation.rows;
+
+	// Pre-compute the rest
+	alpha = cv::Mat_<float>(num_tris, 3);
+	beta = cv::Mat_<float>(num_tris, 3);
+
+	cv::Mat_<float> xs = destination_shape(cv::Rect(0, 0, 1, num_points));
+	cv::Mat_<float> ys = destination_shape(cv::Rect(0, num_points, 1, num_points));
+
+	// Create a vector representation of the control points
+	std::vector<std::vector<float>> destination_points;
+
+	for (int tri = 0; tri < num_tris; ++tri)
+	{
+		int j = triangulation.at<int>(tri, 0);
+		int k = triangulation.at<int>(tri, 1);
+		int l = triangulation.at<int>(tri, 2);
+
+		float c1 = ys.at<float>(l) - ys.at<float>(j);
+		float c2 = xs.at<float>(l) - xs.at<float>(j);
+		float c4 = ys.at<float>(k) - ys.at<float>(j);
+		float c3 = xs.at<float>(k) - xs.at<float>(j);
+
+		float c5 = c3*c1 - c2*c4;
+
+		alpha.at<float>(tri, 0) = (ys.at<float>(j) * c2 - xs.at<float>(j) * c1) / c5;
+		alpha.at<float>(tri, 1) = c1 / c5;
+		alpha.at<float>(tri, 2) = -c2 / c5;
+
+		beta.at<float>(tri, 0) = (xs.at<float>(j) * c4 - ys.at<float>(j) * c3) / c5;
+		beta.at<float>(tri, 1) = -c4 / c5;
+		beta.at<float>(tri, 2) = c3 / c5;
+
+		// Add points corresponding to triangles as optimisation
+		std::vector<float> triangle_points(10);
+
+		triangle_points[0] = xs.at<float>(j);
+		triangle_points[1] = ys.at<float>(j);
+		triangle_points[2] = xs.at<float>(k);
+		triangle_points[3] = ys.at<float>(k);
+		triangle_points[4] = xs.at<float>(l);
+		triangle_points[5] = ys.at<float>(l);
+
+		cv::Vec3f xs_three(triangle_points[0], triangle_points[2], triangle_points[4]);
+		cv::Vec3f ys_three(triangle_points[1], triangle_points[3], triangle_points[5]);
+
+		double min_x, max_x, min_y, max_y;
+		cv::minMaxIdx(xs_three, &min_x, &max_x);
+		cv::minMaxIdx(ys_three, &min_y, &max_y);
+
+		triangle_points[6] = (float)max_x;
+		triangle_points[7] = (float)max_y;
+
+		triangle_points[8] = (float)min_x;
+		triangle_points[9] = (float)min_y;
+
+		destination_points.push_back(triangle_points);
+
+	}
+
+	float max_x;
+	float max_y;
+
+	min_x = in_min_x;
+	min_y = in_min_y;
+
+	max_x = in_max_x;
+	max_y = in_max_y;
+
+	int w = (int)(max_x - min_x + 1.5);
+	int h = (int)(max_y - min_y + 1.5);
+
+	// Round the min_x and min_y for simplicity?
+
+	pixel_mask = cv::Mat_<uchar>(h, w, (uchar)0);
+	triangle_id = cv::Mat_<int>(h, w, -1);
+
+	int curr_tri = -1;
+
+	for (int y = 0; y < pixel_mask.rows; y++)
+	{
+		for (int x = 0; x < pixel_mask.cols; x++)
+		{
+			curr_tri = findTriangle(cv::Point_<float>(x + min_x, y + min_y), destination_points, curr_tri);
+			// If there is a triangle at this location
+			if (curr_tri != -1)
+			{
+				triangle_id.at<int>(y, x) = curr_tri;
+				pixel_mask.at<uchar>(y, x) = 1;
+			}
+		}
+	}
+
+	// Preallocate maps and coefficients
+	coefficients.create(num_tris, 6);
+	map_x.create(pixel_mask.rows, pixel_mask.cols);
+	map_y.create(pixel_mask.rows, pixel_mask.cols);
+
+}
+
+//===========================================================================
+void PAW::Read(std::ifstream& stream)
+{
+
+	stream.read((char*)&number_of_pixels, 4);
+	double min_x_d, min_y_d;
+	stream.read((char*)&min_x_d, 8);
+	stream.read((char*)&min_y_d, 8);
+	min_x = (float)min_x_d;
+	min_y = (float)min_y_d;
+
+	cv::Mat_<double> destination_landmarks_d;
+	ReadMatBin(stream, destination_landmarks_d);
+	destination_landmarks_d.convertTo(destination_landmarks, CV_32F);
+
+	ReadMatBin(stream, triangulation);
+
+	ReadMatBin(stream, triangle_id);
+
+	cv::Mat tmpMask;
+	ReadMatBin(stream, tmpMask);
+	tmpMask.convertTo(pixel_mask, CV_8U);
+
+	cv::Mat_<double> alpha_d;
+	ReadMatBin(stream, alpha_d);
+	alpha_d.convertTo(alpha, CV_32F);
+
+	cv::Mat_<double> beta_d;
+	ReadMatBin(stream, beta_d);
+	beta_d.convertTo(beta, CV_32F);
+
+	map_x.create(pixel_mask.rows, pixel_mask.cols);
+	map_y.create(pixel_mask.rows, pixel_mask.cols);
+
+	coefficients.create(this->NumberOfTriangles(), 6);
+
+	source_landmarks = destination_landmarks;
+}
+
+//=============================================================================
+// cropping from the source image to the destination image using the shape in s, used to determine if shape fitting converged successfully
+void PAW::Warp(const cv::Mat& image_to_warp, cv::Mat& destination_image, const cv::Mat_<float>& landmarks_to_warp)
+{
+
+	// set the current shape
+	source_landmarks = landmarks_to_warp.clone();
+
+	// prepare the mapping coefficients using the current shape
+	this->CalcCoeff();
+
+	// Do the actual mapping computation (where to warp from)
+	this->WarpRegion(map_x, map_y);
+
+	// Do the actual warp (with bi-linear interpolation)
+	remap(image_to_warp, destination_image, map_x, map_y, cv::INTER_LINEAR);
+
+}
+
+
+//=============================================================================
+// Calculate the warping coefficients
+void PAW::CalcCoeff()
+{
+	int p = this->NumberOfLandmarks();
+
+	for (int l = 0; l < this->NumberOfTriangles(); l++)
+	{
+
+		int i = triangulation.at<int>(l, 0);
+		int j = triangulation.at<int>(l, 1);
+		int k = triangulation.at<int>(l, 2);
+
+		float c1 = source_landmarks.at<float>(i, 0);
+		float c2 = source_landmarks.at<float>(j, 0) - c1;
+		float c3 = source_landmarks.at<float>(k, 0) - c1;
+		float c4 = source_landmarks.at<float>(i + p, 0);
+		float c5 = source_landmarks.at<float>(j + p, 0) - c4;
+		float c6 = source_landmarks.at<float>(k + p, 0) - c4;
+
+		// Get a pointer to the coefficient we will be precomputing
+		float *coeff = coefficients.ptr<float>(l);
+
+		// Extract the relevant alphas and betas
+		float *c_alpha = alpha.ptr<float>(l);
+		float *c_beta = beta.ptr<float>(l);
+
+		coeff[0] = c1 + c2 * c_alpha[0] + c3 * c_beta[0];
+		coeff[1] = c2 * c_alpha[1] + c3 * c_beta[1];
+		coeff[2] = c2 * c_alpha[2] + c3 * c_beta[2];
+		coeff[3] = c4 + c5 * c_alpha[0] + c6 * c_beta[0];
+		coeff[4] = c5 * c_alpha[1] + c6 * c_beta[1];
+		coeff[5] = c5 * c_alpha[2] + c6 * c_beta[2];
+	}
+}
+
+//======================================================================
+// Compute the mapping coefficients
+void PAW::WarpRegion(cv::Mat_<float>& mapx, cv::Mat_<float>& mapy)
+{
+
+	cv::MatIterator_<float> xp = mapx.begin();
+	cv::MatIterator_<float> yp = mapy.begin();
+	cv::MatIterator_<uchar> mp = pixel_mask.begin();
+	cv::MatIterator_<int>   tp = triangle_id.begin();
+
+	// The coefficients corresponding to the current triangle
+	float * a;
+
+	// Current triangle being processed	
+	int k = -1;
+
+	for (int y = 0; y < pixel_mask.rows; y++)
+	{
+		float yi = float(y) + min_y;
+
+		for (int x = 0; x < pixel_mask.cols; x++)
+		{
+			float xi = float(x) + min_x;
+
+			if (*mp == 0)
+			{
+				*xp = -1;
+				*yp = -1;
+			}
+			else
+			{
+				// triangle corresponding to the current pixel
+				int j = *tp;
+
+				// If it is different from the previous triangle point to new coefficients
+				// This will always be the case in the first iteration, hence a will not point to nothing
+				if (j != k)
+				{
+					// Update the coefficient pointer if a new triangle is being processed
+					a = coefficients.ptr<float>(j);
+					k = j;
+				}
+
+				//ap is now the pointer to the coefficients
+				float *ap = a;
+
+				//look at the first coefficient (and increment). first coefficient is an x offset
+				float xo = *ap++;
+				//second coefficient is an x scale as a function of x
+				xo += *ap++ * xi;
+				//third coefficient ap(2) is an x scale as a function of y
+				*xp = float(xo + *ap++ * yi);
+
+				//then fourth coefficient ap(3) is a y offset
+				float yo = *ap++;
+				//fifth coeff adds coeff[4]*x to y
+				yo += *ap++ * xi;
+				//final coeff adds coeff[5]*y to y
+				*yp = float(yo + *ap++ * yi);
+
+			}
+			mp++; tp++; xp++; yp++;
+		}
+	}
+}
+
+// ============================================================
+// Helper functions to determine which point a triangle lies in
+// ============================================================
+
+// Is the point (x0,y0) on same side as a half-plane defined by (x1,y1), (x2, y2), and (x3, y3)
+bool PAW::sameSide(float x0, float y0, float x1, float y1, float x2, float y2, float x3, float y3)
+{
+
+	float x = (x3 - x2)*(y0 - y2) - (x0 - x2)*(y3 - y2);
+	float y = (x3 - x2)*(y1 - y2) - (x1 - x2)*(y3 - y2);
+
+	return x*y >= 0;
+
+}
+
+// if point (x0, y0) is on same side for all three half-planes it is in a triangle
+bool PAW::pointInTriangle(float x0, float y0, float x1, float y1, float x2, float y2, float x3, float y3)
+{
+	bool same_1 = sameSide(x0, y0, x1, y1, x2, y2, x3, y3);
+	bool same_2 = sameSide(x0, y0, x2, y2, x1, y1, x3, y3);
+	bool same_3 = sameSide(x0, y0, x3, y3, x1, y1, x2, y2);
+
+	return same_1 && same_2 && same_3;
+
+}
+
+// Find if a given point lies in the triangles
+int PAW::findTriangle(const cv::Point_<float>& point, const std::vector<std::vector<float>>& control_points, int guess)
+{
+
+	int num_tris = control_points.size();
+
+	int tri = -1;
+
+	float x0 = point.x;
+	float y0 = point.y;
+
+	// Allow a guess for speed (so as not to go through all triangles)
+	if (guess != -1)
+	{
+
+		bool in_triangle = pointInTriangle(x0, y0, control_points[guess][0], control_points[guess][1], control_points[guess][2], control_points[guess][3], control_points[guess][4], control_points[guess][5]);
+		if (in_triangle)
+		{
+			return guess;
+		}
+	}
+
+
+	for (int i = 0; i < num_tris; ++i)
+	{
+
+		float max_x = control_points[i][6];
+		float max_y = control_points[i][7];
+
+		float min_x = control_points[i][8];
+		float min_y = control_points[i][9];
+
+		// Skip the check if the point is outside the bounding box of the triangle
+
+		if (max_x < x0 || min_x > x0 || max_y < y0 || min_y > y0)
+		{
+			continue;
+		}
+
+		bool in_triangle = pointInTriangle(x0, y0,
+			control_points[i][0], control_points[i][1],
+			control_points[i][2], control_points[i][3],
+			control_points[i][4], control_points[i][5]);
+
+		if (in_triangle)
+		{
+			tri = i;
+			break;
+		}
+	}
+	return tri;
+}
--- a/pkg/OpenFace/lib/local/LandmarkDetector/src/PDM.cpp
+++ b/pkg/OpenFace/lib/local/LandmarkDetector/src/PDM.cpp
@@ -0,0 +1,738 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (C) 2017, Carnegie Mellon University and University of Cambridge,
+// all rights reserved.
+//
+// ACADEMIC OR NON-PROFIT ORGANIZATION NONCOMMERCIAL RESEARCH USE ONLY
+//
+// BY USING OR DOWNLOADING THE SOFTWARE, YOU ARE AGREEING TO THE TERMS OF THIS LICENSE AGREEMENT.  
+// IF YOU DO NOT AGREE WITH THESE TERMS, YOU MAY NOT USE OR DOWNLOAD THE SOFTWARE.
+//
+// License can be found in OpenFace-license.txt
+//
+//     * Any publications arising from the use of this software, including but
+//       not limited to academic journal and conference publications, technical
+//       reports and manuals, must cite at least one of the following works:
+//
+//       OpenFace 2.0: Facial Behavior Analysis Toolkit
+//       Tadas Baltrušaitis, Amir Zadeh, Yao Chong Lim, and Louis-Philippe Morency
+//       in IEEE International Conference on Automatic Face and Gesture Recognition, 2018  
+//
+//       Convolutional experts constrained local model for facial landmark detection.
+//       A. Zadeh, T. Baltrušaitis, and Louis-Philippe Morency,
+//       in Computer Vision and Pattern Recognition Workshops, 2017.    
+//
+//       Rendering of Eyes for Eye-Shape Registration and Gaze Estimation
+//       Erroll Wood, Tadas Baltrušaitis, Xucong Zhang, Yusuke Sugano, Peter Robinson, and Andreas Bulling 
+//       in IEEE International. Conference on Computer Vision (ICCV),  2015 
+//
+//       Cross-dataset learning and person-specific normalisation for automatic Action Unit detection
+//       Tadas Baltrušaitis, Marwa Mahmoud, and Peter Robinson 
+//       in Facial Expression Recognition and Analysis Challenge, 
+//       IEEE International Conference on Automatic Face and Gesture Recognition, 2015 
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "stdafx.h"
+
+#include <PDM.h>
+#include <RotationHelpers.h>
+
+// OpenCV include
+#include <opencv2/core/core.hpp>
+#include <opencv2/imgproc.hpp>
+
+// Math includes
+#define _USE_MATH_DEFINES
+#include <cmath>
+
+#ifndef M_PI
+	#define M_PI 3.14159265358979323846
+#endif
+
+#include <LandmarkDetectorUtils.h>
+
+using namespace LandmarkDetector;
+//===========================================================================
+
+//=============================================================================
+// Orthonormalising the 3x3 rotation matrix
+void PDM::Orthonormalise(cv::Matx33f &R)
+{
+
+	cv::SVD svd(R, cv::SVD::MODIFY_A);
+  
+	// get the orthogonal matrix from the initial rotation matrix
+	cv::Mat_ <float> X = svd.u*svd.vt;
+  
+	// This makes sure that the handedness is preserved and no reflection happened
+	// by making sure the determinant is 1 and not -1
+	cv::Mat_<float> W = cv::Mat_<float>::eye(3,3);
+	float d = determinant(X);
+	W(2,2) = determinant(X);
+	cv::Mat Rt = svd.u*W*svd.vt;
+
+	Rt.copyTo(R);
+
+}
+
+// A copy constructor
+PDM::PDM(const PDM& other) {
+
+	// Make sure the matrices are allocated properly
+	this->mean_shape = other.mean_shape.clone();
+	this->princ_comp = other.princ_comp.clone();
+	this->eigen_values = other.eigen_values.clone();
+}
+
+//===========================================================================
+// Clamping the parameter values to be within 3 standard deviations
+void PDM::Clamp(cv::Mat_<float>& local_params, cv::Vec6f& params_global, const FaceModelParameters& parameters)
+{
+	float n_sigmas = 3;
+	cv::MatConstIterator_<float> e_it  = this->eigen_values.begin();
+	cv::MatIterator_<float> p_it =  local_params.begin();
+
+	float v;
+
+	// go over all parameters
+	for(; p_it != local_params.end(); ++p_it, ++e_it)
+	{
+		// Work out the maximum value
+		v = n_sigmas*sqrt(*e_it);
+
+		// if the values is too extreme clamp it
+		if(fabs(*p_it) > v)
+		{
+			// Dealing with positive and negative cases
+			if(*p_it > 0.0)
+			{
+				*p_it=v;
+			}
+			else
+			{
+				*p_it=-v;
+			}
+		}
+	}
+	
+	// do not let the pose get out of hand
+	//if(parameters.limit_pose)
+	//{
+	//	if(params_global[1] > M_PI / 2)
+	//		params_global[1] = M_PI/2;
+	//	if(params_global[1] < -M_PI / 2)
+	//		params_global[1] = -M_PI/2;
+	//	if(params_global[2] > M_PI / 2)
+	//		params_global[2] = M_PI/2;
+	//	if(params_global[2] < -M_PI / 2)
+	//		params_global[2] = -M_PI/2;
+	//	if(params_global[3] > M_PI / 2)
+	//		params_global[3] = M_PI/2;
+	//	if(params_global[3] < -M_PI / 2)
+	//		params_global[3] = -M_PI/2;
+	//}
+	
+
+}
+//===========================================================================
+// Compute the 3D representation of shape (in object space) using the local parameters
+void PDM::CalcShape3D(cv::Mat_<float>& out_shape, const cv::Mat_<float>& p_local) const
+{
+	out_shape = mean_shape.clone();
+
+	// Perform matrix vector multiplication in OpenBLAS (fortran call)
+	float alpha1 = 1.0;
+	float beta1 = 1.0;
+	int p_local_cols = p_local.cols;
+	int princ_comp_rows = princ_comp.rows;
+	int princ_comp_cols = princ_comp.cols;
+	char N[2]; N[0] = 'N';
+	sgemm_(N, N, &p_local_cols, &princ_comp_rows, &princ_comp_cols, &alpha1, (float*)p_local.data, &p_local_cols, (float*)princ_comp.data, &princ_comp_cols, &beta1, (float*)out_shape.data, &p_local_cols);
+
+	// Above is a fast (but ugly) version of 
+	// out_shape = mean_shape + princ_comp * p_local;	 
+
+}
+
+//===========================================================================
+// Get the 2D shape (in image space) from global and local parameters
+void PDM::CalcShape2D(cv::Mat_<float>& out_shape, const cv::Mat_<float>& params_local, const cv::Vec6f& params_global) const
+{
+
+	int n = this->NumberOfPoints();
+
+	float s = params_global[0]; // scaling factor
+	float tx = params_global[4]; // x offset
+	float ty = params_global[5]; // y offset
+
+	// get the rotation matrix from the euler angles
+	cv::Vec3f euler(params_global[1], params_global[2], params_global[3]);
+	cv::Matx33f currRot = Utilities::Euler2RotationMatrix(euler);
+	
+	// get the 3D shape of the object
+	cv::Mat_<float> Shape_3D;
+	this->CalcShape3D(Shape_3D, params_local);
+
+	// create the 2D shape matrix (if it has not been defined yet)
+	if((out_shape.rows != 2 * mean_shape.rows / 3) || (out_shape.cols != 1))
+	{
+		out_shape.create(2*n,1);
+	}
+	// for every vertex
+	for(int i = 0; i < n; i++)
+	{
+		// Transform this using the weak-perspective mapping to 2D from 3D
+		out_shape.at<float>(i  ,0) = s * ( currRot(0,0) * Shape_3D.at<float>(i, 0) + currRot(0,1) * Shape_3D.at<float>(i+n  ,0) + currRot(0,2) * Shape_3D.at<float>(i+n*2,0) ) + tx;
+		out_shape.at<float>(i+n,0) = s * ( currRot(1,0) * Shape_3D.at<float>(i, 0) + currRot(1,1) * Shape_3D.at<float>(i+n  ,0) + currRot(1,2) * Shape_3D.at<float>(i+n*2,0) ) + ty;
+	}
+}
+
+//===========================================================================
+// provided the bounding box of a face and the local parameters (with optional rotation), generates the global parameters that can generate the face with the provided bounding box
+// This all assumes that the bounding box describes face from left outline to right outline of the face and chin to eyebrows
+void PDM::CalcParams(cv::Vec6f& out_params_global, const cv::Rect_<float>& bounding_box, const cv::Mat_<float>& params_local, const cv::Vec3f rotation)
+{
+
+	// get the shape instance based on local params
+	cv::Mat_<float> current_shape(mean_shape.size());
+
+	CalcShape3D(current_shape, params_local);
+
+	// rotate the shape
+	cv::Matx33f rotation_matrix = Utilities::Euler2RotationMatrix(rotation);
+
+	cv::Mat_<float> reshaped = current_shape.reshape(1, 3);
+
+	cv::Mat rotated_shape = (cv::Mat(rotation_matrix) * reshaped);
+
+	// Get the width of expected shape
+	double min_x;
+	double max_x;
+	cv::minMaxLoc(rotated_shape.row(0), &min_x, &max_x);	
+
+	double min_y;
+	double max_y;
+	cv::minMaxLoc(rotated_shape.row(1), &min_y, &max_y);
+
+	float width = (float) abs(min_x - max_x);
+	float height = (float)abs(min_y - max_y);
+
+	float scaling = ((bounding_box.width / width) + (bounding_box.height / height)) / 2.0f;
+
+	// The estimate of face center also needs some correction
+	float tx = bounding_box.x + bounding_box.width / 2;
+	float ty = bounding_box.y + bounding_box.height / 2;
+
+	// Correct it so that the bounding box is just around the minimum and maximum point in the initialised face	
+	tx = tx - scaling * (min_x + max_x)/2.0f;
+    ty = ty - scaling * (min_y + max_y)/2.0f;
+
+	out_params_global = cv::Vec6f(scaling, rotation[0], rotation[1], rotation[2], tx, ty);
+}
+
+//===========================================================================
+// provided the model parameters, compute the bounding box of a face
+// The bounding box describes face from left outline to right outline of the face and chin to eyebrows
+void PDM::CalcBoundingBox(cv::Rect_<float>& out_bounding_box, const cv::Vec6f& params_global, const cv::Mat_<float>& params_local)
+{
+	
+	// get the shape instance based on local params
+	cv::Mat_<float> current_shape;
+	CalcShape2D(current_shape, params_local, params_global);
+	
+	// Get the width of expected shape
+	float min_x, max_x, min_y, max_y;
+	ExtractBoundingBox(current_shape, min_x, max_x, min_y, max_y);
+
+	float width = abs(min_x - max_x);
+	float height = abs(min_y - max_y);
+
+	out_bounding_box = cv::Rect_<float>(min_x, min_y, width, height);
+}
+
+//===========================================================================
+// Calculate the PDM's Jacobian over rigid parameters (rotation, translation and scaling), the additional input W represents trust for each of the landmarks and is part of Non-Uniform RLMS 
+void PDM::ComputeRigidJacobian(const cv::Mat_<float>& p_local, const cv::Vec6f& params_global, cv::Mat_<float> &Jacob, const cv::Mat_<float> W, cv::Mat_<float> &Jacob_t_w)
+{
+  	
+	// number of verts
+	int n = this->NumberOfPoints();
+  
+	Jacob.create(n * 2, 6);
+
+	float X,Y,Z;
+
+	float s = params_global[0];
+  	
+	cv::Mat_<float> shape_3D;
+	this->CalcShape3D(shape_3D, p_local);
+		
+	 // Get the rotation matrix
+	cv::Vec3f euler(params_global[1], params_global[2], params_global[3]);
+	cv::Matx33f currRot = Utilities::Euler2RotationMatrix(euler);
+	
+	float r11 = currRot(0,0);
+	float r12 = currRot(0,1);
+	float r13 = currRot(0,2);
+	float r21 = currRot(1,0);
+	float r22 = currRot(1,1);
+	float r23 = currRot(1,2);
+	float r31 = currRot(2,0);
+	float r32 = currRot(2,1);
+	float r33 = currRot(2,2);
+
+	cv::MatIterator_<float> Jx = Jacob.begin();
+	cv::MatIterator_<float> Jy = Jx + n * 6;
+
+	for(int i = 0; i < n; i++)
+	{
+    
+		X = shape_3D.at<float>(i, 0);
+		Y = shape_3D.at<float>(i + n, 0);
+		Z = shape_3D.at<float>(i + n * 2, 0);
+		
+		// The rigid jacobian from the axis angle rotation matrix approximation using small angle assumption (R * R')
+		// where R' = [1, -wz, wy
+		//             wz, 1, -wx
+		//             -wy, wx, 1]
+		// And this is derived using the small angle assumption on the axis angle rotation matrix parametrisation
+
+		// scaling term
+		*Jx++ =  (X  * r11 + Y * r12 + Z * r13);
+		*Jy++ =  (X  * r21 + Y * r22 + Z * r23);
+		
+		// rotation terms
+		*Jx++ = (s * (Y * r13 - Z * r12) );
+		*Jy++ = (s * (Y * r23 - Z * r22) );
+		*Jx++ = (-s * (X * r13 - Z * r11));
+		*Jy++ = (-s * (X * r23 - Z * r21));
+		*Jx++ = (s * (X * r12 - Y * r11) );
+		*Jy++ = (s * (X * r22 - Y * r21) );
+		
+		// translation terms
+		*Jx++ = 1.0f;
+		*Jy++ = 0.0f;
+		*Jx++ = 0.0f;
+		*Jy++ = 1.0f;
+
+	}
+
+	cv::Mat Jacob_w = cv::Mat::zeros(Jacob.rows, Jacob.cols, Jacob.type());
+	
+	Jx =  Jacob.begin();
+	Jy =  Jx + n*6;
+
+	cv::MatIterator_<float> Jx_w =  Jacob_w.begin<float>();
+	cv::MatIterator_<float> Jy_w =  Jx_w + n*6;
+
+	// Iterate over all Jacobian values and multiply them by the weight in diagonal of W
+	for(int i = 0; i < n; i++)
+	{
+		float w_x = W.at<float>(i, i);
+		float w_y = W.at<float>(i+n, i+n);
+
+		for(int j = 0; j < Jacob.cols; ++j)
+		{
+			*Jx_w++ = *Jx++ * w_x;
+			*Jy_w++ = *Jy++ * w_y;
+		}		
+	}
+
+	Jacob_t_w = Jacob_w.t();
+}
+
+//===========================================================================
+// Calculate the PDM's Jacobian over all parameters (rigid and non-rigid), the additional input W represents trust for each of the landmarks and is part of Non-Uniform RLMS
+void PDM::ComputeJacobian(const cv::Mat_<float>& params_local, const cv::Vec6f& params_global, cv::Mat_<float> &Jacobian, const cv::Mat_<float> W, cv::Mat_<float> &Jacob_t_w)
+{ 
+	
+	// number of vertices
+	int n = this->NumberOfPoints();
+		
+	// number of non-rigid parameters
+	int m = this->NumberOfModes();
+
+	Jacobian.create(n * 2, 6 + m);
+	
+	float X,Y,Z;
+	
+	float s = params_global[0];
+  	
+	cv::Mat_<float> shape_3D;
+	this->CalcShape3D(shape_3D, params_local);
+	
+	cv::Vec3f euler(params_global[1], params_global[2], params_global[3]);
+	cv::Matx33f currRot = Utilities::Euler2RotationMatrix(euler);
+	
+	float r11 = currRot(0, 0);
+	float r12 = currRot(0, 1);
+	float r13 = currRot(0, 2);
+	float r21 = currRot(1, 0);
+	float r22 = currRot(1, 1);
+	float r23 = currRot(1, 2);
+	float r31 = currRot(2, 0);
+	float r32 = currRot(2, 1);
+	float r33 = currRot(2, 2);
+
+	cv::MatIterator_<float> Jx =  Jacobian.begin();
+	cv::MatIterator_<float> Jy =  Jx + n * (6 + m);
+	cv::MatConstIterator_<float> Vx =  this->princ_comp.begin();
+	cv::MatConstIterator_<float> Vy =  Vx + n*m;
+	cv::MatConstIterator_<float> Vz =  Vy + n*m;
+
+	for(int i = 0; i < n; i++)
+	{
+    
+		X = shape_3D.at<float>(i, 0);
+		Y = shape_3D.at<float>(i + n, 0);
+		Z = shape_3D.at<float>(i + n * 2, 0);
+    
+		// The rigid jacobian from the axis angle rotation matrix approximation using small angle assumption (R * R')
+		// where R' = [1, -wz, wy
+		//             wz, 1, -wx
+		//             -wy, wx, 1]
+		// And this is derived using the small angle assumption on the axis angle rotation matrix parametrisation
+
+		// scaling term
+		*Jx++ = (X  * r11 + Y * r12 + Z * r13);
+		*Jy++ = (X  * r21 + Y * r22 + Z * r23);
+		
+		// rotation terms
+		*Jx++ = (s * (Y * r13 - Z * r12) );
+		*Jy++ = (s * (Y * r23 - Z * r22) );
+		*Jx++ = (-s * (X * r13 - Z * r11));
+		*Jy++ = (-s * (X * r23 - Z * r21));
+		*Jx++ = (s * (X * r12 - Y * r11) );
+		*Jy++ = (s * (X * r22 - Y * r21) );
+		
+		// translation terms
+		*Jx++ = 1.0f;
+		*Jy++ = 0.0f;
+		*Jx++ = 0.0f;
+		*Jy++ = 1.0f;
+
+		for(int j = 0; j < m; j++,++Vx,++Vy,++Vz)
+		{
+			// How much the change of the non-rigid parameters (when object is rotated) affect 2D motion
+			*Jx++ = ( s*(r11*(*Vx) + r12*(*Vy) + r13*(*Vz)) );
+			*Jy++ = ( s*(r21*(*Vx) + r22*(*Vy) + r23*(*Vz)) );
+		}
+	}	
+
+	// Adding the weights here	
+	if(cv::trace(W)[0] != W.rows) 
+	{
+		cv::Mat Jacob_w = Jacobian.clone();
+		Jx =  Jacobian.begin();
+		Jy =  Jx + n*(6+m);
+
+		cv::MatIterator_<float> Jx_w =  Jacob_w.begin<float>();
+		cv::MatIterator_<float> Jy_w =  Jx_w + n*(6+m);
+
+		// Iterate over all Jacobian values and multiply them by the weight in diagonal of W
+		for(int i = 0; i < n; i++)
+		{
+			float w_x = W.at<float>(i, i);
+			float w_y = W.at<float>(i+n, i+n);
+
+			for(int j = 0; j < Jacobian.cols; ++j)
+			{
+				*Jx_w++ = *Jx++ * w_x;
+				*Jy_w++ = *Jy++ * w_y;
+			}
+		}
+		Jacob_t_w = Jacob_w.t();
+	}
+	else
+	{
+		Jacob_t_w = Jacobian.t();
+	}
+}
+
+//===========================================================================
+// Updating the parameters (more details in my thesis)
+void PDM::UpdateModelParameters(const cv::Mat_<float>& delta_p, cv::Mat_<float>& params_local, cv::Vec6f& params_global)
+{
+
+	// The scaling and translation parameters can be just added
+	params_global[0] += delta_p.at<float>(0,0);
+	params_global[4] += delta_p.at<float>(4,0);
+	params_global[5] += delta_p.at<float>(5,0);
+
+	// get the original rotation matrix	
+	cv::Vec3f eulerGlobal(params_global[1], params_global[2], params_global[3]);
+	
+	cv::Matx33f R1 = Utilities::Euler2RotationMatrix(eulerGlobal);
+
+	// construct R' = [1, -wz, wy
+	//               wz, 1, -wx
+	//               -wy, wx, 1]
+	cv::Matx33f R2 = cv::Matx33f::eye();
+
+	R2(1,2) = -1.0*(R2(2,1) = delta_p.at<float>(1,0));
+	R2(2,0) = -1.0*(R2(0,2) = delta_p.at<float>(2,0));
+	R2(0,1) = -1.0*(R2(1,0) = delta_p.at<float>(3,0));
+	
+	// Make sure it's orthonormal
+	Orthonormalise(R2);
+
+	// Combine rotations
+	cv::Matx33f R3 = R1 *R2;
+
+	// Extract euler angle (through axis angle first to make sure it's legal)
+	cv::Vec3f axis_angle = Utilities::RotationMatrix2AxisAngle(R3);
+
+	cv::Vec3f euler = Utilities::AxisAngle2Euler(axis_angle);
+
+	// Temporary fix to numerical instability
+	if (std::isnan(euler[0]) || std::isnan(euler[1]) || std::isnan(euler[2]))
+	{
+		euler[0] = 0;
+		euler[1] = 0;
+		euler[2] = 0;
+
+	}
+
+	params_global[1] = euler[0];
+	params_global[2] = euler[1];
+	params_global[3] = euler[2];
+
+	// Local parameter update, just simple addition
+	if(delta_p.rows > 6)
+	{
+		params_local = params_local + delta_p(cv::Rect(0,6,1, this->NumberOfModes()));
+	}
+
+}
+
+void PDM::CalcParams(cv::Vec6f& out_params_global, cv::Mat_<float>& out_params_local, const cv::Mat_<float> & landmark_locations, const cv::Vec3f rotation)
+{
+		
+	int m = this->NumberOfModes();
+	int n = this->NumberOfPoints();
+
+	cv::Mat_<int> visi_ind_2D(n * 2, 1, 1);
+	cv::Mat_<int> visi_ind_3D(3 * n , 1, 1);
+
+	int visi_count = n;
+
+	for(int i = 0; i < n; ++i)
+	{
+		// If the landmark is invisible indicate this
+		if(landmark_locations.at<float>(i) == 0)
+		{
+			visi_ind_2D.at<int>(i) = 0;
+			visi_ind_2D.at<int>(i+n) = 0;
+			visi_ind_3D.at<int>(i) = 0;
+			visi_ind_3D.at<int>(i+n) = 0;
+			visi_ind_3D.at<int>(i+2*n) = 0;
+
+			visi_count--;
+		}
+	}
+
+	// As not all landmarks might be visible, subsample the Mean and principal component matrices
+	cv::Mat_<float> M(visi_count * 3, mean_shape.cols, 0.0);
+	cv::Mat_<float> V(visi_count * 3, princ_comp.cols, 0.0);
+	visi_count = 0;
+	for (int i = 0; i < n * 3; ++i)
+	{
+		if (visi_ind_3D.at<int>(i) == 1)
+		{
+			this->mean_shape.row(i).copyTo(M.row(visi_count));
+			this->princ_comp.row(i).copyTo(V.row(visi_count));
+			visi_count++;
+		}
+	}
+
+	cv::Mat_<float> m_old = this->mean_shape.clone();
+	cv::Mat_<float> v_old = this->princ_comp.clone();
+
+	this->mean_shape = M;
+	this->princ_comp = V;
+
+	// The new number of points
+	n  = M.rows / 3;
+
+	// Extract the relevant landmark locations
+	cv::Mat_<float> landmark_locs_vis(n*2, 1, 0.0f);
+	int k = 0;
+	for(int i = 0; i < visi_ind_2D.rows; ++i)
+	{
+		if(visi_ind_2D.at<int>(i) == 1)
+		{
+			landmark_locs_vis.at<float>(k) = landmark_locations.at<float>(i);
+			k++;
+		}		
+	}
+
+	// Compute the initial global parameters
+	float min_x, max_x, min_y, max_y;
+	ExtractBoundingBox(landmark_locs_vis, min_x, max_x, min_y, max_y);
+
+	float width = abs(min_x - max_x);
+	float height = abs(min_y - max_y);
+
+	cv::Rect_<float> model_bbox;
+	CalcBoundingBox(model_bbox, cv::Vec6f(1.0, 0.0, 0.0, 0.0, 0.0, 0.0), cv::Mat_<float>(this->NumberOfModes(), 1, 0.0));
+
+	cv::Rect_<float> bbox(min_x, min_y, width, height);
+
+	float scaling = ((width / model_bbox.width) + (height / model_bbox.height)) / 2.0f;
+        
+	cv::Vec3f rotation_init = rotation;
+	cv::Matx33f R = Utilities::Euler2RotationMatrix(rotation_init);
+	cv::Vec2f translation((min_x + max_x) / 2.0f, (min_y + max_y) / 2.0f);
+    
+	cv::Mat_<float> loc_params(this->NumberOfModes(),1, 0.0);
+	cv::Vec6f glob_params(scaling, rotation_init[0], rotation_init[1], rotation_init[2], translation[0], translation[1]);
+
+	// get the 3D shape of the object	
+	cv::Mat_<float> shape_3D = M + V * loc_params;
+
+	cv::Mat_<float> curr_shape(2*n, 1);
+	
+	// for every vertex
+	for(int i = 0; i < n; i++)
+	{
+		// Transform this using the weak-perspective mapping to 2D from 3D
+		curr_shape.at<float>(i  ,0) = scaling * ( R(0,0) * shape_3D.at<float>(i, 0) + R(0,1) * shape_3D.at<float>(i+n  ,0) + R(0,2) * shape_3D.at<float>(i+n*2,0) ) + translation[0];
+		curr_shape.at<float>(i+n,0) = scaling * ( R(1,0) * shape_3D.at<float>(i, 0) + R(1,1) * shape_3D.at<float>(i+n  ,0) + R(1,2) * shape_3D.at<float>(i+n*2,0) ) + translation[1];
+	}
+		    
+    float currError = cv::norm(curr_shape - landmark_locs_vis);
+
+	cv::Mat_<float> regularisations = cv::Mat_<float>::zeros(1, 6 + m);
+
+	float reg_factor = 1;
+
+	// Setting the regularisation to the inverse of eigenvalues
+	cv::Mat(reg_factor / this->eigen_values).copyTo(regularisations(cv::Rect(6, 0, m, 1)));
+	regularisations = cv::Mat::diag(regularisations.t());
+
+	cv::Mat_<float> WeightMatrix = cv::Mat_<float>::eye(n*2, n*2);
+
+	int not_improved_in = 0;
+
+	for (size_t i = 0; i < 1000; ++i)
+	{
+		// get the 3D shape of the object
+		shape_3D = M + V * loc_params;
+
+		shape_3D = shape_3D.reshape(1, 3);
+
+		cv::Matx23f R_2D(R(0,0), R(0,1), R(0,2), R(1,0), R(1,1), R(1,2));
+
+		cv::Mat_<float> curr_shape_2D = scaling * shape_3D.t() * cv::Mat(R_2D).t();
+        curr_shape_2D.col(0) = curr_shape_2D.col(0) + translation(0);
+		curr_shape_2D.col(1) = curr_shape_2D.col(1) + translation(1);
+
+		curr_shape_2D = cv::Mat(curr_shape_2D.t()).reshape(1, n * 2);
+		
+		cv::Mat_<float> error_resid;
+		cv::Mat(landmark_locs_vis - curr_shape_2D).convertTo(error_resid, CV_32F);
+        
+		cv::Mat_<float> J, J_w_t;
+		this->ComputeJacobian(loc_params, glob_params, J, WeightMatrix, J_w_t);
+        
+		// projection of the meanshifts onto the jacobians (using the weighted Jacobian, see Baltrusaitis 2013)
+		cv::Mat_<float> J_w_t_m = J_w_t * error_resid;
+
+		// Add the regularisation term
+		J_w_t_m(cv::Rect(0,6,1, m)) = J_w_t_m(cv::Rect(0,6,1, m)) - regularisations(cv::Rect(6,6, m, m)) * loc_params;
+
+		cv::Mat_<float> Hessian = regularisations.clone();
+
+		// Perform matrix multiplication in OpenBLAS (fortran call)
+		float alpha1 = 1.0;
+		float beta1 = 1.0;
+		char N[2]; N[0] = 'N';
+		sgemm_(N, N, &J.cols, &J_w_t.rows, &J_w_t.cols, &alpha1, (float*)J.data, &J.cols, (float*)J_w_t.data, &J_w_t.cols, &beta1, (float*)Hessian.data, &J.cols);
+
+		// Above is a fast (but ugly) version of 
+		// cv::Mat_<float> Hessian2 = J_w_t * J + regularisations;
+		
+		// Solve for the parameter update (from Baltrusaitis 2013 based on eq (36) Saragih 2011)
+		cv::Mat_<float> param_update;
+		cv::solve(Hessian, J_w_t_m, param_update, cv::DECOMP_CHOLESKY);
+
+		// To not overshoot, have the gradient decent rate a bit smaller
+		param_update = 0.75 * param_update;
+
+		UpdateModelParameters(param_update, loc_params, glob_params);		
+        
+        scaling = glob_params[0];
+		rotation_init[0] = glob_params[1];
+		rotation_init[1] = glob_params[2];
+		rotation_init[2] = glob_params[3];
+
+		translation[0] = glob_params[4];
+		translation[1] = glob_params[5];
+        
+		R = Utilities::Euler2RotationMatrix(rotation_init);
+
+		R_2D(0,0) = R(0,0);R_2D(0,1) = R(0,1); R_2D(0,2) = R(0,2);
+		R_2D(1,0) = R(1,0);R_2D(1,1) = R(1,1); R_2D(1,2) = R(1,2); 
+
+		curr_shape_2D = scaling * shape_3D.t() * cv::Mat(R_2D).t();
+        curr_shape_2D.col(0) = curr_shape_2D.col(0) + translation(0);
+		curr_shape_2D.col(1) = curr_shape_2D.col(1) + translation(1);
+
+		curr_shape_2D = cv::Mat(curr_shape_2D.t()).reshape(1, n * 2);
+        
+        float error = cv::norm(curr_shape_2D - landmark_locs_vis);
+        
+        if(0.999 * currError < error)
+		{
+			not_improved_in++;
+			if (not_improved_in == 3)
+			{
+				break;
+			}
+		}
+
+		currError = error;
+        
+	}
+
+	out_params_global = glob_params;
+	out_params_local = loc_params;
+    	
+	this->mean_shape = m_old;
+	this->princ_comp = v_old;
+
+
+}
+
+bool PDM::Read(std::string location)
+{
+
+	std::ifstream pdmLoc(location, std::ios_base::in);
+	if (!pdmLoc.is_open())
+	{
+		return false;
+	}
+
+	LandmarkDetector::SkipComments(pdmLoc);
+
+	// Reading mean values
+	cv::Mat_<double> mean_shape_d;
+	LandmarkDetector::ReadMat(pdmLoc, mean_shape_d);
+	mean_shape_d.convertTo(mean_shape, CV_32F); // Moving things to floats for speed
+
+	LandmarkDetector::SkipComments(pdmLoc);
+
+	// Reading principal components
+	cv::Mat_<double> princ_comp_d;
+	LandmarkDetector::ReadMat(pdmLoc, princ_comp_d);
+	princ_comp_d.convertTo(princ_comp, CV_32F);
+
+	LandmarkDetector::SkipComments(pdmLoc);
+	
+	// Reading eigenvalues	
+	cv::Mat_<double> eigen_values_d;
+	LandmarkDetector::ReadMat(pdmLoc, eigen_values_d);
+	eigen_values_d.convertTo(eigen_values, CV_32F);
+
+	return true;
+}
--- a/pkg/OpenFace/lib/local/LandmarkDetector/src/Patch_experts.cpp
+++ b/pkg/OpenFace/lib/local/LandmarkDetector/src/Patch_experts.cpp
@@ -0,0 +1,696 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (C) 2017, Carnegie Mellon University and University of Cambridge,
+// all rights reserved.
+//
+// ACADEMIC OR NON-PROFIT ORGANIZATION NONCOMMERCIAL RESEARCH USE ONLY
+//
+// BY USING OR DOWNLOADING THE SOFTWARE, YOU ARE AGREEING TO THE TERMS OF THIS LICENSE AGREEMENT.  
+// IF YOU DO NOT AGREE WITH THESE TERMS, YOU MAY NOT USE OR DOWNLOAD THE SOFTWARE.
+//
+// License can be found in OpenFace-license.txt
+//
+//     * Any publications arising from the use of this software, including but
+//       not limited to academic journal and conference publications, technical
+//       reports and manuals, must cite at least one of the following works:
+//
+//       OpenFace 2.0: Facial Behavior Analysis Toolkit
+//       Tadas Baltrušaitis, Amir Zadeh, Yao Chong Lim, and Louis-Philippe Morency
+//       in IEEE International Conference on Automatic Face and Gesture Recognition, 2018  
+//
+//       Convolutional experts constrained local model for facial landmark detection.
+//       A. Zadeh, T. Baltrušaitis, and Louis-Philippe Morency,
+//       in Computer Vision and Pattern Recognition Workshops, 2017.    
+//
+//       Rendering of Eyes for Eye-Shape Registration and Gaze Estimation
+//       Erroll Wood, Tadas Baltrušaitis, Xucong Zhang, Yusuke Sugano, Peter Robinson, and Andreas Bulling 
+//       in IEEE International. Conference on Computer Vision (ICCV),  2015 
+//
+//       Cross-dataset learning and person-specific normalisation for automatic Action Unit detection
+//       Tadas Baltrušaitis, Marwa Mahmoud, and Peter Robinson 
+//       in Facial Expression Recognition and Analysis Challenge, 
+//       IEEE International Conference on Automatic Face and Gesture Recognition, 2015 
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "stdafx.h"
+
+#include "Patch_experts.h"
+
+#include "RotationHelpers.h"
+
+// Math includes
+#define _USE_MATH_DEFINES
+#include <cmath>
+
+#ifndef M_PI
+	#define M_PI 3.14159265358979323846
+#endif
+
+#include "LandmarkDetectorUtils.h"
+
+using namespace LandmarkDetector;
+
+// A copy constructor
+
+Patch_experts::Patch_experts(const Patch_experts& other) : patch_scaling(other.patch_scaling), centers(other.centers), svr_expert_intensity(other.svr_expert_intensity), 
+														ccnf_expert_intensity(other.ccnf_expert_intensity), cen_expert_intensity(other.cen_expert_intensity),
+														early_term_weights(other.early_term_weights), early_term_biases(other.early_term_biases), early_term_cutoffs(other.early_term_cutoffs),
+														mirror_inds(other.mirror_inds),mirror_views(other.mirror_views)
+{
+
+	// Make sure the matrices are allocated properly
+	this->sigma_components.resize(other.sigma_components.size());
+	for (size_t i = 0; i < other.sigma_components.size(); ++i)
+	{
+		this->sigma_components[i].resize(other.sigma_components[i].size());
+
+		for (size_t j = 0; j < other.sigma_components[i].size(); ++j)
+		{
+			// Make sure the matrix is copied.
+			this->sigma_components[i][j] = other.sigma_components[i][j].clone();
+		}
+	}
+
+	// Make sure the matrices are allocated properly
+	this->visibilities.resize(other.visibilities.size());
+	for (size_t i = 0; i < other.visibilities.size(); ++i)
+	{
+		this->visibilities[i].resize(other.visibilities[i].size());
+
+		for (size_t j = 0; j < other.visibilities[i].size(); ++j)
+		{
+			// Make sure the matrix is copied.
+			this->visibilities[i][j] = other.visibilities[i][j].clone();
+		}
+	}
+
+	preallocated_im2col.resize(other.preallocated_im2col.size());
+}
+
+// Returns indices to landmarks that need to have patch responses computed (omits mirrored frontal landmarks for CEN as they will be computed together with their mirrored pair)
+std::vector<int> Patch_experts::Collect_visible_landmarks(std::vector<std::vector<cv::Mat_<int> > > visibilities, int scale, int view_id, int n)
+{
+	std::vector<int> vis_lmk;
+	for (int i = 0; i < n; i++)
+	{
+		if (visibilities[scale][view_id].rows == n)
+		{
+			if (visibilities[scale][view_id].at<int>(i, 0) != 0)
+			{
+				// For CEN patch experts and frontal views skip the mirror indices
+				if (!cen_expert_intensity.empty())
+				{
+
+					// If frontal view we can do mirrored landmarks together
+					if (view_id == 0)
+					{
+						// If the patch expert does not have values, means it's a mirrored version and will be done in another part of a loop
+						if (!cen_expert_intensity[scale][view_id][i].biases.empty())
+						{
+							vis_lmk.push_back(i);
+						}
+					}
+					else
+					{
+						vis_lmk.push_back(i);
+					}
+				}
+				else
+				{
+					vis_lmk.push_back(i);
+				}
+			}
+		}
+	}
+	return vis_lmk;
+
+}
+
+// Returns the patch expert responses given a grayscale image.
+// Additionally returns the transform from the image coordinates to the response coordinates (and vice versa).
+// The computation also requires the current landmark locations to compute response around, the PDM corresponding to the desired model, and the parameters describing its instance
+// Also need to provide the size of the area of interest and the desired scale of analysis
+void Patch_experts::Response(std::vector<cv::Mat_<float> >& patch_expert_responses, cv::Matx22f& sim_ref_to_img, 
+	cv::Matx22f& sim_img_to_ref, const cv::Mat_<float>& grayscale_image, const PDM& pdm, const cv::Vec6f& params_global,
+	const cv::Mat_<float>& params_local, int window_size, int scale)
+{
+
+	int view_id = GetViewIdx(params_global, scale);
+
+	int n = pdm.NumberOfPoints();
+
+	// Compute the current landmark locations (around which responses will be computed)
+	cv::Mat_<float> landmark_locations;
+
+	pdm.CalcShape2D(landmark_locations, params_local, params_global);
+
+	cv::Mat_<float> reference_shape;
+
+	// Initialise the reference shape on which we'll be warping
+	cv::Vec6f global_ref(patch_scaling[scale], 0, 0, 0, 0, 0);
+
+	// Compute the reference shape
+	pdm.CalcShape2D(reference_shape, params_local, global_ref);
+
+	// similarity and inverse similarity transform to and from image and reference shape
+	cv::Mat_<float> reference_shape_2D = (reference_shape.reshape(1, 2).t());
+	cv::Mat_<float> image_shape_2D = landmark_locations.reshape(1, 2).t();
+
+	sim_img_to_ref = Utilities::AlignShapesWithScale(image_shape_2D, reference_shape_2D);
+	sim_ref_to_img = sim_img_to_ref.inv(cv::DECOMP_LU);
+	
+	float a1 = sim_ref_to_img(0, 0);
+	float b1 = -sim_ref_to_img(0, 1);
+
+	bool use_ccnf = !this->ccnf_expert_intensity.empty();
+	bool use_cen = !this->cen_expert_intensity.empty();
+
+	// If using CCNF patch experts might need to precalculate Sigmas
+	if (use_ccnf)
+	{
+		std::vector<cv::Mat_<float> > sigma_components;
+
+		// Retrieve the correct sigma component size
+		for (size_t w_size = 0; w_size < this->sigma_components.size(); ++w_size)
+		{
+			if (!this->sigma_components[w_size].empty())
+			{
+				if (window_size*window_size == this->sigma_components[w_size][0].rows)
+				{
+					sigma_components = this->sigma_components[w_size];
+				}
+			}
+		}
+
+		// Go through all of the landmarks and compute the Sigma for each
+		for (int lmark = 0; lmark < n; lmark++)
+		{
+			// Only for visible landmarks
+			if (visibilities[scale][view_id].at<int>(lmark, 0))
+			{
+				// Precompute sigmas if they are not computed yet
+				ccnf_expert_intensity[scale][view_id][lmark].ComputeSigmas(sigma_components, window_size);
+			}
+		}
+
+	}
+
+	// If using CEN precalculate interpolation matrix
+	cv::Mat_<float> interp_mat;
+	if (use_cen)
+	{
+		// Assuming the same size for all experts
+		int support_region = 11;
+		int area_of_interest_width = window_size + support_region - 1;
+		int area_of_interest_height = window_size + support_region - 1;
+		int resp_size = area_of_interest_height - support_region + 1;
+		interpolationMatrix(interp_mat, resp_size, resp_size, area_of_interest_width, area_of_interest_height);
+	}
+
+	// We do not want to create threads for invisible landmarks, so construct an index of visible ones
+	std::vector<int> vis_lmk = Collect_visible_landmarks(visibilities, scale, view_id, n);
+
+	// calculate the patch responses for every landmark (this is the heavy lifting of landmark detection)
+	parallel_for_(cv::Range(0, vis_lmk.size()), [&](const cv::Range& range) {
+		for (int i = range.start; i < range.end; i++)
+		{
+
+			// Work out how big the area of interest has to be to get a response of window size
+			int area_of_interest_width;
+			int area_of_interest_height;
+			int ind = vis_lmk.at(i);
+
+			if (use_cen)
+			{
+				area_of_interest_width = window_size + cen_expert_intensity[scale][view_id][ind].width_support - 1;
+				area_of_interest_height = window_size + cen_expert_intensity[scale][view_id][ind].height_support - 1;
+			}
+			else if (use_ccnf)
+			{
+				area_of_interest_width = window_size + ccnf_expert_intensity[scale][view_id][ind].width - 1;
+				area_of_interest_height = window_size + ccnf_expert_intensity[scale][view_id][ind].height - 1;
+			}
+			else
+			{
+				area_of_interest_width = window_size + svr_expert_intensity[scale][view_id][ind].width - 1;
+				area_of_interest_height = window_size + svr_expert_intensity[scale][view_id][ind].height - 1;
+			}
+
+			// scale and rotate to mean shape to reference frame
+			cv::Mat sim = (cv::Mat_<float>(2, 3) << a1, -b1, landmark_locations.at<float>(ind, 0) - a1 * (area_of_interest_width - 1.0f) / 2.0f + b1 * (area_of_interest_width - 1.0f) / 2.0f, b1, a1, landmark_locations.at<float>(ind + n, 0) - a1 * (area_of_interest_width - 1.0f) / 2.0f - b1 * (area_of_interest_width - 1.0f) / 2.0f);
+
+			// Extract the region of interest around the current landmark location
+			cv::Mat_<float> area_of_interest(area_of_interest_height, area_of_interest_width, 0.0f);
+
+			cv::warpAffine(grayscale_image, area_of_interest, sim, area_of_interest.size(), cv::WARP_INVERSE_MAP + cv::INTER_LINEAR);
+
+			// Get intensity response either from the SVR, CCNF, or CEN patch experts (prefer CEN as they are the most accurate so far)
+			if (!cen_expert_intensity.empty())
+			{
+
+				int im2col_size = (area_of_interest_width * area_of_interest_height - 1) / 2;
+
+				cv::Mat_<float> prealloc_mat = preallocated_im2col[ind][im2col_size];
+
+				// If frontal view we can do mirrored landmarks together
+				if (view_id == 0)
+				{
+					// If the patch expert does not have values, means it's a mirrored version and will be done in another part of a loop
+					if (!cen_expert_intensity[scale][view_id][ind].biases.empty())
+					{
+						// No mirrored expert, so do normally
+						int mirror_id = mirror_inds.at<int>(ind);
+						if (mirror_id == ind)
+						{
+							cv::Mat_<float> empty(0, 0, 0.0f);
+							cen_expert_intensity[scale][view_id][ind].ResponseSparse(area_of_interest, empty, patch_expert_responses[ind], empty, interp_mat, prealloc_mat, empty);
+						}
+						else
+						{
+
+							// Grab mirrored area of interest
+
+							// scale and rotate to mean shape to reference frame
+							cv::Mat sim_r = (cv::Mat_<float>(2, 3) << a1, -b1, landmark_locations.at<float>(mirror_id, 0) - a1 * (area_of_interest_width - 1.0f) / 2.0f + b1 * (area_of_interest_width - 1.0f) / 2.0f, b1, a1, landmark_locations.at<float>(mirror_id + n, 0) - a1 * (area_of_interest_width - 1.0f) / 2.0f - b1 * (area_of_interest_width - 1.0f) / 2.0f);
+
+							// Extract the region of interest around the current landmark location
+							cv::Mat_<float> area_of_interest_r(area_of_interest_height, area_of_interest_width, 0.0f);
+
+							cv::warpAffine(grayscale_image, area_of_interest_r, sim_r, area_of_interest_r.size(), cv::WARP_INVERSE_MAP + cv::INTER_LINEAR);
+
+							cv::Mat_<float> prealloc_mat_right = preallocated_im2col[mirror_id][im2col_size];
+
+							cen_expert_intensity[scale][view_id][ind].ResponseSparse(area_of_interest, area_of_interest_r, patch_expert_responses[ind], patch_expert_responses[mirror_id], interp_mat, prealloc_mat, prealloc_mat_right);
+
+							preallocated_im2col[mirror_id][im2col_size] = prealloc_mat_right;
+
+						}
+					}
+				}
+				else
+				{
+					// For space and memory saving use a mirrored patch expert
+					if (!cen_expert_intensity[scale][view_id][ind].biases.empty())
+					{
+						cv::Mat_<float> empty(0, 0, 0.0f);
+						cen_expert_intensity[scale][view_id][ind].ResponseSparse(area_of_interest, empty, patch_expert_responses[ind], empty, interp_mat, prealloc_mat, empty);
+
+						// A slower, but slightly more accurate version
+						//cen_expert_intensity[scale][view_id][ind].Response(area_of_interest, patch_expert_responses[ind]);
+					}
+					else
+					{
+						cv::Mat_<float> empty(0, 0, 0.0f);
+						cen_expert_intensity[scale][mirror_views.at<int>(view_id)][mirror_inds.at<int>(ind)].ResponseSparse(empty, area_of_interest, empty, patch_expert_responses[ind], interp_mat, empty, prealloc_mat);
+					}
+				}
+
+				preallocated_im2col[ind][im2col_size] = prealloc_mat;
+
+			}
+			else if (!ccnf_expert_intensity.empty())
+			{
+				// get the correct size response window			
+				patch_expert_responses[ind] = cv::Mat_<float>(window_size, window_size);
+
+				int im2col_size = area_of_interest_width * area_of_interest_height;
+
+				cv::Mat_<float> prealloc_mat = preallocated_im2col[ind][im2col_size];
+
+				ccnf_expert_intensity[scale][view_id][ind].ResponseOpenBlas(area_of_interest, patch_expert_responses[ind], prealloc_mat);
+
+				preallocated_im2col[ind][im2col_size] = prealloc_mat;
+
+				// Below is an alternative way to compute the same, but that uses FFT instead of OpenBLAS
+				// ccnf_expert_intensity[scale][view_id][ind].Response(area_of_interest, patch_expert_responses[ind]);
+
+			}
+			else
+			{
+				// get the correct size response window			
+				patch_expert_responses[ind] = cv::Mat_<float>(window_size, window_size);
+
+				svr_expert_intensity[scale][view_id][ind].Response(area_of_interest, patch_expert_responses[ind]);
+			}
+		}
+	});
+}
+
+
+//=============================================================================
+// Getting the closest view center based on orientation
+int Patch_experts::GetViewIdx(const cv::Vec6f& params_global, int scale) const
+{	
+	int idx = 0;
+	
+	float dbest;
+
+	for(int i = 0; i < this->nViews(scale); i++)
+	{
+		float v1 = params_global[1] - centers[scale][i][0]; 
+		float v2 = params_global[2] - centers[scale][i][1];
+		float v3 = params_global[3] - centers[scale][i][2];
+			
+		float d = v1*v1 + v2*v2 + v3*v3;
+
+		if(i == 0 || d < dbest)
+		{
+			dbest = d;
+			idx = i;
+		}
+	}
+	return idx;
+}
+
+
+//===========================================================================
+bool Patch_experts::Read(std::vector<std::string> intensity_svr_expert_locations, std::vector<std::string> intensity_ccnf_expert_locations,
+	std::vector<std::string> intensity_cen_expert_locations, std::string early_term_loc)
+{
+
+	// initialise the SVR intensity patch expert parameters
+	int num_intensity_svr = intensity_svr_expert_locations.size();
+	centers.resize(num_intensity_svr);
+	visibilities.resize(num_intensity_svr);
+	patch_scaling.resize(num_intensity_svr);
+	
+	svr_expert_intensity.resize(num_intensity_svr);
+	
+	// Reading in SVR intensity patch experts for each scales it is defined in
+	for(int scale = 0; scale < num_intensity_svr; ++scale)
+	{		
+		std::string location = intensity_svr_expert_locations[scale];
+		std::cout << "Reading the intensity SVR patch experts from: " << location << "....";
+		bool success_read = Read_SVR_patch_experts(location,  centers[scale], visibilities[scale], svr_expert_intensity[scale], patch_scaling[scale]);
+		if (!success_read)
+		{
+			return false;
+		}
+	}
+
+	// Initialise and read CCNF patch experts (currently only intensity based), 
+	int num_intensity_ccnf = intensity_ccnf_expert_locations.size();
+
+	// CCNF experts override the SVR ones
+	if(num_intensity_ccnf > 0)
+	{
+		centers.resize(num_intensity_ccnf);
+		visibilities.resize(num_intensity_ccnf);
+		patch_scaling.resize(num_intensity_ccnf);
+		ccnf_expert_intensity.resize(num_intensity_ccnf);
+	}
+
+	for(int scale = 0; scale < num_intensity_ccnf; ++scale)
+	{		
+		std::string location = intensity_ccnf_expert_locations[scale];
+		std::cout << "Reading the intensity CCNF patch experts from: " << location << "....";
+		bool success_read = Read_CCNF_patch_experts(location,  centers[scale], visibilities[scale], ccnf_expert_intensity[scale], patch_scaling[scale]);
+
+		if (!success_read)
+		{
+			return false;
+		}
+
+		if (scale == 0)
+		{
+			preallocated_im2col.resize(ccnf_expert_intensity[0][0].size());
+		}
+	}
+
+	// Initialise and read CEN patch experts (currently only intensity based), 
+	int num_intensity_cen = intensity_cen_expert_locations.size();
+
+	// CEN experts override the SVR and CCNF ones
+	if (num_intensity_cen > 0)
+	{
+		centers.resize(num_intensity_cen);
+		visibilities.resize(num_intensity_cen);
+		patch_scaling.resize(num_intensity_cen);
+		cen_expert_intensity.resize(num_intensity_cen);
+	}
+
+	for (int scale = 0; scale < num_intensity_cen; ++scale)
+	{
+		std::string location = intensity_cen_expert_locations[scale];
+		std::cout << "Reading the intensity CEN patch experts from: " << location << "....";
+		bool success_read = Read_CEN_patch_experts(location, centers[scale], visibilities[scale], cen_expert_intensity[scale], patch_scaling[scale]);
+		if (!success_read)
+		{
+			return false;
+		}
+
+		if (scale == 0)
+		{
+			preallocated_im2col.resize(cen_expert_intensity[0][0].size());
+		}
+
+	}
+
+
+	// Reading in early termination parameters
+	if (!early_term_loc.empty())
+	{
+		std::ifstream earlyTermFile(early_term_loc.c_str(), std::ios_base::in);
+
+		if (!earlyTermFile.is_open())
+		{
+			return false;
+		}
+
+		// Reading in weights/biases/cutoffs
+		for (size_t i = 0; i < centers[0].size(); ++i)
+		{
+			double weight;
+			earlyTermFile >> weight;
+			early_term_weights.push_back(weight);
+		}
+
+		for (size_t i = 0; i < centers[0].size(); ++i)
+		{
+			double bias;
+			earlyTermFile >> bias;
+			early_term_biases.push_back(bias);
+		}
+
+		for (size_t i = 0; i < centers[0].size(); ++i)
+		{
+			double cutoff;
+			earlyTermFile >> cutoff;
+			early_term_cutoffs.push_back(cutoff);
+		}
+	}
+	return true;
+}
+//======================= Reading the SVR patch experts =========================================//
+bool Patch_experts::Read_SVR_patch_experts(std::string expert_location, std::vector<cv::Vec3d>& centers,
+	std::vector<cv::Mat_<int> >& visibility, std::vector<std::vector<Multi_SVR_patch_expert> >& patches, double& scale)
+{
+
+	std::ifstream patchesFile(expert_location.c_str(), std::ios_base::in);
+
+	if(patchesFile.is_open())
+	{
+		LandmarkDetector::SkipComments(patchesFile);
+
+		patchesFile >> scale;
+
+		LandmarkDetector::SkipComments(patchesFile);
+
+		int numberViews;		
+
+		patchesFile >> numberViews; 
+
+		// read the visibility
+		centers.resize(numberViews);
+		visibility.resize(numberViews);
+  
+		patches.resize(numberViews);
+
+		LandmarkDetector::SkipComments(patchesFile);
+
+		// centers of each view (which view corresponds to which orientation)
+		for(size_t i = 0; i < centers.size(); i++)
+		{
+			cv::Mat center;
+			LandmarkDetector::ReadMat(patchesFile, center);	
+			center.copyTo(centers[i]);
+			centers[i] = centers[i] * M_PI / 180.0;
+		}
+
+		LandmarkDetector::SkipComments(patchesFile);
+
+		// the visibility of points for each of the views (which verts are visible at a specific view
+		for(size_t i = 0; i < visibility.size(); i++)
+		{
+			LandmarkDetector::ReadMat(patchesFile, visibility[i]);				
+		}
+
+		int numberOfPoints = visibility[0].rows;
+
+		LandmarkDetector::SkipComments(patchesFile);
+
+		// read the patches themselves
+		for(size_t i = 0; i < patches.size(); i++)
+		{
+			// number of patches for each view
+			patches[i].resize(numberOfPoints);
+			// read in each patch
+			for(int j = 0; j < numberOfPoints; j++)
+			{
+				patches[i][j].Read(patchesFile);
+			}
+		}
+	
+		std::cout << "Done" << std::endl;
+		return true;
+	}
+	else
+	{
+		std::cout << "Can't find/open the patches file" << std::endl;
+		return false;
+	}
+}
+
+//======================= Reading the CCNF patch experts =========================================//
+bool Patch_experts::Read_CCNF_patch_experts(std::string patchesFileLocation, std::vector<cv::Vec3d>& centers,
+	std::vector<cv::Mat_<int> >& visibility, std::vector<std::vector<CCNF_patch_expert> >& patches, double& patchScaling)
+{
+
+	std::ifstream patchesFile(patchesFileLocation.c_str(), std::ios::in | std::ios::binary);
+
+	if(patchesFile.is_open())
+	{
+		patchesFile.read ((char*)&patchScaling, 8);
+		
+		int numberViews;		
+		patchesFile.read ((char*)&numberViews, 4);
+
+		// read the visibility
+		centers.resize(numberViews);
+		visibility.resize(numberViews);
+  
+		patches.resize(numberViews);
+		
+		// centers of each view (which view corresponds to which orientation)
+		for(size_t i = 0; i < centers.size(); i++)
+		{
+			cv::Mat center;
+			LandmarkDetector::ReadMatBin(patchesFile, center);	
+			center.copyTo(centers[i]);
+			centers[i] = centers[i] * M_PI / 180.0;
+		}
+
+		// the visibility of points for each of the views (which verts are visible at a specific view
+		for(size_t i = 0; i < visibility.size(); i++)
+		{
+			LandmarkDetector::ReadMatBin(patchesFile, visibility[i]);				
+		}
+		int numberOfPoints = visibility[0].rows;
+
+		// Read the possible SigmaInvs (without beta), this will be followed by patch reading (this assumes all of them have the same type, and number of betas)
+		int num_win_sizes;
+		int num_sigma_comp;
+		patchesFile.read ((char*)&num_win_sizes, 4);
+
+		std::vector<int> windows;
+		windows.resize(num_win_sizes);
+
+		std::vector<std::vector<cv::Mat_<float> > > sigma_components;
+		sigma_components.resize(num_win_sizes);
+
+		for (int w=0; w < num_win_sizes; ++w)
+		{
+			patchesFile.read ((char*)&windows[w], 4);
+
+			patchesFile.read ((char*)&num_sigma_comp, 4);
+
+			sigma_components[w].resize(num_sigma_comp);
+
+			for(int s=0; s < num_sigma_comp; ++s)
+			{
+				LandmarkDetector::ReadMatBin(patchesFile, sigma_components[w][s]);
+			}
+		}
+		
+		this->sigma_components = sigma_components;
+
+		// read the patches themselves
+		for(size_t i = 0; i < patches.size(); i++)
+		{
+			// number of patches for each view
+			patches[i].resize(numberOfPoints);
+			// read in each patch
+			for(int j = 0; j < numberOfPoints; j++)
+			{
+				patches[i][j].Read(patchesFile, windows, sigma_components);
+			}
+		}
+		std::cout << "Done" << std::endl;
+		return true;
+	}
+	else
+	{
+		std::cout << "Can't find/open the patches file" << std::endl;
+		return false;
+	}
+}
+
+//======================= Reading the CEN patch experts =========================================//
+bool Patch_experts::Read_CEN_patch_experts(std::string expert_location, std::vector<cv::Vec3d>& centers, 
+	std::vector<cv::Mat_<int> >& visibility, std::vector<std::vector<CEN_patch_expert> >& patches, double& scale)
+{
+
+	std::ifstream patchesFile(expert_location.c_str(), std::ios::in | std::ios::binary);
+
+	if (patchesFile.is_open())
+	{
+		patchesFile.read((char*)&scale, 8);
+
+		int numberViews;
+		patchesFile.read((char*)&numberViews, 4);
+
+		// read the visibility
+		centers.resize(numberViews);
+		visibility.resize(numberViews);
+
+		patches.resize(numberViews);
+
+		// centers of each view (which view corresponds to which orientation)
+		for (size_t i = 0; i < centers.size(); i++)
+		{
+			cv::Mat center;
+			LandmarkDetector::ReadMatBin(patchesFile, center);
+			center.copyTo(centers[i]);
+			centers[i] = centers[i] * M_PI / 180.0;
+		}
+
+		// the visibility of points for each of the views (which verts are visible at a specific view
+		for (size_t i = 0; i < visibility.size(); i++)
+		{
+			LandmarkDetector::ReadMatBin(patchesFile, visibility[i]);
+		}
+		int numberOfPoints = visibility[0].rows;
+		
+		LandmarkDetector::ReadMatBin(patchesFile, mirror_inds);
+		LandmarkDetector::ReadMatBin(patchesFile, mirror_views);
+
+		// read the patches themselves
+		for (size_t i = 0; i < patches.size(); i++)
+		{
+			// number of patches for each view
+			patches[i].resize(numberOfPoints);
+			// read in each patch
+			for (int j = 0; j < numberOfPoints; j++)
+			{
+				patches[i][j].Read(patchesFile);
+			}
+		}
+		std::cout << "Done" << std::endl;
+		return true;
+	}
+	else
+	{
+		std::cout << "Could not find CEN patch experts, for instructions of how to download them, see https://github.com/TadasBaltrusaitis/OpenFace/wiki/Model-download \n" << std::endl;
+		return false;
+	}
+}
--- a/pkg/OpenFace/lib/local/LandmarkDetector/src/SVR_patch_expert.cpp
+++ b/pkg/OpenFace/lib/local/LandmarkDetector/src/SVR_patch_expert.cpp
@@ -0,0 +1,337 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (C) 2017, Carnegie Mellon University and University of Cambridge,
+// all rights reserved.
+//
+// ACADEMIC OR NON-PROFIT ORGANIZATION NONCOMMERCIAL RESEARCH USE ONLY
+//
+// BY USING OR DOWNLOADING THE SOFTWARE, YOU ARE AGREEING TO THE TERMS OF THIS LICENSE AGREEMENT.  
+// IF YOU DO NOT AGREE WITH THESE TERMS, YOU MAY NOT USE OR DOWNLOAD THE SOFTWARE.
+//
+// License can be found in OpenFace-license.txt
+//
+//     * Any publications arising from the use of this software, including but
+//       not limited to academic journal and conference publications, technical
+//       reports and manuals, must cite at least one of the following works:
+//
+//       OpenFace 2.0: Facial Behavior Analysis Toolkit
+//       Tadas Baltrušaitis, Amir Zadeh, Yao Chong Lim, and Louis-Philippe Morency
+//       in IEEE International Conference on Automatic Face and Gesture Recognition, 2018  
+//
+//       Convolutional experts constrained local model for facial landmark detection.
+//       A. Zadeh, T. Baltrušaitis, and Louis-Philippe Morency,
+//       in Computer Vision and Pattern Recognition Workshops, 2017.    
+//
+//       Rendering of Eyes for Eye-Shape Registration and Gaze Estimation
+//       Erroll Wood, Tadas Baltrušaitis, Xucong Zhang, Yusuke Sugano, Peter Robinson, and Andreas Bulling 
+//       in IEEE International. Conference on Computer Vision (ICCV),  2015 
+//
+//       Cross-dataset learning and person-specific normalisation for automatic Action Unit detection
+//       Tadas Baltrušaitis, Marwa Mahmoud, and Peter Robinson 
+//       in Facial Expression Recognition and Analysis Challenge, 
+//       IEEE International Conference on Automatic Face and Gesture Recognition, 2015 
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "stdafx.h"
+
+#include "SVR_patch_expert.h"
+
+// OpenCV include
+#include <opencv2/core/core.hpp>
+#include <opencv2/imgproc.hpp>
+
+#include "LandmarkDetectorUtils.h"
+
+using namespace LandmarkDetector;
+
+//===========================================================================
+// Computing the image gradient
+void Grad(const cv::Mat& im, cv::Mat& grad)
+{
+	
+	/*float filter[3] = {1, 0, -1};
+	float dfilter[1] = {1};	
+	cv::Mat filterX = cv::Mat(1,3,CV_32F, filter).clone();
+	cv::Mat filterY = cv::Mat(1,1,CV_32F, dfilter).clone();
+			
+	cv::Mat gradX;
+	cv::Mat gradY;
+	cv::sepFilter2D(im, gradX, CV_32F, filterY, filterX, cv::Point(-1,-1), 0);
+	cv::sepFilter2D(im, gradY, CV_32F, filterX.t(), filterY, cv::Point(-1,-1), 0);
+	cv::pow(gradX,2, gradX);
+	cv::pow(gradY,2, gradY);
+	grad = gradX + gradY;
+			
+	grad.row(0).setTo(0);
+	grad.col(0).setTo(0);
+	grad.col(grad.cols-1).setTo(0);
+	grad.row(grad.rows-1).setTo(0);		*/
+
+	// A quicker alternative
+	int x,y,h = im.rows,w = im.cols;
+	float vx,vy;
+
+	// Initialise the gradient
+	grad.create(im.size(), CV_32F);
+	grad.setTo(0.0f);
+
+	cv::MatIterator_<float> gp  = grad.begin<float>() + w+1;
+	cv::MatConstIterator_<float> px1 = im.begin<float>()   + w+2;
+	cv::MatConstIterator_<float> px2 = im.begin<float>()   + w;
+	cv::MatConstIterator_<float> py1 = im.begin<float>()   + 2*w+1;
+	cv::MatConstIterator_<float> py2 = im.begin<float>()   + 1;
+
+	for(y = 1; y < h-1; y++)
+	{ 
+		for(x = 1; x < w-1; x++)
+		{
+			vx = *px1++ - *px2++;
+			vy = *py1++ - *py2++;
+			*gp++ = vx*vx + vy*vy;
+		}
+		px1 += 2;
+		px2 += 2;
+		py1 += 2;
+		py2 += 2;
+		gp += 2;
+	}
+
+}
+
+// A copy constructor
+SVR_patch_expert::SVR_patch_expert(const SVR_patch_expert& other) : weights(other.weights.clone())
+{
+	this->type = other.type;
+	this->scaling = other.scaling;
+	this->bias = other.bias;
+	this->confidence = other.confidence;
+
+	for (std::map<int, cv::Mat_<double> >::const_iterator it = other.weights_dfts.begin(); it != other.weights_dfts.end(); it++)
+	{
+		// Make sure the matrix is copied.
+		this->weights_dfts.insert(std::pair<int, cv::Mat>(it->first, it->second.clone()));
+	}
+}
+
+//===========================================================================
+void SVR_patch_expert::Read(std::ifstream &stream)
+{
+
+	// A sanity check when reading patch experts
+	int read_type;
+	stream >> read_type;
+	assert(read_type == 2);
+  
+	stream >> type >> confidence >> scaling >> bias;
+	LandmarkDetector::ReadMat(stream, weights); 
+	
+	// OpenCV and Matlab matrix cardinality is different, hence the transpose
+	weights = weights.t();
+
+}
+
+//===========================================================================
+void SVR_patch_expert::Response(const cv::Mat_<float>& area_of_interest, cv::Mat_<float>& response)
+{
+
+	int response_height = area_of_interest.rows - weights.rows + 1;
+	int response_width = area_of_interest.cols - weights.cols + 1;
+	
+	// the patch area on which we will calculate reponses
+	cv::Mat_<float> normalised_area_of_interest;
+  
+	if(response.rows != response_height || response.cols != response_width)
+	{
+		response.create(response_height, response_width);
+	}
+
+	// If type is raw just normalise mean and standard deviation
+	if(type == 0)
+	{
+		// Perform normalisation across whole patch
+		cv::Scalar mean;
+		cv::Scalar std;
+
+		cv::meanStdDev(area_of_interest, mean, std);
+		// Avoid division by zero
+		if(std[0] == 0)
+		{
+			std[0] = 1;
+		}
+		normalised_area_of_interest = (area_of_interest - mean[0]) / std[0];
+	}
+	// If type is gradient, perform the image gradient computation
+	else if(type == 1)
+	{
+		Grad(area_of_interest, normalised_area_of_interest);
+	}
+  	else
+	{
+		printf("ERROR(%s,%d): Unsupported patch type %d!\n", __FILE__,__LINE__, type);
+		abort();
+	}
+	
+	cv::Mat_<float> svr_response;
+
+	// The empty matrix as we don't pass precomputed dft's of image
+	cv::Mat_<double> empty_matrix_0(0,0,0.0);
+	cv::Mat_<float> empty_matrix_1(0,0,0.0);
+	cv::Mat_<float> empty_matrix_2(0,0,0.0);
+
+	// Efficient calc of patch expert SVR response across the area of interest
+	matchTemplate_m(normalised_area_of_interest, empty_matrix_0, empty_matrix_1, empty_matrix_2, weights, weights_dfts, svr_response, cv::TM_CCOEFF_NORMED);
+	
+	response.create(svr_response.size());
+	cv::MatIterator_<float> p = response.begin();
+
+	cv::MatIterator_<float> q1 = svr_response.begin(); // respone for each pixel
+	cv::MatIterator_<float> q2 = svr_response.end();
+
+	while(q1 != q2)
+	{
+		// the SVR response passed into logistic regressor
+		*p++ = 1.0/(1.0 + exp( -(*q1++ * scaling + bias )));
+	}
+
+}
+
+void SVR_patch_expert::ResponseDepth(const cv::Mat_<float>& area_of_interest, cv::Mat_<float> &response)
+{
+
+	// How big the response map will be
+	int response_height = area_of_interest.rows - weights.rows + 1;
+	int response_width = area_of_interest.cols - weights.cols + 1;
+	
+	// the patch area on which we will calculate reponses
+	cv::Mat_<float> normalised_area_of_interest;
+  
+	if(response.rows != response_height || response.cols != response_width)
+	{
+		response.create(response_height, response_width);
+	}
+
+	if(type == 0)
+	{
+		// Perform normalisation across whole patch
+		cv::Scalar mean;
+		cv::Scalar std;
+		
+		// ignore missing values
+		cv::Mat_<uchar> mask = area_of_interest > 0;
+		cv::meanStdDev(area_of_interest, mean, std, mask);
+
+		// if all values the same don't divide by 0
+		if(std[0] == 0)
+		{
+			std[0] = 1;
+		}
+
+		normalised_area_of_interest = (area_of_interest - mean[0]) / std[0];
+
+		// Set the invalid pixels to 0
+		normalised_area_of_interest.setTo(0, mask == 0);
+	}
+	else
+	{
+		printf("ERROR(%s,%d): Unsupported patch type %d!\n", __FILE__,__LINE__,type);
+		abort();
+	}
+  
+	cv::Mat_<float> svr_response;
+		
+	// The empty matrix as we don't pass precomputed dft's of image
+	cv::Mat_<double> empty_matrix_0(0,0,0.0);
+	cv::Mat_<float> empty_matrix_1(0,0,0.0);
+	cv::Mat_<float> empty_matrix_2(0,0,0.0);
+
+	// Efficient calc of patch expert response across the area of interest
+
+	matchTemplate_m(normalised_area_of_interest, empty_matrix_0, empty_matrix_1, empty_matrix_2, weights, weights_dfts, svr_response, cv::TM_CCOEFF);
+	
+	response.create(svr_response.size());
+	cv::MatIterator_<float> p = response.begin();
+
+	cv::MatIterator_<float> q1 = svr_response.begin(); // respone for each pixel
+	cv::MatIterator_<float> q2 = svr_response.end();
+
+	while(q1 != q2)
+	{
+		// the SVR response passed through a logistic regressor
+		*p++ = 1.0/(1.0 + exp( -(*q1++ * scaling + bias )));
+	}	
+}
+
+// Copy constructor				
+Multi_SVR_patch_expert::Multi_SVR_patch_expert(const Multi_SVR_patch_expert& other) : svr_patch_experts(other.svr_patch_experts)
+{
+	this->width = other.width;
+	this->height = other.height;
+}
+
+//===========================================================================
+void Multi_SVR_patch_expert::Read(std::ifstream &stream)
+{
+	// A sanity check when reading patch experts
+	int type;
+	stream >> type;
+	assert(type == 3);
+
+	// The number of patch experts for this view (with different modalities)
+	int number_modalities;
+
+	stream >> width >> height >> number_modalities;
+	
+	svr_patch_experts.resize(number_modalities);
+	for(int i = 0; i < number_modalities; i++)
+		svr_patch_experts[i].Read(stream);
+
+}
+//===========================================================================
+void Multi_SVR_patch_expert::Response(const cv::Mat_<float> &area_of_interest, cv::Mat_<float> &response)
+{
+	
+	int response_height = area_of_interest.rows - height + 1;
+	int response_width = area_of_interest.cols - width + 1;
+
+	if(response.rows != response_height || response.cols != response_width)
+	{
+		response.create(response_height, response_width);
+	}
+
+	// For the purposes of the experiment only use the response of normal intensity, for fair comparison
+
+	if(svr_patch_experts.size() == 1)
+	{
+		svr_patch_experts[0].Response(area_of_interest, response);		
+	}
+	else
+	{
+		// responses from multiple patch experts these can be gradients, LBPs etc.
+		response.setTo(1.0);
+		
+		cv::Mat_<float> modality_resp(response_height, response_width);
+
+		for(size_t i = 0; i < svr_patch_experts.size(); i++)
+		{			
+			svr_patch_experts[i].Response(area_of_interest, modality_resp);			
+			response = response.mul(modality_resp);	
+		}	
+		
+	}
+
+}
+
+void Multi_SVR_patch_expert::ResponseDepth(const cv::Mat_<float>& area_of_interest, cv::Mat_<float>& response)
+{
+	int response_height = area_of_interest.rows - height + 1;
+	int response_width = area_of_interest.cols - width + 1;
+
+	if(response.rows != response_height || response.cols != response_width)
+	{
+		response.create(response_height, response_width);
+	}
+	
+	// With depth patch experts only do raw data modality
+	svr_patch_experts[0].ResponseDepth(area_of_interest, response);
+}
+//===========================================================================
--- a/pkg/OpenFace/lib/local/LandmarkDetector/src/stdafx.cpp
+++ b/pkg/OpenFace/lib/local/LandmarkDetector/src/stdafx.cpp
@@ -0,0 +1,12 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (C) 2017, Carnegie Mellon University and University of Cambridge,
+// all rights reserved.
+//
+// ACADEMIC OR NON-PROFIT ORGANIZATION NONCOMMERCIAL RESEARCH USE ONLY
+//
+// BY USING OR DOWNLOADING THE SOFTWARE, YOU ARE AGREEING TO THE TERMS OF THIS LICENSE AGREEMENT.  
+// IF YOU DO NOT AGREE WITH THESE TERMS, YOU MAY NOT USE OR DOWNLOAD THE SOFTWARE.
+//
+// License can be found in OpenFace-license.txt
+//
+#include "stdafx.h"