open source pkg v1
This commit is contained in:
@@ -0,0 +1,514 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Copyright (C) 2017, Carnegie Mellon University and University of Cambridge,
|
||||
// all rights reserved.
|
||||
//
|
||||
// ACADEMIC OR NON-PROFIT ORGANIZATION NONCOMMERCIAL RESEARCH USE ONLY
|
||||
//
|
||||
// BY USING OR DOWNLOADING THE SOFTWARE, YOU ARE AGREEING TO THE TERMS OF THIS LICENSE AGREEMENT.
|
||||
// IF YOU DO NOT AGREE WITH THESE TERMS, YOU MAY NOT USE OR DOWNLOAD THE SOFTWARE.
|
||||
//
|
||||
// License can be found in OpenFace-license.txt
|
||||
//
|
||||
// * Any publications arising from the use of this software, including but
|
||||
// not limited to academic journal and conference publications, technical
|
||||
// reports and manuals, must cite at least one of the following works:
|
||||
//
|
||||
// OpenFace 2.0: Facial Behavior Analysis Toolkit
|
||||
// Tadas Baltrušaitis, Amir Zadeh, Yao Chong Lim, and Louis-Philippe Morency
|
||||
// in IEEE International Conference on Automatic Face and Gesture Recognition, 2018
|
||||
//
|
||||
// Convolutional experts constrained local model for facial landmark detection.
|
||||
// A. Zadeh, T. Baltrušaitis, and Louis-Philippe Morency,
|
||||
// in Computer Vision and Pattern Recognition Workshops, 2017.
|
||||
//
|
||||
// Rendering of Eyes for Eye-Shape Registration and Gaze Estimation
|
||||
// Erroll Wood, Tadas Baltrušaitis, Xucong Zhang, Yusuke Sugano, Peter Robinson, and Andreas Bulling
|
||||
// in IEEE International. Conference on Computer Vision (ICCV), 2015
|
||||
//
|
||||
// Cross-dataset learning and person-specific normalisation for automatic Action Unit detection
|
||||
// Tadas Baltrušaitis, Marwa Mahmoud, and Peter Robinson
|
||||
// in Facial Expression Recognition and Analysis Challenge,
|
||||
// IEEE International Conference on Automatic Face and Gesture Recognition, 2015
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "stdafx.h"
|
||||
|
||||
#include "CCNF_patch_expert.h"
|
||||
|
||||
// OpenCV includes
|
||||
#include <opencv2/core/core.hpp>
|
||||
#include <opencv2/imgproc.hpp>
|
||||
|
||||
// Local includes
|
||||
#include "LandmarkDetectorUtils.h"
|
||||
|
||||
using namespace LandmarkDetector;
|
||||
|
||||
// Copy constructors of neuron and patch expert
|
||||
CCNF_neuron::CCNF_neuron(const CCNF_neuron& other) : weights(other.weights.clone())
|
||||
{
|
||||
this->neuron_type = other.neuron_type;
|
||||
this->norm_weights = other.norm_weights;
|
||||
this->bias = other.bias;
|
||||
this->alpha = other.alpha;
|
||||
|
||||
for (std::map<int, cv::Mat_<double> >::const_iterator it = other.weights_dfts.begin(); it != other.weights_dfts.end(); it++)
|
||||
{
|
||||
// Make sure the matrix is copied.
|
||||
this->weights_dfts.insert(std::pair<int, cv::Mat>(it->first, it->second.clone()));
|
||||
}
|
||||
}
|
||||
|
||||
// Copy constructor
|
||||
CCNF_patch_expert::CCNF_patch_expert(const CCNF_patch_expert& other) : neurons(other.neurons), window_sizes(other.window_sizes), betas(other.betas)
|
||||
{
|
||||
this->width = other.width;
|
||||
this->height = other.height;
|
||||
this->patch_confidence = other.patch_confidence;
|
||||
|
||||
this->weight_matrix = other.weight_matrix.clone();
|
||||
|
||||
// Copy the Sigmas in a deep way
|
||||
for (std::vector<cv::Mat_<float> >::const_iterator it = other.Sigmas.begin(); it != other.Sigmas.end(); it++)
|
||||
{
|
||||
// Make sure the matrix is copied.
|
||||
this->Sigmas.push_back(it->clone());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Compute sigmas for all landmarks for a particular view and window size
|
||||
void CCNF_patch_expert::ComputeSigmas(std::vector<cv::Mat_<float> > sigma_components, int window_size)
|
||||
{
|
||||
for(size_t i=0; i < window_sizes.size(); ++i)
|
||||
{
|
||||
if( window_sizes[i] == window_size)
|
||||
return;
|
||||
}
|
||||
// Each of the landmarks will have the same connections, hence constant number of sigma components
|
||||
int n_betas = sigma_components.size();
|
||||
|
||||
// calculate the sigmas based on alphas and betas
|
||||
float sum_alphas = 0;
|
||||
|
||||
int n_alphas = this->neurons.size();
|
||||
|
||||
// sum the alphas first
|
||||
for(int a = 0; a < n_alphas; ++a)
|
||||
{
|
||||
sum_alphas = sum_alphas + this->neurons[a].alpha;
|
||||
}
|
||||
|
||||
cv::Mat_<float> q1 = sum_alphas * cv::Mat_<float>::eye(window_size*window_size, window_size*window_size);
|
||||
|
||||
cv::Mat_<float> q2 = cv::Mat_<float>::zeros(window_size*window_size, window_size*window_size);
|
||||
for (int b=0; b < n_betas; ++b)
|
||||
{
|
||||
q2 = q2 + ((float)this->betas[b]) * sigma_components[b];
|
||||
}
|
||||
|
||||
cv::Mat_<float> SigmaInv = 2 * (q1 + q2);
|
||||
|
||||
cv::Mat Sigma_f;
|
||||
cv::invert(SigmaInv, Sigma_f, cv::DECOMP_CHOLESKY);
|
||||
|
||||
window_sizes.push_back(window_size);
|
||||
Sigmas.push_back(Sigma_f);
|
||||
|
||||
}
|
||||
|
||||
//===========================================================================
|
||||
void CCNF_neuron::Read(std::ifstream &stream)
|
||||
{
|
||||
// Sanity check
|
||||
int read_type;
|
||||
stream.read ((char*)&read_type, 4);
|
||||
assert(read_type == 2);
|
||||
|
||||
stream.read ((char*)&neuron_type, 4);
|
||||
stream.read ((char*)&norm_weights, 8);
|
||||
stream.read ((char*)&bias, 8);
|
||||
stream.read ((char*)&alpha, 8);
|
||||
|
||||
LandmarkDetector::ReadMatBin(stream, weights);
|
||||
|
||||
}
|
||||
|
||||
// Perform im2col, while at the same time doing contrast normalization and adding a bias term
|
||||
void im2colContrastNormBias(const cv::Mat_<float>& input, const unsigned int width, const unsigned int height, cv::Mat_<float>& output)
|
||||
{
|
||||
const unsigned int m = input.rows;
|
||||
const unsigned int n = input.cols;
|
||||
|
||||
// determine how many blocks there will be with a sliding window of width x height in the input
|
||||
const unsigned int yB = m - height + 1;
|
||||
const unsigned int xB = n - width + 1;
|
||||
|
||||
// Allocate the output size
|
||||
if (output.rows != xB*yB && output.cols != width * height + 1)
|
||||
{
|
||||
output = cv::Mat::ones(xB*yB, width * height + 1, CV_32F);
|
||||
}
|
||||
|
||||
// Iterate over the blocks
|
||||
unsigned int rowIdx = 0;
|
||||
for (unsigned int j = 0; j< xB; j++)
|
||||
{
|
||||
for (unsigned int i = 0; i< yB; i++)
|
||||
{
|
||||
|
||||
float* Mo = output.ptr<float>(rowIdx);
|
||||
|
||||
float sum = 0;
|
||||
|
||||
for (unsigned int yy = 0; yy < height; ++yy)
|
||||
{
|
||||
const float* Mi = input.ptr<float>(i + yy);
|
||||
for (unsigned int xx = 0; xx < width; ++xx)
|
||||
{
|
||||
unsigned int colIdx = xx*height + yy;
|
||||
float in = Mi[j + xx];
|
||||
sum += in;
|
||||
|
||||
Mo[colIdx + 1] = in;
|
||||
}
|
||||
}
|
||||
|
||||
// Working out the mean
|
||||
float mean = sum / (float)(width * height);
|
||||
|
||||
float sum_sq = 0;
|
||||
const unsigned int num_items = width*height + 1;
|
||||
// Working out the sum squared and subtracting the mean
|
||||
for (unsigned int x = 1; x < num_items; ++x)
|
||||
{
|
||||
float in = Mo[x] - mean;
|
||||
Mo[x] = in;
|
||||
sum_sq += in * in;
|
||||
}
|
||||
|
||||
float norm = sqrt(sum_sq);
|
||||
|
||||
// Avoiding division by 0
|
||||
if (norm == 0)
|
||||
{
|
||||
norm = 1;
|
||||
}
|
||||
|
||||
// Flip multiplication to division for speed
|
||||
norm = 1.0 / norm;
|
||||
|
||||
for (unsigned int x = 1; x < num_items; ++x)
|
||||
{
|
||||
Mo[x] *= norm;
|
||||
}
|
||||
|
||||
rowIdx++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//===========================================================================
|
||||
void CCNF_neuron::Response(const cv::Mat_<float> &im, cv::Mat_<double> &im_dft, cv::Mat &integral_img, cv::Mat &integral_img_sq, cv::Mat_<float> &resp)
|
||||
{
|
||||
|
||||
int h = im.rows - weights.rows + 1;
|
||||
int w = im.cols - weights.cols + 1;
|
||||
|
||||
// the patch area on which we will calculate reponses
|
||||
cv::Mat_<float> I;
|
||||
|
||||
if(neuron_type == 3)
|
||||
{
|
||||
// Perform normalisation across whole patch (ignoring the invalid values indicated by <= 0
|
||||
|
||||
cv::Scalar mean;
|
||||
cv::Scalar std;
|
||||
|
||||
// ignore missing values
|
||||
cv::Mat_<uchar> mask = im > 0;
|
||||
cv::meanStdDev(im, mean, std, mask);
|
||||
|
||||
// if all values the same don't divide by 0
|
||||
if(std[0] != 0)
|
||||
{
|
||||
I = (im - mean[0]) / std[0];
|
||||
}
|
||||
else
|
||||
{
|
||||
I = (im - mean[0]);
|
||||
}
|
||||
|
||||
I.setTo(0, mask == 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
if(neuron_type == 0)
|
||||
{
|
||||
I = im;
|
||||
}
|
||||
else
|
||||
{
|
||||
printf("ERROR(%s,%d): Unsupported patch type %d!\n", __FILE__,__LINE__,neuron_type);
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
if(resp.empty())
|
||||
{
|
||||
resp.create(h, w);
|
||||
}
|
||||
|
||||
// The response from neuron before activation
|
||||
if(neuron_type == 3)
|
||||
{
|
||||
// In case of depth we use per area, rather than per patch normalisation
|
||||
matchTemplate_m(I, im_dft, integral_img, integral_img_sq, weights, weights_dfts, resp, cv::TM_CCOEFF); // the linear multiplication, efficient calc of response
|
||||
}
|
||||
else
|
||||
{
|
||||
matchTemplate_m(I, im_dft, integral_img, integral_img_sq, weights, weights_dfts, resp, cv::TM_CCOEFF_NORMED); // the linear multiplication, efficient calc of response
|
||||
}
|
||||
|
||||
cv::MatIterator_<float> p = resp.begin();
|
||||
|
||||
cv::MatIterator_<float> q1 = resp.begin(); // respone for each pixel
|
||||
cv::MatIterator_<float> q2 = resp.end();
|
||||
|
||||
// the logistic function (sigmoid) applied to the response
|
||||
while(q1 != q2)
|
||||
{
|
||||
*p++ = (2 * alpha) * 1.0 /(1.0 + exp( -(*q1++ * norm_weights + bias )));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
//===========================================================================
|
||||
void CCNF_patch_expert::Read(std::ifstream &stream, std::vector<int> window_sizes, std::vector<std::vector<cv::Mat_<float> > > sigma_components)
|
||||
{
|
||||
|
||||
// Sanity check
|
||||
int read_type;
|
||||
|
||||
stream.read ((char*)&read_type, 4);
|
||||
assert(read_type == 5);
|
||||
|
||||
// the number of neurons for this patch
|
||||
int num_neurons;
|
||||
stream.read ((char*)&width, 4);
|
||||
stream.read ((char*)&height, 4);
|
||||
stream.read ((char*)&num_neurons, 4);
|
||||
|
||||
if(num_neurons == 0)
|
||||
{
|
||||
// empty patch due to landmark being invisible at that orientation
|
||||
|
||||
// read an empty int (due to the way things were written out)
|
||||
stream.read ((char*)&num_neurons, 4);
|
||||
return;
|
||||
}
|
||||
|
||||
neurons.resize(num_neurons);
|
||||
for(int i = 0; i < num_neurons; i++)
|
||||
neurons[i].Read(stream);
|
||||
|
||||
// Combine the neuron weights to one weight matrix for more efficient computation
|
||||
weight_matrix = cv::Mat_<float>(neurons.size(), 1 + neurons[0].weights.rows * neurons[0].weights.cols);
|
||||
for (size_t i = 0; i < neurons.size(); i++)
|
||||
{
|
||||
cv::Mat_<float> w_tmp = neurons[i].weights.t();
|
||||
cv::Mat_<float> weights_flat = w_tmp.reshape(1, neurons[i].weights.rows * neurons[i].weights.cols);
|
||||
weights_flat = weights_flat.t();
|
||||
|
||||
// Incorporate neuron weights directly
|
||||
weights_flat = weights_flat * neurons[i].norm_weights;
|
||||
weights_flat.copyTo(weight_matrix(cv::Rect(1, i, neurons[i].weights.rows * neurons[i].weights.cols, 1)));
|
||||
// Incorporate bias as well
|
||||
weight_matrix.at<float>(i, 0) = neurons[i].bias;
|
||||
}
|
||||
|
||||
// In case we are using OpenBLAS, make sure it is not multi-threading as we are multi-threading outside of it
|
||||
openblas_set_num_threads(1);
|
||||
|
||||
int n_sigmas = window_sizes.size();
|
||||
|
||||
int n_betas = 0;
|
||||
|
||||
if(n_sigmas > 0)
|
||||
{
|
||||
n_betas = sigma_components[0].size();
|
||||
|
||||
betas.resize(n_betas);
|
||||
|
||||
for (int i=0; i < n_betas; ++i)
|
||||
{
|
||||
stream.read ((char*)&betas[i], 8);
|
||||
}
|
||||
}
|
||||
|
||||
// Read the patch confidence
|
||||
stream.read ((char*)&patch_confidence, 8);
|
||||
|
||||
}
|
||||
|
||||
//===========================================================================
|
||||
void CCNF_patch_expert::Response(const cv::Mat_<float> &area_of_interest, cv::Mat_<float> &response)
|
||||
{
|
||||
|
||||
int response_height = area_of_interest.rows - height + 1;
|
||||
int response_width = area_of_interest.cols - width + 1;
|
||||
|
||||
if(response.rows != response_height || response.cols != response_width)
|
||||
{
|
||||
response.create(response_height, response_width);
|
||||
}
|
||||
|
||||
response.setTo(0);
|
||||
|
||||
// the placeholder for the DFT of the image, the integral image, and squared integral image so they don't get recalculated for every response
|
||||
cv::Mat_<double> area_of_interest_dft;
|
||||
cv::Mat integral_image, integral_image_sq;
|
||||
|
||||
cv::Mat_<float> neuron_response;
|
||||
|
||||
// responses from the neural layers
|
||||
for(size_t i = 0; i < neurons.size(); i++)
|
||||
{
|
||||
// Do not bother with neuron response if the alpha is tiny and will not contribute much to overall result
|
||||
if(neurons[i].alpha > 1e-4)
|
||||
{
|
||||
|
||||
neurons[i].Response(area_of_interest, area_of_interest_dft, integral_image, integral_image_sq, neuron_response);
|
||||
response = response + neuron_response;
|
||||
}
|
||||
}
|
||||
|
||||
int s_to_use = -1;
|
||||
|
||||
// Find the matching sigma
|
||||
for(size_t i=0; i < window_sizes.size(); ++i)
|
||||
{
|
||||
if(window_sizes[i] == response_height)
|
||||
{
|
||||
// Found the correct sigma
|
||||
s_to_use = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
cv::Mat_<float> resp_vec_f = response.reshape(1, response_height * response_width);
|
||||
|
||||
cv::Mat out = Sigmas[s_to_use] * resp_vec_f;
|
||||
|
||||
response = out.reshape(1, response_height);
|
||||
|
||||
// Making sure the response does not have negative numbers
|
||||
double min;
|
||||
|
||||
minMaxIdx(response, &min, 0);
|
||||
if(min < 0)
|
||||
{
|
||||
response = response - min;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void CCNF_patch_expert::ResponseOpenBlas(const cv::Mat_<float> &area_of_interest, cv::Mat_<float> &response, cv::Mat_<float>& im2col_prealloc)
|
||||
{
|
||||
|
||||
int response_height = area_of_interest.rows - height + 1;
|
||||
int response_width = area_of_interest.cols - width + 1;
|
||||
|
||||
if (response.rows != response_height || response.cols != response_width)
|
||||
{
|
||||
response.create(response_height, response_width);
|
||||
}
|
||||
|
||||
response.setTo(0);
|
||||
if (neurons.size() == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// the placeholder for the column normalized representation of the image, don't get recalculated for every response
|
||||
im2colContrastNormBias(area_of_interest, neurons[0].weights.cols, neurons[0].weights.rows, im2col_prealloc);
|
||||
cv::Mat_<float> normalized_input = im2col_prealloc.t();
|
||||
|
||||
// the placeholder for the DFT of the image, the integral image, and squared integral image so they don't get recalculated for every response
|
||||
cv::Mat_<double> area_of_interest_dft;
|
||||
cv::Mat integral_image, integral_image_sq;
|
||||
|
||||
cv::Mat_<float> neuron_response;
|
||||
|
||||
|
||||
int h = area_of_interest.rows - neurons[0].weights.rows + 1;
|
||||
int w = area_of_interest.cols - neurons[0].weights.cols + 1;
|
||||
|
||||
|
||||
cv::Mat_<float> neuron_resp_full(weight_matrix.rows, normalized_input.cols, 0.0f);
|
||||
// Perform matrix multiplication in OpenBLAS (fortran call)
|
||||
float alpha1 = 1.0;
|
||||
float beta1 = 0.0;
|
||||
char N[2]; N[0] = 'N';
|
||||
sgemm_(N, N, &normalized_input.cols, &weight_matrix.rows, &weight_matrix.cols, &alpha1, (float*)normalized_input.data, &normalized_input.cols, (float*)weight_matrix.data, &weight_matrix.cols, &beta1, (float*)neuron_resp_full.data, &normalized_input.cols);
|
||||
|
||||
// Above is a faster version of this
|
||||
//cv::Mat_<float> neuron_resp_full = this->weight_matrix * normalized_input;
|
||||
|
||||
for (size_t i = 0; i < neurons.size(); i++)
|
||||
{
|
||||
if (neurons[i].alpha > 1e-4)
|
||||
{
|
||||
cv::MatIterator_<float> p = response.begin();
|
||||
|
||||
cv::Mat_<float> rel_row = neuron_resp_full.row(i);
|
||||
cv::MatIterator_<float> q1 = rel_row.begin(); // respone for each pixel
|
||||
cv::MatIterator_<float> q2 = rel_row.end();
|
||||
|
||||
// the logistic function (sigmoid) applied to the response
|
||||
while (q1 != q2)
|
||||
{
|
||||
*p++ += (2.0 * neurons[i].alpha) / (1.0 + exp(-*q1++));
|
||||
}
|
||||
}
|
||||
}
|
||||
response = response.t();
|
||||
|
||||
int s_to_use = -1;
|
||||
|
||||
// Find the matching sigma
|
||||
for (size_t i = 0; i < window_sizes.size(); ++i)
|
||||
{
|
||||
if (window_sizes[i] == response_height)
|
||||
{
|
||||
// Found the correct sigma
|
||||
s_to_use = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
cv::Mat_<float> resp_vec_f = response.reshape(1, response_height * response_width);
|
||||
|
||||
cv::Mat_<float> out(Sigmas[s_to_use].rows, resp_vec_f.cols, 0.0f);
|
||||
|
||||
// Perform matrix multiplication in OpenBLAS (fortran call)
|
||||
alpha1 = 1.0;
|
||||
beta1 = 0.0;
|
||||
sgemm_(N, N, &resp_vec_f.cols, &Sigmas[s_to_use].rows, &Sigmas[s_to_use].cols, &alpha1, (float*)resp_vec_f.data, &resp_vec_f.cols, (float*)Sigmas[s_to_use].data, &Sigmas[s_to_use].cols, &beta1, (float*)out.data, &resp_vec_f.cols);
|
||||
|
||||
// Above is a faster version of this
|
||||
//cv::Mat out = Sigmas[s_to_use] * resp_vec_f;
|
||||
|
||||
response = out.reshape(1, response_height);
|
||||
|
||||
// Making sure the response does not have negative numbers
|
||||
double min;
|
||||
|
||||
minMaxIdx(response, &min, 0);
|
||||
if (min < 0)
|
||||
{
|
||||
response = response - min;
|
||||
}
|
||||
|
||||
}
|
||||
617
pkg/OpenFace/lib/local/LandmarkDetector/src/CEN_patch_expert.cpp
Normal file
617
pkg/OpenFace/lib/local/LandmarkDetector/src/CEN_patch_expert.cpp
Normal file
@@ -0,0 +1,617 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Copyright (C) 2017, Carnegie Mellon University and University of Cambridge,
|
||||
// all rights reserved.
|
||||
//
|
||||
// ACADEMIC OR NON-PROFIT ORGANIZATION NONCOMMERCIAL RESEARCH USE ONLY
|
||||
//
|
||||
// BY USING OR DOWNLOADING THE SOFTWARE, YOU ARE AGREEING TO THE TERMS OF THIS LICENSE AGREEMENT.
|
||||
// IF YOU DO NOT AGREE WITH THESE TERMS, YOU MAY NOT USE OR DOWNLOAD THE SOFTWARE.
|
||||
//
|
||||
// License can be found in OpenFace-license.txt
|
||||
//
|
||||
// * Any publications arising from the use of this software, including but
|
||||
// not limited to academic journal and conference publications, technical
|
||||
// reports and manuals, must cite at least one of the following works:
|
||||
//
|
||||
// OpenFace 2.0: Facial Behavior Analysis Toolkit
|
||||
// Tadas Baltrušaitis, Amir Zadeh, Yao Chong Lim, and Louis-Philippe Morency
|
||||
// in IEEE International Conference on Automatic Face and Gesture Recognition, 2018
|
||||
//
|
||||
// Convolutional experts constrained local model for facial landmark detection.
|
||||
// A. Zadeh, T. Baltrušaitis, and Louis-Philippe Morency,
|
||||
// in Computer Vision and Pattern Recognition Workshops, 2017.
|
||||
//
|
||||
// Rendering of Eyes for Eye-Shape Registration and Gaze Estimation
|
||||
// Erroll Wood, Tadas Baltrušaitis, Xucong Zhang, Yusuke Sugano, Peter Robinson, and Andreas Bulling
|
||||
// in IEEE International. Conference on Computer Vision (ICCV), 2015
|
||||
//
|
||||
// Cross-dataset learning and person-specific normalisation for automatic Action Unit detection
|
||||
// Tadas Baltrušaitis, Marwa Mahmoud, and Peter Robinson
|
||||
// in Facial Expression Recognition and Analysis Challenge,
|
||||
// IEEE International Conference on Automatic Face and Gesture Recognition, 2015
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "stdafx.h"
|
||||
|
||||
#include "CEN_patch_expert.h"
|
||||
|
||||
// OpenCV includes
|
||||
#include <opencv2/core/core.hpp>
|
||||
#include <opencv2/imgproc.hpp>
|
||||
|
||||
// Local includes
|
||||
#include "LandmarkDetectorUtils.h"
|
||||
|
||||
// For exponential
|
||||
#include <math.h>
|
||||
|
||||
using namespace LandmarkDetector;
|
||||
|
||||
// Copy constructor (do not perform a deep copy of data as it is very large, also there is no real need to stor the copies
|
||||
CEN_patch_expert::CEN_patch_expert(const CEN_patch_expert& other) : confidence(other.confidence), width_support(other.width_support), height_support(other.height_support)
|
||||
{
|
||||
|
||||
// Copy the layer weights in a deep way
|
||||
for (size_t i = 0; i < other.weights.size(); ++i)
|
||||
{
|
||||
this->weights.push_back(other.weights[i]);
|
||||
this->biases.push_back(other.biases[i]);
|
||||
this->activation_function.push_back(other.activation_function[i]);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
//===========================================================================
|
||||
void CEN_patch_expert::Read(std::ifstream &stream)
|
||||
{
|
||||
|
||||
// Setting up OpenBLAS
|
||||
openblas_set_num_threads(1);
|
||||
|
||||
// Sanity check
|
||||
int read_type;
|
||||
|
||||
stream.read((char*)&read_type, 4);
|
||||
assert(read_type == 6);
|
||||
|
||||
// the number of neurons for this patch
|
||||
int num_layers;
|
||||
stream.read((char*)&width_support, 4);
|
||||
stream.read((char*)&height_support, 4);
|
||||
stream.read((char*)&num_layers, 4);
|
||||
|
||||
if (num_layers == 0)
|
||||
{
|
||||
// empty patch due to landmark being invisible at that orientation (or visible through mirroring)
|
||||
stream.read((char*)&confidence, 8);
|
||||
return;
|
||||
}
|
||||
|
||||
activation_function.resize(num_layers);
|
||||
weights.resize(num_layers);
|
||||
biases.resize(num_layers);
|
||||
|
||||
for (int i = 0; i < num_layers; i++)
|
||||
{
|
||||
int neuron_type;
|
||||
stream.read((char*)&neuron_type, 4);
|
||||
activation_function[i] = neuron_type;
|
||||
|
||||
cv::Mat_<double> bias;
|
||||
LandmarkDetector::ReadMatBin(stream, bias);
|
||||
|
||||
cv::Mat_<double> weight;
|
||||
LandmarkDetector::ReadMatBin(stream, weight);
|
||||
|
||||
weights[i] = weight;
|
||||
biases[i] = bias;
|
||||
}
|
||||
|
||||
// Read the patch confidence
|
||||
stream.read((char*)&confidence, 8);
|
||||
|
||||
}
|
||||
|
||||
// Contrast normalize the input for response map computation
|
||||
void contrastNorm(const cv::Mat_<float>& input, cv::Mat_<float>& output)
|
||||
{
|
||||
|
||||
const unsigned int num_cols = input.cols;
|
||||
|
||||
const unsigned int num_rows = input.rows;
|
||||
|
||||
output = input.clone();
|
||||
|
||||
cv::MatConstIterator_<float> p = input.begin();
|
||||
|
||||
// Compute row wise
|
||||
for (unsigned int y = 0; y < num_rows; ++y)
|
||||
{
|
||||
|
||||
cv::Scalar mean_s = cv::mean(input(cv::Rect(1,y,num_cols-1, 1)));
|
||||
float mean = (float)mean_s[0];
|
||||
|
||||
p++;
|
||||
|
||||
float sum_sq = 0;
|
||||
for (unsigned int x = 1; x < num_cols; ++x)
|
||||
{
|
||||
float curr = *p++;
|
||||
sum_sq += (curr - mean) * (curr - mean);
|
||||
}
|
||||
|
||||
float norm = sqrt(sum_sq);
|
||||
|
||||
if (norm == 0)
|
||||
norm = 1;
|
||||
|
||||
for (unsigned int x = 1; x < num_cols; ++x)
|
||||
{
|
||||
output.at<float>(y, x) = (output.at<float>(y, x) - mean) / norm;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void im2colBias(const cv::Mat_<float>& input, const unsigned int width, const unsigned int height, cv::Mat_<float>& output)
|
||||
{
|
||||
|
||||
const unsigned int m = input.rows;
|
||||
const unsigned int n = input.cols;
|
||||
|
||||
// determine how many blocks there will be with a sliding window of width x height in the input
|
||||
const unsigned int yB = m - height + 1;
|
||||
const unsigned int xB = n - width + 1;
|
||||
|
||||
// Allocate the output size
|
||||
if(output.rows != xB*yB && output.cols != width * height + 1)
|
||||
{
|
||||
output = cv::Mat::ones(xB*yB, width * height + 1, CV_32F);
|
||||
}
|
||||
|
||||
// Iterate over the blocks
|
||||
for (unsigned int j = 0; j< xB; j++)
|
||||
{
|
||||
for (unsigned int i = 0; i< yB; i++)
|
||||
{
|
||||
unsigned int rowIdx = i + j*yB;
|
||||
|
||||
for (unsigned int yy = 0; yy < height; ++yy)
|
||||
for (unsigned int xx = 0; xx < width; ++xx)
|
||||
{
|
||||
unsigned int colIdx = xx*height + yy;
|
||||
output.at<float>(rowIdx, colIdx + 1) = input.at<float>(i + yy, j + xx);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//===========================================================================
|
||||
void CEN_patch_expert::Response(const cv::Mat_<float> &area_of_interest, cv::Mat_<float> &response)
|
||||
{
|
||||
|
||||
int response_height = area_of_interest.rows - height_support + 1;
|
||||
int response_width = area_of_interest.cols - width_support + 1;
|
||||
|
||||
cv::Mat_<float> input_col;
|
||||
im2colBias(area_of_interest, width_support, height_support, input_col);
|
||||
|
||||
// Mean and standard deviation normalization
|
||||
contrastNorm(input_col, response);
|
||||
response = response.t();
|
||||
|
||||
for (size_t layer = 0; layer < activation_function.size(); ++layer)
|
||||
{
|
||||
|
||||
// We are performing response = weights[layers] * response(t), but in OpenBLAS as that is significantly quicker than OpenCV
|
||||
cv::Mat_<float> resp = response;
|
||||
float* m1 = (float*)resp.data;
|
||||
cv::Mat_<float> weight = weights[layer];
|
||||
float* m2 = (float*)weight.data;
|
||||
|
||||
cv::Mat_<float> resp_blas(weight.rows, resp.cols);
|
||||
float* m3 = (float*)resp_blas.data;
|
||||
|
||||
// Perform matrix multiplication in OpenBLAS (fortran call)
|
||||
float alpha1 = 1.0;
|
||||
float beta1 = 0.0;
|
||||
char N[2]; N[0] = 'N';
|
||||
sgemm_(N, N, &resp.cols, &weight.rows, &weight.cols, &alpha1, m1, &resp.cols, m2, &weight.cols, &beta1, m3, &resp.cols);
|
||||
|
||||
// The above is a faster version of this, by calling the fortran version directly
|
||||
//cblas_sgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, resp.cols, weight.rows, weight.cols, 1, m1, resp.cols, m2, weight.cols, 0.0, m3, resp.cols);
|
||||
|
||||
// Adding the bias (bit ugly, but the fastest way to do this)
|
||||
response = resp_blas;
|
||||
|
||||
float* data = (float*)response.data;
|
||||
size_t height = response.rows;
|
||||
size_t width = response.cols;
|
||||
float* data_b = (float*)biases[layer].data;
|
||||
for (size_t y = 0; y < height; ++y)
|
||||
{
|
||||
float bias = data_b[y];
|
||||
for (size_t x = 0; x < width; ++x)
|
||||
{
|
||||
float in = *data + bias;
|
||||
*data++ = in;
|
||||
}
|
||||
}
|
||||
|
||||
// Perform activation and add bias at the same time
|
||||
if (activation_function[layer] == 0) // Sigmoid
|
||||
{
|
||||
|
||||
size_t resp_size = response.rows * response.cols;
|
||||
|
||||
// Iterate over the data directly
|
||||
float* data = (float*)response.data;
|
||||
|
||||
for (size_t counter = 0; counter < resp_size; ++counter)
|
||||
{
|
||||
float in = *data;
|
||||
*data++ = 1.0 / (1.0 + exp(-(in)));
|
||||
}
|
||||
|
||||
}
|
||||
else if (activation_function[layer] == 2)// ReLU
|
||||
{
|
||||
cv::threshold(response, response, 0, 0, cv::THRESH_TOZERO);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
response = response.t();
|
||||
response = response.reshape(1, response_height);
|
||||
response = response.t();
|
||||
|
||||
}
|
||||
|
||||
// Perform im2col, while at the same time doing contrast normalization and adding a bias term (also skip every other region)
|
||||
void im2colBiasSparseContrastNorm(const cv::Mat_<float>& input, const unsigned int width, const unsigned int height, cv::Mat_<float>& output)
|
||||
{
|
||||
const unsigned int m = input.rows;
|
||||
const unsigned int n = input.cols;
|
||||
|
||||
// determine how many blocks there will be with a sliding window of width x height in the input
|
||||
const unsigned int yB = m - height + 1;
|
||||
const unsigned int xB = n - width + 1;
|
||||
|
||||
// As we will be skipping half of the outputs
|
||||
const unsigned int out_size = (yB*xB - 1) / 2;
|
||||
|
||||
// Allocate the output size
|
||||
if (output.rows != out_size && output.cols != width * height + 1)
|
||||
{
|
||||
output = cv::Mat::ones(out_size, width * height + 1, CV_32F);
|
||||
}
|
||||
|
||||
// Iterate over the blocks, skipping every second block
|
||||
unsigned int rowIdx = 0;
|
||||
unsigned int skipCounter = 0;
|
||||
for (unsigned int j = 0; j< xB; j++)
|
||||
{
|
||||
for (unsigned int i = 0; i< yB; i++)
|
||||
{
|
||||
// Skip every second row
|
||||
skipCounter++;
|
||||
if ((skipCounter + 1) % 2 == 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
float* Mo = output.ptr<float>(rowIdx);
|
||||
|
||||
float sum = 0;
|
||||
|
||||
for (unsigned int yy = 0; yy < height; ++yy)
|
||||
{
|
||||
const float* Mi = input.ptr<float>(i + yy);
|
||||
for (unsigned int xx = 0; xx < width; ++xx)
|
||||
{
|
||||
int colIdx = xx*height + yy;
|
||||
float in = Mi[j + xx];
|
||||
sum += in;
|
||||
|
||||
Mo[colIdx+1] = in;
|
||||
}
|
||||
}
|
||||
|
||||
// Working out the mean
|
||||
float mean = sum / (float)(width * height);
|
||||
|
||||
float sum_sq = 0;
|
||||
const unsigned int num_items = width*height + 1;
|
||||
// Working out the sum squared and subtracting the mean
|
||||
for (unsigned int x = 1; x < num_items; ++x)
|
||||
{
|
||||
float in = Mo[x] - mean;
|
||||
Mo[x] = in;
|
||||
sum_sq += in * in;
|
||||
}
|
||||
|
||||
float norm = sqrt(sum_sq);
|
||||
|
||||
// Avoiding division by 0
|
||||
if (norm == 0)
|
||||
{
|
||||
norm = 1;
|
||||
}
|
||||
|
||||
// Flip multiplication to division for speed
|
||||
norm = 1.0 / norm;
|
||||
|
||||
for (unsigned int x = 1; x < num_items; ++x)
|
||||
{
|
||||
Mo[x] *= norm;
|
||||
}
|
||||
|
||||
rowIdx++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void im2colBiasSparse(const cv::Mat_<float>& input, const unsigned int width, const unsigned int height, cv::Mat_<float>& output)
|
||||
{
|
||||
|
||||
const unsigned int m = input.rows;
|
||||
const unsigned int n = input.cols;
|
||||
|
||||
// determine how many blocks there will be with a sliding window of width x height in the input
|
||||
const unsigned int yB = m - height + 1;
|
||||
const unsigned int xB = n - width + 1;
|
||||
|
||||
// As we will be skipping half of the outputs
|
||||
const unsigned int out_size = (yB*xB - 1) / 2;
|
||||
|
||||
// Allocate the output size
|
||||
if (output.rows != out_size && output.cols != width * height + 1)
|
||||
{
|
||||
output = cv::Mat::ones(out_size, width * height + 1, CV_32F);
|
||||
}
|
||||
|
||||
// Iterate over the blocks, skipping every second block
|
||||
unsigned int rowIdx = 0;
|
||||
unsigned int skipCounter = 0;
|
||||
for (unsigned int j = 0; j< xB; j++)
|
||||
{
|
||||
for (unsigned int i = 0; i< yB; i++)
|
||||
{
|
||||
// Skip every second row
|
||||
skipCounter++;
|
||||
if ((skipCounter + 1) % 2 == 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
for (unsigned int yy = 0; yy < height; ++yy)
|
||||
{
|
||||
for (unsigned int xx = 0; xx < width; ++xx)
|
||||
{
|
||||
unsigned int colIdx = xx*height + yy;
|
||||
output.at<float>(rowIdx, colIdx + 1) = input.at<float>(i + yy, j + xx);
|
||||
}
|
||||
}
|
||||
rowIdx++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// As the sparse patch expert output with interpolation, this function creates an interpolation matrix
|
||||
void LandmarkDetector::interpolationMatrix(cv::Mat_<float>& mapMatrix, int response_height, int response_width,
|
||||
int input_width, int input_height)
|
||||
{
|
||||
int m = input_height;
|
||||
int n = input_width;
|
||||
|
||||
// determine how many blocks there will be with a sliding window of width x height in the input
|
||||
int yB = m - 11 + 1;
|
||||
int xB = n - 11 + 1;
|
||||
|
||||
// As we will be skipping half of the outputs
|
||||
int out_size = (yB*xB - 1) / 2;
|
||||
|
||||
mapMatrix.create(out_size, response_height * response_width);
|
||||
mapMatrix.setTo(0.0f);
|
||||
|
||||
// Find a mapping from indices in the computed sparse response and the original full response
|
||||
cv::Mat_<int> value_id_matrix(response_width, response_height, 0);
|
||||
|
||||
int ind = 0;
|
||||
for (int k = 0; k < value_id_matrix.rows * value_id_matrix.cols; ++k)
|
||||
{
|
||||
if (k % 2 != 0)
|
||||
{
|
||||
value_id_matrix.at<int>(k) = ind;
|
||||
ind++;
|
||||
}
|
||||
}
|
||||
value_id_matrix = value_id_matrix.t();
|
||||
|
||||
int skip_counter = 0;
|
||||
for (int x = 0; x < response_width; ++x)
|
||||
{
|
||||
for (int y = 0; y < response_height; ++y)
|
||||
{
|
||||
int mapping_col = x * response_height + y;
|
||||
skip_counter++;
|
||||
if (skip_counter % 2 == 0)
|
||||
{
|
||||
int val_id = value_id_matrix.at<int>(y, x);
|
||||
mapMatrix.at<float>(val_id, mapping_col) = 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
float num_neigh = 0.0;
|
||||
std::vector<int> val_ids;
|
||||
if (x - 1 >= 0)
|
||||
{
|
||||
num_neigh++;
|
||||
val_ids.push_back(value_id_matrix.at<int>(y, x - 1));
|
||||
}
|
||||
if (y - 1 >= 0)
|
||||
{
|
||||
num_neigh++;
|
||||
val_ids.push_back(value_id_matrix.at<int>(y - 1, x));
|
||||
}
|
||||
if (x + 1 < response_width)
|
||||
{
|
||||
num_neigh++;
|
||||
val_ids.push_back(value_id_matrix.at<int>(y, x + 1));
|
||||
}
|
||||
if (y + 1 < response_height)
|
||||
{
|
||||
num_neigh++;
|
||||
val_ids.push_back(value_id_matrix.at<int>(y + 1, x));
|
||||
}
|
||||
|
||||
for (size_t k = 0; k < val_ids.size(); ++k)
|
||||
{
|
||||
mapMatrix.at<float>(val_ids[k], mapping_col) = 1.0 / num_neigh;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CEN_patch_expert::ResponseInternal(cv::Mat_<float>& response)
|
||||
{
|
||||
for (size_t layer = 0; layer < activation_function.size(); ++layer)
|
||||
{
|
||||
|
||||
// We are performing response = weights[layers] * response, but in OpenBLAS as that is significantly quicker than OpenCV
|
||||
cv::Mat_<float> resp = response;
|
||||
float* m1 = (float*)resp.data;
|
||||
float* m2 = (float*)weights[layer].data;
|
||||
|
||||
cv::Mat_<float> resp_blas(weights[layer].rows, resp.cols);
|
||||
float* m3 = (float*)resp_blas.data;
|
||||
|
||||
// Perform matrix multiplication in OpenBLAS (fortran call)
|
||||
float alpha1 = 1.0;
|
||||
float beta1 = 0.0;
|
||||
char N[2]; N[0] = 'N';
|
||||
sgemm_(N, N, &resp.cols, &weights[layer].rows, &weights[layer].cols, &alpha1, m1, &resp.cols, m2, &weights[layer].cols, &beta1, m3, &resp.cols);
|
||||
|
||||
// The above is a faster version of this, by calling the fortran version directly
|
||||
//cblas_sgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, resp.cols, weight.rows, weight.cols, 1, m1, resp.cols, m2, weight.cols, 0.0, m3, resp.cols);
|
||||
|
||||
response = resp_blas;
|
||||
|
||||
// Alternative is to multiply the responses directly using OpenCV (much slower)
|
||||
//response = weights[layer] * response;
|
||||
|
||||
// Adding the bias (bit ugly, but the fastest way to do this), TODO can this bias be incorporated in the above?
|
||||
float* data = (float*)response.data;
|
||||
const unsigned height = response.rows;
|
||||
const unsigned width = response.cols;
|
||||
float* data_b = (float*)biases[layer].data;
|
||||
for (unsigned int y = 0; y < height; ++y)
|
||||
{
|
||||
float bias = data_b[y];
|
||||
for (unsigned int x = 0; x < width; ++x)
|
||||
{
|
||||
float in = *data + bias;
|
||||
*data++ = in;
|
||||
}
|
||||
}
|
||||
|
||||
// Perform activation and add bias at the same time
|
||||
if (activation_function[layer] == 0) // Sigmoid
|
||||
{
|
||||
|
||||
const unsigned int resp_size = response.rows * response.cols;
|
||||
|
||||
// Iterate over the data directly
|
||||
float* data = (float*)response.data;
|
||||
|
||||
for (unsigned int counter = 0; counter < resp_size; ++counter)
|
||||
{
|
||||
float in = *data;
|
||||
*data++ = 1.0f / (1.0f + exp(-(in)));
|
||||
}
|
||||
|
||||
}
|
||||
else if (activation_function[layer] == 2)// ReLU
|
||||
{
|
||||
cv::threshold(response, response, 0, 0, cv::THRESH_TOZERO);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
//===========================================================================
|
||||
void CEN_patch_expert::ResponseSparse(const cv::Mat_<float> &area_of_interest_left, const cv::Mat_<float> &area_of_interest_right, cv::Mat_<float> &response_left, cv::Mat_<float> &response_right, cv::Mat_<float>& mapMatrix, cv::Mat_<float>& im2col_prealloc_left, cv::Mat_<float>& im2col_prealloc_right)
|
||||
{
|
||||
unsigned int response_height = 0;
|
||||
|
||||
const bool left_provided = !area_of_interest_left.empty();
|
||||
const bool right_provided = !area_of_interest_right.empty();
|
||||
|
||||
if(right_provided)
|
||||
{
|
||||
cv::flip(area_of_interest_right, area_of_interest_right, 1);
|
||||
response_height = area_of_interest_right.rows - height_support + 1;
|
||||
im2colBiasSparseContrastNorm(area_of_interest_right, width_support, height_support, im2col_prealloc_right);
|
||||
}
|
||||
|
||||
// Extract im2col but in a sparse way and contrast normalize
|
||||
if(left_provided)
|
||||
{
|
||||
response_height = area_of_interest_left.rows - height_support + 1;
|
||||
im2colBiasSparseContrastNorm(area_of_interest_left, width_support, height_support, im2col_prealloc_left);
|
||||
}
|
||||
|
||||
cv::Mat_<float> response;
|
||||
if(right_provided && left_provided)
|
||||
{
|
||||
cv::vconcat(im2col_prealloc_left, im2col_prealloc_right, response);
|
||||
response = response.t();
|
||||
}
|
||||
else if (left_provided)
|
||||
{
|
||||
response = im2col_prealloc_left.t();
|
||||
}
|
||||
else if (right_provided)
|
||||
{
|
||||
response = im2col_prealloc_right.t();
|
||||
}
|
||||
|
||||
ResponseInternal(response);
|
||||
|
||||
if(left_provided && right_provided)
|
||||
{
|
||||
response_left = response(cv::Rect(0, 0, response.cols / 2, 1));
|
||||
response_right = response(cv::Rect(response.cols / 2, 0, response.cols / 2, 1));
|
||||
}
|
||||
else if (left_provided)
|
||||
{
|
||||
response_left = response;
|
||||
}
|
||||
else if (right_provided)
|
||||
{
|
||||
response_right = response;
|
||||
}
|
||||
|
||||
if(left_provided)
|
||||
{
|
||||
// TODO This could and should be gemm'ed
|
||||
response_left = response_left * mapMatrix;
|
||||
response_left = response_left.t();
|
||||
response_left = response_left.reshape(1, response_height);
|
||||
response_left = response_left.t();
|
||||
}
|
||||
|
||||
if(right_provided)
|
||||
{
|
||||
// TODO This could and should be gemm'ed
|
||||
response_right = response_right * mapMatrix;
|
||||
response_right = response_right.t();
|
||||
response_right = response_right.reshape(1, response_height);
|
||||
response_right = response_right.t();
|
||||
|
||||
cv::flip(response_right, response_right, 1);
|
||||
}
|
||||
}
|
||||
543
pkg/OpenFace/lib/local/LandmarkDetector/src/CNN_utils.cpp
Normal file
543
pkg/OpenFace/lib/local/LandmarkDetector/src/CNN_utils.cpp
Normal file
@@ -0,0 +1,543 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Copyright (C) 2017, Tadas Baltrusaitis, all rights reserved.
|
||||
//
|
||||
// ACADEMIC OR NON-PROFIT ORGANIZATION NONCOMMERCIAL RESEARCH USE ONLY
|
||||
//
|
||||
// BY USING OR DOWNLOADING THE SOFTWARE, YOU ARE AGREEING TO THE TERMS OF THIS LICENSE AGREEMENT.
|
||||
// IF YOU DO NOT AGREE WITH THESE TERMS, YOU MAY NOT USE OR DOWNLOAD THE SOFTWARE.
|
||||
//
|
||||
// License can be found in OpenFace-license.txt
|
||||
//
|
||||
// * Any publications arising from the use of this software, including but
|
||||
// not limited to academic journal and conference publications, technical
|
||||
// reports and manuals, must cite at least one of the following works:
|
||||
//
|
||||
// OpenFace 2.0: Facial Behavior Analysis Toolkit
|
||||
// Tadas Baltrušaitis, Amir Zadeh, Yao Chong Lim, and Louis-Philippe Morency
|
||||
// in IEEE International Conference on Automatic Face and Gesture Recognition, 2018
|
||||
//
|
||||
// Convolutional experts constrained local model for facial landmark detection.
|
||||
// A. Zadeh, T. Baltrušaitis, and Louis-Philippe Morency,
|
||||
// in Computer Vision and Pattern Recognition Workshops, 2017.
|
||||
//
|
||||
// Rendering of Eyes for Eye-Shape Registration and Gaze Estimation
|
||||
// Erroll Wood, Tadas Baltrušaitis, Xucong Zhang, Yusuke Sugano, Peter Robinson, and Andreas Bulling
|
||||
// in IEEE International. Conference on Computer Vision (ICCV), 2015
|
||||
//
|
||||
// Cross-dataset learning and person-specific normalisation for automatic Action Unit detection
|
||||
// Tadas Baltrušaitis, Marwa Mahmoud, and Peter Robinson
|
||||
// in Facial Expression Recognition and Analysis Challenge,
|
||||
// IEEE International Conference on Automatic Face and Gesture Recognition, 2015
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "stdafx.h"
|
||||
|
||||
#include "CNN_utils.h"
|
||||
|
||||
namespace LandmarkDetector
|
||||
{
|
||||
|
||||
// Parametric ReLU with leaky weights (separate ones per channel)
|
||||
void PReLU(std::vector<cv::Mat_<float> >& input_output_maps, cv::Mat_<float> prelu_weights)
|
||||
{
|
||||
|
||||
if (input_output_maps.size() > 1)
|
||||
{
|
||||
unsigned int h = input_output_maps[0].rows;
|
||||
unsigned int w = input_output_maps[0].cols;
|
||||
unsigned int size_in = h * w;
|
||||
|
||||
for (int k = 0; k < (int) input_output_maps.size(); ++k)
|
||||
{
|
||||
// Apply the PReLU
|
||||
auto iter = input_output_maps[k].begin();
|
||||
|
||||
float neg_mult = prelu_weights.at<float>(k);
|
||||
|
||||
for (unsigned int i = 0; i < size_in; ++i)
|
||||
{
|
||||
float in_val = *iter;
|
||||
|
||||
// The prelu step
|
||||
*iter++ = in_val >= 0 ? in_val : in_val * neg_mult;
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
int w = input_output_maps[0].cols;
|
||||
|
||||
for (int k = 0; k < prelu_weights.rows; ++k)
|
||||
{
|
||||
auto iter = input_output_maps[0].row(k).begin();
|
||||
float neg_mult = prelu_weights.at<float>(k);
|
||||
|
||||
for (int i = 0; i < w; ++i)
|
||||
{
|
||||
float in_val = *iter;
|
||||
// Apply the PReLU
|
||||
*iter = in_val >= 0 ? in_val : in_val * neg_mult;
|
||||
|
||||
// To deal with OpenCV 3.4s debug mode not allowing to go over iteration boundaries
|
||||
if(i + 1 < w)
|
||||
{
|
||||
iter++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void fully_connected(std::vector<cv::Mat_<float> >& outputs, const std::vector<cv::Mat_<float> >& input_maps, cv::Mat_<float> weights, cv::Mat_<float> biases)
|
||||
{
|
||||
outputs.clear();
|
||||
|
||||
if (input_maps.size() > 1)
|
||||
{
|
||||
// Concatenate all the maps
|
||||
cv::Size orig_size = input_maps[0].size();
|
||||
cv::Mat_<float> input_concat((int)input_maps.size(), input_maps[0].cols * input_maps[0].rows);
|
||||
|
||||
for (int in = 0; in < (int)input_maps.size(); ++in)
|
||||
{
|
||||
cv::Mat_<float> add = input_maps[in];
|
||||
|
||||
// Reshape if all of the data will be flattened
|
||||
if (input_concat.rows != weights.cols)
|
||||
{
|
||||
add = add.t();
|
||||
}
|
||||
|
||||
add = add.reshape(0, 1);
|
||||
add.copyTo(input_concat.row(in));
|
||||
}
|
||||
|
||||
// Treat the input as separate feature maps
|
||||
if (input_concat.rows == weights.cols)
|
||||
{
|
||||
input_concat = weights * input_concat;
|
||||
// Add biases
|
||||
for (int k = 0; k < biases.rows; ++k)
|
||||
{
|
||||
input_concat.row(k) = input_concat.row(k) + biases.at<float>(k);
|
||||
}
|
||||
|
||||
outputs.clear();
|
||||
// Resize and add as output
|
||||
for (int k = 0; k < biases.rows; ++k)
|
||||
{
|
||||
cv::Mat_<float> reshaped = input_concat.row(k).clone();
|
||||
reshaped = reshaped.reshape(1, orig_size.height);
|
||||
outputs.push_back(reshaped);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Flatten the input
|
||||
input_concat = input_concat.reshape(0, input_concat.rows * input_concat.cols);
|
||||
|
||||
input_concat = weights * input_concat + biases;
|
||||
|
||||
outputs.clear();
|
||||
outputs.push_back(input_concat);
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat out = weights * input_maps[0] + biases;
|
||||
outputs.clear();
|
||||
outputs.push_back(out.t());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
void max_pooling(std::vector<cv::Mat_<float> >& outputs, const std::vector<cv::Mat_<float> >& input_maps, int stride_x, int stride_y, int kernel_size_x, int kernel_size_y)
|
||||
{
|
||||
std::vector<cv::Mat_<float> > outputs_sub;
|
||||
|
||||
// Iterate over kernel height and width, based on stride
|
||||
for (size_t in = 0; in < input_maps.size(); ++in)
|
||||
{
|
||||
// Help with rounding up a bit, to match caffe style output
|
||||
int out_x = (int)round((float)(input_maps[in].cols - kernel_size_x) / (float)stride_x) + 1;
|
||||
int out_y = (int)round((float)(input_maps[in].rows - kernel_size_y) / (float)stride_y) + 1;
|
||||
|
||||
cv::Mat_<float> sub_out(out_y, out_x, 0.0);
|
||||
cv::Mat_<float> in_map = input_maps[in];
|
||||
|
||||
for (int x = 0; x < input_maps[in].cols; x += stride_x)
|
||||
{
|
||||
int max_x = cv::min(input_maps[in].cols, x + kernel_size_x);
|
||||
int x_in_out = int(x / stride_x);
|
||||
|
||||
if (x_in_out >= out_x)
|
||||
continue;
|
||||
|
||||
for (int y = 0; y < input_maps[in].rows; y += stride_y)
|
||||
{
|
||||
int y_in_out = int(y / stride_y);
|
||||
|
||||
if (y_in_out >= out_y)
|
||||
continue;
|
||||
|
||||
int max_y = cv::min(input_maps[in].rows, y + kernel_size_y);
|
||||
|
||||
float curr_max = -FLT_MAX;
|
||||
|
||||
for (int x_in = x; x_in < max_x; ++x_in)
|
||||
{
|
||||
for (int y_in = y; y_in < max_y; ++y_in)
|
||||
{
|
||||
float curr_val = in_map.at<float>(y_in, x_in);
|
||||
if (curr_val > curr_max)
|
||||
{
|
||||
curr_max = curr_val;
|
||||
}
|
||||
}
|
||||
}
|
||||
sub_out.at<float>(y_in_out, x_in_out) = curr_max;
|
||||
}
|
||||
}
|
||||
|
||||
outputs_sub.push_back(sub_out);
|
||||
|
||||
}
|
||||
outputs = outputs_sub;
|
||||
|
||||
}
|
||||
|
||||
void convolution_single_kern_fft(const std::vector<cv::Mat_<float> >& input_imgs, std::vector<cv::Mat_<double> >& img_dfts,
|
||||
const std::vector<cv::Mat_<float> >& _templs, std::map<int, std::vector<cv::Mat_<double> > >& _templ_dfts, cv::Mat_<float>& result)
|
||||
{
|
||||
// Assume result is defined properly
|
||||
if (result.empty())
|
||||
{
|
||||
cv::Size corrSize(input_imgs[0].cols - _templs[0].cols + 1, input_imgs[0].rows - _templs[0].rows + 1);
|
||||
result.create(corrSize);
|
||||
}
|
||||
|
||||
// Our model will always be under min block size so can ignore this
|
||||
//const double blockScale = 4.5;
|
||||
//const int minBlockSize = 256;
|
||||
|
||||
int maxDepth = CV_64F;
|
||||
|
||||
cv::Size dftsize;
|
||||
|
||||
dftsize.width = cv::getOptimalDFTSize(result.cols + _templs[0].cols - 1);
|
||||
dftsize.height = cv::getOptimalDFTSize(result.rows + _templs[0].rows - 1);
|
||||
|
||||
// Compute block size
|
||||
cv::Size blocksize;
|
||||
blocksize.width = dftsize.width - _templs[0].cols + 1;
|
||||
blocksize.width = MIN(blocksize.width, result.cols);
|
||||
blocksize.height = dftsize.height - _templs[0].rows + 1;
|
||||
blocksize.height = MIN(blocksize.height, result.rows);
|
||||
|
||||
std::vector<cv::Mat_<double>> dftTempl;
|
||||
|
||||
// if this has not been precomputed, precompute it, otherwise use it
|
||||
if (_templ_dfts.find(dftsize.width) == _templ_dfts.end())
|
||||
{
|
||||
dftTempl.resize(_templs.size());
|
||||
for (size_t k = 0; k < _templs.size(); ++k)
|
||||
{
|
||||
dftTempl[k].create(dftsize.height, dftsize.width);
|
||||
|
||||
cv::Mat_<float> src = _templs[k];
|
||||
|
||||
cv::Mat_<double> dst(dftTempl[k], cv::Rect(0, 0, dftsize.width, dftsize.height));
|
||||
|
||||
cv::Mat_<double> dst1(dftTempl[k], cv::Rect(0, 0, _templs[k].cols, _templs[k].rows));
|
||||
|
||||
if (dst1.data != src.data)
|
||||
src.convertTo(dst1, dst1.depth());
|
||||
|
||||
if (dst.cols > _templs[k].cols)
|
||||
{
|
||||
cv::Mat_<double> part(dst, cv::Range(0, _templs[k].rows), cv::Range(_templs[k].cols, dst.cols));
|
||||
part.setTo(0);
|
||||
}
|
||||
|
||||
// Perform DFT of the template
|
||||
dft(dst, dst, 0, _templs[k].rows);
|
||||
|
||||
}
|
||||
_templ_dfts[dftsize.width] = dftTempl;
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
dftTempl = _templ_dfts[dftsize.width];
|
||||
}
|
||||
|
||||
cv::Size bsz(std::min(blocksize.width, result.cols), std::min(blocksize.height, result.rows));
|
||||
cv::Mat src;
|
||||
|
||||
cv::Mat cdst(result, cv::Rect(0, 0, bsz.width, bsz.height));
|
||||
|
||||
std::vector<cv::Mat_<double> > dftImgs;
|
||||
dftImgs.resize(input_imgs.size());
|
||||
|
||||
if (img_dfts.empty())
|
||||
{
|
||||
for (size_t k = 0; k < input_imgs.size(); ++k)
|
||||
{
|
||||
dftImgs[k].create(dftsize);
|
||||
dftImgs[k].setTo(0.0);
|
||||
|
||||
cv::Size dsz(bsz.width + _templs[k].cols - 1, bsz.height + _templs[k].rows - 1);
|
||||
|
||||
int x2 = std::min(input_imgs[k].cols, dsz.width);
|
||||
int y2 = std::min(input_imgs[k].rows, dsz.height);
|
||||
|
||||
cv::Mat src0(input_imgs[k], cv::Range(0, y2), cv::Range(0, x2));
|
||||
cv::Mat dst(dftImgs[k], cv::Rect(0, 0, dsz.width, dsz.height));
|
||||
cv::Mat dst1(dftImgs[k], cv::Rect(0, 0, x2, y2));
|
||||
|
||||
src = src0;
|
||||
|
||||
if (dst1.data != src.data)
|
||||
src.convertTo(dst1, dst1.depth());
|
||||
|
||||
dft(dftImgs[k], dftImgs[k], 0, dsz.height);
|
||||
img_dfts.push_back(dftImgs[k].clone());
|
||||
}
|
||||
}
|
||||
|
||||
cv::Mat_<double> dft_img(img_dfts[0].rows, img_dfts[0].cols, 0.0);
|
||||
for (size_t k = 0; k < input_imgs.size(); ++k)
|
||||
{
|
||||
cv::Mat dftTempl1(dftTempl[k], cv::Rect(0, 0, dftsize.width, dftsize.height));
|
||||
if (k == 0)
|
||||
{
|
||||
cv::mulSpectrums(img_dfts[k], dftTempl1, dft_img, 0, true);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::mulSpectrums(img_dfts[k], dftTempl1, dftImgs[k], 0, true);
|
||||
dft_img = dft_img + dftImgs[k];
|
||||
}
|
||||
}
|
||||
|
||||
cv::dft(dft_img, dft_img, cv::DFT_INVERSE + cv::DFT_SCALE, bsz.height);
|
||||
|
||||
src = dft_img(cv::Rect(0, 0, bsz.width, bsz.height));
|
||||
|
||||
src.convertTo(cdst, CV_32F);
|
||||
|
||||
}
|
||||
|
||||
void convolution_fft2(std::vector<cv::Mat_<float> >& outputs, const std::vector<cv::Mat_<float> >& input_maps,
|
||||
const std::vector<std::vector<cv::Mat_<float> > >& kernels, const std::vector<float >& biases,
|
||||
std::vector<std::map<int, std::vector<cv::Mat_<double> > > >& precomp_dfts)
|
||||
{
|
||||
outputs.clear();
|
||||
|
||||
// Useful precomputed data placeholders for quick correlation (convolution)
|
||||
std::vector<cv::Mat_<double> > input_image_dft;
|
||||
|
||||
for (size_t k = 0; k < kernels.size(); ++k)
|
||||
{
|
||||
|
||||
// The convolution (with precomputation)
|
||||
cv::Mat_<float> output;
|
||||
convolution_single_kern_fft(input_maps, input_image_dft, kernels[k], precomp_dfts[k], output);
|
||||
|
||||
// Combining the maps
|
||||
outputs.push_back(output + biases[k]);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
void im2col_t(const cv::Mat_<float>& input, const unsigned int width, const unsigned int height, cv::Mat_<float>& output)
|
||||
{
|
||||
|
||||
const unsigned int m = input.cols;
|
||||
const unsigned int n = input.rows;
|
||||
|
||||
// determine how many blocks there will be with a sliding window of width x height in the input
|
||||
const unsigned int yB = m - height + 1;
|
||||
const unsigned int xB = n - width + 1;
|
||||
|
||||
// Allocate the output size
|
||||
if (output.rows != width * height || output.cols != xB*yB)
|
||||
{
|
||||
output = cv::Mat::ones(width * height, xB*yB, CV_32F);
|
||||
}
|
||||
|
||||
// Iterate over the whole image
|
||||
for (unsigned int i = 0; i< yB; i++)
|
||||
{
|
||||
unsigned int rowIdx = i;
|
||||
for (unsigned int j = 0; j< xB; j++)
|
||||
{
|
||||
//int rowIdx = i; +j*yB;
|
||||
// iterate over the blocks within the image
|
||||
for (unsigned int yy = 0; yy < height; ++yy)
|
||||
{
|
||||
// Faster iteration over the image
|
||||
const float* Mi = input.ptr<float>(j + yy);
|
||||
for (unsigned int xx = 0; xx < width; ++xx)
|
||||
{
|
||||
unsigned int colIdx = xx*height + yy;
|
||||
|
||||
output.at<float>(colIdx, rowIdx) = Mi[i + xx];
|
||||
}
|
||||
}
|
||||
rowIdx += yB;
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void im2col(const cv::Mat_<float>& input, const unsigned int width, const unsigned int height, cv::Mat_<float>& output)
|
||||
{
|
||||
|
||||
const unsigned int m = input.rows;
|
||||
const unsigned int n = input.cols;
|
||||
|
||||
// determine how many blocks there will be with a sliding window of width x height in the input
|
||||
const unsigned int yB = m - height + 1;
|
||||
const unsigned int xB = n - width + 1;
|
||||
|
||||
// Allocate the output size
|
||||
if (output.cols != width * height || output.rows != xB*yB)
|
||||
{
|
||||
output = cv::Mat::ones(xB*yB, width * height, CV_32F);
|
||||
}
|
||||
|
||||
// Iterate over the whole image
|
||||
for (unsigned int i = 0; i< yB; i++)
|
||||
{
|
||||
unsigned int rowIdx = i*xB;
|
||||
for (unsigned int j = 0; j< xB; j++)
|
||||
{
|
||||
|
||||
float* Mo = output.ptr<float>(rowIdx);
|
||||
|
||||
// iterate over the blocks within the image
|
||||
for (unsigned int yy = 0; yy < height; ++yy)
|
||||
{
|
||||
// Faster iteration over the image
|
||||
const float* Mi = input.ptr<float>(i + yy);
|
||||
|
||||
for (unsigned int xx = 0; xx < width; ++xx)
|
||||
{
|
||||
unsigned int colIdx = xx*height + yy;
|
||||
//output.at<float>(rowIdx, colIdx) = Mi[j + xx]; //input.at<float>(i + yy, j + xx);
|
||||
Mo[colIdx] = Mi[j + xx];
|
||||
}
|
||||
}
|
||||
rowIdx++;
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void im2col_multimap(const std::vector<cv::Mat_<float> >& inputs, const unsigned int width, const unsigned int height,
|
||||
cv::Mat_<float>& output)
|
||||
{
|
||||
|
||||
const unsigned int m = inputs[0].rows;
|
||||
const unsigned int n = inputs[0].cols;
|
||||
|
||||
// determine how many blocks there will be with a sliding window of width x height in the input
|
||||
const unsigned int yB = m - height + 1;
|
||||
const unsigned int xB = n - width + 1;
|
||||
|
||||
int stride = height * width;
|
||||
|
||||
unsigned int num_maps = (unsigned int)inputs.size();
|
||||
|
||||
// Allocate the output size
|
||||
if (output.cols != width * height * inputs.size() + 1 || (unsigned int) output.rows < xB*yB)
|
||||
{
|
||||
output = cv::Mat::ones(xB*yB, width * height * num_maps + 1, CV_32F);
|
||||
}
|
||||
|
||||
// Iterate over the whole image
|
||||
for (unsigned int i = 0; i< yB; i++)
|
||||
{
|
||||
unsigned int rowIdx = i*xB;
|
||||
for (unsigned int j = 0; j< xB; j++)
|
||||
{
|
||||
|
||||
float* Mo = output.ptr<float>(rowIdx);
|
||||
|
||||
// TODO, this should be rearranged and done through mem-copy
|
||||
|
||||
// iterate over the blocks within the image
|
||||
for (unsigned int yy = 0; yy < height; ++yy)
|
||||
{
|
||||
for (unsigned int in_maps = 0; in_maps < num_maps; ++in_maps)
|
||||
{
|
||||
// Faster iteration over the image
|
||||
const float* Mi = inputs[in_maps].ptr<float>(i + yy);
|
||||
|
||||
for (unsigned int xx = 0; xx < width; ++xx)
|
||||
{
|
||||
unsigned int colIdx = xx*height + yy + in_maps * stride;
|
||||
//output.at<float>(rowIdx, colIdx) = Mi[j + xx]; //input.at<float>(i + yy, j + xx);
|
||||
Mo[colIdx] = Mi[j + xx];
|
||||
}
|
||||
}
|
||||
}
|
||||
rowIdx++;
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// A fast convolution implementation, can provide a pre-allocated im2col as well, if empty, it is created
|
||||
void convolution_direct_blas(std::vector<cv::Mat_<float> >& outputs, const std::vector<cv::Mat_<float> >& input_maps, const cv::Mat_<float>& weight_matrix, int height_k, int width_k, cv::Mat_<float>& pre_alloc_im2col)
|
||||
{
|
||||
outputs.clear();
|
||||
|
||||
int height_in = input_maps[0].rows;
|
||||
int width_n = input_maps[0].cols;
|
||||
|
||||
// determine how many blocks there will be with a sliding window of width x height in the input
|
||||
int yB = height_in - height_k + 1;
|
||||
int xB = width_n - width_k + 1;
|
||||
int num_rows = yB * xB;
|
||||
|
||||
// Instead of re-allocating data use the first rows of already allocated data and re-allocate only if not enough rows are present, this is what makes this non thread safe, as same memory would be used
|
||||
im2col_multimap(input_maps, width_k, height_k, pre_alloc_im2col);
|
||||
|
||||
float* m1 = (float*)pre_alloc_im2col.data;
|
||||
float* m2 = (float*)weight_matrix.data;
|
||||
int m2_cols = weight_matrix.cols;
|
||||
int m2_rows = weight_matrix.rows;
|
||||
|
||||
cv::Mat_<float> out(num_rows, weight_matrix.cols, 1.0);
|
||||
float* m3 = (float*)out.data;
|
||||
|
||||
//cblas_sgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, weight_t.cols, yB * xB, pre_alloc_im2col.cols, 1, m2, weight_t.cols, m1, pre_alloc_im2col.cols, 0.0, m3, weight_t.cols);
|
||||
float alpha = 1.0f;
|
||||
float beta = 0.0f;
|
||||
// Call fortran directly (faster)
|
||||
char N[2]; N[0] = 'N';
|
||||
sgemm_(N, N, &m2_cols, &num_rows, &pre_alloc_im2col.cols, &alpha, m2, &m2_cols, m1, &pre_alloc_im2col.cols, &beta, m3, &m2_cols);
|
||||
|
||||
// Above is equivalent to out = pre_alloc_im2col * weight_matrix;
|
||||
|
||||
out = out.t();
|
||||
|
||||
// Move back to vectors and reshape accordingly
|
||||
for (int k = 0; k < out.rows; ++k)
|
||||
{
|
||||
outputs.push_back(out.row(k).reshape(1, yB));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
@@ -0,0 +1,893 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Copyright (C) 2017, Carnegie Mellon University and University of Cambridge,
|
||||
// all rights reserved.
|
||||
//
|
||||
// ACADEMIC OR NON-PROFIT ORGANIZATION NONCOMMERCIAL RESEARCH USE ONLY
|
||||
//
|
||||
// BY USING OR DOWNLOADING THE SOFTWARE, YOU ARE AGREEING TO THE TERMS OF THIS LICENSE AGREEMENT.
|
||||
// IF YOU DO NOT AGREE WITH THESE TERMS, YOU MAY NOT USE OR DOWNLOAD THE SOFTWARE.
|
||||
//
|
||||
// License can be found in OpenFace-license.txt
|
||||
//
|
||||
// * Any publications arising from the use of this software, including but
|
||||
// not limited to academic journal and conference publications, technical
|
||||
// reports and manuals, must cite at least one of the following works:
|
||||
//
|
||||
// OpenFace 2.0: Facial Behavior Analysis Toolkit
|
||||
// Tadas Baltrušaitis, Amir Zadeh, Yao Chong Lim, and Louis-Philippe Morency
|
||||
// in IEEE International Conference on Automatic Face and Gesture Recognition, 2018
|
||||
//
|
||||
// Convolutional experts constrained local model for facial landmark detection.
|
||||
// A. Zadeh, T. Baltrušaitis, and Louis-Philippe Morency,
|
||||
// in Computer Vision and Pattern Recognition Workshops, 2017.
|
||||
//
|
||||
// Rendering of Eyes for Eye-Shape Registration and Gaze Estimation
|
||||
// Erroll Wood, Tadas Baltrušaitis, Xucong Zhang, Yusuke Sugano, Peter Robinson, and Andreas Bulling
|
||||
// in IEEE International. Conference on Computer Vision (ICCV), 2015
|
||||
//
|
||||
// Cross-dataset learning and person-specific normalisation for automatic Action Unit detection
|
||||
// Tadas Baltrušaitis, Marwa Mahmoud, and Peter Robinson
|
||||
// in Facial Expression Recognition and Analysis Challenge,
|
||||
// IEEE International Conference on Automatic Face and Gesture Recognition, 2015
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "stdafx.h"
|
||||
|
||||
#include "FaceDetectorMTCNN.h"
|
||||
|
||||
#ifndef M_PI
|
||||
#define M_PI 3.14159265358979323846
|
||||
#endif
|
||||
|
||||
#include "LandmarkDetectorUtils.h"
|
||||
|
||||
// CNN includes
|
||||
#include "CNN_utils.h"
|
||||
|
||||
// Instead of including cblas.h (the definitions from OpenBLAS and other BLAS libraries differ, declare the required OpenBLAS functionality here)
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
/* Assume C declarations for C++ */
|
||||
#endif /* __cplusplus */
|
||||
|
||||
/*Set the number of threads on runtime.*/
|
||||
void openblas_set_num_threads(int num_threads);
|
||||
}
|
||||
|
||||
using namespace LandmarkDetector;
|
||||
|
||||
// Constructor from model file location
|
||||
FaceDetectorMTCNN::FaceDetectorMTCNN(const std::string& location)
|
||||
{
|
||||
this->Read(location);
|
||||
}
|
||||
// Copy constructor
|
||||
FaceDetectorMTCNN::FaceDetectorMTCNN(const FaceDetectorMTCNN& other) : PNet(other.PNet), RNet(other.RNet), ONet(other.ONet)
|
||||
{
|
||||
}
|
||||
|
||||
CNN::CNN(const CNN& other) : cnn_layer_types(other.cnn_layer_types), cnn_max_pooling_layers(other.cnn_max_pooling_layers), cnn_convolutional_layers_bias(other.cnn_convolutional_layers_bias), conv_layer_pre_alloc_im2col(other.conv_layer_pre_alloc_im2col)
|
||||
{
|
||||
|
||||
this->cnn_convolutional_layers_weights.resize(other.cnn_convolutional_layers_weights.size());
|
||||
for (size_t l = 0; l < other.cnn_convolutional_layers_weights.size(); ++l)
|
||||
{
|
||||
// Make sure the matrix is copied.
|
||||
this->cnn_convolutional_layers_weights[l] = other.cnn_convolutional_layers_weights[l].clone();
|
||||
}
|
||||
|
||||
this->cnn_convolutional_layers.resize(other.cnn_convolutional_layers.size());
|
||||
for (size_t l = 0; l < other.cnn_convolutional_layers.size(); ++l)
|
||||
{
|
||||
this->cnn_convolutional_layers[l].resize(other.cnn_convolutional_layers[l].size());
|
||||
|
||||
for (size_t i = 0; i < other.cnn_convolutional_layers[l].size(); ++i)
|
||||
{
|
||||
this->cnn_convolutional_layers[l][i].resize(other.cnn_convolutional_layers[l][i].size());
|
||||
|
||||
for (size_t k = 0; k < other.cnn_convolutional_layers[l][i].size(); ++k)
|
||||
{
|
||||
// Make sure the matrix is copied.
|
||||
this->cnn_convolutional_layers[l][i][k] = other.cnn_convolutional_layers[l][i][k].clone();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
this->cnn_fully_connected_layers_weights.resize(other.cnn_fully_connected_layers_weights.size());
|
||||
|
||||
for (size_t l = 0; l < other.cnn_fully_connected_layers_weights.size(); ++l)
|
||||
{
|
||||
// Make sure the matrix is copied.
|
||||
this->cnn_fully_connected_layers_weights[l] = other.cnn_fully_connected_layers_weights[l].clone();
|
||||
}
|
||||
|
||||
this->cnn_fully_connected_layers_biases.resize(other.cnn_fully_connected_layers_biases.size());
|
||||
|
||||
for (size_t l = 0; l < other.cnn_fully_connected_layers_biases.size(); ++l)
|
||||
{
|
||||
// Make sure the matrix is copied.
|
||||
this->cnn_fully_connected_layers_biases[l] = other.cnn_fully_connected_layers_biases[l].clone();
|
||||
}
|
||||
|
||||
this->cnn_prelu_layer_weights.resize(other.cnn_prelu_layer_weights.size());
|
||||
|
||||
for (size_t l = 0; l < other.cnn_prelu_layer_weights.size(); ++l)
|
||||
{
|
||||
// Make sure the matrix is copied.
|
||||
this->cnn_prelu_layer_weights[l] = other.cnn_prelu_layer_weights[l].clone();
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<cv::Mat_<float>> CNN::Inference(const cv::Mat& input_img, bool direct, bool thread_safe)
|
||||
{
|
||||
if (input_img.channels() == 1)
|
||||
{
|
||||
cv::cvtColor(input_img, input_img, cv::COLOR_GRAY2BGR);
|
||||
}
|
||||
|
||||
int cnn_layer = 0;
|
||||
int fully_connected_layer = 0;
|
||||
int prelu_layer = 0;
|
||||
int max_pool_layer = 0;
|
||||
|
||||
// Slit a BGR image into three chnels
|
||||
cv::Mat channels[3];
|
||||
cv::split(input_img, channels);
|
||||
|
||||
// Flip the BGR order to RGB
|
||||
std::vector<cv::Mat_<float> > input_maps;
|
||||
input_maps.push_back(channels[2]);
|
||||
input_maps.push_back(channels[1]);
|
||||
input_maps.push_back(channels[0]);
|
||||
|
||||
std::vector<cv::Mat_<float> > outputs;
|
||||
|
||||
for (size_t layer = 0; layer < cnn_layer_types.size(); ++layer)
|
||||
{
|
||||
|
||||
// Determine layer type
|
||||
int layer_type = cnn_layer_types[layer];
|
||||
|
||||
// Convolutional layer
|
||||
if (layer_type == 0)
|
||||
{
|
||||
|
||||
// Either perform direct convolution through matrix multiplication or use an FFT optimized version, which one is optimal depends on the kernel and input sizes
|
||||
if (direct)
|
||||
{
|
||||
if(thread_safe)
|
||||
{
|
||||
cv::Mat_<float> pre_alloc;
|
||||
convolution_direct_blas(outputs, input_maps, cnn_convolutional_layers_weights[cnn_layer], cnn_convolutional_layers[cnn_layer][0][0].rows, cnn_convolutional_layers[cnn_layer][0][0].cols, pre_alloc);
|
||||
}
|
||||
else
|
||||
{
|
||||
convolution_direct_blas(outputs, input_maps, cnn_convolutional_layers_weights[cnn_layer], cnn_convolutional_layers[cnn_layer][0][0].rows, cnn_convolutional_layers[cnn_layer][0][0].cols, conv_layer_pre_alloc_im2col[cnn_layer]);
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
convolution_fft2(outputs, input_maps, cnn_convolutional_layers[cnn_layer], cnn_convolutional_layers_bias[cnn_layer], cnn_convolutional_layers_dft[cnn_layer]);
|
||||
}
|
||||
//vector<cv::Mat_<float> > outs;
|
||||
//convolution_fft(outs, input_maps, cnn_convolutional_layers[cnn_layer], cnn_convolutional_layers_bias[cnn_layer], cnn_convolutional_layers_dft[cnn_layer]);
|
||||
|
||||
|
||||
|
||||
cnn_layer++;
|
||||
}
|
||||
if (layer_type == 1)
|
||||
{
|
||||
|
||||
int stride_x = std::get<2>(cnn_max_pooling_layers[max_pool_layer]);
|
||||
int stride_y = std::get<3>(cnn_max_pooling_layers[max_pool_layer]);
|
||||
|
||||
int kernel_size_x = std::get<0>(cnn_max_pooling_layers[max_pool_layer]);
|
||||
int kernel_size_y = std::get<1>(cnn_max_pooling_layers[max_pool_layer]);
|
||||
|
||||
max_pooling(outputs, input_maps, stride_x, stride_y, kernel_size_x, kernel_size_y);
|
||||
max_pool_layer++;
|
||||
}
|
||||
if (layer_type == 2)
|
||||
{
|
||||
fully_connected(outputs, input_maps, cnn_fully_connected_layers_weights[fully_connected_layer], cnn_fully_connected_layers_biases[fully_connected_layer]);
|
||||
fully_connected_layer++;
|
||||
}
|
||||
if (layer_type == 3) // PReLU
|
||||
{
|
||||
// In place prelu computation
|
||||
PReLU(input_maps, cnn_prelu_layer_weights[prelu_layer]);
|
||||
outputs = input_maps;
|
||||
prelu_layer++;
|
||||
}
|
||||
if (layer_type == 4)
|
||||
{
|
||||
outputs.clear();
|
||||
for (size_t k = 0; k < input_maps.size(); ++k)
|
||||
{
|
||||
// Apply the sigmoid
|
||||
cv::exp(-input_maps[k], input_maps[k]);
|
||||
input_maps[k] = 1.0 / (1.0 + input_maps[k]);
|
||||
|
||||
outputs.push_back(input_maps[k]);
|
||||
|
||||
}
|
||||
}
|
||||
// Set the outputs of this layer to inputs of the next one
|
||||
input_maps = outputs;
|
||||
}
|
||||
|
||||
|
||||
return outputs;
|
||||
|
||||
}
|
||||
|
||||
void ReadMatBin(std::ifstream& stream, cv::Mat &output_mat)
|
||||
{
|
||||
// Read in the number of rows, columns and the data type
|
||||
int row, col, type;
|
||||
|
||||
stream.read((char*)&row, 4);
|
||||
stream.read((char*)&col, 4);
|
||||
stream.read((char*)&type, 4);
|
||||
|
||||
output_mat = cv::Mat(row, col, type);
|
||||
int size = output_mat.rows * output_mat.cols * output_mat.elemSize();
|
||||
stream.read((char *)output_mat.data, size);
|
||||
|
||||
}
|
||||
|
||||
void CNN::ClearPrecomp()
|
||||
{
|
||||
for (size_t k1 = 0; k1 < cnn_convolutional_layers_dft.size(); ++k1)
|
||||
{
|
||||
for (size_t k2 = 0; k2 < cnn_convolutional_layers_dft[k1].size(); ++k2)
|
||||
{
|
||||
cnn_convolutional_layers_dft[k1][k2].clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CNN::Read(const std::string& location)
|
||||
{
|
||||
|
||||
openblas_set_num_threads(1);
|
||||
|
||||
std::ifstream cnn_stream(location, std::ios::in | std::ios::binary);
|
||||
if (cnn_stream.is_open())
|
||||
{
|
||||
cnn_stream.seekg(0, std::ios::beg);
|
||||
|
||||
// Reading in CNNs
|
||||
|
||||
int network_depth;
|
||||
cnn_stream.read((char*)&network_depth, 4);
|
||||
|
||||
cnn_layer_types.resize(network_depth);
|
||||
|
||||
for (int layer = 0; layer < network_depth; ++layer)
|
||||
{
|
||||
|
||||
int layer_type;
|
||||
cnn_stream.read((char*)&layer_type, 4);
|
||||
cnn_layer_types[layer] = layer_type;
|
||||
|
||||
// convolutional
|
||||
if (layer_type == 0)
|
||||
{
|
||||
|
||||
// Read the number of input maps
|
||||
int num_in_maps;
|
||||
cnn_stream.read((char*)&num_in_maps, 4);
|
||||
|
||||
// Read the number of kernels for each input map
|
||||
int num_kernels;
|
||||
cnn_stream.read((char*)&num_kernels, 4);
|
||||
|
||||
std::vector<std::vector<cv::Mat_<float> > > kernels;
|
||||
|
||||
kernels.resize(num_in_maps);
|
||||
|
||||
std::vector<float> biases;
|
||||
for (int k = 0; k < num_kernels; ++k)
|
||||
{
|
||||
float bias;
|
||||
cnn_stream.read((char*)&bias, 4);
|
||||
biases.push_back(bias);
|
||||
}
|
||||
|
||||
cnn_convolutional_layers_bias.push_back(biases);
|
||||
|
||||
// For every input map
|
||||
for (int in = 0; in < num_in_maps; ++in)
|
||||
{
|
||||
kernels[in].resize(num_kernels);
|
||||
|
||||
// For every kernel on that input map
|
||||
for (int k = 0; k < num_kernels; ++k)
|
||||
{
|
||||
ReadMatBin(cnn_stream, kernels[in][k]);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// Rearrange the kernels for faster inference with FFT
|
||||
std::vector<std::vector<cv::Mat_<float> > > kernels_rearr;
|
||||
kernels_rearr.resize(num_kernels);
|
||||
|
||||
// Fill up the rearranged layer
|
||||
for (int k = 0; k < num_kernels; ++k)
|
||||
{
|
||||
for (int in = 0; in < num_in_maps; ++in)
|
||||
{
|
||||
kernels_rearr[k].push_back(kernels[in][k]);
|
||||
}
|
||||
}
|
||||
|
||||
cnn_convolutional_layers.push_back(kernels_rearr);
|
||||
|
||||
// Place-holders for DFT precomputation
|
||||
std::vector<std::map<int, std::vector<cv::Mat_<double> > > > cnn_convolutional_layers_dft_curr_layer;
|
||||
cnn_convolutional_layers_dft_curr_layer.resize(num_kernels);
|
||||
cnn_convolutional_layers_dft.push_back(cnn_convolutional_layers_dft_curr_layer);
|
||||
|
||||
// Rearrange the flattened kernels into weight matrices for direct convolution computation
|
||||
cv::Mat_<float> weight_matrix(num_in_maps * kernels_rearr[0][0].rows * kernels_rearr[0][0].cols, num_kernels);
|
||||
for (int k = 0; k < num_kernels; ++k)
|
||||
{
|
||||
for (int i = 0; i < num_in_maps; ++i)
|
||||
{
|
||||
// Flatten the kernel
|
||||
cv::Mat_<float> k_flat = kernels_rearr[k][i].t();
|
||||
k_flat = k_flat.reshape(0, 1).t();
|
||||
k_flat.copyTo(weight_matrix(cv::Rect(k, i * kernels_rearr[0][0].rows * kernels_rearr[0][0].cols, 1, kernels_rearr[0][0].rows * kernels_rearr[0][0].cols)));
|
||||
}
|
||||
}
|
||||
|
||||
// Transpose the weight matrix for more convenient computation
|
||||
weight_matrix = weight_matrix.t();
|
||||
|
||||
// Add a bias term to the weight matrix for efficiency
|
||||
cv::Mat_<float> W(weight_matrix.rows, weight_matrix.cols + 1, 1.0);
|
||||
for (int k = 0; k < weight_matrix.rows; ++k)
|
||||
{
|
||||
W.at<float>(k, weight_matrix.cols) = biases[k];
|
||||
}
|
||||
weight_matrix.copyTo(W(cv::Rect(0, 0, weight_matrix.cols, weight_matrix.rows)));
|
||||
|
||||
cnn_convolutional_layers_weights.push_back(W.t());
|
||||
conv_layer_pre_alloc_im2col.push_back(cv::Mat_<float>());
|
||||
|
||||
}
|
||||
else if (layer_type == 1)
|
||||
{
|
||||
int kernel_x, kernel_y, stride_x, stride_y;
|
||||
cnn_stream.read((char*)&kernel_x, 4);
|
||||
cnn_stream.read((char*)&kernel_y, 4);
|
||||
cnn_stream.read((char*)&stride_x, 4);
|
||||
cnn_stream.read((char*)&stride_y, 4);
|
||||
cnn_max_pooling_layers.push_back(std::tuple<int, int, int, int>(kernel_x, kernel_y, stride_x, stride_y));
|
||||
}
|
||||
else if (layer_type == 2)
|
||||
{
|
||||
cv::Mat_<float> biases;
|
||||
ReadMatBin(cnn_stream, biases);
|
||||
cnn_fully_connected_layers_biases.push_back(biases);
|
||||
|
||||
// Fully connected layer
|
||||
cv::Mat_<float> weights;
|
||||
ReadMatBin(cnn_stream, weights);
|
||||
cnn_fully_connected_layers_weights.push_back(weights.t());
|
||||
}
|
||||
|
||||
else if (layer_type == 3)
|
||||
{
|
||||
cv::Mat_<float> weights;
|
||||
ReadMatBin(cnn_stream, weights);
|
||||
cnn_prelu_layer_weights.push_back(weights);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout << "WARNING: Can't find the CNN location" << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
//===========================================================================
|
||||
// Read in the MTCNN detector
|
||||
void FaceDetectorMTCNN::Read(const std::string& location)
|
||||
{
|
||||
|
||||
std::cout << "Reading the MTCNN face detector from: " << location << std::endl;
|
||||
|
||||
std::ifstream locations(location.c_str(), std::ios_base::in);
|
||||
if (!locations.is_open())
|
||||
{
|
||||
std::cout << "MTCNN model file not found or can't be opened" << std::endl;
|
||||
return;
|
||||
}
|
||||
std::string line;
|
||||
|
||||
// The other module locations should be defined as relative paths from the main model
|
||||
fs::path root = fs::path(location).parent_path();
|
||||
|
||||
// The main file contains the references to other files
|
||||
while (!locations.eof())
|
||||
{
|
||||
getline(locations, line);
|
||||
|
||||
std::stringstream lineStream(line);
|
||||
|
||||
std::string module;
|
||||
std::string location;
|
||||
|
||||
// figure out which module is to be read from which file
|
||||
lineStream >> module;
|
||||
|
||||
lineStream >> location;
|
||||
|
||||
// remove carriage return at the end for compatibility with unix systems
|
||||
if (location.size() > 0 && location.at(location.size() - 1) == '\r')
|
||||
{
|
||||
location = location.substr(0, location.size() - 1);
|
||||
}
|
||||
|
||||
// append to root
|
||||
location = (root / location).string();
|
||||
if (module.compare("PNet") == 0)
|
||||
{
|
||||
std::cout << "Reading the PNet module from: " << location << std::endl;
|
||||
PNet.Read(location);
|
||||
}
|
||||
else if(module.compare("RNet") == 0)
|
||||
{
|
||||
std::cout << "Reading the RNet module from: " << location << std::endl;
|
||||
RNet.Read(location);
|
||||
}
|
||||
else if (module.compare("ONet") == 0)
|
||||
{
|
||||
std::cout << "Reading the ONet module from: " << location << std::endl;
|
||||
ONet.Read(location);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Perform non maximum supression on proposal bounding boxes prioritizing boxes with high score/confidence
|
||||
std::vector<int> non_maximum_supression(const std::vector<cv::Rect_<float> >& original_bb, const std::vector<float>& scores, float thresh, bool minimum)
|
||||
{
|
||||
|
||||
// Sort the input bounding boxes by the detection score, using the nice trick of multimap always being sorted internally
|
||||
std::multimap<float, size_t> idxs;
|
||||
for (size_t i = 0; i < original_bb.size(); ++i)
|
||||
{
|
||||
idxs.insert(std::pair<float, size_t>(scores[i], i));
|
||||
}
|
||||
|
||||
std::vector<int> output_ids;
|
||||
|
||||
// keep looping while some indexes still remain in the indexes list
|
||||
while (idxs.size() > 0)
|
||||
{
|
||||
// grab the last rectangle
|
||||
auto lastElem = --std::end(idxs);
|
||||
size_t curr_id = lastElem->second;
|
||||
|
||||
const cv::Rect& rect1 = original_bb[curr_id];
|
||||
|
||||
idxs.erase(lastElem);
|
||||
|
||||
// Iterate through remaining bounding boxes and choose which ones to remove
|
||||
for (auto pos = std::begin(idxs); pos != std::end(idxs); )
|
||||
{
|
||||
// grab the current rectangle
|
||||
const cv::Rect& rect2 = original_bb[pos->second];
|
||||
|
||||
float intArea = (rect1 & rect2).area();
|
||||
float unionArea;
|
||||
if (minimum)
|
||||
{
|
||||
unionArea = cv::min(rect1.area(), rect2.area());
|
||||
}
|
||||
else
|
||||
{
|
||||
unionArea = rect1.area() + rect2.area() - intArea;
|
||||
}
|
||||
float overlap = intArea / unionArea;
|
||||
|
||||
// Remove the bounding boxes with less confidence but with significant overlap with the current one
|
||||
if (overlap > thresh)
|
||||
{
|
||||
pos = idxs.erase(pos);
|
||||
}
|
||||
else
|
||||
{
|
||||
++pos;
|
||||
}
|
||||
}
|
||||
output_ids.push_back(curr_id);
|
||||
|
||||
}
|
||||
|
||||
return output_ids;
|
||||
|
||||
}
|
||||
|
||||
// Helper function for selecting a subset of bounding boxes based on indices
|
||||
void select_subset(const std::vector<int>& to_keep, std::vector<cv::Rect_<float> >& bounding_boxes, std::vector<float>& scores,
|
||||
std::vector<cv::Rect_<float> >& corrections)
|
||||
{
|
||||
std::vector<cv::Rect_<float> > bounding_boxes_tmp;
|
||||
std::vector<float> scores_tmp;
|
||||
std::vector<cv::Rect_<float> > corrections_tmp;
|
||||
|
||||
for (size_t i = 0; i < to_keep.size(); ++i)
|
||||
{
|
||||
bounding_boxes_tmp.push_back(bounding_boxes[to_keep[i]]);
|
||||
scores_tmp.push_back(scores[to_keep[i]]);
|
||||
corrections_tmp.push_back(corrections[to_keep[i]]);
|
||||
}
|
||||
|
||||
bounding_boxes = bounding_boxes_tmp;
|
||||
scores = scores_tmp;
|
||||
corrections = corrections_tmp;
|
||||
}
|
||||
|
||||
// Use the heatmap generated by PNet to generate bounding boxes in the original image space, also generate the correction values and scores of the bounding boxes as well
|
||||
void generate_bounding_boxes(std::vector<cv::Rect_<float> >& o_bounding_boxes, std::vector<float>& o_scores,
|
||||
std::vector<cv::Rect_<float> >& o_corrections, const cv::Mat_<float>& heatmap, const std::vector<cv::Mat_<float> >& corrections,
|
||||
float scale, float threshold, int face_support)
|
||||
{
|
||||
|
||||
// Correction for the pooling
|
||||
int stride = 2;
|
||||
|
||||
o_bounding_boxes.clear();
|
||||
o_scores.clear();
|
||||
o_corrections.clear();
|
||||
|
||||
int counter = 0;
|
||||
for (int x = 0; x < heatmap.cols; ++x)
|
||||
{
|
||||
for(int y = 0; y < heatmap.rows; ++y)
|
||||
{
|
||||
if (heatmap.at<float>(y, x) >= threshold)
|
||||
{
|
||||
float min_x = int((stride * x + 1) / scale);
|
||||
float max_x = int((stride * x + face_support) / scale);
|
||||
float min_y = int((stride * y + 1) / scale);
|
||||
float max_y = int((stride * y + face_support) / scale);
|
||||
|
||||
o_bounding_boxes.push_back(cv::Rect_<float>(min_x, min_y, max_x - min_x, max_y - min_y));
|
||||
o_scores.push_back(heatmap.at<float>(y, x));
|
||||
|
||||
float corr_x = corrections[0].at<float>(y, x);
|
||||
float corr_y = corrections[1].at<float>(y, x);
|
||||
float corr_width = corrections[2].at<float>(y, x);
|
||||
float corr_height = corrections[3].at<float>(y, x);
|
||||
o_corrections.push_back(cv::Rect_<float>(corr_x, corr_y, corr_width, corr_height));
|
||||
|
||||
counter++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Converting the bounding boxes to squares
|
||||
void rectify(std::vector<cv::Rect_<float> >& total_bboxes)
|
||||
{
|
||||
|
||||
// Apply size and location offsets
|
||||
for (size_t i = 0; i < total_bboxes.size(); ++i)
|
||||
{
|
||||
float height = total_bboxes[i].height;
|
||||
float width = total_bboxes[i].width;
|
||||
|
||||
float max_side = std::max(width, height);
|
||||
|
||||
// Correct the starts based on new size
|
||||
float new_min_x = total_bboxes[i].x + 0.5 * (width - max_side);
|
||||
float new_min_y = total_bboxes[i].y + 0.5 * (height - max_side);
|
||||
|
||||
total_bboxes[i].x = (int)new_min_x;
|
||||
total_bboxes[i].y = (int)new_min_y;
|
||||
total_bboxes[i].width = (int)max_side;
|
||||
total_bboxes[i].height = (int)max_side;
|
||||
}
|
||||
}
|
||||
|
||||
void apply_correction(std::vector<cv::Rect_<float> >& total_bboxes, const std::vector<cv::Rect_<float> > corrections, bool add1)
|
||||
{
|
||||
|
||||
// Apply size and location offsets
|
||||
for (size_t i = 0; i < total_bboxes.size(); ++i)
|
||||
{
|
||||
cv::Rect curr_box = total_bboxes[i];
|
||||
if (add1)
|
||||
{
|
||||
curr_box.width++;
|
||||
curr_box.height++;
|
||||
}
|
||||
|
||||
float new_min_x = curr_box.x + corrections[i].x * curr_box.width;
|
||||
float new_min_y = curr_box.y + corrections[i].y * curr_box.height;
|
||||
float new_max_x = curr_box.x + curr_box.width + curr_box.width * corrections[i].width;
|
||||
float new_max_y = curr_box.y + curr_box.height + curr_box.height * corrections[i].height;
|
||||
total_bboxes[i] = cv::Rect_<float>(new_min_x, new_min_y, new_max_x - new_min_x, new_max_y - new_min_y);
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
// The actual MTCNN face detection step
|
||||
bool FaceDetectorMTCNN::DetectFaces(std::vector<cv::Rect_<float> >& o_regions, const cv::Mat& img_in,
|
||||
std::vector<float>& o_confidences, int min_face_size, float t1, float t2, float t3)
|
||||
{
|
||||
|
||||
int height_orig = img_in.size().height;
|
||||
int width_orig = img_in.size().width;
|
||||
|
||||
// Size ratio of image pyramids
|
||||
double pyramid_factor = 0.709;
|
||||
|
||||
// Face support region is 12x12 px, so from that can work out the largest
|
||||
// scale(which is 12 / min), and work down from there to smallest scale(no smaller than 12x12px)
|
||||
int min_dim = std::min(height_orig, width_orig);
|
||||
|
||||
int face_support = 12;
|
||||
int num_scales = floor(log((double)min_face_size / (double)min_dim) / log(pyramid_factor)) + 1;
|
||||
|
||||
cv::Mat input_img;
|
||||
|
||||
// Force the image to three channels
|
||||
if (img_in.channels() == 1)
|
||||
{
|
||||
cv::cvtColor(img_in, input_img, cv::COLOR_GRAY2RGB);
|
||||
}
|
||||
else
|
||||
{
|
||||
input_img = img_in;
|
||||
}
|
||||
|
||||
cv::Mat img_float;
|
||||
input_img.convertTo(img_float, CV_32FC3);
|
||||
|
||||
std::vector<cv::Rect_<float> > proposal_boxes_all;
|
||||
std::vector<float> scores_all;
|
||||
std::vector<cv::Rect_<float> > proposal_corrections_all;
|
||||
|
||||
// As the scales will be done in parallel have some containers for them
|
||||
std::vector<std::vector<cv::Rect_<float> > > proposal_boxes_cross_scale(num_scales);
|
||||
std::vector<std::vector<float> > scores_cross_scale(num_scales);
|
||||
std::vector<std::vector<cv::Rect_<float> > > proposal_corrections_cross_scale(num_scales);
|
||||
|
||||
for (int i = 0; i < num_scales; ++i)
|
||||
{
|
||||
double scale = ((double)face_support / (double)min_face_size)*cv::pow(pyramid_factor, i);
|
||||
|
||||
int h_pyr = ceil(height_orig * scale);
|
||||
int w_pyr = ceil(width_orig * scale);
|
||||
|
||||
cv::Mat normalised_img;
|
||||
cv::resize(img_float, normalised_img, cv::Size(w_pyr, h_pyr));
|
||||
|
||||
// Normalize the image
|
||||
normalised_img = (normalised_img - 127.5) * 0.0078125;
|
||||
|
||||
// Actual PNet CNN step
|
||||
std::vector<cv::Mat_<float> > pnet_out = PNet.Inference(normalised_img, true, false);
|
||||
|
||||
// Clear the precomputations, as the image sizes will be different
|
||||
PNet.ClearPrecomp();
|
||||
|
||||
// Extract the probabilities from PNet response
|
||||
cv::Mat_<float> prob_heatmap;
|
||||
cv::exp(pnet_out[0]- pnet_out[1], prob_heatmap);
|
||||
prob_heatmap = 1.0 / (1.0 + prob_heatmap);
|
||||
|
||||
// Extract the probabilities from PNet response
|
||||
std::vector<cv::Mat_<float>> corrections_heatmap(pnet_out.begin() + 2, pnet_out.end());
|
||||
|
||||
// Grab the detections
|
||||
std::vector<cv::Rect_<float> > proposal_boxes;
|
||||
std::vector<float> scores;
|
||||
std::vector<cv::Rect_<float> > proposal_corrections;
|
||||
generate_bounding_boxes(proposal_boxes, scores, proposal_corrections, prob_heatmap, corrections_heatmap, scale, t1, face_support);
|
||||
|
||||
proposal_boxes_cross_scale[i] = proposal_boxes;
|
||||
scores_cross_scale[i] = scores;
|
||||
proposal_corrections_cross_scale[i] = proposal_corrections;
|
||||
}
|
||||
//});
|
||||
|
||||
// Perform non-maximum supression on proposals accross scales and combine them
|
||||
for (int i = 0; i < num_scales; ++i)
|
||||
{
|
||||
std::vector<int> to_keep = non_maximum_supression(proposal_boxes_cross_scale[i], scores_cross_scale[i], 0.5, false);
|
||||
select_subset(to_keep, proposal_boxes_cross_scale[i], scores_cross_scale[i], proposal_corrections_cross_scale[i]);
|
||||
|
||||
proposal_boxes_all.insert(proposal_boxes_all.end(), proposal_boxes_cross_scale[i].begin(), proposal_boxes_cross_scale[i].end());
|
||||
scores_all.insert(scores_all.end(), scores_cross_scale[i].begin(), scores_cross_scale[i].end());
|
||||
proposal_corrections_all.insert(proposal_corrections_all.end(), proposal_corrections_cross_scale[i].begin(), proposal_corrections_cross_scale[i].end());
|
||||
}
|
||||
|
||||
// Preparation for RNet step
|
||||
|
||||
// Non maximum supression accross bounding boxes, and their offset correction
|
||||
std::vector<int> to_keep = non_maximum_supression(proposal_boxes_all, scores_all, 0.7, false);
|
||||
select_subset(to_keep, proposal_boxes_all, scores_all, proposal_corrections_all);
|
||||
|
||||
apply_correction(proposal_boxes_all, proposal_corrections_all, false);
|
||||
|
||||
// Convert to rectangles and round
|
||||
rectify(proposal_boxes_all);
|
||||
|
||||
// Creating proposal images from previous step detections
|
||||
std::vector<bool> above_thresh;
|
||||
above_thresh.resize(proposal_boxes_all.size(), false);
|
||||
|
||||
for (size_t k = 0; k < proposal_boxes_all.size(); ++k)
|
||||
{
|
||||
float width_target = proposal_boxes_all[k].width + 1;
|
||||
float height_target = proposal_boxes_all[k].height + 1;
|
||||
|
||||
// Work out the start and end indices in the original image
|
||||
int start_x_in = cv::max((int)(proposal_boxes_all[k].x - 1), 0);
|
||||
int start_y_in = cv::max((int)(proposal_boxes_all[k].y - 1), 0);
|
||||
int end_x_in = cv::min((int)(proposal_boxes_all[k].x + width_target - 1), width_orig);
|
||||
int end_y_in = cv::min((int)(proposal_boxes_all[k].y + height_target - 1), height_orig);
|
||||
|
||||
// Work out the start and end indices in the target image
|
||||
int start_x_out = cv::max((int)(-proposal_boxes_all[k].x + 1), 0);
|
||||
int start_y_out = cv::max((int)(-proposal_boxes_all[k].y + 1), 0);
|
||||
int end_x_out = cv::min(width_target - (proposal_boxes_all[k].x + proposal_boxes_all[k].width - width_orig), width_target);
|
||||
int end_y_out = cv::min(height_target - (proposal_boxes_all[k].y + proposal_boxes_all[k].height - height_orig), height_target);
|
||||
|
||||
cv::Mat tmp(height_target, width_target, CV_32FC3, cv::Scalar(0.0f,0.0f,0.0f));
|
||||
|
||||
img_float(cv::Rect(start_x_in, start_y_in, end_x_in - start_x_in, end_y_in - start_y_in)).copyTo(
|
||||
tmp(cv::Rect(start_x_out, start_y_out, end_x_out - start_x_out, end_y_out - start_y_out)));
|
||||
|
||||
cv::Mat prop_img;
|
||||
cv::resize(tmp, prop_img, cv::Size(24, 24));
|
||||
|
||||
prop_img = (prop_img - 127.5) * 0.0078125;
|
||||
|
||||
// Perform RNet on the proposal image
|
||||
std::vector<cv::Mat_<float> > rnet_out = RNet.Inference(prop_img, true, false);
|
||||
|
||||
float prob = 1.0 / (1.0 + cv::exp(rnet_out[0].at<float>(0) - rnet_out[0].at<float>(1)));
|
||||
scores_all[k] = prob;
|
||||
proposal_corrections_all[k].x = rnet_out[0].at<float>(2);
|
||||
proposal_corrections_all[k].y = rnet_out[0].at<float>(3);
|
||||
proposal_corrections_all[k].width = rnet_out[0].at<float>(4);
|
||||
proposal_corrections_all[k].height = rnet_out[0].at<float>(5);
|
||||
if(prob >= t2)
|
||||
{
|
||||
above_thresh[k] = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
above_thresh[k] = false;
|
||||
}
|
||||
|
||||
}
|
||||
//});
|
||||
|
||||
to_keep.clear();
|
||||
for (size_t i = 0; i < above_thresh.size(); ++i)
|
||||
{
|
||||
if (above_thresh[i])
|
||||
{
|
||||
to_keep.push_back(i);
|
||||
}
|
||||
}
|
||||
|
||||
// Pick only the bounding boxes above the threshold
|
||||
select_subset(to_keep, proposal_boxes_all, scores_all, proposal_corrections_all);
|
||||
|
||||
// Non maximum supression accross bounding boxes, and their offset correction
|
||||
to_keep = non_maximum_supression(proposal_boxes_all, scores_all, 0.7, false);
|
||||
select_subset(to_keep, proposal_boxes_all, scores_all, proposal_corrections_all);
|
||||
|
||||
apply_correction(proposal_boxes_all, proposal_corrections_all, false);
|
||||
|
||||
// Convert to rectangles and round
|
||||
rectify(proposal_boxes_all);
|
||||
|
||||
// Preparing for the ONet stage
|
||||
above_thresh.clear();
|
||||
above_thresh.resize(proposal_boxes_all.size());
|
||||
|
||||
for (size_t k = 0; k < proposal_boxes_all.size(); ++k)
|
||||
{
|
||||
float width_target = proposal_boxes_all[k].width + 1;
|
||||
float height_target = proposal_boxes_all[k].height + 1;
|
||||
|
||||
// Work out the start and end indices in the original image
|
||||
int start_x_in = cv::max((int)(proposal_boxes_all[k].x - 1), 0);
|
||||
int start_y_in = cv::max((int)(proposal_boxes_all[k].y - 1), 0);
|
||||
int end_x_in = cv::min((int)(proposal_boxes_all[k].x + width_target - 1), width_orig);
|
||||
int end_y_in = cv::min((int)(proposal_boxes_all[k].y + height_target - 1), height_orig);
|
||||
|
||||
// Work out the start and end indices in the target image
|
||||
int start_x_out = cv::max((int)(-proposal_boxes_all[k].x + 1), 0);
|
||||
int start_y_out = cv::max((int)(-proposal_boxes_all[k].y + 1), 0);
|
||||
int end_x_out = cv::min(width_target - (proposal_boxes_all[k].x + proposal_boxes_all[k].width - width_orig), width_target);
|
||||
int end_y_out = cv::min(height_target - (proposal_boxes_all[k].y + proposal_boxes_all[k].height - height_orig), height_target);
|
||||
|
||||
cv::Mat tmp(height_target, width_target, CV_32FC3, cv::Scalar(0.0f, 0.0f, 0.0f));
|
||||
|
||||
img_float(cv::Rect(start_x_in, start_y_in, end_x_in - start_x_in, end_y_in - start_y_in)).copyTo(
|
||||
tmp(cv::Rect(start_x_out, start_y_out, end_x_out - start_x_out, end_y_out - start_y_out)));
|
||||
|
||||
cv::Mat prop_img;
|
||||
cv::resize(tmp, prop_img, cv::Size(48, 48));
|
||||
|
||||
prop_img = (prop_img - 127.5) * 0.0078125;
|
||||
|
||||
// Perform RNet on the proposal image
|
||||
std::vector<cv::Mat_<float> > onet_out = ONet.Inference(prop_img, true, false);
|
||||
|
||||
float prob = 1.0 / (1.0 + cv::exp(onet_out[0].at<float>(0) - onet_out[0].at<float>(1)));
|
||||
scores_all[k] = prob;
|
||||
proposal_corrections_all[k].x = onet_out[0].at<float>(2);
|
||||
proposal_corrections_all[k].y = onet_out[0].at<float>(3);
|
||||
proposal_corrections_all[k].width = onet_out[0].at<float>(4);
|
||||
proposal_corrections_all[k].height = onet_out[0].at<float>(5);
|
||||
if (prob >= t3)
|
||||
{
|
||||
above_thresh[k] = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
above_thresh[k] = false;
|
||||
}
|
||||
}
|
||||
//});
|
||||
|
||||
to_keep.clear();
|
||||
for (size_t i = 0; i < above_thresh.size(); ++i)
|
||||
{
|
||||
if (above_thresh[i])
|
||||
{
|
||||
to_keep.push_back(i);
|
||||
}
|
||||
}
|
||||
|
||||
// Pick only the bounding boxes above the threshold
|
||||
select_subset(to_keep, proposal_boxes_all, scores_all, proposal_corrections_all);
|
||||
apply_correction(proposal_boxes_all, proposal_corrections_all, true);
|
||||
|
||||
// Non maximum supression accross bounding boxes, and their offset correction
|
||||
to_keep = non_maximum_supression(proposal_boxes_all, scores_all, 0.7, true);
|
||||
select_subset(to_keep, proposal_boxes_all, scores_all, proposal_corrections_all);
|
||||
|
||||
// Correct the box to expectation to be tight around facial landmarks
|
||||
for (size_t k = 0; k < proposal_boxes_all.size(); ++k)
|
||||
{
|
||||
proposal_boxes_all[k].x = proposal_boxes_all[k].width * -0.0075 + proposal_boxes_all[k].x;
|
||||
proposal_boxes_all[k].y = proposal_boxes_all[k].height * 0.2459 + proposal_boxes_all[k].y;
|
||||
proposal_boxes_all[k].width = 1.0323 * proposal_boxes_all[k].width;
|
||||
proposal_boxes_all[k].height = 0.7751 * proposal_boxes_all[k].height;
|
||||
|
||||
o_regions.push_back(cv::Rect_<float>(proposal_boxes_all[k].x, proposal_boxes_all[k].y, proposal_boxes_all[k].width, proposal_boxes_all[k].height));
|
||||
o_confidences.push_back(scores_all[k]);
|
||||
|
||||
}
|
||||
|
||||
if(o_regions.size() > 0)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,550 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Copyright (C) 2017, Carnegie Mellon University and University of Cambridge,
|
||||
// all rights reserved.
|
||||
//
|
||||
// ACADEMIC OR NON-PROFIT ORGANIZATION NONCOMMERCIAL RESEARCH USE ONLY
|
||||
//
|
||||
// BY USING OR DOWNLOADING THE SOFTWARE, YOU ARE AGREEING TO THE TERMS OF THIS LICENSE AGREEMENT.
|
||||
// IF YOU DO NOT AGREE WITH THESE TERMS, YOU MAY NOT USE OR DOWNLOAD THE SOFTWARE.
|
||||
//
|
||||
// License can be found in OpenFace-license.txt
|
||||
//
|
||||
// * Any publications arising from the use of this software, including but
|
||||
// not limited to academic journal and conference publications, technical
|
||||
// reports and manuals, must cite at least one of the following works:
|
||||
//
|
||||
// OpenFace 2.0: Facial Behavior Analysis Toolkit
|
||||
// Tadas Baltrušaitis, Amir Zadeh, Yao Chong Lim, and Louis-Philippe Morency
|
||||
// in IEEE International Conference on Automatic Face and Gesture Recognition, 2018
|
||||
//
|
||||
// Convolutional experts constrained local model for facial landmark detection.
|
||||
// A. Zadeh, T. Baltrušaitis, and Louis-Philippe Morency,
|
||||
// in Computer Vision and Pattern Recognition Workshops, 2017.
|
||||
//
|
||||
// Rendering of Eyes for Eye-Shape Registration and Gaze Estimation
|
||||
// Erroll Wood, Tadas Baltrušaitis, Xucong Zhang, Yusuke Sugano, Peter Robinson, and Andreas Bulling
|
||||
// in IEEE International. Conference on Computer Vision (ICCV), 2015
|
||||
//
|
||||
// Cross-dataset learning and person-specific normalisation for automatic Action Unit detection
|
||||
// Tadas Baltrušaitis, Marwa Mahmoud, and Peter Robinson
|
||||
// in Facial Expression Recognition and Analysis Challenge,
|
||||
// IEEE International Conference on Automatic Face and Gesture Recognition, 2015
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "stdafx.h"
|
||||
|
||||
#include "LandmarkDetectionValidator.h"
|
||||
|
||||
// OpenCV includes
|
||||
#include <opencv2/core/core.hpp>
|
||||
#include <opencv2/imgproc.hpp>
|
||||
|
||||
// System includes
|
||||
#include <fstream>
|
||||
|
||||
// Math includes
|
||||
#define _USE_MATH_DEFINES
|
||||
#include <cmath>
|
||||
|
||||
#ifndef M_PI
|
||||
#define M_PI 3.14159265358979323846
|
||||
#endif
|
||||
// Local includes
|
||||
#include "LandmarkDetectorUtils.h"
|
||||
#include "CNN_utils.h"
|
||||
|
||||
using namespace LandmarkDetector;
|
||||
|
||||
// Copy constructor
|
||||
DetectionValidator::DetectionValidator(const DetectionValidator& other) : orientations(other.orientations), paws(other.paws),
|
||||
cnn_subsampling_layers(other.cnn_subsampling_layers), cnn_layer_types(other.cnn_layer_types), cnn_convolutional_layers_im2col_precomp(other.cnn_convolutional_layers_im2col_precomp),
|
||||
cnn_convolutional_layers_weights(other.cnn_convolutional_layers_weights)
|
||||
{
|
||||
|
||||
this->cnn_convolutional_layers.resize(other.cnn_convolutional_layers.size());
|
||||
for (size_t v = 0; v < other.cnn_convolutional_layers.size(); ++v)
|
||||
{
|
||||
this->cnn_convolutional_layers[v].resize(other.cnn_convolutional_layers[v].size());
|
||||
|
||||
for (size_t l = 0; l < other.cnn_convolutional_layers[v].size(); ++l)
|
||||
{
|
||||
this->cnn_convolutional_layers[v][l].resize(other.cnn_convolutional_layers[v][l].size());
|
||||
|
||||
for (size_t i = 0; i < other.cnn_convolutional_layers[v][l].size(); ++i)
|
||||
{
|
||||
this->cnn_convolutional_layers[v][l][i].resize(other.cnn_convolutional_layers[v][l][i].size());
|
||||
|
||||
for (size_t k = 0; k < other.cnn_convolutional_layers[v][l][i].size(); ++k)
|
||||
{
|
||||
// Make sure the matrix is copied.
|
||||
this->cnn_convolutional_layers[v][l][i][k] = other.cnn_convolutional_layers[v][l][i][k].clone();
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
this->cnn_fully_connected_layers_weights.resize(other.cnn_fully_connected_layers_weights.size());
|
||||
for (size_t v = 0; v < other.cnn_fully_connected_layers_weights.size(); ++v)
|
||||
{
|
||||
this->cnn_fully_connected_layers_weights[v].resize(other.cnn_fully_connected_layers_weights[v].size());
|
||||
|
||||
for (size_t l = 0; l < other.cnn_fully_connected_layers_weights[v].size(); ++l)
|
||||
{
|
||||
// Make sure the matrix is copied.
|
||||
this->cnn_fully_connected_layers_weights[v][l] = other.cnn_fully_connected_layers_weights[v][l].clone();
|
||||
}
|
||||
}
|
||||
|
||||
this->cnn_fully_connected_layers_biases.resize(other.cnn_fully_connected_layers_biases.size());
|
||||
for (size_t v = 0; v < other.cnn_fully_connected_layers_biases.size(); ++v)
|
||||
{
|
||||
this->cnn_fully_connected_layers_biases[v].resize(other.cnn_fully_connected_layers_biases[v].size());
|
||||
|
||||
for (size_t l = 0; l < other.cnn_fully_connected_layers_biases[v].size(); ++l)
|
||||
{
|
||||
// Make sure the matrix is copied.
|
||||
this->cnn_fully_connected_layers_biases[v][l] = other.cnn_fully_connected_layers_biases[v][l].clone();
|
||||
}
|
||||
}
|
||||
|
||||
this->mean_images.resize(other.mean_images.size());
|
||||
for (size_t i = 0; i < other.mean_images.size(); ++i)
|
||||
{
|
||||
// Make sure the matrix is copied.
|
||||
this->mean_images[i] = other.mean_images[i].clone();
|
||||
}
|
||||
|
||||
this->standard_deviations.resize(other.standard_deviations.size());
|
||||
for (size_t i = 0; i < other.standard_deviations.size(); ++i)
|
||||
{
|
||||
// Make sure the matrix is copied.
|
||||
this->standard_deviations[i] = other.standard_deviations[i].clone();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
//===========================================================================
|
||||
// Read in the landmark detection validation module
|
||||
void DetectionValidator::Read(std::string location)
|
||||
{
|
||||
|
||||
std::ifstream detection_validator_stream (location, std::ios::in | std::ios::binary);
|
||||
if (detection_validator_stream.is_open())
|
||||
{
|
||||
detection_validator_stream.seekg (0, std::ios::beg);
|
||||
|
||||
// Read validator type
|
||||
int validator_type;
|
||||
detection_validator_stream.read ((char*)&validator_type, 4);
|
||||
|
||||
if (validator_type != 3)
|
||||
{
|
||||
std::cout << "ERROR: Using old face validator, no longer supported" << std::endl;
|
||||
}
|
||||
|
||||
// Read the number of views (orientations) within the validator
|
||||
int n;
|
||||
detection_validator_stream.read ((char*)&n, 4);
|
||||
|
||||
orientations.resize(n);
|
||||
|
||||
for(int i = 0; i < n; i++)
|
||||
{
|
||||
cv::Mat_<double> orientation_tmp;
|
||||
LandmarkDetector::ReadMatBin(detection_validator_stream, orientation_tmp);
|
||||
|
||||
orientations[i] = cv::Vec3d(orientation_tmp.at<double>(0), orientation_tmp.at<double>(1), orientation_tmp.at<double>(2));
|
||||
|
||||
// Convert from degrees to radians
|
||||
orientations[i] = orientations[i] * M_PI / 180.0;
|
||||
}
|
||||
|
||||
// Initialise the piece-wise affine warps, biases and weights
|
||||
paws.resize(n);
|
||||
|
||||
cnn_convolutional_layers_weights.resize(n);
|
||||
cnn_convolutional_layers_im2col_precomp.resize(n);
|
||||
cnn_convolutional_layers.resize(n);
|
||||
cnn_fully_connected_layers_weights.resize(n);
|
||||
cnn_layer_types.resize(n);
|
||||
cnn_fully_connected_layers_biases.resize(n);
|
||||
|
||||
// Initialise the normalisation terms
|
||||
mean_images.resize(n);
|
||||
standard_deviations.resize(n);
|
||||
|
||||
// Read in the validators for each of the views
|
||||
for(int i = 0; i < n; i++)
|
||||
{
|
||||
|
||||
// Read in the mean images
|
||||
cv::Mat_<double> mean_img;
|
||||
LandmarkDetector::ReadMatBin(detection_validator_stream, mean_img);
|
||||
mean_img.convertTo(mean_images[i], CV_32F);
|
||||
mean_images[i] = mean_images[i].t();
|
||||
|
||||
cv::Mat_<double> std_dev;
|
||||
LandmarkDetector::ReadMatBin(detection_validator_stream, std_dev);
|
||||
std_dev.convertTo(standard_deviations[i], CV_32F);
|
||||
|
||||
standard_deviations[i] = standard_deviations[i].t();
|
||||
|
||||
// Model specifics
|
||||
if (validator_type == 3)
|
||||
{
|
||||
int network_depth;
|
||||
detection_validator_stream.read((char*)&network_depth, 4);
|
||||
|
||||
cnn_layer_types[i].resize(network_depth);
|
||||
|
||||
for (int layer = 0; layer < network_depth; ++layer)
|
||||
{
|
||||
|
||||
int layer_type;
|
||||
detection_validator_stream.read((char*)&layer_type, 4);
|
||||
cnn_layer_types[i][layer] = layer_type;
|
||||
|
||||
// convolutional
|
||||
if (layer_type == 0)
|
||||
{
|
||||
|
||||
// Read the number of input maps
|
||||
int num_in_maps;
|
||||
detection_validator_stream.read((char*)&num_in_maps, 4);
|
||||
|
||||
// Read the number of kernels for each input map
|
||||
int num_kernels;
|
||||
detection_validator_stream.read((char*)&num_kernels, 4);
|
||||
|
||||
std::vector<std::vector<cv::Mat_<float> > > kernels;
|
||||
|
||||
kernels.resize(num_in_maps);
|
||||
|
||||
std::vector<float> biases;
|
||||
for (int k = 0; k < num_kernels; ++k)
|
||||
{
|
||||
float bias;
|
||||
detection_validator_stream.read((char*)&bias, 4);
|
||||
biases.push_back(bias);
|
||||
}
|
||||
|
||||
// For every input map
|
||||
for (int in = 0; in < num_in_maps; ++in)
|
||||
{
|
||||
kernels[in].resize(num_kernels);
|
||||
|
||||
// For every kernel on that input map
|
||||
for (int k = 0; k < num_kernels; ++k)
|
||||
{
|
||||
ReadMatBin(detection_validator_stream, kernels[in][k]);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
cnn_convolutional_layers[i].push_back(kernels);
|
||||
|
||||
// Rearrange the kernels for faster inference with FFT
|
||||
std::vector<std::vector<cv::Mat_<float> > > kernels_rearr;
|
||||
kernels_rearr.resize(num_kernels);
|
||||
|
||||
// Fill up the rearranged layer
|
||||
for (int k = 0; k < num_kernels; ++k)
|
||||
{
|
||||
for (int in = 0; in < num_in_maps; ++in)
|
||||
{
|
||||
kernels_rearr[k].push_back(kernels[in][k]);
|
||||
}
|
||||
}
|
||||
|
||||
// Rearrange the flattened kernels into weight matrices for direct convolution computation
|
||||
cv::Mat_<float> weight_matrix(num_in_maps * kernels_rearr[0][0].rows * kernels_rearr[0][0].cols, num_kernels);
|
||||
for (int k = 0; k < num_kernels; ++k)
|
||||
{
|
||||
for (int i = 0; i < num_in_maps; ++i)
|
||||
{
|
||||
// Flatten the kernel
|
||||
cv::Mat_<float> k_flat = kernels_rearr[k][i].t();
|
||||
k_flat = k_flat.reshape(0, 1).t();
|
||||
k_flat.copyTo(weight_matrix(cv::Rect(k, i * kernels_rearr[0][0].rows * kernels_rearr[0][0].cols, 1, kernels_rearr[0][0].rows * kernels_rearr[0][0].cols)));
|
||||
}
|
||||
}
|
||||
|
||||
// Transpose the weight matrix for more convenient computation
|
||||
weight_matrix = weight_matrix.t();
|
||||
|
||||
// Add a bias term to the weight matrix for efficiency
|
||||
cv::Mat_<float> W(weight_matrix.rows, weight_matrix.cols + 1, 1.0);
|
||||
for (int k = 0; k < weight_matrix.rows; ++k)
|
||||
{
|
||||
W.at<float>(k, weight_matrix.cols) = biases[k];
|
||||
}
|
||||
weight_matrix.copyTo(W(cv::Rect(0, 0, weight_matrix.cols, weight_matrix.rows)));
|
||||
|
||||
cnn_convolutional_layers_weights[i].push_back(W.t());
|
||||
cnn_convolutional_layers_im2col_precomp[i].push_back(cv::Mat_<float>());
|
||||
}
|
||||
else if (layer_type == 2)
|
||||
{
|
||||
cv::Mat_<float> biases;
|
||||
ReadMatBin(detection_validator_stream, biases);
|
||||
cnn_fully_connected_layers_biases[i].push_back(biases);
|
||||
|
||||
// Fully connected layer
|
||||
cv::Mat_<float> weights;
|
||||
ReadMatBin(detection_validator_stream, weights);
|
||||
cnn_fully_connected_layers_weights[i].push_back(weights);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Read in the piece-wise affine warps
|
||||
paws[i].Read(detection_validator_stream);
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout << "WARNING: Can't find the Face checker location" << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
//===========================================================================
|
||||
// Check if the fitting actually succeeded
|
||||
float DetectionValidator::Check(const cv::Vec3d& orientation, const cv::Mat_<uchar>& intensity_img, cv::Mat_<float>& detected_landmarks)
|
||||
{
|
||||
|
||||
int id = GetViewId(orientation);
|
||||
|
||||
// The warped (cropped) image, corresponding to a face lying withing the detected lanmarks
|
||||
cv::Mat_<float> warped;
|
||||
|
||||
// First only use the ROI of the image of interest
|
||||
cv::Mat_<float> detected_landmarks_local = detected_landmarks.clone();
|
||||
|
||||
float min_x_f, max_x_f, min_y_f, max_y_f;
|
||||
ExtractBoundingBox(detected_landmarks_local, min_x_f, max_x_f, min_y_f, max_y_f);
|
||||
|
||||
cv::Mat_<float> xs = detected_landmarks_local(cv::Rect(0, 0, 1, detected_landmarks.rows / 2));
|
||||
cv::Mat_<float> ys = detected_landmarks_local(cv::Rect(0, detected_landmarks.rows / 2, 1, detected_landmarks.rows / 2));
|
||||
|
||||
// Picking the ROI (some extra space for bilinear interpolation)
|
||||
int min_x = (int)(min_x_f - 3.0f);
|
||||
int max_x = (int)(max_x_f + 3.0f);
|
||||
int min_y = (int)(min_y_f - 3.0f);
|
||||
int max_y = (int)(max_y_f + 3.0f);
|
||||
|
||||
if (min_x < 0) min_x = 0;
|
||||
if (min_y < 0) min_y = 0;
|
||||
if (max_x > intensity_img.cols - 1) max_x = intensity_img.cols - 1;
|
||||
if (max_y > intensity_img.rows - 1) max_y = intensity_img.rows - 1;
|
||||
xs = xs - min_x;
|
||||
ys = ys - min_y;
|
||||
|
||||
// If the ROI is non existent return failure (this could happen if all landmarks are outside of the image)
|
||||
if (max_x - min_x <= 1 || max_y - min_y <= 1)
|
||||
{
|
||||
return 0.0f;
|
||||
}
|
||||
|
||||
cv::Mat_<float> intensity_img_float_local;
|
||||
intensity_img(cv::Rect(min_x, min_y, max_x - min_x, max_y - min_y)).convertTo(intensity_img_float_local, CV_32F);
|
||||
|
||||
// the piece-wise affine image warping
|
||||
paws[id].Warp(intensity_img_float_local, warped, detected_landmarks_local);
|
||||
|
||||
// The actual validation step
|
||||
double dec = CheckCNN(warped, id);
|
||||
|
||||
// Convert it to a more interpretable signal (0 low confidence, 1 high confidence)
|
||||
dec = 0.5 * (1.0 - dec);
|
||||
|
||||
return (float)dec;
|
||||
}
|
||||
|
||||
double DetectionValidator::CheckCNN(const cv::Mat_<float>& warped_img, int view_id)
|
||||
{
|
||||
|
||||
cv::Mat_<float> feature_vec;
|
||||
NormaliseWarpedToVector(warped_img, feature_vec, view_id);
|
||||
|
||||
// Create a normalised image from the crop vector
|
||||
cv::Mat_<float> img(warped_img.size(), 0.0);
|
||||
img = img.t();
|
||||
|
||||
cv::Mat mask = paws[view_id].pixel_mask.t();
|
||||
cv::MatIterator_<uchar> mask_it = mask.begin<uchar>();
|
||||
|
||||
cv::MatIterator_<float> feature_it = feature_vec.begin();
|
||||
cv::MatIterator_<float> img_it = img.begin();
|
||||
|
||||
int wInt = img.cols;
|
||||
int hInt = img.rows;
|
||||
|
||||
for (int i = 0; i < wInt; ++i)
|
||||
{
|
||||
for (int j = 0; j < hInt; ++j, ++mask_it, ++img_it)
|
||||
{
|
||||
// if is within mask
|
||||
if (*mask_it)
|
||||
{
|
||||
// assign the feature to image if it is within the mask
|
||||
*img_it = (float)*feature_it++;
|
||||
}
|
||||
}
|
||||
}
|
||||
img = img.t();
|
||||
|
||||
int cnn_layer = 0;
|
||||
int fully_connected_layer = 0;
|
||||
|
||||
std::vector<cv::Mat_<float> > input_maps;
|
||||
input_maps.push_back(img);
|
||||
|
||||
std::vector<cv::Mat_<float> > outputs;
|
||||
|
||||
for (size_t layer = 0; layer < cnn_layer_types[view_id].size(); ++layer)
|
||||
{
|
||||
// Determine layer type
|
||||
int layer_type = cnn_layer_types[view_id][layer];
|
||||
|
||||
// Convolutional layer
|
||||
if (layer_type == 0)
|
||||
{
|
||||
|
||||
convolution_direct_blas(outputs, input_maps, cnn_convolutional_layers_weights[view_id][cnn_layer], cnn_convolutional_layers[view_id][cnn_layer][0][0].rows, cnn_convolutional_layers[view_id][cnn_layer][0][0].cols, cnn_convolutional_layers_im2col_precomp[view_id][cnn_layer]);
|
||||
|
||||
cnn_layer++;
|
||||
}
|
||||
if (layer_type == 1)
|
||||
{
|
||||
max_pooling(outputs, input_maps, 2, 2, 2, 2);
|
||||
}
|
||||
if (layer_type == 2)
|
||||
{
|
||||
|
||||
fully_connected(outputs, input_maps, cnn_fully_connected_layers_weights[view_id][fully_connected_layer].t(), cnn_fully_connected_layers_biases[view_id][fully_connected_layer]);
|
||||
fully_connected_layer++;
|
||||
}
|
||||
if (layer_type == 3) // ReLU
|
||||
{
|
||||
outputs.clear();
|
||||
for (size_t k = 0; k < input_maps.size(); ++k)
|
||||
{
|
||||
// Apply the ReLU
|
||||
cv::threshold(input_maps[k], input_maps[k], 0, 0, cv::THRESH_TOZERO);
|
||||
outputs.push_back(input_maps[k]);
|
||||
|
||||
}
|
||||
}
|
||||
if (layer_type == 4)
|
||||
{
|
||||
outputs.clear();
|
||||
for (size_t k = 0; k < input_maps.size(); ++k)
|
||||
{
|
||||
// Apply the sigmoid
|
||||
cv::exp(-input_maps[k], input_maps[k]);
|
||||
input_maps[k] = 1.0 / (1.0 + input_maps[k]);
|
||||
|
||||
outputs.push_back(input_maps[k]);
|
||||
|
||||
}
|
||||
}
|
||||
// Set the outputs of this layer to inputs of the next
|
||||
input_maps = outputs;
|
||||
|
||||
}
|
||||
|
||||
// Convert the class label to a continuous value
|
||||
double max_val = 0;
|
||||
cv::Point max_loc;
|
||||
cv::minMaxLoc(outputs[0].t(), 0, &max_val, 0, &max_loc);
|
||||
int max_idx = max_loc.y;
|
||||
double max = 1;
|
||||
double min = -1;
|
||||
double bins = (double)outputs[0].cols;
|
||||
// Unquantizing the softmax layer to continuous value
|
||||
double step_size = (max - min) / bins; // This should be saved somewhere
|
||||
double unquantized = min + step_size / 2.0 + max_idx * step_size;
|
||||
|
||||
return unquantized;
|
||||
}
|
||||
|
||||
void DetectionValidator::NormaliseWarpedToVector(const cv::Mat_<float>& warped_img, cv::Mat_<float>& feature_vec, int view_id)
|
||||
{
|
||||
cv::Mat_<float> warped_t = warped_img.t();
|
||||
|
||||
// the vector to be filled with paw values
|
||||
cv::MatIterator_<float> vp;
|
||||
cv::MatIterator_<float> cp;
|
||||
|
||||
cv::Mat_<float> vec(paws[view_id].number_of_pixels,1);
|
||||
vp = vec.begin();
|
||||
|
||||
cp = warped_t.begin();
|
||||
|
||||
int wInt = warped_img.cols;
|
||||
int hInt = warped_img.rows;
|
||||
|
||||
// the mask indicating if point is within or outside the face region
|
||||
|
||||
cv::Mat maskT = paws[view_id].pixel_mask.t();
|
||||
|
||||
cv::MatIterator_<uchar> mp = maskT.begin<uchar>();
|
||||
|
||||
for(int i=0; i < wInt; ++i)
|
||||
{
|
||||
for(int j=0; j < hInt; ++j, ++mp, ++cp)
|
||||
{
|
||||
// if is within mask
|
||||
if(*mp)
|
||||
{
|
||||
*vp++ = *cp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Local normalisation
|
||||
cv::Scalar mean;
|
||||
cv::Scalar std;
|
||||
cv::meanStdDev(vec, mean, std);
|
||||
|
||||
// subtract the mean image
|
||||
vec -= mean[0];
|
||||
|
||||
// Normalise the image
|
||||
if(std[0] == 0)
|
||||
{
|
||||
std[0] = 1;
|
||||
}
|
||||
|
||||
vec /= std[0];
|
||||
|
||||
// Global normalisation
|
||||
feature_vec = (vec - mean_images[view_id]) / standard_deviations[view_id];
|
||||
}
|
||||
|
||||
// Getting the closest view center based on orientation
|
||||
int DetectionValidator::GetViewId(const cv::Vec3d& orientation) const
|
||||
{
|
||||
int id = 0;
|
||||
|
||||
double dbest = -1.0;
|
||||
|
||||
for(size_t i = 0; i < this->orientations.size(); i++)
|
||||
{
|
||||
|
||||
// Distance to current view
|
||||
double d = cv::norm(orientation, this->orientations[i]);
|
||||
|
||||
if(i == 0 || d < dbest)
|
||||
{
|
||||
dbest = d;
|
||||
id = i;
|
||||
}
|
||||
}
|
||||
return id;
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,768 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Copyright (C) 2017, Carnegie Mellon University and University of Cambridge,
|
||||
// all rights reserved.
|
||||
//
|
||||
// ACADEMIC OR NON-PROFIT ORGANIZATION NONCOMMERCIAL RESEARCH USE ONLY
|
||||
//
|
||||
// BY USING OR DOWNLOADING THE SOFTWARE, YOU ARE AGREEING TO THE TERMS OF THIS LICENSE AGREEMENT.
|
||||
// IF YOU DO NOT AGREE WITH THESE TERMS, YOU MAY NOT USE OR DOWNLOAD THE SOFTWARE.
|
||||
//
|
||||
// License can be found in OpenFace-license.txt
|
||||
//
|
||||
// * Any publications arising from the use of this software, including but
|
||||
// not limited to academic journal and conference publications, technical
|
||||
// reports and manuals, must cite at least one of the following works:
|
||||
//
|
||||
// OpenFace 2.0: Facial Behavior Analysis Toolkit
|
||||
// Tadas Baltrušaitis, Amir Zadeh, Yao Chong Lim, and Louis-Philippe Morency
|
||||
// in IEEE International Conference on Automatic Face and Gesture Recognition, 2018
|
||||
//
|
||||
// Convolutional experts constrained local model for facial landmark detection.
|
||||
// A. Zadeh, T. Baltrušaitis, and Louis-Philippe Morency,
|
||||
// in Computer Vision and Pattern Recognition Workshops, 2017.
|
||||
//
|
||||
// Rendering of Eyes for Eye-Shape Registration and Gaze Estimation
|
||||
// Erroll Wood, Tadas Baltrušaitis, Xucong Zhang, Yusuke Sugano, Peter Robinson, and Andreas Bulling
|
||||
// in IEEE International. Conference on Computer Vision (ICCV), 2015
|
||||
//
|
||||
// Cross-dataset learning and person-specific normalisation for automatic Action Unit detection
|
||||
// Tadas Baltrušaitis, Marwa Mahmoud, and Peter Robinson
|
||||
// in Facial Expression Recognition and Analysis Challenge,
|
||||
// IEEE International Conference on Automatic Face and Gesture Recognition, 2015
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "stdafx.h"
|
||||
|
||||
#include "LandmarkDetectorFunc.h"
|
||||
#include "RotationHelpers.h"
|
||||
#include "ImageManipulationHelpers.h"
|
||||
|
||||
// OpenCV includes
|
||||
#include <opencv2/core/core.hpp>
|
||||
#include <opencv2/calib3d.hpp>
|
||||
#include <opencv2/imgproc.hpp>
|
||||
|
||||
// System includes
|
||||
#include <vector>
|
||||
#include <numeric>
|
||||
|
||||
using namespace LandmarkDetector;
|
||||
|
||||
// Getting a head pose estimate from the currently detected landmarks, with appropriate correction due to the PDM assuming an orthographic camera
|
||||
// which is only correct close to the centre of the image
|
||||
// This method returns a corrected pose estimate with respect to world coordinates with camera at origin (0,0,0)
|
||||
// The format returned is [Tx, Ty, Tz, Eul_x, Eul_y, Eul_z]
|
||||
cv::Vec6f LandmarkDetector::GetPose(const CLNF& clnf_model, float fx, float fy, float cx, float cy)
|
||||
{
|
||||
if (!clnf_model.detected_landmarks.empty() && clnf_model.params_global[0] != 0)
|
||||
{
|
||||
// This is used as an initial estimate for the iterative PnP algorithm
|
||||
float Z = fx / clnf_model.params_global[0];
|
||||
|
||||
float X = ((clnf_model.params_global[4] - cx) * (1.0 / fx)) * Z;
|
||||
float Y = ((clnf_model.params_global[5] - cy) * (1.0 / fy)) * Z;
|
||||
|
||||
// Correction for orientation
|
||||
|
||||
// 2D points
|
||||
cv::Mat_<float> landmarks_2D = clnf_model.detected_landmarks;
|
||||
|
||||
landmarks_2D = landmarks_2D.reshape(1, 2).t();
|
||||
|
||||
// 3D points
|
||||
cv::Mat_<float> landmarks_3D;
|
||||
clnf_model.pdm.CalcShape3D(landmarks_3D, clnf_model.params_local);
|
||||
|
||||
landmarks_3D = landmarks_3D.reshape(1, 3).t();
|
||||
|
||||
// Solving the PNP model
|
||||
|
||||
// The camera matrix
|
||||
cv::Matx33f camera_matrix(fx, 0, cx, 0, fy, cy, 0, 0, 1);
|
||||
|
||||
cv::Vec3f vec_trans(X, Y, Z);
|
||||
cv::Vec3f vec_rot(clnf_model.params_global[1], clnf_model.params_global[2], clnf_model.params_global[3]);
|
||||
|
||||
cv::solvePnP(landmarks_3D, landmarks_2D, camera_matrix, cv::Mat(), vec_rot, vec_trans, true);
|
||||
|
||||
cv::Vec3f euler = Utilities::AxisAngle2Euler(vec_rot);
|
||||
|
||||
return cv::Vec6f(vec_trans[0], vec_trans[1], vec_trans[2], euler[0], euler[1], euler[2]);
|
||||
}
|
||||
else
|
||||
{
|
||||
return cv::Vec6f(0, 0, 0, 0, 0, 0);
|
||||
}
|
||||
}
|
||||
|
||||
// Getting a head pose estimate from the currently detected landmarks, with appropriate correction due to perspective projection
|
||||
// This method returns a corrected pose estimate with respect to a point camera (NOTE not the world coordinates), which is useful to find out if the person is looking at a camera
|
||||
// The format returned is [Tx, Ty, Tz, Eul_x, Eul_y, Eul_z]
|
||||
cv::Vec6f LandmarkDetector::GetPoseWRTCamera(const CLNF& clnf_model, float fx, float fy, float cx, float cy)
|
||||
{
|
||||
if (!clnf_model.detected_landmarks.empty() && clnf_model.params_global[0] != 0)
|
||||
{
|
||||
|
||||
float Z = fx / clnf_model.params_global[0];
|
||||
|
||||
float X = ((clnf_model.params_global[4] - cx) * (1.0 / fx)) * Z;
|
||||
float Y = ((clnf_model.params_global[5] - cy) * (1.0 / fy)) * Z;
|
||||
|
||||
// Correction for orientation
|
||||
|
||||
// 3D points
|
||||
cv::Mat_<float> landmarks_3D;
|
||||
clnf_model.pdm.CalcShape3D(landmarks_3D, clnf_model.params_local);
|
||||
|
||||
landmarks_3D = landmarks_3D.reshape(1, 3).t();
|
||||
|
||||
// 2D points
|
||||
cv::Mat_<float> landmarks_2D = clnf_model.detected_landmarks;
|
||||
|
||||
landmarks_2D = landmarks_2D.reshape(1, 2).t();
|
||||
|
||||
// Solving the PNP model
|
||||
|
||||
// The camera matrix
|
||||
cv::Matx33f camera_matrix(fx, 0, cx, 0, fy, cy, 0, 0, 1);
|
||||
|
||||
cv::Vec3f vec_trans(X, Y, Z);
|
||||
cv::Vec3f vec_rot(clnf_model.params_global[1], clnf_model.params_global[2], clnf_model.params_global[3]);
|
||||
|
||||
cv::solvePnP(landmarks_3D, landmarks_2D, camera_matrix, cv::Mat(), vec_rot, vec_trans, true);
|
||||
|
||||
// Here we correct for the camera orientation, for this need to determine the angle the camera makes with the head pose
|
||||
float z_x = cv::sqrt(vec_trans[0] * vec_trans[0] + vec_trans[2] * vec_trans[2]);
|
||||
float eul_x = atan2(vec_trans[1], z_x);
|
||||
|
||||
float z_y = cv::sqrt(vec_trans[1] * vec_trans[1] + vec_trans[2] * vec_trans[2]);
|
||||
float eul_y = -atan2(vec_trans[0], z_y);
|
||||
|
||||
cv::Matx33f camera_rotation = Utilities::Euler2RotationMatrix(cv::Vec3f(eul_x, eul_y, 0));
|
||||
cv::Matx33f head_rotation = Utilities::AxisAngle2RotationMatrix(vec_rot);
|
||||
|
||||
cv::Matx33f corrected_rotation = camera_rotation * head_rotation;
|
||||
|
||||
cv::Vec3f euler_corrected = Utilities::RotationMatrix2Euler(corrected_rotation);
|
||||
|
||||
return cv::Vec6f(vec_trans[0], vec_trans[1], vec_trans[2], euler_corrected[0], euler_corrected[1], euler_corrected[2]);
|
||||
}
|
||||
else
|
||||
{
|
||||
return cv::Vec6f(0, 0, 0, 0, 0, 0);
|
||||
}
|
||||
}
|
||||
|
||||
// If landmark detection in video succeeded create a template for use in simple tracking
|
||||
void UpdateTemplate(const cv::Mat_<uchar> &grayscale_image, CLNF& clnf_model)
|
||||
{
|
||||
cv::Rect_<float> bounding_box;
|
||||
clnf_model.pdm.CalcBoundingBox(bounding_box, clnf_model.params_global, clnf_model.params_local);
|
||||
|
||||
// Make sure the box is not out of bounds
|
||||
cv::Rect_<int> bbox_tmp((int)bounding_box.x, (int)bounding_box.y, (int)bounding_box.width, (int)bounding_box.height);
|
||||
bounding_box = bbox_tmp & cv::Rect(0, 0, grayscale_image.cols, grayscale_image.rows);
|
||||
|
||||
clnf_model.face_template = grayscale_image(bounding_box).clone();
|
||||
}
|
||||
|
||||
// This method uses basic template matching in order to allow for better tracking of fast moving faces
|
||||
void CorrectGlobalParametersVideo(const cv::Mat_<uchar> &grayscale_image, CLNF& clnf_model, const FaceModelParameters& params)
|
||||
{
|
||||
cv::Rect_<float> init_box;
|
||||
clnf_model.pdm.CalcBoundingBox(init_box, clnf_model.params_global, clnf_model.params_local);
|
||||
|
||||
cv::Rect roi(init_box.x - init_box.width/2, init_box.y - init_box.height/2, init_box.width * 2, init_box.height * 2);
|
||||
roi = roi & cv::Rect(0, 0, grayscale_image.cols, grayscale_image.rows);
|
||||
|
||||
int off_x = roi.x;
|
||||
int off_y = roi.y;
|
||||
|
||||
float scaling = params.face_template_scale / clnf_model.params_global[0];
|
||||
cv::Mat_<uchar> image;
|
||||
if(scaling < 1)
|
||||
{
|
||||
cv::resize(clnf_model.face_template, clnf_model.face_template, cv::Size(), scaling, scaling);
|
||||
cv::resize(grayscale_image(roi), image, cv::Size(), scaling, scaling);
|
||||
}
|
||||
else
|
||||
{
|
||||
scaling = 1;
|
||||
image = grayscale_image(roi).clone();
|
||||
}
|
||||
|
||||
// Resizing the template
|
||||
cv::Mat corr_out;
|
||||
cv::matchTemplate(image, clnf_model.face_template, corr_out, cv::TM_CCOEFF_NORMED);
|
||||
|
||||
// Actually matching it
|
||||
//double min, max;
|
||||
int max_loc[2];
|
||||
|
||||
cv::minMaxIdx(corr_out, NULL, NULL, NULL, max_loc);
|
||||
|
||||
cv::Rect_<float> out_bbox(max_loc[1]/scaling + off_x, max_loc[0]/scaling + off_y, clnf_model.face_template.rows / scaling, clnf_model.face_template.cols / scaling);
|
||||
|
||||
float shift_x = out_bbox.x - init_box.x;
|
||||
float shift_y = out_bbox.y - init_box.y;
|
||||
|
||||
clnf_model.params_global[4] = clnf_model.params_global[4] + shift_x;
|
||||
clnf_model.params_global[5] = clnf_model.params_global[5] + shift_y;
|
||||
|
||||
}
|
||||
|
||||
bool LandmarkDetector::DetectLandmarksInVideo(const cv::Mat &rgb_image, CLNF& clnf_model, FaceModelParameters& params, cv::Mat& grayscale_image)
|
||||
{
|
||||
// First need to decide if the landmarks should be "detected" or "tracked"
|
||||
// Detected means running face detection and a larger search area, tracked means initialising from previous step
|
||||
// and using a smaller search area
|
||||
|
||||
if(grayscale_image.empty())
|
||||
{
|
||||
Utilities::ConvertToGrayscale_8bit(rgb_image, grayscale_image);
|
||||
}
|
||||
|
||||
// Indicating that this is a first detection in video sequence or after restart
|
||||
bool initial_detection = !clnf_model.tracking_initialised;
|
||||
|
||||
// Only do it if there was a face detection at all
|
||||
if(clnf_model.tracking_initialised)
|
||||
{
|
||||
|
||||
// The area of interest search size will depend if the previous track was successful
|
||||
if(!clnf_model.detection_success)
|
||||
{
|
||||
params.window_sizes_current = params.window_sizes_init;
|
||||
}
|
||||
else
|
||||
{
|
||||
params.window_sizes_current = params.window_sizes_small;
|
||||
}
|
||||
|
||||
// Before the expensive landmark detection step apply a quick template tracking approach
|
||||
if(params.use_face_template && !clnf_model.face_template.empty() && clnf_model.detection_success)
|
||||
{
|
||||
CorrectGlobalParametersVideo(grayscale_image, clnf_model, params);
|
||||
}
|
||||
|
||||
bool track_success = clnf_model.DetectLandmarks(grayscale_image, params);
|
||||
|
||||
if(!track_success)
|
||||
{
|
||||
// Make a record that tracking failed
|
||||
clnf_model.failures_in_a_row++;
|
||||
}
|
||||
else
|
||||
{
|
||||
// indicate that tracking is a success
|
||||
clnf_model.failures_in_a_row = -1;
|
||||
|
||||
if(params.use_face_template)
|
||||
{
|
||||
UpdateTemplate(grayscale_image, clnf_model);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// This is used for both detection (if it the tracking has not been initialised yet) or if the tracking failed (however we do this every n frames, for speed)
|
||||
// This also has the effect of an attempt to reinitialise just after the tracking has failed, which is useful during large motions
|
||||
if((!clnf_model.tracking_initialised && (clnf_model.failures_in_a_row + 1) % (params.reinit_video_every * 6) == 0)
|
||||
|| (clnf_model.tracking_initialised && !clnf_model.detection_success && params.reinit_video_every > 0 && clnf_model.failures_in_a_row % params.reinit_video_every == 0))
|
||||
{
|
||||
|
||||
cv::Rect_<float> bounding_box;
|
||||
|
||||
// If the face detector has not been initialised and we're using it, then read it in
|
||||
if(clnf_model.face_detector_HAAR.empty() && params.curr_face_detector == params.HAAR_DETECTOR)
|
||||
{
|
||||
clnf_model.face_detector_HAAR.load(params.haar_face_detector_location);
|
||||
clnf_model.haar_face_detector_location = params.haar_face_detector_location;
|
||||
}
|
||||
if (clnf_model.face_detector_MTCNN.empty() && params.curr_face_detector == params.MTCNN_DETECTOR)
|
||||
{
|
||||
clnf_model.face_detector_MTCNN.Read(params.mtcnn_face_detector_location);
|
||||
clnf_model.mtcnn_face_detector_location = params.mtcnn_face_detector_location;
|
||||
|
||||
// If the model is still empty default to HOG
|
||||
if (clnf_model.face_detector_MTCNN.empty())
|
||||
{
|
||||
std::cout << "INFO: defaulting to HOG-SVM face detector" << std::endl;
|
||||
params.curr_face_detector = LandmarkDetector::FaceModelParameters::HOG_SVM_DETECTOR;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
cv::Point preference_det(-1, -1);
|
||||
if(clnf_model.preference_det.x != -1 && clnf_model.preference_det.y != -1)
|
||||
{
|
||||
preference_det.x = clnf_model.preference_det.x * grayscale_image.cols;
|
||||
preference_det.y = clnf_model.preference_det.y * grayscale_image.rows;
|
||||
clnf_model.preference_det = cv::Point(-1, -1);
|
||||
}
|
||||
|
||||
bool face_detection_success;
|
||||
if(params.curr_face_detector == FaceModelParameters::HOG_SVM_DETECTOR)
|
||||
{
|
||||
float confidence;
|
||||
face_detection_success = LandmarkDetector::DetectSingleFaceHOG(bounding_box, grayscale_image, clnf_model.face_detector_HOG, confidence, preference_det);
|
||||
}
|
||||
else if(params.curr_face_detector == FaceModelParameters::HAAR_DETECTOR)
|
||||
{
|
||||
face_detection_success = LandmarkDetector::DetectSingleFace(bounding_box, grayscale_image, clnf_model.face_detector_HAAR, preference_det);
|
||||
}
|
||||
else if (params.curr_face_detector == FaceModelParameters::MTCNN_DETECTOR)
|
||||
{
|
||||
float confidence;
|
||||
face_detection_success = LandmarkDetector::DetectSingleFaceMTCNN(bounding_box, rgb_image, clnf_model.face_detector_MTCNN, confidence, preference_det);
|
||||
}
|
||||
|
||||
// Attempt to detect landmarks using the detected face (if unseccessful the detection will be ignored)
|
||||
if(face_detection_success)
|
||||
{
|
||||
// Indicate that tracking has started as a face was detected
|
||||
clnf_model.tracking_initialised = true;
|
||||
|
||||
// Keep track of old model values so that they can be restored if redetection fails
|
||||
cv::Vec6f params_global_init = clnf_model.params_global;
|
||||
cv::Mat_<float> params_local_init = clnf_model.params_local.clone();
|
||||
float likelihood_init = clnf_model.model_likelihood;
|
||||
cv::Mat_<float> detected_landmarks_init = clnf_model.detected_landmarks.clone();
|
||||
cv::Mat_<float> landmark_likelihoods_init = clnf_model.landmark_likelihoods.clone();
|
||||
|
||||
// Use the detected bounding box and empty local parameters
|
||||
clnf_model.params_local.setTo(0);
|
||||
clnf_model.pdm.CalcParams(clnf_model.params_global, bounding_box, clnf_model.params_local);
|
||||
|
||||
// Make sure the search size is large
|
||||
params.window_sizes_current = params.window_sizes_init;
|
||||
|
||||
// TODO rem (should the multi-hyp version be only for CEN and not CLNF?), otherwise poss too slow, and poss not accurate
|
||||
//bool landmark_detection_success = clnf_model.DetectLandmarks(grayscale_image, params);
|
||||
|
||||
// Do the actual landmark detection (and keep it only if successful)
|
||||
// Perform multi-hypothesis detection here (as face detector can pick up multiple of them)
|
||||
params.multi_view = true;
|
||||
bool landmark_detection_success = DetectLandmarksInImage(rgb_image, bounding_box, clnf_model, params, grayscale_image);
|
||||
params.multi_view = false;
|
||||
|
||||
|
||||
// If landmark reinitialisation unsucessful continue from previous estimates
|
||||
// if it's initial detection however, do not care if it was successful as the validator might be wrong, so continue trackig
|
||||
// regardless
|
||||
if(!initial_detection && !landmark_detection_success)
|
||||
{
|
||||
|
||||
// Restore previous estimates
|
||||
clnf_model.params_global = params_global_init;
|
||||
clnf_model.params_local = params_local_init.clone();
|
||||
clnf_model.pdm.CalcShape2D(clnf_model.detected_landmarks, clnf_model.params_local, clnf_model.params_global);
|
||||
clnf_model.model_likelihood = likelihood_init;
|
||||
clnf_model.detected_landmarks = detected_landmarks_init.clone();
|
||||
clnf_model.landmark_likelihoods = landmark_likelihoods_init.clone();
|
||||
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
clnf_model.failures_in_a_row = -1;
|
||||
|
||||
if(params.use_face_template)
|
||||
{
|
||||
UpdateTemplate(grayscale_image, clnf_model);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// if the model has not been initialised yet class it as a failure
|
||||
if(!clnf_model.tracking_initialised)
|
||||
{
|
||||
clnf_model.failures_in_a_row++;
|
||||
}
|
||||
|
||||
// un-initialise the tracking
|
||||
if( clnf_model.failures_in_a_row > 100)
|
||||
{
|
||||
clnf_model.tracking_initialised = false;
|
||||
}
|
||||
|
||||
return clnf_model.detection_success;
|
||||
|
||||
}
|
||||
|
||||
bool LandmarkDetector::DetectLandmarksInVideo(const cv::Mat &rgb_image, const cv::Rect_<double> bounding_box, CLNF& clnf_model, FaceModelParameters& params, cv::Mat &grayscale_image)
|
||||
{
|
||||
if(bounding_box.width > 0)
|
||||
{
|
||||
// calculate the local and global parameters from the generated 2D shape (mapping from the 2D to 3D because camera params are unknown)
|
||||
clnf_model.params_local.setTo(0);
|
||||
clnf_model.pdm.CalcParams(clnf_model.params_global, bounding_box, clnf_model.params_local);
|
||||
|
||||
// indicate that face was detected so initialisation is not necessary
|
||||
clnf_model.tracking_initialised = true;
|
||||
}
|
||||
|
||||
return DetectLandmarksInVideo(rgb_image, clnf_model, params, grayscale_image);
|
||||
|
||||
}
|
||||
|
||||
//================================================================================================================
|
||||
// Landmark detection in image, need to provide an image and optionally CLNF model together with parameters (default values work well)
|
||||
// Optionally can provide a bounding box in which detection is performed (this is useful if multiple faces are to be detected in images)
|
||||
//================================================================================================================
|
||||
|
||||
bool DetectLandmarksInImageMultiHypBasic(const cv::Mat_<uchar> &grayscale_image, std::vector<cv::Vec3d> rotation_hypotheses,
|
||||
const cv::Rect_<double> bounding_box, CLNF& clnf_model, FaceModelParameters& params)
|
||||
{
|
||||
|
||||
// Use the initialisation size for the landmark detection
|
||||
params.window_sizes_current = params.window_sizes_init;
|
||||
|
||||
// Store the current best estimate
|
||||
float best_likelihood;
|
||||
float best_detection_certainty;
|
||||
cv::Vec6f best_global_parameters;
|
||||
cv::Mat_<float> best_local_parameters;
|
||||
cv::Mat_<float> best_detected_landmarks;
|
||||
cv::Mat_<float> best_landmark_likelihoods;
|
||||
bool best_success;
|
||||
|
||||
// The hierarchical model parameters
|
||||
std::vector<float> best_likelihood_h(clnf_model.hierarchical_models.size());
|
||||
std::vector<cv::Vec6f> best_global_parameters_h(clnf_model.hierarchical_models.size());
|
||||
std::vector<cv::Mat_<float>> best_local_parameters_h(clnf_model.hierarchical_models.size());
|
||||
std::vector<cv::Mat_<float>> best_detected_landmarks_h(clnf_model.hierarchical_models.size());
|
||||
std::vector<cv::Mat_<float>> best_landmark_likelihoods_h(clnf_model.hierarchical_models.size());
|
||||
|
||||
for (size_t hypothesis = 0; hypothesis < rotation_hypotheses.size(); ++hypothesis)
|
||||
{
|
||||
// Reset the potentially set clnf_model parameters
|
||||
clnf_model.params_local.setTo(0.0);
|
||||
|
||||
for (size_t part = 0; part < clnf_model.hierarchical_models.size(); ++part)
|
||||
{
|
||||
clnf_model.hierarchical_models[part].params_local.setTo(0.0);
|
||||
}
|
||||
|
||||
// calculate the local and global parameters from the generated 2D shape (mapping from the 2D to 3D because camera params are unknown)
|
||||
clnf_model.pdm.CalcParams(clnf_model.params_global, bounding_box, clnf_model.params_local, rotation_hypotheses[hypothesis]);
|
||||
|
||||
bool success = clnf_model.DetectLandmarks(grayscale_image, params);
|
||||
|
||||
if (hypothesis == 0 || best_likelihood < clnf_model.model_likelihood)
|
||||
{
|
||||
best_likelihood = clnf_model.model_likelihood;
|
||||
best_global_parameters = clnf_model.params_global;
|
||||
best_local_parameters = clnf_model.params_local.clone();
|
||||
best_detected_landmarks = clnf_model.detected_landmarks.clone();
|
||||
best_landmark_likelihoods = clnf_model.landmark_likelihoods.clone();
|
||||
best_detection_certainty = clnf_model.detection_certainty;
|
||||
best_success = success;
|
||||
|
||||
for (size_t part = 0; part < clnf_model.hierarchical_models.size(); ++part)
|
||||
{
|
||||
best_likelihood_h[part] = clnf_model.hierarchical_models[part].model_likelihood;
|
||||
best_global_parameters_h[part] = clnf_model.hierarchical_models[part].params_global;
|
||||
best_local_parameters_h[part] = clnf_model.hierarchical_models[part].params_local.clone();
|
||||
best_detected_landmarks_h[part] = clnf_model.hierarchical_models[part].detected_landmarks.clone();
|
||||
best_landmark_likelihoods_h[part] = clnf_model.hierarchical_models[part].landmark_likelihoods.clone();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Store the best estimates in the clnf_model
|
||||
clnf_model.model_likelihood = best_likelihood;
|
||||
clnf_model.params_global = best_global_parameters;
|
||||
clnf_model.params_local = best_local_parameters.clone();
|
||||
clnf_model.detected_landmarks = best_detected_landmarks.clone();
|
||||
clnf_model.detection_success = best_success;
|
||||
clnf_model.landmark_likelihoods = best_landmark_likelihoods.clone();
|
||||
clnf_model.detection_certainty = best_detection_certainty;
|
||||
|
||||
for (size_t part = 0; part < clnf_model.hierarchical_models.size(); ++part)
|
||||
{
|
||||
clnf_model.hierarchical_models[part].params_global = best_global_parameters_h[part];
|
||||
clnf_model.hierarchical_models[part].params_local = best_local_parameters_h[part].clone();
|
||||
clnf_model.hierarchical_models[part].detected_landmarks = best_detected_landmarks_h[part].clone();
|
||||
clnf_model.hierarchical_models[part].landmark_likelihoods = best_landmark_likelihoods_h[part].clone();
|
||||
}
|
||||
|
||||
return best_success;
|
||||
|
||||
|
||||
}
|
||||
|
||||
// Helper index sorting function
|
||||
template <typename T> std::vector<size_t> sort_indexes(const std::vector<T> &v) {
|
||||
|
||||
// initialize original index locations
|
||||
std::vector<size_t> idx(v.size());
|
||||
std::iota(idx.begin(), idx.end(), 0);
|
||||
|
||||
// sort indexes based on comparing values in v
|
||||
std::sort(idx.begin(), idx.end(),
|
||||
[&v](size_t i1, size_t i2) {return v[i1] > v[i2]; });
|
||||
|
||||
return idx;
|
||||
}
|
||||
|
||||
bool DetectLandmarksInImageMultiHypEarlyTerm(const cv::Mat_<uchar> &grayscale_image, std::vector<cv::Vec3d> rotation_hypotheses,
|
||||
const cv::Rect_<double> bounding_box, CLNF& clnf_model, FaceModelParameters& params)
|
||||
{
|
||||
FaceModelParameters old_params(params);
|
||||
|
||||
// Use the initialisation size for the landmark detection
|
||||
params.window_sizes_current = params.window_sizes_init;
|
||||
|
||||
bool early_term = false;
|
||||
|
||||
// Setup the parameters accordingly
|
||||
// Only do the first iteration
|
||||
for (size_t i = 1; i < params.window_sizes_current.size(); ++i)
|
||||
{
|
||||
params.window_sizes_current[i] = 0;
|
||||
}
|
||||
params.refine_hierarchical = false;
|
||||
params.validate_detections = false;
|
||||
|
||||
bool success = false;
|
||||
|
||||
// Keeping track of converges
|
||||
std::vector<float> likelihoods;
|
||||
std::vector<cv::Vec6f> global_parameters;
|
||||
std::vector<cv::Mat_<float>> local_parameters;
|
||||
|
||||
for (size_t hypothesis = 0; hypothesis < rotation_hypotheses.size(); ++hypothesis)
|
||||
{
|
||||
// Reset the potentially set clnf_model parameters
|
||||
clnf_model.params_local.setTo(0.0);
|
||||
|
||||
for (size_t part = 0; part < clnf_model.hierarchical_models.size(); ++part)
|
||||
{
|
||||
clnf_model.hierarchical_models[part].params_local.setTo(0.0);
|
||||
}
|
||||
|
||||
// calculate the local and global parameters from the generated 2D shape (mapping from the 2D to 3D because camera params are unknown)
|
||||
clnf_model.pdm.CalcParams(clnf_model.params_global, bounding_box, clnf_model.params_local, rotation_hypotheses[hypothesis]);
|
||||
|
||||
// Perform landmark detection in first scale
|
||||
clnf_model.DetectLandmarks(grayscale_image, params);
|
||||
|
||||
float lhood = clnf_model.model_likelihood * clnf_model.patch_experts.early_term_weights[clnf_model.view_used] + clnf_model.patch_experts.early_term_biases[clnf_model.view_used];
|
||||
|
||||
// If likelihood higher than cutoff continue on this model
|
||||
if (lhood > clnf_model.patch_experts.early_term_cutoffs[clnf_model.view_used])
|
||||
{
|
||||
params.refine_hierarchical = old_params.refine_hierarchical;
|
||||
params.window_sizes_current = params.window_sizes_init;
|
||||
params.window_sizes_current[0] = 0;
|
||||
params.validate_detections = old_params.validate_detections;
|
||||
success = clnf_model.DetectLandmarks(grayscale_image, params);
|
||||
early_term = true;
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
likelihoods.push_back(lhood);
|
||||
global_parameters.push_back(clnf_model.params_global);
|
||||
local_parameters.push_back(clnf_model.params_local);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (!early_term)
|
||||
{
|
||||
|
||||
// Store the current best estimate
|
||||
float best_likelihood;
|
||||
cv::Vec6f best_global_parameters;
|
||||
cv::Mat_<float> best_local_parameters;
|
||||
cv::Mat_<float> best_detected_landmarks;
|
||||
cv::Mat_<float> best_landmark_likelihoods;
|
||||
bool best_success;
|
||||
|
||||
// The hierarchical model parameters
|
||||
std::vector<float> best_likelihood_h(clnf_model.hierarchical_models.size());
|
||||
std::vector<cv::Vec6f> best_global_parameters_h(clnf_model.hierarchical_models.size());
|
||||
std::vector<cv::Mat_<float>> best_local_parameters_h(clnf_model.hierarchical_models.size());
|
||||
std::vector<cv::Mat_<float>> best_detected_landmarks_h(clnf_model.hierarchical_models.size());
|
||||
std::vector<cv::Mat_<float>> best_landmark_likelihoods_h(clnf_model.hierarchical_models.size());
|
||||
|
||||
// Sort the likelihoods and pick the best top 3 models
|
||||
std::vector<size_t> indices = sort_indexes(likelihoods);
|
||||
|
||||
// Pick 3 best hypotheses and complete them
|
||||
size_t max = indices.size() >= 3 ? 3 : indices.size();
|
||||
|
||||
params.refine_hierarchical = old_params.refine_hierarchical;
|
||||
params.window_sizes_current = params.window_sizes_init;
|
||||
params.window_sizes_current[0] = 0;
|
||||
params.validate_detections = old_params.validate_detections;
|
||||
|
||||
|
||||
for (size_t i = 0; i < max; ++i)
|
||||
{
|
||||
// Reset the potentially set clnf_model parameters
|
||||
clnf_model.params_local = local_parameters[indices[i]];
|
||||
clnf_model.params_global = global_parameters[indices[i]];
|
||||
for (size_t part = 0; part < clnf_model.hierarchical_models.size(); ++part)
|
||||
{
|
||||
clnf_model.hierarchical_models[part].params_local.setTo(0.0);
|
||||
}
|
||||
|
||||
// Perform landmark detection in first scale
|
||||
success = clnf_model.DetectLandmarks(grayscale_image, params);
|
||||
|
||||
if (i == 0 || best_likelihood < clnf_model.model_likelihood)
|
||||
{
|
||||
best_likelihood = clnf_model.model_likelihood;
|
||||
best_global_parameters = clnf_model.params_global;
|
||||
best_local_parameters = clnf_model.params_local.clone();
|
||||
best_detected_landmarks = clnf_model.detected_landmarks.clone();
|
||||
best_landmark_likelihoods = clnf_model.landmark_likelihoods.clone();
|
||||
best_success = success;
|
||||
|
||||
for (size_t part = 0; part < clnf_model.hierarchical_models.size(); ++part)
|
||||
{
|
||||
best_likelihood_h[part] = clnf_model.hierarchical_models[part].model_likelihood;
|
||||
best_global_parameters_h[part] = clnf_model.hierarchical_models[part].params_global;
|
||||
best_local_parameters_h[part] = clnf_model.hierarchical_models[part].params_local.clone();
|
||||
best_detected_landmarks_h[part] = clnf_model.hierarchical_models[part].detected_landmarks.clone();
|
||||
best_landmark_likelihoods_h[part] = clnf_model.hierarchical_models[part].landmark_likelihoods.clone();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Store the best estimates in the clnf_model
|
||||
clnf_model.model_likelihood = best_likelihood;
|
||||
clnf_model.params_global = best_global_parameters;
|
||||
clnf_model.params_local = best_local_parameters.clone();
|
||||
clnf_model.detected_landmarks = best_detected_landmarks.clone();
|
||||
clnf_model.detection_success = best_success;
|
||||
clnf_model.landmark_likelihoods = best_landmark_likelihoods.clone();
|
||||
|
||||
for (size_t part = 0; part < clnf_model.hierarchical_models.size(); ++part)
|
||||
{
|
||||
clnf_model.hierarchical_models[part].params_global = best_global_parameters_h[part];
|
||||
clnf_model.hierarchical_models[part].params_local = best_local_parameters_h[part].clone();
|
||||
clnf_model.hierarchical_models[part].detected_landmarks = best_detected_landmarks_h[part].clone();
|
||||
clnf_model.hierarchical_models[part].landmark_likelihoods = best_landmark_likelihoods_h[part].clone();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
params = old_params;
|
||||
|
||||
return success;
|
||||
|
||||
}
|
||||
|
||||
|
||||
// This is the one where the actual work gets done, other DetectLandmarksInImage calls lead to this one
|
||||
bool LandmarkDetector::DetectLandmarksInImage(const cv::Mat &rgb_image, const cv::Rect_<double> bounding_box, CLNF& clnf_model, FaceModelParameters& params, cv::Mat &grayscale_image)
|
||||
{
|
||||
|
||||
if (grayscale_image.empty())
|
||||
{
|
||||
Utilities::ConvertToGrayscale_8bit(rgb_image, grayscale_image);
|
||||
}
|
||||
|
||||
// Can have multiple hypotheses
|
||||
std::vector<cv::Vec3d> rotation_hypotheses;
|
||||
|
||||
if(params.multi_view)
|
||||
{
|
||||
// Try out different orientation initialisations
|
||||
// It is possible to add other orientation hypotheses easilly by just pushing to this vector
|
||||
rotation_hypotheses.push_back(cv::Vec3d(0,0,0));
|
||||
rotation_hypotheses.push_back(cv::Vec3d(0, -0.5236, 0));
|
||||
rotation_hypotheses.push_back(cv::Vec3d(0, 0.5236,0));
|
||||
rotation_hypotheses.push_back(cv::Vec3d(0, -0.96, 0));
|
||||
rotation_hypotheses.push_back(cv::Vec3d(0, 0.96, 0));
|
||||
rotation_hypotheses.push_back(cv::Vec3d(0, 0, 0.5236));
|
||||
rotation_hypotheses.push_back(cv::Vec3d(0, 0, -0.5236));
|
||||
rotation_hypotheses.push_back(cv::Vec3d(0, -1.57, 0));
|
||||
rotation_hypotheses.push_back(cv::Vec3d(0, 1.57, 0));
|
||||
rotation_hypotheses.push_back(cv::Vec3d(0, -1.22, 0.698));
|
||||
rotation_hypotheses.push_back(cv::Vec3d(0, 1.22, -0.698));
|
||||
}
|
||||
else
|
||||
{
|
||||
// Assume the face is close to frontal
|
||||
rotation_hypotheses.push_back(cv::Vec3d(0,0,0));
|
||||
}
|
||||
|
||||
bool success;
|
||||
|
||||
// Either use basic multi-hypothesis testing or clever testing if early termination parameters are present
|
||||
if(clnf_model.patch_experts.early_term_biases.size() == 0)
|
||||
{
|
||||
success = DetectLandmarksInImageMultiHypBasic(grayscale_image, rotation_hypotheses, bounding_box, clnf_model, params);
|
||||
}
|
||||
else
|
||||
{
|
||||
success = DetectLandmarksInImageMultiHypEarlyTerm(grayscale_image, rotation_hypotheses, bounding_box, clnf_model, params);
|
||||
}
|
||||
return success;
|
||||
}
|
||||
|
||||
bool LandmarkDetector::DetectLandmarksInImage(const cv::Mat &rgb_image, CLNF& clnf_model, FaceModelParameters& params, cv::Mat &grayscale_image)
|
||||
{
|
||||
if (grayscale_image.empty())
|
||||
{
|
||||
Utilities::ConvertToGrayscale_8bit(rgb_image, grayscale_image);
|
||||
}
|
||||
|
||||
cv::Rect_<float> bounding_box;
|
||||
|
||||
// If the face detector has not been initialised read it in
|
||||
if(clnf_model.face_detector_HAAR.empty() && params.curr_face_detector == FaceModelParameters::HAAR_DETECTOR)
|
||||
{
|
||||
clnf_model.face_detector_HAAR.load(params.haar_face_detector_location);
|
||||
clnf_model.haar_face_detector_location = params.haar_face_detector_location;
|
||||
}
|
||||
|
||||
if (clnf_model.face_detector_MTCNN.empty() && params.curr_face_detector == FaceModelParameters::MTCNN_DETECTOR)
|
||||
{
|
||||
clnf_model.face_detector_MTCNN.Read(params.mtcnn_face_detector_location);
|
||||
clnf_model.mtcnn_face_detector_location = params.mtcnn_face_detector_location;
|
||||
|
||||
// If the model is still empty default to HOG
|
||||
if (clnf_model.face_detector_MTCNN.empty())
|
||||
{
|
||||
std::cout << "INFO: defaulting to HOG-SVM face detector" << std::endl;
|
||||
params.curr_face_detector = LandmarkDetector::FaceModelParameters::HOG_SVM_DETECTOR;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Detect the face first
|
||||
if(params.curr_face_detector == FaceModelParameters::HOG_SVM_DETECTOR)
|
||||
{
|
||||
float confidence;
|
||||
LandmarkDetector::DetectSingleFaceHOG(bounding_box, grayscale_image, clnf_model.face_detector_HOG, confidence);
|
||||
}
|
||||
else if(params.curr_face_detector == FaceModelParameters::HAAR_DETECTOR)
|
||||
{
|
||||
LandmarkDetector::DetectSingleFace(bounding_box, rgb_image, clnf_model.face_detector_HAAR);
|
||||
}
|
||||
else if (params.curr_face_detector == FaceModelParameters::MTCNN_DETECTOR)
|
||||
{
|
||||
float confidence;
|
||||
LandmarkDetector::DetectSingleFaceMTCNN(bounding_box, rgb_image, clnf_model.face_detector_MTCNN, confidence);
|
||||
}
|
||||
|
||||
if(bounding_box.width == 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
return DetectLandmarksInImage(rgb_image, bounding_box, clnf_model, params, grayscale_image);
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,340 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Copyright (C) 2017, Carnegie Mellon University and University of Cambridge,
|
||||
// all rights reserved.
|
||||
//
|
||||
// ACADEMIC OR NON-PROFIT ORGANIZATION NONCOMMERCIAL RESEARCH USE ONLY
|
||||
//
|
||||
// BY USING OR DOWNLOADING THE SOFTWARE, YOU ARE AGREEING TO THE TERMS OF THIS LICENSE AGREEMENT.
|
||||
// IF YOU DO NOT AGREE WITH THESE TERMS, YOU MAY NOT USE OR DOWNLOAD THE SOFTWARE.
|
||||
//
|
||||
// License can be found in OpenFace-license.txt
|
||||
//
|
||||
// * Any publications arising from the use of this software, including but
|
||||
// not limited to academic journal and conference publications, technical
|
||||
// reports and manuals, must cite at least one of the following works:
|
||||
//
|
||||
// OpenFace 2.0: Facial Behavior Analysis Toolkit
|
||||
// Tadas Baltrušaitis, Amir Zadeh, Yao Chong Lim, and Louis-Philippe Morency
|
||||
// in IEEE International Conference on Automatic Face and Gesture Recognition, 2018
|
||||
//
|
||||
// Convolutional experts constrained local model for facial landmark detection.
|
||||
// A. Zadeh, T. Baltrušaitis, and Louis-Philippe Morency,
|
||||
// in Computer Vision and Pattern Recognition Workshops, 2017.
|
||||
//
|
||||
// Rendering of Eyes for Eye-Shape Registration and Gaze Estimation
|
||||
// Erroll Wood, Tadas Baltrušaitis, Xucong Zhang, Yusuke Sugano, Peter Robinson, and Andreas Bulling
|
||||
// in IEEE International. Conference on Computer Vision (ICCV), 2015
|
||||
//
|
||||
// Cross-dataset learning and person-specific normalisation for automatic Action Unit detection
|
||||
// Tadas Baltrušaitis, Marwa Mahmoud, and Peter Robinson
|
||||
// in Facial Expression Recognition and Analysis Challenge,
|
||||
// IEEE International Conference on Automatic Face and Gesture Recognition, 2015
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "stdafx.h"
|
||||
|
||||
#include "LandmarkDetectorParameters.h"
|
||||
|
||||
// System includes
|
||||
#include <sstream>
|
||||
#include <iostream>
|
||||
#include <cstdlib>
|
||||
|
||||
#ifndef CONFIG_DIR
|
||||
#define CONFIG_DIR "~"
|
||||
#endif
|
||||
|
||||
using namespace LandmarkDetector;
|
||||
|
||||
FaceModelParameters::FaceModelParameters()
|
||||
{
|
||||
// initialise the default values
|
||||
init();
|
||||
check_model_path();
|
||||
|
||||
}
|
||||
|
||||
FaceModelParameters::FaceModelParameters(std::vector<std::string> &arguments)
|
||||
{
|
||||
// initialise the default values
|
||||
init();
|
||||
|
||||
// First element is reserved for the executable location (useful for finding relative model locs)
|
||||
fs::path root = fs::path(arguments[0]).parent_path();
|
||||
|
||||
bool* valid = new bool[arguments.size()];
|
||||
valid[0] = true;
|
||||
|
||||
for (size_t i = 1; i < arguments.size(); ++i)
|
||||
{
|
||||
valid[i] = true;
|
||||
|
||||
if (arguments[i].compare("-mloc") == 0)
|
||||
{
|
||||
std::string model_loc = arguments[i + 1];
|
||||
model_location = model_loc;
|
||||
valid[i] = false;
|
||||
valid[i + 1] = false;
|
||||
i++;
|
||||
|
||||
}
|
||||
if (arguments[i].compare("-fdloc") ==0)
|
||||
{
|
||||
std::string face_detector_loc = arguments[i + 1];
|
||||
haar_face_detector_location = face_detector_loc;
|
||||
curr_face_detector = HAAR_DETECTOR;
|
||||
valid[i] = false;
|
||||
valid[i + 1] = false;
|
||||
i++;
|
||||
}
|
||||
if (arguments[i].compare("-sigma") == 0)
|
||||
{
|
||||
std::stringstream data(arguments[i + 1]);
|
||||
data >> sigma;
|
||||
valid[i] = false;
|
||||
valid[i + 1] = false;
|
||||
i++;
|
||||
}
|
||||
else if (arguments[i].compare("-w_reg") == 0)
|
||||
{
|
||||
std::stringstream data(arguments[i + 1]);
|
||||
data >> weight_factor;
|
||||
valid[i] = false;
|
||||
valid[i + 1] = false;
|
||||
i++;
|
||||
}
|
||||
else if (arguments[i].compare("-reg") == 0)
|
||||
{
|
||||
std::stringstream data(arguments[i + 1]);
|
||||
data >> reg_factor;
|
||||
valid[i] = false;
|
||||
valid[i + 1] = false;
|
||||
i++;
|
||||
}
|
||||
else if (arguments[i].compare("-multi_view") == 0)
|
||||
{
|
||||
|
||||
std::stringstream data(arguments[i + 1]);
|
||||
int m_view;
|
||||
data >> m_view;
|
||||
|
||||
multi_view = (bool)(m_view != 0);
|
||||
valid[i] = false;
|
||||
valid[i + 1] = false;
|
||||
i++;
|
||||
}
|
||||
else if (arguments[i].compare("-validate_detections") == 0)
|
||||
{
|
||||
std::stringstream data(arguments[i + 1]);
|
||||
int v_det;
|
||||
data >> v_det;
|
||||
|
||||
validate_detections = (bool)(v_det != 0);
|
||||
valid[i] = false;
|
||||
valid[i + 1] = false;
|
||||
i++;
|
||||
}
|
||||
else if (arguments[i].compare("-n_iter") == 0)
|
||||
{
|
||||
std::stringstream data(arguments[i + 1]);
|
||||
data >> num_optimisation_iteration;
|
||||
|
||||
valid[i] = false;
|
||||
valid[i + 1] = false;
|
||||
i++;
|
||||
}
|
||||
else if (arguments[i].compare("-wild") == 0)
|
||||
{
|
||||
// For in the wild fitting these parameters are suitable
|
||||
window_sizes_init = std::vector<int>(4);
|
||||
window_sizes_init[0] = 15; window_sizes_init[1] = 13; window_sizes_init[2] = 11; window_sizes_init[3] = 11;
|
||||
|
||||
sigma = 1.25;
|
||||
reg_factor = 35;
|
||||
weight_factor = 2.5;
|
||||
num_optimisation_iteration = 10;
|
||||
|
||||
valid[i] = false;
|
||||
|
||||
// For in-the-wild images use an in-the wild detector
|
||||
curr_face_detector = MTCNN_DETECTOR;
|
||||
|
||||
// Use multi-view hypotheses if in-the-wild setting
|
||||
multi_view = true;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = (int)arguments.size() - 1; i >= 0; --i)
|
||||
{
|
||||
if (!valid[i])
|
||||
{
|
||||
arguments.erase(arguments.begin() + i);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Make sure model_location is valid
|
||||
// First check working directory, then the executable's directory, then the config path set by the build process.
|
||||
fs::path config_path = fs::path(CONFIG_DIR);
|
||||
fs::path model_path = fs::path(model_location);
|
||||
if (fs::exists(model_path))
|
||||
{
|
||||
model_location = model_path.string();
|
||||
}
|
||||
else if (fs::exists(root/model_path))
|
||||
{
|
||||
model_location = (root/model_path).string();
|
||||
}
|
||||
else if (fs::exists(config_path/model_path))
|
||||
{
|
||||
model_location = (config_path/model_path).string();
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout << "Could not find the landmark detection model to load" << std::endl;
|
||||
}
|
||||
|
||||
if (model_path.stem().string().compare("main_ceclm_general") == 0)
|
||||
{
|
||||
curr_landmark_detector = CECLM_DETECTOR;
|
||||
sigma = 1.5f * sigma;
|
||||
reg_factor = 0.9f * reg_factor;
|
||||
}
|
||||
else if (model_path.stem().string().compare("main_clnf_general") == 0)
|
||||
{
|
||||
curr_landmark_detector = CLNF_DETECTOR;
|
||||
}
|
||||
else if (model_path.stem().string().compare("main_clm_general") == 0)
|
||||
{
|
||||
curr_landmark_detector = CLM_DETECTOR;
|
||||
}
|
||||
|
||||
// Make sure face detector location is valid
|
||||
// First check working directory, then the executable's directory, then the config path set by the build process.
|
||||
model_path = fs::path(haar_face_detector_location);
|
||||
if (fs::exists(model_path))
|
||||
{
|
||||
haar_face_detector_location = model_path.string();
|
||||
}
|
||||
else if (fs::exists(root / model_path))
|
||||
{
|
||||
haar_face_detector_location = (root / model_path).string();
|
||||
}
|
||||
else if (fs::exists(config_path / model_path))
|
||||
{
|
||||
haar_face_detector_location = (config_path / model_path).string();
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout << "Could not find the HAAR face detector location" << std::endl;
|
||||
}
|
||||
|
||||
// Make sure face detector location is valid
|
||||
// First check working directory, then the executable's directory, then the config path set by the build process.
|
||||
model_path = fs::path(mtcnn_face_detector_location);
|
||||
if (fs::exists(model_path))
|
||||
{
|
||||
mtcnn_face_detector_location = model_path.string();
|
||||
}
|
||||
else if (fs::exists(root / model_path))
|
||||
{
|
||||
mtcnn_face_detector_location = (root / model_path).string();
|
||||
}
|
||||
else if (fs::exists(config_path / model_path))
|
||||
{
|
||||
mtcnn_face_detector_location = (config_path / model_path).string();
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout << "Could not find the MTCNN face detector location" << std::endl;
|
||||
}
|
||||
check_model_path(root.string());
|
||||
}
|
||||
|
||||
void FaceModelParameters::check_model_path(const std::string& root)
|
||||
{
|
||||
// Make sure model_location is valid
|
||||
// First check working directory, then the executable's directory, then the config path set by the build process.
|
||||
fs::path config_path = fs::path(CONFIG_DIR);
|
||||
fs::path model_path = fs::path(model_location);
|
||||
fs::path root_path = fs::path(root);
|
||||
|
||||
if (fs::exists(model_path))
|
||||
{
|
||||
model_location = model_path.string();
|
||||
}
|
||||
else if (fs::exists(root_path / model_path))
|
||||
{
|
||||
model_location = (root_path / model_path).string();
|
||||
}
|
||||
else if (fs::exists(config_path / model_path))
|
||||
{
|
||||
model_location = (config_path / model_path).string();
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout << "Could not find the landmark detection model to load" << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void FaceModelParameters::init()
|
||||
{
|
||||
|
||||
// number of iterations that will be performed at each scale
|
||||
num_optimisation_iteration = 5;
|
||||
|
||||
// using an external face checker based on SVM
|
||||
validate_detections = true;
|
||||
|
||||
// Using hierarchical refinement by default (can be turned off)
|
||||
refine_hierarchical = true;
|
||||
|
||||
// Refining parameters by default
|
||||
refine_parameters = true;
|
||||
|
||||
window_sizes_small = std::vector<int>(4);
|
||||
window_sizes_init = std::vector<int>(4);
|
||||
|
||||
// For fast tracking
|
||||
window_sizes_small[0] = 0;
|
||||
window_sizes_small[1] = 9;
|
||||
window_sizes_small[2] = 7;
|
||||
window_sizes_small[3] = 0;
|
||||
|
||||
// Just for initialisation
|
||||
window_sizes_init.at(0) = 11;
|
||||
window_sizes_init.at(1) = 9;
|
||||
window_sizes_init.at(2) = 7;
|
||||
window_sizes_init.at(3) = 5;
|
||||
|
||||
face_template_scale = 0.3f;
|
||||
// Off by default (as it might lead to some slight inaccuracies in slowly moving faces)
|
||||
use_face_template = false;
|
||||
|
||||
// For first frame use the initialisation
|
||||
window_sizes_current = window_sizes_init;
|
||||
|
||||
model_location = "model/main_ceclm_general.txt";
|
||||
curr_landmark_detector = CECLM_DETECTOR;
|
||||
|
||||
sigma = 1.5f;
|
||||
reg_factor = 25.0f;
|
||||
weight_factor = 0.0f; // By default do not use NU-RLMS for videos as it does not work as well for them
|
||||
|
||||
validation_boundary = 0.725f;
|
||||
|
||||
limit_pose = true;
|
||||
multi_view = false;
|
||||
|
||||
reinit_video_every = 2;
|
||||
|
||||
// Face detection
|
||||
haar_face_detector_location = "classifiers/haarcascade_frontalface_alt.xml";
|
||||
mtcnn_face_detector_location = "model/mtcnn_detector/MTCNN_detector.txt";
|
||||
|
||||
// By default use MTCNN
|
||||
curr_face_detector = MTCNN_DETECTOR;
|
||||
|
||||
}
|
||||
|
||||
@@ -0,0 +1,925 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Copyright (C) 2017, Carnegie Mellon University and University of Cambridge,
|
||||
// all rights reserved.
|
||||
//
|
||||
// ACADEMIC OR NON-PROFIT ORGANIZATION NONCOMMERCIAL RESEARCH USE ONLY
|
||||
//
|
||||
// BY USING OR DOWNLOADING THE SOFTWARE, YOU ARE AGREEING TO THE TERMS OF THIS LICENSE AGREEMENT.
|
||||
// IF YOU DO NOT AGREE WITH THESE TERMS, YOU MAY NOT USE OR DOWNLOAD THE SOFTWARE.
|
||||
//
|
||||
// License can be found in OpenFace-license.txt
|
||||
//
|
||||
// * Any publications arising from the use of this software, including but
|
||||
// not limited to academic journal and conference publications, technical
|
||||
// reports and manuals, must cite at least one of the following works:
|
||||
//
|
||||
// OpenFace 2.0: Facial Behavior Analysis Toolkit
|
||||
// Tadas Baltrušaitis, Amir Zadeh, Yao Chong Lim, and Louis-Philippe Morency
|
||||
// in IEEE International Conference on Automatic Face and Gesture Recognition, 2018
|
||||
//
|
||||
// Convolutional experts constrained local model for facial landmark detection.
|
||||
// A. Zadeh, T. Baltrušaitis, and Louis-Philippe Morency,
|
||||
// in Computer Vision and Pattern Recognition Workshops, 2017.
|
||||
//
|
||||
// Rendering of Eyes for Eye-Shape Registration and Gaze Estimation
|
||||
// Erroll Wood, Tadas Baltrušaitis, Xucong Zhang, Yusuke Sugano, Peter Robinson, and Andreas Bulling
|
||||
// in IEEE International. Conference on Computer Vision (ICCV), 2015
|
||||
//
|
||||
// Cross-dataset learning and person-specific normalisation for automatic Action Unit detection
|
||||
// Tadas Baltrušaitis, Marwa Mahmoud, and Peter Robinson
|
||||
// in Facial Expression Recognition and Analysis Challenge,
|
||||
// IEEE International Conference on Automatic Face and Gesture Recognition, 2015
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "stdafx.h"
|
||||
|
||||
#include <LandmarkDetectorUtils.h>
|
||||
#include <RotationHelpers.h>
|
||||
|
||||
// OpenCV includes
|
||||
#include <opencv2/core/core.hpp>
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <opencv2/calib3d.hpp>
|
||||
|
||||
namespace LandmarkDetector
|
||||
{
|
||||
|
||||
//===========================================================================
|
||||
// Fast patch expert response computation (linear model across a ROI) using normalised cross-correlation
|
||||
//===========================================================================
|
||||
|
||||
void crossCorr_m(const cv::Mat_<float>& img, cv::Mat_<double>& img_dft, const cv::Mat_<float>& _templ,
|
||||
std::map<int, cv::Mat_<double> >& _templ_dfts, cv::Mat_<float>& corr)
|
||||
{
|
||||
// Our model will always be under min block size so can ignore this
|
||||
//const double blockScale = 4.5;
|
||||
//const int minBlockSize = 256;
|
||||
|
||||
int maxDepth = CV_64F;
|
||||
|
||||
cv::Size dftsize;
|
||||
|
||||
dftsize.width = cv::getOptimalDFTSize(corr.cols + _templ.cols - 1);
|
||||
dftsize.height = cv::getOptimalDFTSize(corr.rows + _templ.rows - 1);
|
||||
|
||||
// Compute block size
|
||||
cv::Size blocksize;
|
||||
blocksize.width = dftsize.width - _templ.cols + 1;
|
||||
blocksize.width = MIN(blocksize.width, corr.cols);
|
||||
blocksize.height = dftsize.height - _templ.rows + 1;
|
||||
blocksize.height = MIN(blocksize.height, corr.rows);
|
||||
|
||||
cv::Mat_<double> dftTempl;
|
||||
|
||||
// if this has not been precomputed, precompute it, otherwise use it
|
||||
if (_templ_dfts.find(dftsize.width) == _templ_dfts.end())
|
||||
{
|
||||
dftTempl.create(dftsize.height, dftsize.width);
|
||||
|
||||
cv::Mat_<float> src = _templ;
|
||||
|
||||
cv::Mat_<double> dst(dftTempl, cv::Rect(0, 0, dftsize.width, dftsize.height));
|
||||
|
||||
cv::Mat_<double> dst1(dftTempl, cv::Rect(0, 0, _templ.cols, _templ.rows));
|
||||
|
||||
if (dst1.data != src.data)
|
||||
src.convertTo(dst1, dst1.depth());
|
||||
|
||||
if (dst.cols > _templ.cols)
|
||||
{
|
||||
cv::Mat_<double> part(dst, cv::Range(0, _templ.rows), cv::Range(_templ.cols, dst.cols));
|
||||
part.setTo(0);
|
||||
}
|
||||
|
||||
// Perform DFT of the template
|
||||
dft(dst, dst, 0, _templ.rows);
|
||||
|
||||
_templ_dfts[dftsize.width] = dftTempl;
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
// use the precomputed version
|
||||
dftTempl = _templ_dfts.find(dftsize.width)->second;
|
||||
}
|
||||
|
||||
cv::Size bsz(std::min(blocksize.width, corr.cols), std::min(blocksize.height, corr.rows));
|
||||
cv::Mat src;
|
||||
|
||||
cv::Mat cdst(corr, cv::Rect(0, 0, bsz.width, bsz.height));
|
||||
|
||||
cv::Mat_<double> dftImg;
|
||||
|
||||
if (img_dft.empty())
|
||||
{
|
||||
dftImg.create(dftsize);
|
||||
dftImg.setTo(0.0);
|
||||
|
||||
cv::Size dsz(bsz.width + _templ.cols - 1, bsz.height + _templ.rows - 1);
|
||||
|
||||
int x2 = std::min(img.cols, dsz.width);
|
||||
int y2 = std::min(img.rows, dsz.height);
|
||||
|
||||
cv::Mat src0(img, cv::Range(0, y2), cv::Range(0, x2));
|
||||
cv::Mat dst(dftImg, cv::Rect(0, 0, dsz.width, dsz.height));
|
||||
cv::Mat dst1(dftImg, cv::Rect(0, 0, x2, y2));
|
||||
|
||||
src = src0;
|
||||
|
||||
if (dst1.data != src.data)
|
||||
src.convertTo(dst1, dst1.depth());
|
||||
|
||||
dft(dftImg, dftImg, 0, dsz.height);
|
||||
img_dft = dftImg.clone();
|
||||
}
|
||||
|
||||
cv::Mat dftTempl1(dftTempl, cv::Rect(0, 0, dftsize.width, dftsize.height));
|
||||
cv::mulSpectrums(img_dft, dftTempl1, dftImg, 0, true);
|
||||
cv::dft(dftImg, dftImg, cv::DFT_INVERSE + cv::DFT_SCALE, bsz.height);
|
||||
|
||||
src = dftImg(cv::Rect(0, 0, bsz.width, bsz.height));
|
||||
|
||||
src.convertTo(cdst, CV_32F);
|
||||
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void matchTemplate_m(const cv::Mat_<float>& input_img, cv::Mat_<double>& img_dft, cv::Mat& _integral_img,
|
||||
cv::Mat& _integral_img_sq, const cv::Mat_<float>& templ, std::map<int, cv::Mat_<double> >& templ_dfts,
|
||||
cv::Mat_<float>& result, int method)
|
||||
{
|
||||
|
||||
int numType = method == cv::TM_CCORR || method == cv::TM_CCORR_NORMED ? 0 :
|
||||
method == cv::TM_CCOEFF || method == cv::TM_CCOEFF_NORMED ? 1 : 2;
|
||||
bool isNormed = method == cv::TM_CCORR_NORMED ||
|
||||
method == cv::TM_SQDIFF_NORMED ||
|
||||
method == cv::TM_CCOEFF_NORMED;
|
||||
|
||||
// Assume result is defined properly
|
||||
if (result.empty())
|
||||
{
|
||||
cv::Size corrSize(input_img.cols - templ.cols + 1, input_img.rows - templ.rows + 1);
|
||||
result.create(corrSize);
|
||||
}
|
||||
LandmarkDetector::crossCorr_m(input_img, img_dft, templ, templ_dfts, result);
|
||||
|
||||
if (method == cv::TM_CCORR)
|
||||
return;
|
||||
|
||||
double invArea = 1. / ((double)templ.rows * templ.cols);
|
||||
|
||||
cv::Mat sum, sqsum;
|
||||
cv::Scalar templMean, templSdv;
|
||||
double *q0 = 0, *q1 = 0, *q2 = 0, *q3 = 0;
|
||||
double templNorm = 0, templSum2 = 0;
|
||||
|
||||
if (method == cv::TM_CCOEFF)
|
||||
{
|
||||
// If it has not been precomputed compute it now
|
||||
if (_integral_img.empty())
|
||||
{
|
||||
integral(input_img, _integral_img, CV_64F);
|
||||
}
|
||||
sum = _integral_img;
|
||||
|
||||
templMean = cv::mean(templ);
|
||||
}
|
||||
else
|
||||
{
|
||||
// If it has not been precomputed compute it now
|
||||
if (_integral_img.empty())
|
||||
{
|
||||
integral(input_img, _integral_img, _integral_img_sq, CV_64F);
|
||||
}
|
||||
|
||||
sum = _integral_img;
|
||||
sqsum = _integral_img_sq;
|
||||
|
||||
meanStdDev(templ, templMean, templSdv);
|
||||
|
||||
templNorm = templSdv[0] * templSdv[0] + templSdv[1] * templSdv[1] + templSdv[2] * templSdv[2] + templSdv[3] * templSdv[3];
|
||||
|
||||
if (templNorm < DBL_EPSILON && method == cv::TM_CCOEFF_NORMED)
|
||||
{
|
||||
result.setTo(1.0);
|
||||
return;
|
||||
}
|
||||
|
||||
templSum2 = templNorm + templMean[0] * templMean[0] + templMean[1] * templMean[1] + templMean[2] * templMean[2] + templMean[3] * templMean[3];
|
||||
|
||||
if (numType != 1)
|
||||
{
|
||||
templMean = cv::Scalar::all(0);
|
||||
templNorm = templSum2;
|
||||
}
|
||||
|
||||
templSum2 /= invArea;
|
||||
templNorm = std::sqrt(templNorm);
|
||||
templNorm /= std::sqrt(invArea); // care of accuracy here
|
||||
|
||||
q0 = (double*)sqsum.data;
|
||||
q1 = q0 + templ.cols;
|
||||
q2 = (double*)(sqsum.data + templ.rows*sqsum.step);
|
||||
q3 = q2 + templ.cols;
|
||||
}
|
||||
|
||||
double* p0 = (double*)sum.data;
|
||||
double* p1 = p0 + templ.cols;
|
||||
double* p2 = (double*)(sum.data + templ.rows*sum.step);
|
||||
double* p3 = p2 + templ.cols;
|
||||
|
||||
int sumstep = sum.data ? (int)(sum.step / sizeof(double)) : 0;
|
||||
int sqstep = sqsum.data ? (int)(sqsum.step / sizeof(double)) : 0;
|
||||
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < result.rows; i++)
|
||||
{
|
||||
float* rrow = result.ptr<float>(i);
|
||||
int idx = i * sumstep;
|
||||
int idx2 = i * sqstep;
|
||||
|
||||
for (j = 0; j < result.cols; j++, idx += 1, idx2 += 1)
|
||||
{
|
||||
double num = rrow[j], t;
|
||||
double wndMean2 = 0, wndSum2 = 0;
|
||||
|
||||
if (numType == 1)
|
||||
{
|
||||
|
||||
t = p0[idx] - p1[idx] - p2[idx] + p3[idx];
|
||||
wndMean2 += t*t;
|
||||
num -= t*templMean[0];
|
||||
|
||||
wndMean2 *= invArea;
|
||||
}
|
||||
|
||||
if (isNormed || numType == 2)
|
||||
{
|
||||
|
||||
t = q0[idx2] - q1[idx2] - q2[idx2] + q3[idx2];
|
||||
wndSum2 += t;
|
||||
|
||||
if (numType == 2)
|
||||
{
|
||||
num = wndSum2 - 2 * num + templSum2;
|
||||
num = MAX(num, 0.);
|
||||
}
|
||||
}
|
||||
|
||||
if (isNormed)
|
||||
{
|
||||
t = std::sqrt(MAX(wndSum2 - wndMean2, 0))*templNorm;
|
||||
if (fabs(num) < t)
|
||||
num /= t;
|
||||
else if (fabs(num) < t*1.125)
|
||||
num = num > 0 ? 1 : -1;
|
||||
else
|
||||
num = method != cv::TM_SQDIFF_NORMED ? 0 : 1;
|
||||
}
|
||||
|
||||
rrow[j] = (float)num;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Useful utility for grabing a bounding box around a set of 2D landmarks (as a 1D 2n x 1 vector of xs followed by doubles or as an n x 2 vector)
|
||||
void ExtractBoundingBox(const cv::Mat_<float>& landmarks, float &min_x, float &max_x, float &min_y, float &max_y)
|
||||
{
|
||||
|
||||
if (landmarks.cols == 1)
|
||||
{
|
||||
int n = landmarks.rows / 2;
|
||||
cv::MatConstIterator_<float> landmarks_it = landmarks.begin();
|
||||
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
float val = *landmarks_it++;
|
||||
|
||||
if (i == 0 || val < min_x)
|
||||
min_x = val;
|
||||
|
||||
if (i == 0 || val > max_x)
|
||||
max_x = val;
|
||||
|
||||
}
|
||||
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
float val = *landmarks_it++;
|
||||
|
||||
if (i == 0 || val < min_y)
|
||||
min_y = val;
|
||||
|
||||
if (i == 0 || val > max_y)
|
||||
max_y = val;
|
||||
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
int n = landmarks.rows;
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
float val_x = landmarks.at<float>(i, 0);
|
||||
float val_y = landmarks.at<float>(i, 1);
|
||||
|
||||
if (i == 0 || val_x < min_x)
|
||||
min_x = val_x;
|
||||
|
||||
if (i == 0 || val_x > max_x)
|
||||
max_x = val_x;
|
||||
|
||||
if (i == 0 || val_y < min_y)
|
||||
min_y = val_y;
|
||||
|
||||
if (i == 0 || val_y > max_y)
|
||||
max_y = val_y;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
// Computing landmarks (to be drawn later possibly)
|
||||
std::vector<cv::Point2f> CalculateVisibleLandmarks(const cv::Mat_<float>& shape2D, const cv::Mat_<int>& visibilities)
|
||||
{
|
||||
int n = shape2D.rows / 2;
|
||||
std::vector<cv::Point2f> landmarks;
|
||||
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
if (visibilities.at<int>(i))
|
||||
{
|
||||
cv::Point2f featurePoint(shape2D.at<float>(i), shape2D.at<float>(i + n));
|
||||
|
||||
landmarks.push_back(featurePoint);
|
||||
}
|
||||
}
|
||||
|
||||
return landmarks;
|
||||
}
|
||||
|
||||
// Computing landmarks (to be drawn later possibly)
|
||||
std::vector<cv::Point2f> CalculateAllLandmarks(const cv::Mat_<float>& shape2D)
|
||||
{
|
||||
|
||||
int n = 0;
|
||||
std::vector<cv::Point2f> landmarks;
|
||||
|
||||
if (shape2D.cols == 2)
|
||||
{
|
||||
n = shape2D.rows;
|
||||
}
|
||||
else if (shape2D.cols == 1)
|
||||
{
|
||||
n = shape2D.rows / 2;
|
||||
}
|
||||
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
cv::Point2f featurePoint;
|
||||
if (shape2D.cols == 1)
|
||||
{
|
||||
featurePoint = cv::Point2f(shape2D.at<float>(i), shape2D.at<float>(i + n));
|
||||
}
|
||||
else
|
||||
{
|
||||
featurePoint = cv::Point2f(shape2D.at<float>(i, 0), shape2D.at<float>(i, 1));
|
||||
}
|
||||
|
||||
landmarks.push_back(featurePoint);
|
||||
}
|
||||
|
||||
return landmarks;
|
||||
}
|
||||
|
||||
// Computing landmarks (to be drawn later possibly)
|
||||
std::vector<cv::Point2f> CalculateAllLandmarks(const CLNF& clnf_model)
|
||||
{
|
||||
return CalculateAllLandmarks(clnf_model.detected_landmarks);
|
||||
}
|
||||
|
||||
// Computing landmarks (to be drawn later possibly)
|
||||
std::vector<cv::Point2f> CalculateVisibleLandmarks(const CLNF& clnf_model)
|
||||
{
|
||||
// If the detection was not successful no landmarks are visible
|
||||
if (clnf_model.detection_success)
|
||||
{
|
||||
int idx = clnf_model.patch_experts.GetViewIdx(clnf_model.params_global, 0);
|
||||
// Because we only draw visible points, need to find which points patch experts consider visible at a certain orientation
|
||||
return CalculateVisibleLandmarks(clnf_model.detected_landmarks, clnf_model.patch_experts.visibilities[0][idx]);
|
||||
}
|
||||
else
|
||||
{
|
||||
return std::vector<cv::Point2f>();
|
||||
}
|
||||
}
|
||||
|
||||
// Computing eye landmarks (to be drawn later or in different interfaces)
|
||||
std::vector<cv::Point2f> CalculateVisibleEyeLandmarks(const CLNF& clnf_model)
|
||||
{
|
||||
|
||||
std::vector<cv::Point2f> to_return;
|
||||
// If the model has hierarchical updates draw those too
|
||||
for (size_t i = 0; i < clnf_model.hierarchical_models.size(); ++i)
|
||||
{
|
||||
|
||||
if (clnf_model.hierarchical_model_names[i].compare("left_eye_28") == 0 ||
|
||||
clnf_model.hierarchical_model_names[i].compare("right_eye_28") == 0)
|
||||
{
|
||||
|
||||
auto lmks = CalculateVisibleLandmarks(clnf_model.hierarchical_models[i]);
|
||||
for (auto lmk : lmks)
|
||||
{
|
||||
to_return.push_back(lmk);
|
||||
}
|
||||
}
|
||||
}
|
||||
return to_return;
|
||||
}
|
||||
// Computing the 3D eye landmarks
|
||||
std::vector<cv::Point3f> Calculate3DEyeLandmarks(const CLNF& clnf_model, float fx, float fy, float cx, float cy)
|
||||
{
|
||||
|
||||
std::vector<cv::Point3f> to_return;
|
||||
|
||||
for (size_t i = 0; i < clnf_model.hierarchical_models.size(); ++i)
|
||||
{
|
||||
|
||||
if (clnf_model.hierarchical_model_names[i].compare("left_eye_28") == 0 ||
|
||||
clnf_model.hierarchical_model_names[i].compare("right_eye_28") == 0)
|
||||
{
|
||||
|
||||
auto lmks = clnf_model.hierarchical_models[i].GetShape(fx, fy, cx, cy);
|
||||
|
||||
int num_landmarks = lmks.cols;
|
||||
|
||||
for (int lmk = 0; lmk < num_landmarks; ++lmk)
|
||||
{
|
||||
cv::Point3f curr_lmk(lmks.at<float>(0, lmk), lmks.at<float>(1, lmk), lmks.at<float>(2, lmk));
|
||||
to_return.push_back(curr_lmk);
|
||||
}
|
||||
}
|
||||
}
|
||||
return to_return;
|
||||
}
|
||||
|
||||
// Computing eye landmarks (to be drawn later or in different interfaces)
|
||||
std::vector<cv::Point2f> CalculateAllEyeLandmarks(const CLNF& clnf_model)
|
||||
{
|
||||
|
||||
std::vector<cv::Point2f> to_return;
|
||||
// If the model has hierarchical updates draw those too
|
||||
for (size_t i = 0; i < clnf_model.hierarchical_models.size(); ++i)
|
||||
{
|
||||
|
||||
if (clnf_model.hierarchical_model_names[i].compare("left_eye_28") == 0 ||
|
||||
clnf_model.hierarchical_model_names[i].compare("right_eye_28") == 0)
|
||||
{
|
||||
|
||||
auto lmks = CalculateAllLandmarks(clnf_model.hierarchical_models[i]);
|
||||
for (auto lmk : lmks)
|
||||
{
|
||||
to_return.push_back(lmk);
|
||||
}
|
||||
}
|
||||
}
|
||||
return to_return;
|
||||
}
|
||||
|
||||
//===========================================================================
|
||||
|
||||
//============================================================================
|
||||
// Face detection helpers
|
||||
//============================================================================
|
||||
bool DetectFaces(std::vector<cv::Rect_<float> >& o_regions, const cv::Mat_<uchar>& intensity, float min_width, cv::Rect_<float> roi)
|
||||
{
|
||||
cv::CascadeClassifier classifier("./classifiers/haarcascade_frontalface_alt.xml");
|
||||
if (classifier.empty())
|
||||
{
|
||||
std::cout << "Couldn't load the Haar cascade classifier" << std::endl;
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
return DetectFaces(o_regions, intensity, classifier, min_width, roi);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
bool DetectFaces(std::vector<cv::Rect_<float> >& o_regions, const cv::Mat_<uchar>& intensity, cv::CascadeClassifier& classifier, float min_width, cv::Rect_<float> roi)
|
||||
{
|
||||
|
||||
std::vector<cv::Rect> face_detections;
|
||||
if (min_width == -1)
|
||||
{
|
||||
classifier.detectMultiScale(intensity, face_detections, 1.2, 2, 0, cv::Size(50, 50));
|
||||
}
|
||||
else
|
||||
{
|
||||
classifier.detectMultiScale(intensity, face_detections, 1.2, 2, 0, cv::Size(min_width, min_width));
|
||||
}
|
||||
|
||||
// Convert from int bounding box do a double one with corrections
|
||||
for (size_t face = 0; face < face_detections.size(); ++face)
|
||||
{
|
||||
// OpenCV is overgenerous with face size and y location is off
|
||||
// CLNF detector expects the bounding box to encompass from eyebrow to chin in y, and from cheeck outline to cheeck outline in x, so we need to compensate
|
||||
|
||||
// The scalings were learned using the Face Detections on LFPW, Helen, AFW and iBUG datasets, using ground truth and detections from openCV
|
||||
cv::Rect_<float> region;
|
||||
// Correct for scale
|
||||
region.width = face_detections[face].width * 0.8924f;
|
||||
region.height = face_detections[face].height * 0.8676f;
|
||||
|
||||
// Move the face slightly to the right (as the width was made smaller)
|
||||
region.x = face_detections[face].x + 0.0578f * face_detections[face].width;
|
||||
// Shift face down as OpenCV Haar Cascade detects the forehead as well, and we're not interested
|
||||
region.y = face_detections[face].y + face_detections[face].height * 0.2166f;
|
||||
|
||||
if (min_width != -1)
|
||||
{
|
||||
if (region.width < min_width || region.x < ((float)intensity.cols) * roi.x || region.y < ((float)intensity.cols) * roi.y || region.x + region.width >((float)intensity.cols) * (roi.x + roi.width) || region.y + region.height >((float)intensity.rows) * (roi.y + roi.height))
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
o_regions.push_back(region);
|
||||
}
|
||||
return o_regions.size() > 0;
|
||||
}
|
||||
|
||||
bool DetectSingleFace(cv::Rect_<float>& o_region, const cv::Mat_<uchar>& intensity_image, cv::CascadeClassifier& classifier, cv::Point preference, float min_width, cv::Rect_<float> roi)
|
||||
{
|
||||
// The tracker can return multiple faces
|
||||
std::vector<cv::Rect_<float> > face_detections;
|
||||
|
||||
bool detect_success = LandmarkDetector::DetectFaces(face_detections, intensity_image, classifier, min_width, roi);
|
||||
|
||||
if (detect_success)
|
||||
{
|
||||
|
||||
bool use_preferred = (preference.x != -1) && (preference.y != -1);
|
||||
|
||||
if (face_detections.size() > 1)
|
||||
{
|
||||
// keep the closest one if preference point not set
|
||||
float best = -1;
|
||||
int bestIndex = -1;
|
||||
for (size_t i = 0; i < face_detections.size(); ++i)
|
||||
{
|
||||
float dist;
|
||||
bool better;
|
||||
|
||||
if (use_preferred)
|
||||
{
|
||||
dist = sqrt((preference.x) * (face_detections[i].width / 2 + face_detections[i].x) +
|
||||
(preference.y) * (face_detections[i].height / 2 + face_detections[i].y));
|
||||
better = dist < best;
|
||||
}
|
||||
else
|
||||
{
|
||||
dist = face_detections[i].width;
|
||||
better = face_detections[i].width > best;
|
||||
}
|
||||
|
||||
// Pick a closest face to preffered point or the biggest face
|
||||
if (i == 0 || better)
|
||||
{
|
||||
bestIndex = i;
|
||||
best = dist;
|
||||
}
|
||||
}
|
||||
|
||||
o_region = face_detections[bestIndex];
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
o_region = face_detections[0];
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
// if not detected
|
||||
o_region = cv::Rect_<float>(0, 0, 0, 0);
|
||||
}
|
||||
return detect_success;
|
||||
}
|
||||
|
||||
bool DetectFacesHOG(std::vector<cv::Rect_<float> >& o_regions, const cv::Mat_<uchar>& intensity,
|
||||
std::vector<float>& confidences, float min_width, cv::Rect_<float> roi)
|
||||
{
|
||||
dlib::frontal_face_detector detector = dlib::get_frontal_face_detector();
|
||||
|
||||
return DetectFacesHOG(o_regions, intensity, detector, confidences, min_width, roi);
|
||||
|
||||
}
|
||||
|
||||
bool DetectFacesHOG(std::vector<cv::Rect_<float> >& o_regions, const cv::Mat_<uchar>& intensity,
|
||||
dlib::frontal_face_detector& detector, std::vector<float>& o_confidences, float min_width, cv::Rect_<float> roi)
|
||||
{
|
||||
if (detector.num_detectors() == 0)
|
||||
{
|
||||
detector = dlib::get_frontal_face_detector();
|
||||
}
|
||||
|
||||
cv::Mat_<uchar> upsampled_intensity;
|
||||
|
||||
float scaling = 1.3f;
|
||||
|
||||
cv::resize(intensity, upsampled_intensity, cv::Size((int)(intensity.cols * scaling), (int)(intensity.rows * scaling)));
|
||||
|
||||
dlib::cv_image<uchar> cv_grayscale(upsampled_intensity);
|
||||
|
||||
std::vector<dlib::full_detection> face_detections;
|
||||
detector(cv_grayscale, face_detections, -0.2);
|
||||
|
||||
// Convert from int bounding box do a double one with corrections
|
||||
for (size_t face = 0; face < face_detections.size(); ++face)
|
||||
{
|
||||
// CLNF expects the bounding box to encompass from eyebrow to chin in y, and from cheeck outline to cheeck outline in x, so we need to compensate
|
||||
|
||||
cv::Rect_<float> region;
|
||||
// Move the face slightly to the right (as the width was made smaller)
|
||||
region.x = (face_detections[face].rect.get_rect().tl_corner().x() + 0.0389f * face_detections[face].rect.get_rect().width()) / scaling;
|
||||
// Shift face down as OpenCV Haar Cascade detects the forehead as well, and we're not interested
|
||||
region.y = (face_detections[face].rect.get_rect().tl_corner().y() + 0.1278f * face_detections[face].rect.get_rect().height()) / scaling;
|
||||
|
||||
// Correct for scale
|
||||
region.width = (face_detections[face].rect.get_rect().width() * 0.9611) / scaling;
|
||||
region.height = (face_detections[face].rect.get_rect().height() * 0.9388) / scaling;
|
||||
|
||||
// The scalings were learned using the Face Detections on LFPW and Helen using ground truth and detections from the HOG detector
|
||||
if (min_width != -1)
|
||||
{
|
||||
if (region.width < min_width || region.x < ((float)intensity.cols) * roi.x || region.y < ((float)intensity.cols) * roi.y ||
|
||||
region.x + region.width >((float)intensity.cols) * (roi.x + roi.width) || region.y + region.height >((float)intensity.rows) * (roi.y + roi.height))
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
o_regions.push_back(region);
|
||||
o_confidences.push_back(face_detections[face].detection_confidence);
|
||||
|
||||
|
||||
}
|
||||
return o_regions.size() > 0;
|
||||
}
|
||||
|
||||
bool DetectSingleFaceHOG(cv::Rect_<float>& o_region, const cv::Mat_<uchar>& intensity_img, dlib::frontal_face_detector& detector, float& confidence, cv::Point preference, float min_width, cv::Rect_<float> roi)
|
||||
{
|
||||
|
||||
if (detector.num_detectors() == 0)
|
||||
{
|
||||
detector = dlib::get_frontal_face_detector();
|
||||
}
|
||||
|
||||
// The tracker can return multiple faces
|
||||
std::vector<cv::Rect_<float> > face_detections;
|
||||
std::vector<float> confidences;
|
||||
bool detect_success = LandmarkDetector::DetectFacesHOG(face_detections, intensity_img, detector, confidences, min_width, roi);
|
||||
|
||||
// In case of multiple faces pick the biggest one
|
||||
bool use_size = true;
|
||||
|
||||
if (detect_success)
|
||||
{
|
||||
|
||||
bool use_preferred = (preference.x != -1) && (preference.y != -1);
|
||||
|
||||
// keep the most confident one or the one closest to preference point if set
|
||||
float best_so_far;
|
||||
if (use_preferred)
|
||||
{
|
||||
best_so_far = sqrt((preference.x - (face_detections[0].width / 2 + face_detections[0].x)) * (preference.x - (face_detections[0].width / 2 + face_detections[0].x)) +
|
||||
(preference.y - (face_detections[0].height / 2 + face_detections[0].y)) * (preference.y - (face_detections[0].height / 2 + face_detections[0].y)));
|
||||
}
|
||||
else if (use_size)
|
||||
{
|
||||
best_so_far = (face_detections[0].width + face_detections[0].height) / 2.0;
|
||||
}
|
||||
else
|
||||
{
|
||||
best_so_far = confidences[0];
|
||||
}
|
||||
int bestIndex = 0;
|
||||
|
||||
for (size_t i = 1; i < face_detections.size(); ++i)
|
||||
{
|
||||
|
||||
float dist;
|
||||
bool better;
|
||||
|
||||
if (use_preferred)
|
||||
{
|
||||
dist = sqrt((preference.x - (face_detections[i].width / 2 + face_detections[i].x)) * (preference.x - (face_detections[i].width / 2 + face_detections[i].x)) +
|
||||
(preference.y - (face_detections[i].height / 2 + face_detections[i].y)) * (preference.y - (face_detections[i].height / 2 + face_detections[i].y)));
|
||||
|
||||
better = dist < best_so_far;
|
||||
}
|
||||
else if (use_size)
|
||||
{
|
||||
dist = (face_detections[i].width + face_detections[i].height) / 2.0;
|
||||
better = dist > best_so_far;
|
||||
}
|
||||
else
|
||||
{
|
||||
dist = confidences[i];
|
||||
better = dist > best_so_far;
|
||||
}
|
||||
|
||||
// Pick a closest face
|
||||
if (better)
|
||||
{
|
||||
best_so_far = dist;
|
||||
bestIndex = i;
|
||||
}
|
||||
}
|
||||
|
||||
o_region = face_detections[bestIndex];
|
||||
confidence = confidences[bestIndex];
|
||||
}
|
||||
else
|
||||
{
|
||||
// if not detected
|
||||
o_region = cv::Rect_<float>(0, 0, 0, 0);
|
||||
// A completely unreliable detection (shouldn't really matter what is returned here)
|
||||
confidence = -2;
|
||||
}
|
||||
return detect_success;
|
||||
}
|
||||
|
||||
bool DetectFacesMTCNN(std::vector<cv::Rect_<float> >& o_regions, const cv::Mat& image, LandmarkDetector::FaceDetectorMTCNN& detector,
|
||||
std::vector<float>& o_confidences)
|
||||
{
|
||||
detector.DetectFaces(o_regions, image, o_confidences);
|
||||
|
||||
return o_regions.size() > 0;
|
||||
}
|
||||
|
||||
bool DetectSingleFaceMTCNN(cv::Rect_<float>& o_region, const cv::Mat& image, LandmarkDetector::FaceDetectorMTCNN& detector,
|
||||
float& confidence, cv::Point preference)
|
||||
{
|
||||
// The tracker can return multiple faces
|
||||
std::vector<cv::Rect_<float> > face_detections;
|
||||
std::vector<float> confidences;
|
||||
|
||||
detector.DetectFaces(face_detections, image, confidences);
|
||||
|
||||
bool detect_success = face_detections.size() > 0;
|
||||
if (detect_success)
|
||||
{
|
||||
|
||||
bool use_preferred = (preference.x != -1) && (preference.y != -1);
|
||||
|
||||
// keep the most confident one or the one closest to preference point if set
|
||||
float best_so_far;
|
||||
if (use_preferred)
|
||||
{
|
||||
best_so_far = sqrt((preference.x - (face_detections[0].width / 2 + face_detections[0].x)) * (preference.x - (face_detections[0].width / 2 + face_detections[0].x)) +
|
||||
(preference.y - (face_detections[0].height / 2 + face_detections[0].y)) * (preference.y - (face_detections[0].height / 2 + face_detections[0].y)));
|
||||
}
|
||||
else
|
||||
{
|
||||
best_so_far = face_detections[0].width;
|
||||
}
|
||||
int bestIndex = 0;
|
||||
|
||||
for (size_t i = 1; i < face_detections.size(); ++i)
|
||||
{
|
||||
|
||||
float dist;
|
||||
bool better;
|
||||
|
||||
if (use_preferred)
|
||||
{
|
||||
dist = sqrt((preference.x - (face_detections[i].width / 2 + face_detections[i].x)) * (preference.x - (face_detections[i].width / 2 + face_detections[i].x)) +
|
||||
(preference.y - (face_detections[i].height / 2 + face_detections[i].y)) * (preference.y - (face_detections[i].height / 2 + face_detections[i].y)));
|
||||
better = dist < best_so_far;
|
||||
}
|
||||
else
|
||||
{
|
||||
dist = face_detections[i].width;
|
||||
better = dist > best_so_far;
|
||||
}
|
||||
|
||||
// Pick a closest face
|
||||
if (better)
|
||||
{
|
||||
best_so_far = dist;
|
||||
bestIndex = i;
|
||||
}
|
||||
}
|
||||
|
||||
o_region = face_detections[bestIndex];
|
||||
confidence = confidences[bestIndex];
|
||||
}
|
||||
else
|
||||
{
|
||||
// if not detected
|
||||
o_region = cv::Rect_<float>(0, 0, 0, 0);
|
||||
// A completely unreliable detection (shouldn't really matter what is returned here)
|
||||
confidence = -2;
|
||||
}
|
||||
return detect_success;
|
||||
}
|
||||
|
||||
|
||||
//============================================================================
|
||||
// Matrix reading functionality
|
||||
//============================================================================
|
||||
|
||||
// Reading in a matrix from a stream
|
||||
void ReadMat(std::ifstream& stream, cv::Mat &output_mat)
|
||||
{
|
||||
// Read in the number of rows, columns and the data type
|
||||
int row, col, type;
|
||||
|
||||
stream >> row >> col >> type;
|
||||
|
||||
output_mat = cv::Mat(row, col, type);
|
||||
|
||||
switch (output_mat.type())
|
||||
{
|
||||
case CV_64FC1:
|
||||
{
|
||||
cv::MatIterator_<double> begin_it = output_mat.begin<double>();
|
||||
cv::MatIterator_<double> end_it = output_mat.end<double>();
|
||||
|
||||
while (begin_it != end_it)
|
||||
{
|
||||
stream >> *begin_it++;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case CV_32FC1:
|
||||
{
|
||||
cv::MatIterator_<float> begin_it = output_mat.begin<float>();
|
||||
cv::MatIterator_<float> end_it = output_mat.end<float>();
|
||||
|
||||
while (begin_it != end_it)
|
||||
{
|
||||
stream >> *begin_it++;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case CV_32SC1:
|
||||
{
|
||||
cv::MatIterator_<int> begin_it = output_mat.begin<int>();
|
||||
cv::MatIterator_<int> end_it = output_mat.end<int>();
|
||||
while (begin_it != end_it)
|
||||
{
|
||||
stream >> *begin_it++;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case CV_8UC1:
|
||||
{
|
||||
cv::MatIterator_<uchar> begin_it = output_mat.begin<uchar>();
|
||||
cv::MatIterator_<uchar> end_it = output_mat.end<uchar>();
|
||||
while (begin_it != end_it)
|
||||
{
|
||||
stream >> *begin_it++;
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
printf("ERROR(%s,%d) : Unsupported Matrix type %d!\n", __FILE__, __LINE__, output_mat.type()); abort();
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
void ReadMatBin(std::ifstream& stream, cv::Mat &output_mat)
|
||||
{
|
||||
// Read in the number of rows, columns and the data type
|
||||
int row, col, type;
|
||||
|
||||
stream.read((char*)&row, 4);
|
||||
stream.read((char*)&col, 4);
|
||||
stream.read((char*)&type, 4);
|
||||
|
||||
output_mat = cv::Mat(row, col, type);
|
||||
int size = output_mat.rows * output_mat.cols * output_mat.elemSize();
|
||||
stream.read((char *)output_mat.data, size);
|
||||
|
||||
}
|
||||
|
||||
// Skipping lines that start with # (together with empty lines)
|
||||
void SkipComments(std::ifstream& stream)
|
||||
{
|
||||
while (stream.peek() == '#' || stream.peek() == '\n' || stream.peek() == ' ' || stream.peek() == '\r')
|
||||
{
|
||||
std::string skipped;
|
||||
std::getline(stream, skipped);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
515
pkg/OpenFace/lib/local/LandmarkDetector/src/PAW.cpp
Normal file
515
pkg/OpenFace/lib/local/LandmarkDetector/src/PAW.cpp
Normal file
@@ -0,0 +1,515 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Copyright (C) 2017, Carnegie Mellon University and University of Cambridge,
|
||||
// all rights reserved.
|
||||
//
|
||||
// ACADEMIC OR NON-PROFIT ORGANIZATION NONCOMMERCIAL RESEARCH USE ONLY
|
||||
//
|
||||
// BY USING OR DOWNLOADING THE SOFTWARE, YOU ARE AGREEING TO THE TERMS OF THIS LICENSE AGREEMENT.
|
||||
// IF YOU DO NOT AGREE WITH THESE TERMS, YOU MAY NOT USE OR DOWNLOAD THE SOFTWARE.
|
||||
//
|
||||
// License can be found in OpenFace-license.txt
|
||||
//
|
||||
// * Any publications arising from the use of this software, including but
|
||||
// not limited to academic journal and conference publications, technical
|
||||
// reports and manuals, must cite at least one of the following works:
|
||||
//
|
||||
// OpenFace 2.0: Facial Behavior Analysis Toolkit
|
||||
// Tadas Baltrušaitis, Amir Zadeh, Yao Chong Lim, and Louis-Philippe Morency
|
||||
// in IEEE International Conference on Automatic Face and Gesture Recognition, 2018
|
||||
//
|
||||
// Convolutional experts constrained local model for facial landmark detection.
|
||||
// A. Zadeh, T. Baltrušaitis, and Louis-Philippe Morency,
|
||||
// in Computer Vision and Pattern Recognition Workshops, 2017.
|
||||
//
|
||||
// Rendering of Eyes for Eye-Shape Registration and Gaze Estimation
|
||||
// Erroll Wood, Tadas Baltrušaitis, Xucong Zhang, Yusuke Sugano, Peter Robinson, and Andreas Bulling
|
||||
// in IEEE International. Conference on Computer Vision (ICCV), 2015
|
||||
//
|
||||
// Cross-dataset learning and person-specific normalisation for automatic Action Unit detection
|
||||
// Tadas Baltrušaitis, Marwa Mahmoud, and Peter Robinson
|
||||
// in Facial Expression Recognition and Analysis Challenge,
|
||||
// IEEE International Conference on Automatic Face and Gesture Recognition, 2015
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "stdafx.h"
|
||||
|
||||
#include "PAW.h"
|
||||
|
||||
// OpenCV includes
|
||||
#include <opencv2/core/core.hpp>
|
||||
#include <opencv2/imgproc.hpp>
|
||||
|
||||
#include "LandmarkDetectorUtils.h"
|
||||
|
||||
using namespace LandmarkDetector;
|
||||
|
||||
// Copy constructor
|
||||
PAW::PAW(const PAW& other) : destination_landmarks(other.destination_landmarks.clone()), source_landmarks(other.source_landmarks.clone()), triangulation(other.triangulation.clone()),
|
||||
triangle_id(other.triangle_id.clone()), pixel_mask(other.pixel_mask.clone()), coefficients(other.coefficients.clone()), alpha(other.alpha.clone()), beta(other.beta.clone()), map_x(other.map_x.clone()), map_y(other.map_y.clone())
|
||||
{
|
||||
this->number_of_pixels = other.number_of_pixels;
|
||||
this->min_x = other.min_x;
|
||||
this->min_y = other.min_y;
|
||||
}
|
||||
|
||||
// A constructor from destination shape and triangulation
|
||||
PAW::PAW(const cv::Mat_<float>& destination_shape, const cv::Mat_<int>& triangulation)
|
||||
{
|
||||
// Initialise some variables directly
|
||||
this->destination_landmarks = destination_shape;
|
||||
this->triangulation = triangulation;
|
||||
|
||||
int num_points = destination_shape.rows / 2;
|
||||
|
||||
int num_tris = triangulation.rows;
|
||||
|
||||
// Pre-compute the rest
|
||||
alpha = cv::Mat_<float>(num_tris, 3);
|
||||
beta = cv::Mat_<float>(num_tris, 3);
|
||||
|
||||
cv::Mat_<float> xs = destination_shape(cv::Rect(0, 0, 1, num_points));
|
||||
cv::Mat_<float> ys = destination_shape(cv::Rect(0, num_points, 1, num_points));
|
||||
|
||||
// Create a vector representation of the control points
|
||||
std::vector<std::vector<float>> destination_points;
|
||||
|
||||
for (int tri = 0; tri < num_tris; ++tri)
|
||||
{
|
||||
int j = triangulation.at<int>(tri, 0);
|
||||
int k = triangulation.at<int>(tri, 1);
|
||||
int l = triangulation.at<int>(tri, 2);
|
||||
|
||||
float c1 = ys.at<float>(l) - ys.at<float>(j);
|
||||
float c2 = xs.at<float>(l) - xs.at<float>(j);
|
||||
float c4 = ys.at<float>(k) - ys.at<float>(j);
|
||||
float c3 = xs.at<float>(k) - xs.at<float>(j);
|
||||
|
||||
float c5 = c3*c1 - c2*c4;
|
||||
|
||||
alpha.at<float>(tri, 0) = (ys.at<float>(j) * c2 - xs.at<float>(j) * c1) / c5;
|
||||
alpha.at<float>(tri, 1) = c1 / c5;
|
||||
alpha.at<float>(tri, 2) = -c2 / c5;
|
||||
|
||||
beta.at<float>(tri, 0) = (xs.at<float>(j) * c4 - ys.at<float>(j) * c3) / c5;
|
||||
beta.at<float>(tri, 1) = -c4 / c5;
|
||||
beta.at<float>(tri, 2) = c3 / c5;
|
||||
|
||||
// Add points corresponding to triangles as optimisation
|
||||
std::vector<float> triangle_points(10);
|
||||
|
||||
triangle_points[0] = xs.at<float>(j);
|
||||
triangle_points[1] = ys.at<float>(j);
|
||||
triangle_points[2] = xs.at<float>(k);
|
||||
triangle_points[3] = ys.at<float>(k);
|
||||
triangle_points[4] = xs.at<float>(l);
|
||||
triangle_points[5] = ys.at<float>(l);
|
||||
|
||||
cv::Vec3f xs_three(triangle_points[0], triangle_points[2], triangle_points[4]);
|
||||
cv::Vec3f ys_three(triangle_points[1], triangle_points[3], triangle_points[5]);
|
||||
|
||||
double min_x, max_x, min_y, max_y;
|
||||
cv::minMaxIdx(xs_three, &min_x, &max_x);
|
||||
cv::minMaxIdx(ys_three, &min_y, &max_y);
|
||||
|
||||
triangle_points[6] = (float)max_x;
|
||||
triangle_points[7] = (float)max_y;
|
||||
|
||||
triangle_points[8] = (float)min_x;
|
||||
triangle_points[9] = (float)min_y;
|
||||
|
||||
destination_points.push_back(triangle_points);
|
||||
|
||||
}
|
||||
|
||||
double max_x;
|
||||
double max_y;
|
||||
double min_x_d;
|
||||
double min_y_d;
|
||||
|
||||
minMaxLoc(xs, &min_x_d, &max_x);
|
||||
minMaxLoc(ys, &min_y_d, &max_y);
|
||||
|
||||
min_x = min_x_d;
|
||||
min_y = min_y_d;
|
||||
|
||||
int w = (int)(max_x - min_x + 1.5);
|
||||
int h = (int)(max_y - min_y + 1.5);
|
||||
|
||||
// Round the min_x and min_y for simplicity?
|
||||
|
||||
pixel_mask = cv::Mat_<uchar>(h, w, (uchar)0);
|
||||
triangle_id = cv::Mat_<int>(h, w, -1);
|
||||
|
||||
int curr_tri = -1;
|
||||
|
||||
for (int y = 0; y < pixel_mask.rows; y++)
|
||||
{
|
||||
for (int x = 0; x < pixel_mask.cols; x++)
|
||||
{
|
||||
curr_tri = findTriangle(cv::Point_<float>(x + min_x, y + min_y), destination_points, curr_tri);
|
||||
// If there is a triangle at this location
|
||||
if (curr_tri != -1)
|
||||
{
|
||||
triangle_id.at<int>(y, x) = curr_tri;
|
||||
pixel_mask.at<uchar>(y, x) = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Preallocate maps and coefficients
|
||||
coefficients.create(num_tris, 6);
|
||||
map_x.create(pixel_mask.rows, pixel_mask.cols);
|
||||
map_y.create(pixel_mask.rows, pixel_mask.cols);
|
||||
|
||||
|
||||
}
|
||||
|
||||
// Manually define min and max values
|
||||
PAW::PAW(const cv::Mat_<float>& destination_shape, const cv::Mat_<int>& triangulation, float in_min_x, float in_min_y, float in_max_x, float in_max_y)
|
||||
{
|
||||
// Initialise some variables directly
|
||||
this->destination_landmarks = destination_shape;
|
||||
this->triangulation = triangulation;
|
||||
|
||||
int num_points = destination_shape.rows / 2;
|
||||
|
||||
int num_tris = triangulation.rows;
|
||||
|
||||
// Pre-compute the rest
|
||||
alpha = cv::Mat_<float>(num_tris, 3);
|
||||
beta = cv::Mat_<float>(num_tris, 3);
|
||||
|
||||
cv::Mat_<float> xs = destination_shape(cv::Rect(0, 0, 1, num_points));
|
||||
cv::Mat_<float> ys = destination_shape(cv::Rect(0, num_points, 1, num_points));
|
||||
|
||||
// Create a vector representation of the control points
|
||||
std::vector<std::vector<float>> destination_points;
|
||||
|
||||
for (int tri = 0; tri < num_tris; ++tri)
|
||||
{
|
||||
int j = triangulation.at<int>(tri, 0);
|
||||
int k = triangulation.at<int>(tri, 1);
|
||||
int l = triangulation.at<int>(tri, 2);
|
||||
|
||||
float c1 = ys.at<float>(l) - ys.at<float>(j);
|
||||
float c2 = xs.at<float>(l) - xs.at<float>(j);
|
||||
float c4 = ys.at<float>(k) - ys.at<float>(j);
|
||||
float c3 = xs.at<float>(k) - xs.at<float>(j);
|
||||
|
||||
float c5 = c3*c1 - c2*c4;
|
||||
|
||||
alpha.at<float>(tri, 0) = (ys.at<float>(j) * c2 - xs.at<float>(j) * c1) / c5;
|
||||
alpha.at<float>(tri, 1) = c1 / c5;
|
||||
alpha.at<float>(tri, 2) = -c2 / c5;
|
||||
|
||||
beta.at<float>(tri, 0) = (xs.at<float>(j) * c4 - ys.at<float>(j) * c3) / c5;
|
||||
beta.at<float>(tri, 1) = -c4 / c5;
|
||||
beta.at<float>(tri, 2) = c3 / c5;
|
||||
|
||||
// Add points corresponding to triangles as optimisation
|
||||
std::vector<float> triangle_points(10);
|
||||
|
||||
triangle_points[0] = xs.at<float>(j);
|
||||
triangle_points[1] = ys.at<float>(j);
|
||||
triangle_points[2] = xs.at<float>(k);
|
||||
triangle_points[3] = ys.at<float>(k);
|
||||
triangle_points[4] = xs.at<float>(l);
|
||||
triangle_points[5] = ys.at<float>(l);
|
||||
|
||||
cv::Vec3f xs_three(triangle_points[0], triangle_points[2], triangle_points[4]);
|
||||
cv::Vec3f ys_three(triangle_points[1], triangle_points[3], triangle_points[5]);
|
||||
|
||||
double min_x, max_x, min_y, max_y;
|
||||
cv::minMaxIdx(xs_three, &min_x, &max_x);
|
||||
cv::minMaxIdx(ys_three, &min_y, &max_y);
|
||||
|
||||
triangle_points[6] = (float)max_x;
|
||||
triangle_points[7] = (float)max_y;
|
||||
|
||||
triangle_points[8] = (float)min_x;
|
||||
triangle_points[9] = (float)min_y;
|
||||
|
||||
destination_points.push_back(triangle_points);
|
||||
|
||||
}
|
||||
|
||||
float max_x;
|
||||
float max_y;
|
||||
|
||||
min_x = in_min_x;
|
||||
min_y = in_min_y;
|
||||
|
||||
max_x = in_max_x;
|
||||
max_y = in_max_y;
|
||||
|
||||
int w = (int)(max_x - min_x + 1.5);
|
||||
int h = (int)(max_y - min_y + 1.5);
|
||||
|
||||
// Round the min_x and min_y for simplicity?
|
||||
|
||||
pixel_mask = cv::Mat_<uchar>(h, w, (uchar)0);
|
||||
triangle_id = cv::Mat_<int>(h, w, -1);
|
||||
|
||||
int curr_tri = -1;
|
||||
|
||||
for (int y = 0; y < pixel_mask.rows; y++)
|
||||
{
|
||||
for (int x = 0; x < pixel_mask.cols; x++)
|
||||
{
|
||||
curr_tri = findTriangle(cv::Point_<float>(x + min_x, y + min_y), destination_points, curr_tri);
|
||||
// If there is a triangle at this location
|
||||
if (curr_tri != -1)
|
||||
{
|
||||
triangle_id.at<int>(y, x) = curr_tri;
|
||||
pixel_mask.at<uchar>(y, x) = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Preallocate maps and coefficients
|
||||
coefficients.create(num_tris, 6);
|
||||
map_x.create(pixel_mask.rows, pixel_mask.cols);
|
||||
map_y.create(pixel_mask.rows, pixel_mask.cols);
|
||||
|
||||
}
|
||||
|
||||
//===========================================================================
|
||||
void PAW::Read(std::ifstream& stream)
|
||||
{
|
||||
|
||||
stream.read((char*)&number_of_pixels, 4);
|
||||
double min_x_d, min_y_d;
|
||||
stream.read((char*)&min_x_d, 8);
|
||||
stream.read((char*)&min_y_d, 8);
|
||||
min_x = (float)min_x_d;
|
||||
min_y = (float)min_y_d;
|
||||
|
||||
cv::Mat_<double> destination_landmarks_d;
|
||||
ReadMatBin(stream, destination_landmarks_d);
|
||||
destination_landmarks_d.convertTo(destination_landmarks, CV_32F);
|
||||
|
||||
ReadMatBin(stream, triangulation);
|
||||
|
||||
ReadMatBin(stream, triangle_id);
|
||||
|
||||
cv::Mat tmpMask;
|
||||
ReadMatBin(stream, tmpMask);
|
||||
tmpMask.convertTo(pixel_mask, CV_8U);
|
||||
|
||||
cv::Mat_<double> alpha_d;
|
||||
ReadMatBin(stream, alpha_d);
|
||||
alpha_d.convertTo(alpha, CV_32F);
|
||||
|
||||
cv::Mat_<double> beta_d;
|
||||
ReadMatBin(stream, beta_d);
|
||||
beta_d.convertTo(beta, CV_32F);
|
||||
|
||||
map_x.create(pixel_mask.rows, pixel_mask.cols);
|
||||
map_y.create(pixel_mask.rows, pixel_mask.cols);
|
||||
|
||||
coefficients.create(this->NumberOfTriangles(), 6);
|
||||
|
||||
source_landmarks = destination_landmarks;
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
// cropping from the source image to the destination image using the shape in s, used to determine if shape fitting converged successfully
|
||||
void PAW::Warp(const cv::Mat& image_to_warp, cv::Mat& destination_image, const cv::Mat_<float>& landmarks_to_warp)
|
||||
{
|
||||
|
||||
// set the current shape
|
||||
source_landmarks = landmarks_to_warp.clone();
|
||||
|
||||
// prepare the mapping coefficients using the current shape
|
||||
this->CalcCoeff();
|
||||
|
||||
// Do the actual mapping computation (where to warp from)
|
||||
this->WarpRegion(map_x, map_y);
|
||||
|
||||
// Do the actual warp (with bi-linear interpolation)
|
||||
remap(image_to_warp, destination_image, map_x, map_y, cv::INTER_LINEAR);
|
||||
|
||||
}
|
||||
|
||||
|
||||
//=============================================================================
|
||||
// Calculate the warping coefficients
|
||||
void PAW::CalcCoeff()
|
||||
{
|
||||
int p = this->NumberOfLandmarks();
|
||||
|
||||
for (int l = 0; l < this->NumberOfTriangles(); l++)
|
||||
{
|
||||
|
||||
int i = triangulation.at<int>(l, 0);
|
||||
int j = triangulation.at<int>(l, 1);
|
||||
int k = triangulation.at<int>(l, 2);
|
||||
|
||||
float c1 = source_landmarks.at<float>(i, 0);
|
||||
float c2 = source_landmarks.at<float>(j, 0) - c1;
|
||||
float c3 = source_landmarks.at<float>(k, 0) - c1;
|
||||
float c4 = source_landmarks.at<float>(i + p, 0);
|
||||
float c5 = source_landmarks.at<float>(j + p, 0) - c4;
|
||||
float c6 = source_landmarks.at<float>(k + p, 0) - c4;
|
||||
|
||||
// Get a pointer to the coefficient we will be precomputing
|
||||
float *coeff = coefficients.ptr<float>(l);
|
||||
|
||||
// Extract the relevant alphas and betas
|
||||
float *c_alpha = alpha.ptr<float>(l);
|
||||
float *c_beta = beta.ptr<float>(l);
|
||||
|
||||
coeff[0] = c1 + c2 * c_alpha[0] + c3 * c_beta[0];
|
||||
coeff[1] = c2 * c_alpha[1] + c3 * c_beta[1];
|
||||
coeff[2] = c2 * c_alpha[2] + c3 * c_beta[2];
|
||||
coeff[3] = c4 + c5 * c_alpha[0] + c6 * c_beta[0];
|
||||
coeff[4] = c5 * c_alpha[1] + c6 * c_beta[1];
|
||||
coeff[5] = c5 * c_alpha[2] + c6 * c_beta[2];
|
||||
}
|
||||
}
|
||||
|
||||
//======================================================================
|
||||
// Compute the mapping coefficients
|
||||
void PAW::WarpRegion(cv::Mat_<float>& mapx, cv::Mat_<float>& mapy)
|
||||
{
|
||||
|
||||
cv::MatIterator_<float> xp = mapx.begin();
|
||||
cv::MatIterator_<float> yp = mapy.begin();
|
||||
cv::MatIterator_<uchar> mp = pixel_mask.begin();
|
||||
cv::MatIterator_<int> tp = triangle_id.begin();
|
||||
|
||||
// The coefficients corresponding to the current triangle
|
||||
float * a;
|
||||
|
||||
// Current triangle being processed
|
||||
int k = -1;
|
||||
|
||||
for (int y = 0; y < pixel_mask.rows; y++)
|
||||
{
|
||||
float yi = float(y) + min_y;
|
||||
|
||||
for (int x = 0; x < pixel_mask.cols; x++)
|
||||
{
|
||||
float xi = float(x) + min_x;
|
||||
|
||||
if (*mp == 0)
|
||||
{
|
||||
*xp = -1;
|
||||
*yp = -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
// triangle corresponding to the current pixel
|
||||
int j = *tp;
|
||||
|
||||
// If it is different from the previous triangle point to new coefficients
|
||||
// This will always be the case in the first iteration, hence a will not point to nothing
|
||||
if (j != k)
|
||||
{
|
||||
// Update the coefficient pointer if a new triangle is being processed
|
||||
a = coefficients.ptr<float>(j);
|
||||
k = j;
|
||||
}
|
||||
|
||||
//ap is now the pointer to the coefficients
|
||||
float *ap = a;
|
||||
|
||||
//look at the first coefficient (and increment). first coefficient is an x offset
|
||||
float xo = *ap++;
|
||||
//second coefficient is an x scale as a function of x
|
||||
xo += *ap++ * xi;
|
||||
//third coefficient ap(2) is an x scale as a function of y
|
||||
*xp = float(xo + *ap++ * yi);
|
||||
|
||||
//then fourth coefficient ap(3) is a y offset
|
||||
float yo = *ap++;
|
||||
//fifth coeff adds coeff[4]*x to y
|
||||
yo += *ap++ * xi;
|
||||
//final coeff adds coeff[5]*y to y
|
||||
*yp = float(yo + *ap++ * yi);
|
||||
|
||||
}
|
||||
mp++; tp++; xp++; yp++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// Helper functions to determine which point a triangle lies in
|
||||
// ============================================================
|
||||
|
||||
// Is the point (x0,y0) on same side as a half-plane defined by (x1,y1), (x2, y2), and (x3, y3)
|
||||
bool PAW::sameSide(float x0, float y0, float x1, float y1, float x2, float y2, float x3, float y3)
|
||||
{
|
||||
|
||||
float x = (x3 - x2)*(y0 - y2) - (x0 - x2)*(y3 - y2);
|
||||
float y = (x3 - x2)*(y1 - y2) - (x1 - x2)*(y3 - y2);
|
||||
|
||||
return x*y >= 0;
|
||||
|
||||
}
|
||||
|
||||
// if point (x0, y0) is on same side for all three half-planes it is in a triangle
|
||||
bool PAW::pointInTriangle(float x0, float y0, float x1, float y1, float x2, float y2, float x3, float y3)
|
||||
{
|
||||
bool same_1 = sameSide(x0, y0, x1, y1, x2, y2, x3, y3);
|
||||
bool same_2 = sameSide(x0, y0, x2, y2, x1, y1, x3, y3);
|
||||
bool same_3 = sameSide(x0, y0, x3, y3, x1, y1, x2, y2);
|
||||
|
||||
return same_1 && same_2 && same_3;
|
||||
|
||||
}
|
||||
|
||||
// Find if a given point lies in the triangles
|
||||
int PAW::findTriangle(const cv::Point_<float>& point, const std::vector<std::vector<float>>& control_points, int guess)
|
||||
{
|
||||
|
||||
int num_tris = control_points.size();
|
||||
|
||||
int tri = -1;
|
||||
|
||||
float x0 = point.x;
|
||||
float y0 = point.y;
|
||||
|
||||
// Allow a guess for speed (so as not to go through all triangles)
|
||||
if (guess != -1)
|
||||
{
|
||||
|
||||
bool in_triangle = pointInTriangle(x0, y0, control_points[guess][0], control_points[guess][1], control_points[guess][2], control_points[guess][3], control_points[guess][4], control_points[guess][5]);
|
||||
if (in_triangle)
|
||||
{
|
||||
return guess;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
for (int i = 0; i < num_tris; ++i)
|
||||
{
|
||||
|
||||
float max_x = control_points[i][6];
|
||||
float max_y = control_points[i][7];
|
||||
|
||||
float min_x = control_points[i][8];
|
||||
float min_y = control_points[i][9];
|
||||
|
||||
// Skip the check if the point is outside the bounding box of the triangle
|
||||
|
||||
if (max_x < x0 || min_x > x0 || max_y < y0 || min_y > y0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
bool in_triangle = pointInTriangle(x0, y0,
|
||||
control_points[i][0], control_points[i][1],
|
||||
control_points[i][2], control_points[i][3],
|
||||
control_points[i][4], control_points[i][5]);
|
||||
|
||||
if (in_triangle)
|
||||
{
|
||||
tri = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return tri;
|
||||
}
|
||||
738
pkg/OpenFace/lib/local/LandmarkDetector/src/PDM.cpp
Normal file
738
pkg/OpenFace/lib/local/LandmarkDetector/src/PDM.cpp
Normal file
@@ -0,0 +1,738 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Copyright (C) 2017, Carnegie Mellon University and University of Cambridge,
|
||||
// all rights reserved.
|
||||
//
|
||||
// ACADEMIC OR NON-PROFIT ORGANIZATION NONCOMMERCIAL RESEARCH USE ONLY
|
||||
//
|
||||
// BY USING OR DOWNLOADING THE SOFTWARE, YOU ARE AGREEING TO THE TERMS OF THIS LICENSE AGREEMENT.
|
||||
// IF YOU DO NOT AGREE WITH THESE TERMS, YOU MAY NOT USE OR DOWNLOAD THE SOFTWARE.
|
||||
//
|
||||
// License can be found in OpenFace-license.txt
|
||||
//
|
||||
// * Any publications arising from the use of this software, including but
|
||||
// not limited to academic journal and conference publications, technical
|
||||
// reports and manuals, must cite at least one of the following works:
|
||||
//
|
||||
// OpenFace 2.0: Facial Behavior Analysis Toolkit
|
||||
// Tadas Baltrušaitis, Amir Zadeh, Yao Chong Lim, and Louis-Philippe Morency
|
||||
// in IEEE International Conference on Automatic Face and Gesture Recognition, 2018
|
||||
//
|
||||
// Convolutional experts constrained local model for facial landmark detection.
|
||||
// A. Zadeh, T. Baltrušaitis, and Louis-Philippe Morency,
|
||||
// in Computer Vision and Pattern Recognition Workshops, 2017.
|
||||
//
|
||||
// Rendering of Eyes for Eye-Shape Registration and Gaze Estimation
|
||||
// Erroll Wood, Tadas Baltrušaitis, Xucong Zhang, Yusuke Sugano, Peter Robinson, and Andreas Bulling
|
||||
// in IEEE International. Conference on Computer Vision (ICCV), 2015
|
||||
//
|
||||
// Cross-dataset learning and person-specific normalisation for automatic Action Unit detection
|
||||
// Tadas Baltrušaitis, Marwa Mahmoud, and Peter Robinson
|
||||
// in Facial Expression Recognition and Analysis Challenge,
|
||||
// IEEE International Conference on Automatic Face and Gesture Recognition, 2015
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "stdafx.h"
|
||||
|
||||
#include <PDM.h>
|
||||
#include <RotationHelpers.h>
|
||||
|
||||
// OpenCV include
|
||||
#include <opencv2/core/core.hpp>
|
||||
#include <opencv2/imgproc.hpp>
|
||||
|
||||
// Math includes
|
||||
#define _USE_MATH_DEFINES
|
||||
#include <cmath>
|
||||
|
||||
#ifndef M_PI
|
||||
#define M_PI 3.14159265358979323846
|
||||
#endif
|
||||
|
||||
#include <LandmarkDetectorUtils.h>
|
||||
|
||||
using namespace LandmarkDetector;
|
||||
//===========================================================================
|
||||
|
||||
//=============================================================================
|
||||
// Orthonormalising the 3x3 rotation matrix
|
||||
void PDM::Orthonormalise(cv::Matx33f &R)
|
||||
{
|
||||
|
||||
cv::SVD svd(R, cv::SVD::MODIFY_A);
|
||||
|
||||
// get the orthogonal matrix from the initial rotation matrix
|
||||
cv::Mat_ <float> X = svd.u*svd.vt;
|
||||
|
||||
// This makes sure that the handedness is preserved and no reflection happened
|
||||
// by making sure the determinant is 1 and not -1
|
||||
cv::Mat_<float> W = cv::Mat_<float>::eye(3,3);
|
||||
float d = determinant(X);
|
||||
W(2,2) = determinant(X);
|
||||
cv::Mat Rt = svd.u*W*svd.vt;
|
||||
|
||||
Rt.copyTo(R);
|
||||
|
||||
}
|
||||
|
||||
// A copy constructor
|
||||
PDM::PDM(const PDM& other) {
|
||||
|
||||
// Make sure the matrices are allocated properly
|
||||
this->mean_shape = other.mean_shape.clone();
|
||||
this->princ_comp = other.princ_comp.clone();
|
||||
this->eigen_values = other.eigen_values.clone();
|
||||
}
|
||||
|
||||
//===========================================================================
|
||||
// Clamping the parameter values to be within 3 standard deviations
|
||||
void PDM::Clamp(cv::Mat_<float>& local_params, cv::Vec6f& params_global, const FaceModelParameters& parameters)
|
||||
{
|
||||
float n_sigmas = 3;
|
||||
cv::MatConstIterator_<float> e_it = this->eigen_values.begin();
|
||||
cv::MatIterator_<float> p_it = local_params.begin();
|
||||
|
||||
float v;
|
||||
|
||||
// go over all parameters
|
||||
for(; p_it != local_params.end(); ++p_it, ++e_it)
|
||||
{
|
||||
// Work out the maximum value
|
||||
v = n_sigmas*sqrt(*e_it);
|
||||
|
||||
// if the values is too extreme clamp it
|
||||
if(fabs(*p_it) > v)
|
||||
{
|
||||
// Dealing with positive and negative cases
|
||||
if(*p_it > 0.0)
|
||||
{
|
||||
*p_it=v;
|
||||
}
|
||||
else
|
||||
{
|
||||
*p_it=-v;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// do not let the pose get out of hand
|
||||
//if(parameters.limit_pose)
|
||||
//{
|
||||
// if(params_global[1] > M_PI / 2)
|
||||
// params_global[1] = M_PI/2;
|
||||
// if(params_global[1] < -M_PI / 2)
|
||||
// params_global[1] = -M_PI/2;
|
||||
// if(params_global[2] > M_PI / 2)
|
||||
// params_global[2] = M_PI/2;
|
||||
// if(params_global[2] < -M_PI / 2)
|
||||
// params_global[2] = -M_PI/2;
|
||||
// if(params_global[3] > M_PI / 2)
|
||||
// params_global[3] = M_PI/2;
|
||||
// if(params_global[3] < -M_PI / 2)
|
||||
// params_global[3] = -M_PI/2;
|
||||
//}
|
||||
|
||||
|
||||
}
|
||||
//===========================================================================
|
||||
// Compute the 3D representation of shape (in object space) using the local parameters
|
||||
void PDM::CalcShape3D(cv::Mat_<float>& out_shape, const cv::Mat_<float>& p_local) const
|
||||
{
|
||||
out_shape = mean_shape.clone();
|
||||
|
||||
// Perform matrix vector multiplication in OpenBLAS (fortran call)
|
||||
float alpha1 = 1.0;
|
||||
float beta1 = 1.0;
|
||||
int p_local_cols = p_local.cols;
|
||||
int princ_comp_rows = princ_comp.rows;
|
||||
int princ_comp_cols = princ_comp.cols;
|
||||
char N[2]; N[0] = 'N';
|
||||
sgemm_(N, N, &p_local_cols, &princ_comp_rows, &princ_comp_cols, &alpha1, (float*)p_local.data, &p_local_cols, (float*)princ_comp.data, &princ_comp_cols, &beta1, (float*)out_shape.data, &p_local_cols);
|
||||
|
||||
// Above is a fast (but ugly) version of
|
||||
// out_shape = mean_shape + princ_comp * p_local;
|
||||
|
||||
}
|
||||
|
||||
//===========================================================================
|
||||
// Get the 2D shape (in image space) from global and local parameters
|
||||
void PDM::CalcShape2D(cv::Mat_<float>& out_shape, const cv::Mat_<float>& params_local, const cv::Vec6f& params_global) const
|
||||
{
|
||||
|
||||
int n = this->NumberOfPoints();
|
||||
|
||||
float s = params_global[0]; // scaling factor
|
||||
float tx = params_global[4]; // x offset
|
||||
float ty = params_global[5]; // y offset
|
||||
|
||||
// get the rotation matrix from the euler angles
|
||||
cv::Vec3f euler(params_global[1], params_global[2], params_global[3]);
|
||||
cv::Matx33f currRot = Utilities::Euler2RotationMatrix(euler);
|
||||
|
||||
// get the 3D shape of the object
|
||||
cv::Mat_<float> Shape_3D;
|
||||
this->CalcShape3D(Shape_3D, params_local);
|
||||
|
||||
// create the 2D shape matrix (if it has not been defined yet)
|
||||
if((out_shape.rows != 2 * mean_shape.rows / 3) || (out_shape.cols != 1))
|
||||
{
|
||||
out_shape.create(2*n,1);
|
||||
}
|
||||
// for every vertex
|
||||
for(int i = 0; i < n; i++)
|
||||
{
|
||||
// Transform this using the weak-perspective mapping to 2D from 3D
|
||||
out_shape.at<float>(i ,0) = s * ( currRot(0,0) * Shape_3D.at<float>(i, 0) + currRot(0,1) * Shape_3D.at<float>(i+n ,0) + currRot(0,2) * Shape_3D.at<float>(i+n*2,0) ) + tx;
|
||||
out_shape.at<float>(i+n,0) = s * ( currRot(1,0) * Shape_3D.at<float>(i, 0) + currRot(1,1) * Shape_3D.at<float>(i+n ,0) + currRot(1,2) * Shape_3D.at<float>(i+n*2,0) ) + ty;
|
||||
}
|
||||
}
|
||||
|
||||
//===========================================================================
|
||||
// provided the bounding box of a face and the local parameters (with optional rotation), generates the global parameters that can generate the face with the provided bounding box
|
||||
// This all assumes that the bounding box describes face from left outline to right outline of the face and chin to eyebrows
|
||||
void PDM::CalcParams(cv::Vec6f& out_params_global, const cv::Rect_<float>& bounding_box, const cv::Mat_<float>& params_local, const cv::Vec3f rotation)
|
||||
{
|
||||
|
||||
// get the shape instance based on local params
|
||||
cv::Mat_<float> current_shape(mean_shape.size());
|
||||
|
||||
CalcShape3D(current_shape, params_local);
|
||||
|
||||
// rotate the shape
|
||||
cv::Matx33f rotation_matrix = Utilities::Euler2RotationMatrix(rotation);
|
||||
|
||||
cv::Mat_<float> reshaped = current_shape.reshape(1, 3);
|
||||
|
||||
cv::Mat rotated_shape = (cv::Mat(rotation_matrix) * reshaped);
|
||||
|
||||
// Get the width of expected shape
|
||||
double min_x;
|
||||
double max_x;
|
||||
cv::minMaxLoc(rotated_shape.row(0), &min_x, &max_x);
|
||||
|
||||
double min_y;
|
||||
double max_y;
|
||||
cv::minMaxLoc(rotated_shape.row(1), &min_y, &max_y);
|
||||
|
||||
float width = (float) abs(min_x - max_x);
|
||||
float height = (float)abs(min_y - max_y);
|
||||
|
||||
float scaling = ((bounding_box.width / width) + (bounding_box.height / height)) / 2.0f;
|
||||
|
||||
// The estimate of face center also needs some correction
|
||||
float tx = bounding_box.x + bounding_box.width / 2;
|
||||
float ty = bounding_box.y + bounding_box.height / 2;
|
||||
|
||||
// Correct it so that the bounding box is just around the minimum and maximum point in the initialised face
|
||||
tx = tx - scaling * (min_x + max_x)/2.0f;
|
||||
ty = ty - scaling * (min_y + max_y)/2.0f;
|
||||
|
||||
out_params_global = cv::Vec6f(scaling, rotation[0], rotation[1], rotation[2], tx, ty);
|
||||
}
|
||||
|
||||
//===========================================================================
|
||||
// provided the model parameters, compute the bounding box of a face
|
||||
// The bounding box describes face from left outline to right outline of the face and chin to eyebrows
|
||||
void PDM::CalcBoundingBox(cv::Rect_<float>& out_bounding_box, const cv::Vec6f& params_global, const cv::Mat_<float>& params_local)
|
||||
{
|
||||
|
||||
// get the shape instance based on local params
|
||||
cv::Mat_<float> current_shape;
|
||||
CalcShape2D(current_shape, params_local, params_global);
|
||||
|
||||
// Get the width of expected shape
|
||||
float min_x, max_x, min_y, max_y;
|
||||
ExtractBoundingBox(current_shape, min_x, max_x, min_y, max_y);
|
||||
|
||||
float width = abs(min_x - max_x);
|
||||
float height = abs(min_y - max_y);
|
||||
|
||||
out_bounding_box = cv::Rect_<float>(min_x, min_y, width, height);
|
||||
}
|
||||
|
||||
//===========================================================================
|
||||
// Calculate the PDM's Jacobian over rigid parameters (rotation, translation and scaling), the additional input W represents trust for each of the landmarks and is part of Non-Uniform RLMS
|
||||
void PDM::ComputeRigidJacobian(const cv::Mat_<float>& p_local, const cv::Vec6f& params_global, cv::Mat_<float> &Jacob, const cv::Mat_<float> W, cv::Mat_<float> &Jacob_t_w)
|
||||
{
|
||||
|
||||
// number of verts
|
||||
int n = this->NumberOfPoints();
|
||||
|
||||
Jacob.create(n * 2, 6);
|
||||
|
||||
float X,Y,Z;
|
||||
|
||||
float s = params_global[0];
|
||||
|
||||
cv::Mat_<float> shape_3D;
|
||||
this->CalcShape3D(shape_3D, p_local);
|
||||
|
||||
// Get the rotation matrix
|
||||
cv::Vec3f euler(params_global[1], params_global[2], params_global[3]);
|
||||
cv::Matx33f currRot = Utilities::Euler2RotationMatrix(euler);
|
||||
|
||||
float r11 = currRot(0,0);
|
||||
float r12 = currRot(0,1);
|
||||
float r13 = currRot(0,2);
|
||||
float r21 = currRot(1,0);
|
||||
float r22 = currRot(1,1);
|
||||
float r23 = currRot(1,2);
|
||||
float r31 = currRot(2,0);
|
||||
float r32 = currRot(2,1);
|
||||
float r33 = currRot(2,2);
|
||||
|
||||
cv::MatIterator_<float> Jx = Jacob.begin();
|
||||
cv::MatIterator_<float> Jy = Jx + n * 6;
|
||||
|
||||
for(int i = 0; i < n; i++)
|
||||
{
|
||||
|
||||
X = shape_3D.at<float>(i, 0);
|
||||
Y = shape_3D.at<float>(i + n, 0);
|
||||
Z = shape_3D.at<float>(i + n * 2, 0);
|
||||
|
||||
// The rigid jacobian from the axis angle rotation matrix approximation using small angle assumption (R * R')
|
||||
// where R' = [1, -wz, wy
|
||||
// wz, 1, -wx
|
||||
// -wy, wx, 1]
|
||||
// And this is derived using the small angle assumption on the axis angle rotation matrix parametrisation
|
||||
|
||||
// scaling term
|
||||
*Jx++ = (X * r11 + Y * r12 + Z * r13);
|
||||
*Jy++ = (X * r21 + Y * r22 + Z * r23);
|
||||
|
||||
// rotation terms
|
||||
*Jx++ = (s * (Y * r13 - Z * r12) );
|
||||
*Jy++ = (s * (Y * r23 - Z * r22) );
|
||||
*Jx++ = (-s * (X * r13 - Z * r11));
|
||||
*Jy++ = (-s * (X * r23 - Z * r21));
|
||||
*Jx++ = (s * (X * r12 - Y * r11) );
|
||||
*Jy++ = (s * (X * r22 - Y * r21) );
|
||||
|
||||
// translation terms
|
||||
*Jx++ = 1.0f;
|
||||
*Jy++ = 0.0f;
|
||||
*Jx++ = 0.0f;
|
||||
*Jy++ = 1.0f;
|
||||
|
||||
}
|
||||
|
||||
cv::Mat Jacob_w = cv::Mat::zeros(Jacob.rows, Jacob.cols, Jacob.type());
|
||||
|
||||
Jx = Jacob.begin();
|
||||
Jy = Jx + n*6;
|
||||
|
||||
cv::MatIterator_<float> Jx_w = Jacob_w.begin<float>();
|
||||
cv::MatIterator_<float> Jy_w = Jx_w + n*6;
|
||||
|
||||
// Iterate over all Jacobian values and multiply them by the weight in diagonal of W
|
||||
for(int i = 0; i < n; i++)
|
||||
{
|
||||
float w_x = W.at<float>(i, i);
|
||||
float w_y = W.at<float>(i+n, i+n);
|
||||
|
||||
for(int j = 0; j < Jacob.cols; ++j)
|
||||
{
|
||||
*Jx_w++ = *Jx++ * w_x;
|
||||
*Jy_w++ = *Jy++ * w_y;
|
||||
}
|
||||
}
|
||||
|
||||
Jacob_t_w = Jacob_w.t();
|
||||
}
|
||||
|
||||
//===========================================================================
|
||||
// Calculate the PDM's Jacobian over all parameters (rigid and non-rigid), the additional input W represents trust for each of the landmarks and is part of Non-Uniform RLMS
|
||||
void PDM::ComputeJacobian(const cv::Mat_<float>& params_local, const cv::Vec6f& params_global, cv::Mat_<float> &Jacobian, const cv::Mat_<float> W, cv::Mat_<float> &Jacob_t_w)
|
||||
{
|
||||
|
||||
// number of vertices
|
||||
int n = this->NumberOfPoints();
|
||||
|
||||
// number of non-rigid parameters
|
||||
int m = this->NumberOfModes();
|
||||
|
||||
Jacobian.create(n * 2, 6 + m);
|
||||
|
||||
float X,Y,Z;
|
||||
|
||||
float s = params_global[0];
|
||||
|
||||
cv::Mat_<float> shape_3D;
|
||||
this->CalcShape3D(shape_3D, params_local);
|
||||
|
||||
cv::Vec3f euler(params_global[1], params_global[2], params_global[3]);
|
||||
cv::Matx33f currRot = Utilities::Euler2RotationMatrix(euler);
|
||||
|
||||
float r11 = currRot(0, 0);
|
||||
float r12 = currRot(0, 1);
|
||||
float r13 = currRot(0, 2);
|
||||
float r21 = currRot(1, 0);
|
||||
float r22 = currRot(1, 1);
|
||||
float r23 = currRot(1, 2);
|
||||
float r31 = currRot(2, 0);
|
||||
float r32 = currRot(2, 1);
|
||||
float r33 = currRot(2, 2);
|
||||
|
||||
cv::MatIterator_<float> Jx = Jacobian.begin();
|
||||
cv::MatIterator_<float> Jy = Jx + n * (6 + m);
|
||||
cv::MatConstIterator_<float> Vx = this->princ_comp.begin();
|
||||
cv::MatConstIterator_<float> Vy = Vx + n*m;
|
||||
cv::MatConstIterator_<float> Vz = Vy + n*m;
|
||||
|
||||
for(int i = 0; i < n; i++)
|
||||
{
|
||||
|
||||
X = shape_3D.at<float>(i, 0);
|
||||
Y = shape_3D.at<float>(i + n, 0);
|
||||
Z = shape_3D.at<float>(i + n * 2, 0);
|
||||
|
||||
// The rigid jacobian from the axis angle rotation matrix approximation using small angle assumption (R * R')
|
||||
// where R' = [1, -wz, wy
|
||||
// wz, 1, -wx
|
||||
// -wy, wx, 1]
|
||||
// And this is derived using the small angle assumption on the axis angle rotation matrix parametrisation
|
||||
|
||||
// scaling term
|
||||
*Jx++ = (X * r11 + Y * r12 + Z * r13);
|
||||
*Jy++ = (X * r21 + Y * r22 + Z * r23);
|
||||
|
||||
// rotation terms
|
||||
*Jx++ = (s * (Y * r13 - Z * r12) );
|
||||
*Jy++ = (s * (Y * r23 - Z * r22) );
|
||||
*Jx++ = (-s * (X * r13 - Z * r11));
|
||||
*Jy++ = (-s * (X * r23 - Z * r21));
|
||||
*Jx++ = (s * (X * r12 - Y * r11) );
|
||||
*Jy++ = (s * (X * r22 - Y * r21) );
|
||||
|
||||
// translation terms
|
||||
*Jx++ = 1.0f;
|
||||
*Jy++ = 0.0f;
|
||||
*Jx++ = 0.0f;
|
||||
*Jy++ = 1.0f;
|
||||
|
||||
for(int j = 0; j < m; j++,++Vx,++Vy,++Vz)
|
||||
{
|
||||
// How much the change of the non-rigid parameters (when object is rotated) affect 2D motion
|
||||
*Jx++ = ( s*(r11*(*Vx) + r12*(*Vy) + r13*(*Vz)) );
|
||||
*Jy++ = ( s*(r21*(*Vx) + r22*(*Vy) + r23*(*Vz)) );
|
||||
}
|
||||
}
|
||||
|
||||
// Adding the weights here
|
||||
if(cv::trace(W)[0] != W.rows)
|
||||
{
|
||||
cv::Mat Jacob_w = Jacobian.clone();
|
||||
Jx = Jacobian.begin();
|
||||
Jy = Jx + n*(6+m);
|
||||
|
||||
cv::MatIterator_<float> Jx_w = Jacob_w.begin<float>();
|
||||
cv::MatIterator_<float> Jy_w = Jx_w + n*(6+m);
|
||||
|
||||
// Iterate over all Jacobian values and multiply them by the weight in diagonal of W
|
||||
for(int i = 0; i < n; i++)
|
||||
{
|
||||
float w_x = W.at<float>(i, i);
|
||||
float w_y = W.at<float>(i+n, i+n);
|
||||
|
||||
for(int j = 0; j < Jacobian.cols; ++j)
|
||||
{
|
||||
*Jx_w++ = *Jx++ * w_x;
|
||||
*Jy_w++ = *Jy++ * w_y;
|
||||
}
|
||||
}
|
||||
Jacob_t_w = Jacob_w.t();
|
||||
}
|
||||
else
|
||||
{
|
||||
Jacob_t_w = Jacobian.t();
|
||||
}
|
||||
}
|
||||
|
||||
//===========================================================================
|
||||
// Updating the parameters (more details in my thesis)
|
||||
void PDM::UpdateModelParameters(const cv::Mat_<float>& delta_p, cv::Mat_<float>& params_local, cv::Vec6f& params_global)
|
||||
{
|
||||
|
||||
// The scaling and translation parameters can be just added
|
||||
params_global[0] += delta_p.at<float>(0,0);
|
||||
params_global[4] += delta_p.at<float>(4,0);
|
||||
params_global[5] += delta_p.at<float>(5,0);
|
||||
|
||||
// get the original rotation matrix
|
||||
cv::Vec3f eulerGlobal(params_global[1], params_global[2], params_global[3]);
|
||||
|
||||
cv::Matx33f R1 = Utilities::Euler2RotationMatrix(eulerGlobal);
|
||||
|
||||
// construct R' = [1, -wz, wy
|
||||
// wz, 1, -wx
|
||||
// -wy, wx, 1]
|
||||
cv::Matx33f R2 = cv::Matx33f::eye();
|
||||
|
||||
R2(1,2) = -1.0*(R2(2,1) = delta_p.at<float>(1,0));
|
||||
R2(2,0) = -1.0*(R2(0,2) = delta_p.at<float>(2,0));
|
||||
R2(0,1) = -1.0*(R2(1,0) = delta_p.at<float>(3,0));
|
||||
|
||||
// Make sure it's orthonormal
|
||||
Orthonormalise(R2);
|
||||
|
||||
// Combine rotations
|
||||
cv::Matx33f R3 = R1 *R2;
|
||||
|
||||
// Extract euler angle (through axis angle first to make sure it's legal)
|
||||
cv::Vec3f axis_angle = Utilities::RotationMatrix2AxisAngle(R3);
|
||||
|
||||
cv::Vec3f euler = Utilities::AxisAngle2Euler(axis_angle);
|
||||
|
||||
// Temporary fix to numerical instability
|
||||
if (std::isnan(euler[0]) || std::isnan(euler[1]) || std::isnan(euler[2]))
|
||||
{
|
||||
euler[0] = 0;
|
||||
euler[1] = 0;
|
||||
euler[2] = 0;
|
||||
|
||||
}
|
||||
|
||||
params_global[1] = euler[0];
|
||||
params_global[2] = euler[1];
|
||||
params_global[3] = euler[2];
|
||||
|
||||
// Local parameter update, just simple addition
|
||||
if(delta_p.rows > 6)
|
||||
{
|
||||
params_local = params_local + delta_p(cv::Rect(0,6,1, this->NumberOfModes()));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void PDM::CalcParams(cv::Vec6f& out_params_global, cv::Mat_<float>& out_params_local, const cv::Mat_<float> & landmark_locations, const cv::Vec3f rotation)
|
||||
{
|
||||
|
||||
int m = this->NumberOfModes();
|
||||
int n = this->NumberOfPoints();
|
||||
|
||||
cv::Mat_<int> visi_ind_2D(n * 2, 1, 1);
|
||||
cv::Mat_<int> visi_ind_3D(3 * n , 1, 1);
|
||||
|
||||
int visi_count = n;
|
||||
|
||||
for(int i = 0; i < n; ++i)
|
||||
{
|
||||
// If the landmark is invisible indicate this
|
||||
if(landmark_locations.at<float>(i) == 0)
|
||||
{
|
||||
visi_ind_2D.at<int>(i) = 0;
|
||||
visi_ind_2D.at<int>(i+n) = 0;
|
||||
visi_ind_3D.at<int>(i) = 0;
|
||||
visi_ind_3D.at<int>(i+n) = 0;
|
||||
visi_ind_3D.at<int>(i+2*n) = 0;
|
||||
|
||||
visi_count--;
|
||||
}
|
||||
}
|
||||
|
||||
// As not all landmarks might be visible, subsample the Mean and principal component matrices
|
||||
cv::Mat_<float> M(visi_count * 3, mean_shape.cols, 0.0);
|
||||
cv::Mat_<float> V(visi_count * 3, princ_comp.cols, 0.0);
|
||||
visi_count = 0;
|
||||
for (int i = 0; i < n * 3; ++i)
|
||||
{
|
||||
if (visi_ind_3D.at<int>(i) == 1)
|
||||
{
|
||||
this->mean_shape.row(i).copyTo(M.row(visi_count));
|
||||
this->princ_comp.row(i).copyTo(V.row(visi_count));
|
||||
visi_count++;
|
||||
}
|
||||
}
|
||||
|
||||
cv::Mat_<float> m_old = this->mean_shape.clone();
|
||||
cv::Mat_<float> v_old = this->princ_comp.clone();
|
||||
|
||||
this->mean_shape = M;
|
||||
this->princ_comp = V;
|
||||
|
||||
// The new number of points
|
||||
n = M.rows / 3;
|
||||
|
||||
// Extract the relevant landmark locations
|
||||
cv::Mat_<float> landmark_locs_vis(n*2, 1, 0.0f);
|
||||
int k = 0;
|
||||
for(int i = 0; i < visi_ind_2D.rows; ++i)
|
||||
{
|
||||
if(visi_ind_2D.at<int>(i) == 1)
|
||||
{
|
||||
landmark_locs_vis.at<float>(k) = landmark_locations.at<float>(i);
|
||||
k++;
|
||||
}
|
||||
}
|
||||
|
||||
// Compute the initial global parameters
|
||||
float min_x, max_x, min_y, max_y;
|
||||
ExtractBoundingBox(landmark_locs_vis, min_x, max_x, min_y, max_y);
|
||||
|
||||
float width = abs(min_x - max_x);
|
||||
float height = abs(min_y - max_y);
|
||||
|
||||
cv::Rect_<float> model_bbox;
|
||||
CalcBoundingBox(model_bbox, cv::Vec6f(1.0, 0.0, 0.0, 0.0, 0.0, 0.0), cv::Mat_<float>(this->NumberOfModes(), 1, 0.0));
|
||||
|
||||
cv::Rect_<float> bbox(min_x, min_y, width, height);
|
||||
|
||||
float scaling = ((width / model_bbox.width) + (height / model_bbox.height)) / 2.0f;
|
||||
|
||||
cv::Vec3f rotation_init = rotation;
|
||||
cv::Matx33f R = Utilities::Euler2RotationMatrix(rotation_init);
|
||||
cv::Vec2f translation((min_x + max_x) / 2.0f, (min_y + max_y) / 2.0f);
|
||||
|
||||
cv::Mat_<float> loc_params(this->NumberOfModes(),1, 0.0);
|
||||
cv::Vec6f glob_params(scaling, rotation_init[0], rotation_init[1], rotation_init[2], translation[0], translation[1]);
|
||||
|
||||
// get the 3D shape of the object
|
||||
cv::Mat_<float> shape_3D = M + V * loc_params;
|
||||
|
||||
cv::Mat_<float> curr_shape(2*n, 1);
|
||||
|
||||
// for every vertex
|
||||
for(int i = 0; i < n; i++)
|
||||
{
|
||||
// Transform this using the weak-perspective mapping to 2D from 3D
|
||||
curr_shape.at<float>(i ,0) = scaling * ( R(0,0) * shape_3D.at<float>(i, 0) + R(0,1) * shape_3D.at<float>(i+n ,0) + R(0,2) * shape_3D.at<float>(i+n*2,0) ) + translation[0];
|
||||
curr_shape.at<float>(i+n,0) = scaling * ( R(1,0) * shape_3D.at<float>(i, 0) + R(1,1) * shape_3D.at<float>(i+n ,0) + R(1,2) * shape_3D.at<float>(i+n*2,0) ) + translation[1];
|
||||
}
|
||||
|
||||
float currError = cv::norm(curr_shape - landmark_locs_vis);
|
||||
|
||||
cv::Mat_<float> regularisations = cv::Mat_<float>::zeros(1, 6 + m);
|
||||
|
||||
float reg_factor = 1;
|
||||
|
||||
// Setting the regularisation to the inverse of eigenvalues
|
||||
cv::Mat(reg_factor / this->eigen_values).copyTo(regularisations(cv::Rect(6, 0, m, 1)));
|
||||
regularisations = cv::Mat::diag(regularisations.t());
|
||||
|
||||
cv::Mat_<float> WeightMatrix = cv::Mat_<float>::eye(n*2, n*2);
|
||||
|
||||
int not_improved_in = 0;
|
||||
|
||||
for (size_t i = 0; i < 1000; ++i)
|
||||
{
|
||||
// get the 3D shape of the object
|
||||
shape_3D = M + V * loc_params;
|
||||
|
||||
shape_3D = shape_3D.reshape(1, 3);
|
||||
|
||||
cv::Matx23f R_2D(R(0,0), R(0,1), R(0,2), R(1,0), R(1,1), R(1,2));
|
||||
|
||||
cv::Mat_<float> curr_shape_2D = scaling * shape_3D.t() * cv::Mat(R_2D).t();
|
||||
curr_shape_2D.col(0) = curr_shape_2D.col(0) + translation(0);
|
||||
curr_shape_2D.col(1) = curr_shape_2D.col(1) + translation(1);
|
||||
|
||||
curr_shape_2D = cv::Mat(curr_shape_2D.t()).reshape(1, n * 2);
|
||||
|
||||
cv::Mat_<float> error_resid;
|
||||
cv::Mat(landmark_locs_vis - curr_shape_2D).convertTo(error_resid, CV_32F);
|
||||
|
||||
cv::Mat_<float> J, J_w_t;
|
||||
this->ComputeJacobian(loc_params, glob_params, J, WeightMatrix, J_w_t);
|
||||
|
||||
// projection of the meanshifts onto the jacobians (using the weighted Jacobian, see Baltrusaitis 2013)
|
||||
cv::Mat_<float> J_w_t_m = J_w_t * error_resid;
|
||||
|
||||
// Add the regularisation term
|
||||
J_w_t_m(cv::Rect(0,6,1, m)) = J_w_t_m(cv::Rect(0,6,1, m)) - regularisations(cv::Rect(6,6, m, m)) * loc_params;
|
||||
|
||||
cv::Mat_<float> Hessian = regularisations.clone();
|
||||
|
||||
// Perform matrix multiplication in OpenBLAS (fortran call)
|
||||
float alpha1 = 1.0;
|
||||
float beta1 = 1.0;
|
||||
char N[2]; N[0] = 'N';
|
||||
sgemm_(N, N, &J.cols, &J_w_t.rows, &J_w_t.cols, &alpha1, (float*)J.data, &J.cols, (float*)J_w_t.data, &J_w_t.cols, &beta1, (float*)Hessian.data, &J.cols);
|
||||
|
||||
// Above is a fast (but ugly) version of
|
||||
// cv::Mat_<float> Hessian2 = J_w_t * J + regularisations;
|
||||
|
||||
// Solve for the parameter update (from Baltrusaitis 2013 based on eq (36) Saragih 2011)
|
||||
cv::Mat_<float> param_update;
|
||||
cv::solve(Hessian, J_w_t_m, param_update, cv::DECOMP_CHOLESKY);
|
||||
|
||||
// To not overshoot, have the gradient decent rate a bit smaller
|
||||
param_update = 0.75 * param_update;
|
||||
|
||||
UpdateModelParameters(param_update, loc_params, glob_params);
|
||||
|
||||
scaling = glob_params[0];
|
||||
rotation_init[0] = glob_params[1];
|
||||
rotation_init[1] = glob_params[2];
|
||||
rotation_init[2] = glob_params[3];
|
||||
|
||||
translation[0] = glob_params[4];
|
||||
translation[1] = glob_params[5];
|
||||
|
||||
R = Utilities::Euler2RotationMatrix(rotation_init);
|
||||
|
||||
R_2D(0,0) = R(0,0);R_2D(0,1) = R(0,1); R_2D(0,2) = R(0,2);
|
||||
R_2D(1,0) = R(1,0);R_2D(1,1) = R(1,1); R_2D(1,2) = R(1,2);
|
||||
|
||||
curr_shape_2D = scaling * shape_3D.t() * cv::Mat(R_2D).t();
|
||||
curr_shape_2D.col(0) = curr_shape_2D.col(0) + translation(0);
|
||||
curr_shape_2D.col(1) = curr_shape_2D.col(1) + translation(1);
|
||||
|
||||
curr_shape_2D = cv::Mat(curr_shape_2D.t()).reshape(1, n * 2);
|
||||
|
||||
float error = cv::norm(curr_shape_2D - landmark_locs_vis);
|
||||
|
||||
if(0.999 * currError < error)
|
||||
{
|
||||
not_improved_in++;
|
||||
if (not_improved_in == 3)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
currError = error;
|
||||
|
||||
}
|
||||
|
||||
out_params_global = glob_params;
|
||||
out_params_local = loc_params;
|
||||
|
||||
this->mean_shape = m_old;
|
||||
this->princ_comp = v_old;
|
||||
|
||||
|
||||
}
|
||||
|
||||
bool PDM::Read(std::string location)
|
||||
{
|
||||
|
||||
std::ifstream pdmLoc(location, std::ios_base::in);
|
||||
if (!pdmLoc.is_open())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
LandmarkDetector::SkipComments(pdmLoc);
|
||||
|
||||
// Reading mean values
|
||||
cv::Mat_<double> mean_shape_d;
|
||||
LandmarkDetector::ReadMat(pdmLoc, mean_shape_d);
|
||||
mean_shape_d.convertTo(mean_shape, CV_32F); // Moving things to floats for speed
|
||||
|
||||
LandmarkDetector::SkipComments(pdmLoc);
|
||||
|
||||
// Reading principal components
|
||||
cv::Mat_<double> princ_comp_d;
|
||||
LandmarkDetector::ReadMat(pdmLoc, princ_comp_d);
|
||||
princ_comp_d.convertTo(princ_comp, CV_32F);
|
||||
|
||||
LandmarkDetector::SkipComments(pdmLoc);
|
||||
|
||||
// Reading eigenvalues
|
||||
cv::Mat_<double> eigen_values_d;
|
||||
LandmarkDetector::ReadMat(pdmLoc, eigen_values_d);
|
||||
eigen_values_d.convertTo(eigen_values, CV_32F);
|
||||
|
||||
return true;
|
||||
}
|
||||
696
pkg/OpenFace/lib/local/LandmarkDetector/src/Patch_experts.cpp
Normal file
696
pkg/OpenFace/lib/local/LandmarkDetector/src/Patch_experts.cpp
Normal file
@@ -0,0 +1,696 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Copyright (C) 2017, Carnegie Mellon University and University of Cambridge,
|
||||
// all rights reserved.
|
||||
//
|
||||
// ACADEMIC OR NON-PROFIT ORGANIZATION NONCOMMERCIAL RESEARCH USE ONLY
|
||||
//
|
||||
// BY USING OR DOWNLOADING THE SOFTWARE, YOU ARE AGREEING TO THE TERMS OF THIS LICENSE AGREEMENT.
|
||||
// IF YOU DO NOT AGREE WITH THESE TERMS, YOU MAY NOT USE OR DOWNLOAD THE SOFTWARE.
|
||||
//
|
||||
// License can be found in OpenFace-license.txt
|
||||
//
|
||||
// * Any publications arising from the use of this software, including but
|
||||
// not limited to academic journal and conference publications, technical
|
||||
// reports and manuals, must cite at least one of the following works:
|
||||
//
|
||||
// OpenFace 2.0: Facial Behavior Analysis Toolkit
|
||||
// Tadas Baltrušaitis, Amir Zadeh, Yao Chong Lim, and Louis-Philippe Morency
|
||||
// in IEEE International Conference on Automatic Face and Gesture Recognition, 2018
|
||||
//
|
||||
// Convolutional experts constrained local model for facial landmark detection.
|
||||
// A. Zadeh, T. Baltrušaitis, and Louis-Philippe Morency,
|
||||
// in Computer Vision and Pattern Recognition Workshops, 2017.
|
||||
//
|
||||
// Rendering of Eyes for Eye-Shape Registration and Gaze Estimation
|
||||
// Erroll Wood, Tadas Baltrušaitis, Xucong Zhang, Yusuke Sugano, Peter Robinson, and Andreas Bulling
|
||||
// in IEEE International. Conference on Computer Vision (ICCV), 2015
|
||||
//
|
||||
// Cross-dataset learning and person-specific normalisation for automatic Action Unit detection
|
||||
// Tadas Baltrušaitis, Marwa Mahmoud, and Peter Robinson
|
||||
// in Facial Expression Recognition and Analysis Challenge,
|
||||
// IEEE International Conference on Automatic Face and Gesture Recognition, 2015
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "stdafx.h"
|
||||
|
||||
#include "Patch_experts.h"
|
||||
|
||||
#include "RotationHelpers.h"
|
||||
|
||||
// Math includes
|
||||
#define _USE_MATH_DEFINES
|
||||
#include <cmath>
|
||||
|
||||
#ifndef M_PI
|
||||
#define M_PI 3.14159265358979323846
|
||||
#endif
|
||||
|
||||
#include "LandmarkDetectorUtils.h"
|
||||
|
||||
using namespace LandmarkDetector;
|
||||
|
||||
// A copy constructor
|
||||
|
||||
Patch_experts::Patch_experts(const Patch_experts& other) : patch_scaling(other.patch_scaling), centers(other.centers), svr_expert_intensity(other.svr_expert_intensity),
|
||||
ccnf_expert_intensity(other.ccnf_expert_intensity), cen_expert_intensity(other.cen_expert_intensity),
|
||||
early_term_weights(other.early_term_weights), early_term_biases(other.early_term_biases), early_term_cutoffs(other.early_term_cutoffs),
|
||||
mirror_inds(other.mirror_inds),mirror_views(other.mirror_views)
|
||||
{
|
||||
|
||||
// Make sure the matrices are allocated properly
|
||||
this->sigma_components.resize(other.sigma_components.size());
|
||||
for (size_t i = 0; i < other.sigma_components.size(); ++i)
|
||||
{
|
||||
this->sigma_components[i].resize(other.sigma_components[i].size());
|
||||
|
||||
for (size_t j = 0; j < other.sigma_components[i].size(); ++j)
|
||||
{
|
||||
// Make sure the matrix is copied.
|
||||
this->sigma_components[i][j] = other.sigma_components[i][j].clone();
|
||||
}
|
||||
}
|
||||
|
||||
// Make sure the matrices are allocated properly
|
||||
this->visibilities.resize(other.visibilities.size());
|
||||
for (size_t i = 0; i < other.visibilities.size(); ++i)
|
||||
{
|
||||
this->visibilities[i].resize(other.visibilities[i].size());
|
||||
|
||||
for (size_t j = 0; j < other.visibilities[i].size(); ++j)
|
||||
{
|
||||
// Make sure the matrix is copied.
|
||||
this->visibilities[i][j] = other.visibilities[i][j].clone();
|
||||
}
|
||||
}
|
||||
|
||||
preallocated_im2col.resize(other.preallocated_im2col.size());
|
||||
}
|
||||
|
||||
// Returns indices to landmarks that need to have patch responses computed (omits mirrored frontal landmarks for CEN as they will be computed together with their mirrored pair)
|
||||
std::vector<int> Patch_experts::Collect_visible_landmarks(std::vector<std::vector<cv::Mat_<int> > > visibilities, int scale, int view_id, int n)
|
||||
{
|
||||
std::vector<int> vis_lmk;
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
if (visibilities[scale][view_id].rows == n)
|
||||
{
|
||||
if (visibilities[scale][view_id].at<int>(i, 0) != 0)
|
||||
{
|
||||
// For CEN patch experts and frontal views skip the mirror indices
|
||||
if (!cen_expert_intensity.empty())
|
||||
{
|
||||
|
||||
// If frontal view we can do mirrored landmarks together
|
||||
if (view_id == 0)
|
||||
{
|
||||
// If the patch expert does not have values, means it's a mirrored version and will be done in another part of a loop
|
||||
if (!cen_expert_intensity[scale][view_id][i].biases.empty())
|
||||
{
|
||||
vis_lmk.push_back(i);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
vis_lmk.push_back(i);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
vis_lmk.push_back(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return vis_lmk;
|
||||
|
||||
}
|
||||
|
||||
// Returns the patch expert responses given a grayscale image.
|
||||
// Additionally returns the transform from the image coordinates to the response coordinates (and vice versa).
|
||||
// The computation also requires the current landmark locations to compute response around, the PDM corresponding to the desired model, and the parameters describing its instance
|
||||
// Also need to provide the size of the area of interest and the desired scale of analysis
|
||||
void Patch_experts::Response(std::vector<cv::Mat_<float> >& patch_expert_responses, cv::Matx22f& sim_ref_to_img,
|
||||
cv::Matx22f& sim_img_to_ref, const cv::Mat_<float>& grayscale_image, const PDM& pdm, const cv::Vec6f& params_global,
|
||||
const cv::Mat_<float>& params_local, int window_size, int scale)
|
||||
{
|
||||
|
||||
int view_id = GetViewIdx(params_global, scale);
|
||||
|
||||
int n = pdm.NumberOfPoints();
|
||||
|
||||
// Compute the current landmark locations (around which responses will be computed)
|
||||
cv::Mat_<float> landmark_locations;
|
||||
|
||||
pdm.CalcShape2D(landmark_locations, params_local, params_global);
|
||||
|
||||
cv::Mat_<float> reference_shape;
|
||||
|
||||
// Initialise the reference shape on which we'll be warping
|
||||
cv::Vec6f global_ref(patch_scaling[scale], 0, 0, 0, 0, 0);
|
||||
|
||||
// Compute the reference shape
|
||||
pdm.CalcShape2D(reference_shape, params_local, global_ref);
|
||||
|
||||
// similarity and inverse similarity transform to and from image and reference shape
|
||||
cv::Mat_<float> reference_shape_2D = (reference_shape.reshape(1, 2).t());
|
||||
cv::Mat_<float> image_shape_2D = landmark_locations.reshape(1, 2).t();
|
||||
|
||||
sim_img_to_ref = Utilities::AlignShapesWithScale(image_shape_2D, reference_shape_2D);
|
||||
sim_ref_to_img = sim_img_to_ref.inv(cv::DECOMP_LU);
|
||||
|
||||
float a1 = sim_ref_to_img(0, 0);
|
||||
float b1 = -sim_ref_to_img(0, 1);
|
||||
|
||||
bool use_ccnf = !this->ccnf_expert_intensity.empty();
|
||||
bool use_cen = !this->cen_expert_intensity.empty();
|
||||
|
||||
// If using CCNF patch experts might need to precalculate Sigmas
|
||||
if (use_ccnf)
|
||||
{
|
||||
std::vector<cv::Mat_<float> > sigma_components;
|
||||
|
||||
// Retrieve the correct sigma component size
|
||||
for (size_t w_size = 0; w_size < this->sigma_components.size(); ++w_size)
|
||||
{
|
||||
if (!this->sigma_components[w_size].empty())
|
||||
{
|
||||
if (window_size*window_size == this->sigma_components[w_size][0].rows)
|
||||
{
|
||||
sigma_components = this->sigma_components[w_size];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Go through all of the landmarks and compute the Sigma for each
|
||||
for (int lmark = 0; lmark < n; lmark++)
|
||||
{
|
||||
// Only for visible landmarks
|
||||
if (visibilities[scale][view_id].at<int>(lmark, 0))
|
||||
{
|
||||
// Precompute sigmas if they are not computed yet
|
||||
ccnf_expert_intensity[scale][view_id][lmark].ComputeSigmas(sigma_components, window_size);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// If using CEN precalculate interpolation matrix
|
||||
cv::Mat_<float> interp_mat;
|
||||
if (use_cen)
|
||||
{
|
||||
// Assuming the same size for all experts
|
||||
int support_region = 11;
|
||||
int area_of_interest_width = window_size + support_region - 1;
|
||||
int area_of_interest_height = window_size + support_region - 1;
|
||||
int resp_size = area_of_interest_height - support_region + 1;
|
||||
interpolationMatrix(interp_mat, resp_size, resp_size, area_of_interest_width, area_of_interest_height);
|
||||
}
|
||||
|
||||
// We do not want to create threads for invisible landmarks, so construct an index of visible ones
|
||||
std::vector<int> vis_lmk = Collect_visible_landmarks(visibilities, scale, view_id, n);
|
||||
|
||||
// calculate the patch responses for every landmark (this is the heavy lifting of landmark detection)
|
||||
parallel_for_(cv::Range(0, vis_lmk.size()), [&](const cv::Range& range) {
|
||||
for (int i = range.start; i < range.end; i++)
|
||||
{
|
||||
|
||||
// Work out how big the area of interest has to be to get a response of window size
|
||||
int area_of_interest_width;
|
||||
int area_of_interest_height;
|
||||
int ind = vis_lmk.at(i);
|
||||
|
||||
if (use_cen)
|
||||
{
|
||||
area_of_interest_width = window_size + cen_expert_intensity[scale][view_id][ind].width_support - 1;
|
||||
area_of_interest_height = window_size + cen_expert_intensity[scale][view_id][ind].height_support - 1;
|
||||
}
|
||||
else if (use_ccnf)
|
||||
{
|
||||
area_of_interest_width = window_size + ccnf_expert_intensity[scale][view_id][ind].width - 1;
|
||||
area_of_interest_height = window_size + ccnf_expert_intensity[scale][view_id][ind].height - 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
area_of_interest_width = window_size + svr_expert_intensity[scale][view_id][ind].width - 1;
|
||||
area_of_interest_height = window_size + svr_expert_intensity[scale][view_id][ind].height - 1;
|
||||
}
|
||||
|
||||
// scale and rotate to mean shape to reference frame
|
||||
cv::Mat sim = (cv::Mat_<float>(2, 3) << a1, -b1, landmark_locations.at<float>(ind, 0) - a1 * (area_of_interest_width - 1.0f) / 2.0f + b1 * (area_of_interest_width - 1.0f) / 2.0f, b1, a1, landmark_locations.at<float>(ind + n, 0) - a1 * (area_of_interest_width - 1.0f) / 2.0f - b1 * (area_of_interest_width - 1.0f) / 2.0f);
|
||||
|
||||
// Extract the region of interest around the current landmark location
|
||||
cv::Mat_<float> area_of_interest(area_of_interest_height, area_of_interest_width, 0.0f);
|
||||
|
||||
cv::warpAffine(grayscale_image, area_of_interest, sim, area_of_interest.size(), cv::WARP_INVERSE_MAP + cv::INTER_LINEAR);
|
||||
|
||||
// Get intensity response either from the SVR, CCNF, or CEN patch experts (prefer CEN as they are the most accurate so far)
|
||||
if (!cen_expert_intensity.empty())
|
||||
{
|
||||
|
||||
int im2col_size = (area_of_interest_width * area_of_interest_height - 1) / 2;
|
||||
|
||||
cv::Mat_<float> prealloc_mat = preallocated_im2col[ind][im2col_size];
|
||||
|
||||
// If frontal view we can do mirrored landmarks together
|
||||
if (view_id == 0)
|
||||
{
|
||||
// If the patch expert does not have values, means it's a mirrored version and will be done in another part of a loop
|
||||
if (!cen_expert_intensity[scale][view_id][ind].biases.empty())
|
||||
{
|
||||
// No mirrored expert, so do normally
|
||||
int mirror_id = mirror_inds.at<int>(ind);
|
||||
if (mirror_id == ind)
|
||||
{
|
||||
cv::Mat_<float> empty(0, 0, 0.0f);
|
||||
cen_expert_intensity[scale][view_id][ind].ResponseSparse(area_of_interest, empty, patch_expert_responses[ind], empty, interp_mat, prealloc_mat, empty);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
// Grab mirrored area of interest
|
||||
|
||||
// scale and rotate to mean shape to reference frame
|
||||
cv::Mat sim_r = (cv::Mat_<float>(2, 3) << a1, -b1, landmark_locations.at<float>(mirror_id, 0) - a1 * (area_of_interest_width - 1.0f) / 2.0f + b1 * (area_of_interest_width - 1.0f) / 2.0f, b1, a1, landmark_locations.at<float>(mirror_id + n, 0) - a1 * (area_of_interest_width - 1.0f) / 2.0f - b1 * (area_of_interest_width - 1.0f) / 2.0f);
|
||||
|
||||
// Extract the region of interest around the current landmark location
|
||||
cv::Mat_<float> area_of_interest_r(area_of_interest_height, area_of_interest_width, 0.0f);
|
||||
|
||||
cv::warpAffine(grayscale_image, area_of_interest_r, sim_r, area_of_interest_r.size(), cv::WARP_INVERSE_MAP + cv::INTER_LINEAR);
|
||||
|
||||
cv::Mat_<float> prealloc_mat_right = preallocated_im2col[mirror_id][im2col_size];
|
||||
|
||||
cen_expert_intensity[scale][view_id][ind].ResponseSparse(area_of_interest, area_of_interest_r, patch_expert_responses[ind], patch_expert_responses[mirror_id], interp_mat, prealloc_mat, prealloc_mat_right);
|
||||
|
||||
preallocated_im2col[mirror_id][im2col_size] = prealloc_mat_right;
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// For space and memory saving use a mirrored patch expert
|
||||
if (!cen_expert_intensity[scale][view_id][ind].biases.empty())
|
||||
{
|
||||
cv::Mat_<float> empty(0, 0, 0.0f);
|
||||
cen_expert_intensity[scale][view_id][ind].ResponseSparse(area_of_interest, empty, patch_expert_responses[ind], empty, interp_mat, prealloc_mat, empty);
|
||||
|
||||
// A slower, but slightly more accurate version
|
||||
//cen_expert_intensity[scale][view_id][ind].Response(area_of_interest, patch_expert_responses[ind]);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat_<float> empty(0, 0, 0.0f);
|
||||
cen_expert_intensity[scale][mirror_views.at<int>(view_id)][mirror_inds.at<int>(ind)].ResponseSparse(empty, area_of_interest, empty, patch_expert_responses[ind], interp_mat, empty, prealloc_mat);
|
||||
}
|
||||
}
|
||||
|
||||
preallocated_im2col[ind][im2col_size] = prealloc_mat;
|
||||
|
||||
}
|
||||
else if (!ccnf_expert_intensity.empty())
|
||||
{
|
||||
// get the correct size response window
|
||||
patch_expert_responses[ind] = cv::Mat_<float>(window_size, window_size);
|
||||
|
||||
int im2col_size = area_of_interest_width * area_of_interest_height;
|
||||
|
||||
cv::Mat_<float> prealloc_mat = preallocated_im2col[ind][im2col_size];
|
||||
|
||||
ccnf_expert_intensity[scale][view_id][ind].ResponseOpenBlas(area_of_interest, patch_expert_responses[ind], prealloc_mat);
|
||||
|
||||
preallocated_im2col[ind][im2col_size] = prealloc_mat;
|
||||
|
||||
// Below is an alternative way to compute the same, but that uses FFT instead of OpenBLAS
|
||||
// ccnf_expert_intensity[scale][view_id][ind].Response(area_of_interest, patch_expert_responses[ind]);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
// get the correct size response window
|
||||
patch_expert_responses[ind] = cv::Mat_<float>(window_size, window_size);
|
||||
|
||||
svr_expert_intensity[scale][view_id][ind].Response(area_of_interest, patch_expert_responses[ind]);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
//=============================================================================
|
||||
// Getting the closest view center based on orientation
|
||||
int Patch_experts::GetViewIdx(const cv::Vec6f& params_global, int scale) const
|
||||
{
|
||||
int idx = 0;
|
||||
|
||||
float dbest;
|
||||
|
||||
for(int i = 0; i < this->nViews(scale); i++)
|
||||
{
|
||||
float v1 = params_global[1] - centers[scale][i][0];
|
||||
float v2 = params_global[2] - centers[scale][i][1];
|
||||
float v3 = params_global[3] - centers[scale][i][2];
|
||||
|
||||
float d = v1*v1 + v2*v2 + v3*v3;
|
||||
|
||||
if(i == 0 || d < dbest)
|
||||
{
|
||||
dbest = d;
|
||||
idx = i;
|
||||
}
|
||||
}
|
||||
return idx;
|
||||
}
|
||||
|
||||
|
||||
//===========================================================================
|
||||
bool Patch_experts::Read(std::vector<std::string> intensity_svr_expert_locations, std::vector<std::string> intensity_ccnf_expert_locations,
|
||||
std::vector<std::string> intensity_cen_expert_locations, std::string early_term_loc)
|
||||
{
|
||||
|
||||
// initialise the SVR intensity patch expert parameters
|
||||
int num_intensity_svr = intensity_svr_expert_locations.size();
|
||||
centers.resize(num_intensity_svr);
|
||||
visibilities.resize(num_intensity_svr);
|
||||
patch_scaling.resize(num_intensity_svr);
|
||||
|
||||
svr_expert_intensity.resize(num_intensity_svr);
|
||||
|
||||
// Reading in SVR intensity patch experts for each scales it is defined in
|
||||
for(int scale = 0; scale < num_intensity_svr; ++scale)
|
||||
{
|
||||
std::string location = intensity_svr_expert_locations[scale];
|
||||
std::cout << "Reading the intensity SVR patch experts from: " << location << "....";
|
||||
bool success_read = Read_SVR_patch_experts(location, centers[scale], visibilities[scale], svr_expert_intensity[scale], patch_scaling[scale]);
|
||||
if (!success_read)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Initialise and read CCNF patch experts (currently only intensity based),
|
||||
int num_intensity_ccnf = intensity_ccnf_expert_locations.size();
|
||||
|
||||
// CCNF experts override the SVR ones
|
||||
if(num_intensity_ccnf > 0)
|
||||
{
|
||||
centers.resize(num_intensity_ccnf);
|
||||
visibilities.resize(num_intensity_ccnf);
|
||||
patch_scaling.resize(num_intensity_ccnf);
|
||||
ccnf_expert_intensity.resize(num_intensity_ccnf);
|
||||
}
|
||||
|
||||
for(int scale = 0; scale < num_intensity_ccnf; ++scale)
|
||||
{
|
||||
std::string location = intensity_ccnf_expert_locations[scale];
|
||||
std::cout << "Reading the intensity CCNF patch experts from: " << location << "....";
|
||||
bool success_read = Read_CCNF_patch_experts(location, centers[scale], visibilities[scale], ccnf_expert_intensity[scale], patch_scaling[scale]);
|
||||
|
||||
if (!success_read)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (scale == 0)
|
||||
{
|
||||
preallocated_im2col.resize(ccnf_expert_intensity[0][0].size());
|
||||
}
|
||||
}
|
||||
|
||||
// Initialise and read CEN patch experts (currently only intensity based),
|
||||
int num_intensity_cen = intensity_cen_expert_locations.size();
|
||||
|
||||
// CEN experts override the SVR and CCNF ones
|
||||
if (num_intensity_cen > 0)
|
||||
{
|
||||
centers.resize(num_intensity_cen);
|
||||
visibilities.resize(num_intensity_cen);
|
||||
patch_scaling.resize(num_intensity_cen);
|
||||
cen_expert_intensity.resize(num_intensity_cen);
|
||||
}
|
||||
|
||||
for (int scale = 0; scale < num_intensity_cen; ++scale)
|
||||
{
|
||||
std::string location = intensity_cen_expert_locations[scale];
|
||||
std::cout << "Reading the intensity CEN patch experts from: " << location << "....";
|
||||
bool success_read = Read_CEN_patch_experts(location, centers[scale], visibilities[scale], cen_expert_intensity[scale], patch_scaling[scale]);
|
||||
if (!success_read)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (scale == 0)
|
||||
{
|
||||
preallocated_im2col.resize(cen_expert_intensity[0][0].size());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
// Reading in early termination parameters
|
||||
if (!early_term_loc.empty())
|
||||
{
|
||||
std::ifstream earlyTermFile(early_term_loc.c_str(), std::ios_base::in);
|
||||
|
||||
if (!earlyTermFile.is_open())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// Reading in weights/biases/cutoffs
|
||||
for (size_t i = 0; i < centers[0].size(); ++i)
|
||||
{
|
||||
double weight;
|
||||
earlyTermFile >> weight;
|
||||
early_term_weights.push_back(weight);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < centers[0].size(); ++i)
|
||||
{
|
||||
double bias;
|
||||
earlyTermFile >> bias;
|
||||
early_term_biases.push_back(bias);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < centers[0].size(); ++i)
|
||||
{
|
||||
double cutoff;
|
||||
earlyTermFile >> cutoff;
|
||||
early_term_cutoffs.push_back(cutoff);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
//======================= Reading the SVR patch experts =========================================//
|
||||
bool Patch_experts::Read_SVR_patch_experts(std::string expert_location, std::vector<cv::Vec3d>& centers,
|
||||
std::vector<cv::Mat_<int> >& visibility, std::vector<std::vector<Multi_SVR_patch_expert> >& patches, double& scale)
|
||||
{
|
||||
|
||||
std::ifstream patchesFile(expert_location.c_str(), std::ios_base::in);
|
||||
|
||||
if(patchesFile.is_open())
|
||||
{
|
||||
LandmarkDetector::SkipComments(patchesFile);
|
||||
|
||||
patchesFile >> scale;
|
||||
|
||||
LandmarkDetector::SkipComments(patchesFile);
|
||||
|
||||
int numberViews;
|
||||
|
||||
patchesFile >> numberViews;
|
||||
|
||||
// read the visibility
|
||||
centers.resize(numberViews);
|
||||
visibility.resize(numberViews);
|
||||
|
||||
patches.resize(numberViews);
|
||||
|
||||
LandmarkDetector::SkipComments(patchesFile);
|
||||
|
||||
// centers of each view (which view corresponds to which orientation)
|
||||
for(size_t i = 0; i < centers.size(); i++)
|
||||
{
|
||||
cv::Mat center;
|
||||
LandmarkDetector::ReadMat(patchesFile, center);
|
||||
center.copyTo(centers[i]);
|
||||
centers[i] = centers[i] * M_PI / 180.0;
|
||||
}
|
||||
|
||||
LandmarkDetector::SkipComments(patchesFile);
|
||||
|
||||
// the visibility of points for each of the views (which verts are visible at a specific view
|
||||
for(size_t i = 0; i < visibility.size(); i++)
|
||||
{
|
||||
LandmarkDetector::ReadMat(patchesFile, visibility[i]);
|
||||
}
|
||||
|
||||
int numberOfPoints = visibility[0].rows;
|
||||
|
||||
LandmarkDetector::SkipComments(patchesFile);
|
||||
|
||||
// read the patches themselves
|
||||
for(size_t i = 0; i < patches.size(); i++)
|
||||
{
|
||||
// number of patches for each view
|
||||
patches[i].resize(numberOfPoints);
|
||||
// read in each patch
|
||||
for(int j = 0; j < numberOfPoints; j++)
|
||||
{
|
||||
patches[i][j].Read(patchesFile);
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Done" << std::endl;
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout << "Can't find/open the patches file" << std::endl;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
//======================= Reading the CCNF patch experts =========================================//
|
||||
bool Patch_experts::Read_CCNF_patch_experts(std::string patchesFileLocation, std::vector<cv::Vec3d>& centers,
|
||||
std::vector<cv::Mat_<int> >& visibility, std::vector<std::vector<CCNF_patch_expert> >& patches, double& patchScaling)
|
||||
{
|
||||
|
||||
std::ifstream patchesFile(patchesFileLocation.c_str(), std::ios::in | std::ios::binary);
|
||||
|
||||
if(patchesFile.is_open())
|
||||
{
|
||||
patchesFile.read ((char*)&patchScaling, 8);
|
||||
|
||||
int numberViews;
|
||||
patchesFile.read ((char*)&numberViews, 4);
|
||||
|
||||
// read the visibility
|
||||
centers.resize(numberViews);
|
||||
visibility.resize(numberViews);
|
||||
|
||||
patches.resize(numberViews);
|
||||
|
||||
// centers of each view (which view corresponds to which orientation)
|
||||
for(size_t i = 0; i < centers.size(); i++)
|
||||
{
|
||||
cv::Mat center;
|
||||
LandmarkDetector::ReadMatBin(patchesFile, center);
|
||||
center.copyTo(centers[i]);
|
||||
centers[i] = centers[i] * M_PI / 180.0;
|
||||
}
|
||||
|
||||
// the visibility of points for each of the views (which verts are visible at a specific view
|
||||
for(size_t i = 0; i < visibility.size(); i++)
|
||||
{
|
||||
LandmarkDetector::ReadMatBin(patchesFile, visibility[i]);
|
||||
}
|
||||
int numberOfPoints = visibility[0].rows;
|
||||
|
||||
// Read the possible SigmaInvs (without beta), this will be followed by patch reading (this assumes all of them have the same type, and number of betas)
|
||||
int num_win_sizes;
|
||||
int num_sigma_comp;
|
||||
patchesFile.read ((char*)&num_win_sizes, 4);
|
||||
|
||||
std::vector<int> windows;
|
||||
windows.resize(num_win_sizes);
|
||||
|
||||
std::vector<std::vector<cv::Mat_<float> > > sigma_components;
|
||||
sigma_components.resize(num_win_sizes);
|
||||
|
||||
for (int w=0; w < num_win_sizes; ++w)
|
||||
{
|
||||
patchesFile.read ((char*)&windows[w], 4);
|
||||
|
||||
patchesFile.read ((char*)&num_sigma_comp, 4);
|
||||
|
||||
sigma_components[w].resize(num_sigma_comp);
|
||||
|
||||
for(int s=0; s < num_sigma_comp; ++s)
|
||||
{
|
||||
LandmarkDetector::ReadMatBin(patchesFile, sigma_components[w][s]);
|
||||
}
|
||||
}
|
||||
|
||||
this->sigma_components = sigma_components;
|
||||
|
||||
// read the patches themselves
|
||||
for(size_t i = 0; i < patches.size(); i++)
|
||||
{
|
||||
// number of patches for each view
|
||||
patches[i].resize(numberOfPoints);
|
||||
// read in each patch
|
||||
for(int j = 0; j < numberOfPoints; j++)
|
||||
{
|
||||
patches[i][j].Read(patchesFile, windows, sigma_components);
|
||||
}
|
||||
}
|
||||
std::cout << "Done" << std::endl;
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout << "Can't find/open the patches file" << std::endl;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
//======================= Reading the CEN patch experts =========================================//
|
||||
bool Patch_experts::Read_CEN_patch_experts(std::string expert_location, std::vector<cv::Vec3d>& centers,
|
||||
std::vector<cv::Mat_<int> >& visibility, std::vector<std::vector<CEN_patch_expert> >& patches, double& scale)
|
||||
{
|
||||
|
||||
std::ifstream patchesFile(expert_location.c_str(), std::ios::in | std::ios::binary);
|
||||
|
||||
if (patchesFile.is_open())
|
||||
{
|
||||
patchesFile.read((char*)&scale, 8);
|
||||
|
||||
int numberViews;
|
||||
patchesFile.read((char*)&numberViews, 4);
|
||||
|
||||
// read the visibility
|
||||
centers.resize(numberViews);
|
||||
visibility.resize(numberViews);
|
||||
|
||||
patches.resize(numberViews);
|
||||
|
||||
// centers of each view (which view corresponds to which orientation)
|
||||
for (size_t i = 0; i < centers.size(); i++)
|
||||
{
|
||||
cv::Mat center;
|
||||
LandmarkDetector::ReadMatBin(patchesFile, center);
|
||||
center.copyTo(centers[i]);
|
||||
centers[i] = centers[i] * M_PI / 180.0;
|
||||
}
|
||||
|
||||
// the visibility of points for each of the views (which verts are visible at a specific view
|
||||
for (size_t i = 0; i < visibility.size(); i++)
|
||||
{
|
||||
LandmarkDetector::ReadMatBin(patchesFile, visibility[i]);
|
||||
}
|
||||
int numberOfPoints = visibility[0].rows;
|
||||
|
||||
LandmarkDetector::ReadMatBin(patchesFile, mirror_inds);
|
||||
LandmarkDetector::ReadMatBin(patchesFile, mirror_views);
|
||||
|
||||
// read the patches themselves
|
||||
for (size_t i = 0; i < patches.size(); i++)
|
||||
{
|
||||
// number of patches for each view
|
||||
patches[i].resize(numberOfPoints);
|
||||
// read in each patch
|
||||
for (int j = 0; j < numberOfPoints; j++)
|
||||
{
|
||||
patches[i][j].Read(patchesFile);
|
||||
}
|
||||
}
|
||||
std::cout << "Done" << std::endl;
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout << "Could not find CEN patch experts, for instructions of how to download them, see https://github.com/TadasBaltrusaitis/OpenFace/wiki/Model-download \n" << std::endl;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
337
pkg/OpenFace/lib/local/LandmarkDetector/src/SVR_patch_expert.cpp
Normal file
337
pkg/OpenFace/lib/local/LandmarkDetector/src/SVR_patch_expert.cpp
Normal file
@@ -0,0 +1,337 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Copyright (C) 2017, Carnegie Mellon University and University of Cambridge,
|
||||
// all rights reserved.
|
||||
//
|
||||
// ACADEMIC OR NON-PROFIT ORGANIZATION NONCOMMERCIAL RESEARCH USE ONLY
|
||||
//
|
||||
// BY USING OR DOWNLOADING THE SOFTWARE, YOU ARE AGREEING TO THE TERMS OF THIS LICENSE AGREEMENT.
|
||||
// IF YOU DO NOT AGREE WITH THESE TERMS, YOU MAY NOT USE OR DOWNLOAD THE SOFTWARE.
|
||||
//
|
||||
// License can be found in OpenFace-license.txt
|
||||
//
|
||||
// * Any publications arising from the use of this software, including but
|
||||
// not limited to academic journal and conference publications, technical
|
||||
// reports and manuals, must cite at least one of the following works:
|
||||
//
|
||||
// OpenFace 2.0: Facial Behavior Analysis Toolkit
|
||||
// Tadas Baltrušaitis, Amir Zadeh, Yao Chong Lim, and Louis-Philippe Morency
|
||||
// in IEEE International Conference on Automatic Face and Gesture Recognition, 2018
|
||||
//
|
||||
// Convolutional experts constrained local model for facial landmark detection.
|
||||
// A. Zadeh, T. Baltrušaitis, and Louis-Philippe Morency,
|
||||
// in Computer Vision and Pattern Recognition Workshops, 2017.
|
||||
//
|
||||
// Rendering of Eyes for Eye-Shape Registration and Gaze Estimation
|
||||
// Erroll Wood, Tadas Baltrušaitis, Xucong Zhang, Yusuke Sugano, Peter Robinson, and Andreas Bulling
|
||||
// in IEEE International. Conference on Computer Vision (ICCV), 2015
|
||||
//
|
||||
// Cross-dataset learning and person-specific normalisation for automatic Action Unit detection
|
||||
// Tadas Baltrušaitis, Marwa Mahmoud, and Peter Robinson
|
||||
// in Facial Expression Recognition and Analysis Challenge,
|
||||
// IEEE International Conference on Automatic Face and Gesture Recognition, 2015
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "stdafx.h"
|
||||
|
||||
#include "SVR_patch_expert.h"
|
||||
|
||||
// OpenCV include
|
||||
#include <opencv2/core/core.hpp>
|
||||
#include <opencv2/imgproc.hpp>
|
||||
|
||||
#include "LandmarkDetectorUtils.h"
|
||||
|
||||
using namespace LandmarkDetector;
|
||||
|
||||
//===========================================================================
|
||||
// Computing the image gradient
|
||||
void Grad(const cv::Mat& im, cv::Mat& grad)
|
||||
{
|
||||
|
||||
/*float filter[3] = {1, 0, -1};
|
||||
float dfilter[1] = {1};
|
||||
cv::Mat filterX = cv::Mat(1,3,CV_32F, filter).clone();
|
||||
cv::Mat filterY = cv::Mat(1,1,CV_32F, dfilter).clone();
|
||||
|
||||
cv::Mat gradX;
|
||||
cv::Mat gradY;
|
||||
cv::sepFilter2D(im, gradX, CV_32F, filterY, filterX, cv::Point(-1,-1), 0);
|
||||
cv::sepFilter2D(im, gradY, CV_32F, filterX.t(), filterY, cv::Point(-1,-1), 0);
|
||||
cv::pow(gradX,2, gradX);
|
||||
cv::pow(gradY,2, gradY);
|
||||
grad = gradX + gradY;
|
||||
|
||||
grad.row(0).setTo(0);
|
||||
grad.col(0).setTo(0);
|
||||
grad.col(grad.cols-1).setTo(0);
|
||||
grad.row(grad.rows-1).setTo(0); */
|
||||
|
||||
// A quicker alternative
|
||||
int x,y,h = im.rows,w = im.cols;
|
||||
float vx,vy;
|
||||
|
||||
// Initialise the gradient
|
||||
grad.create(im.size(), CV_32F);
|
||||
grad.setTo(0.0f);
|
||||
|
||||
cv::MatIterator_<float> gp = grad.begin<float>() + w+1;
|
||||
cv::MatConstIterator_<float> px1 = im.begin<float>() + w+2;
|
||||
cv::MatConstIterator_<float> px2 = im.begin<float>() + w;
|
||||
cv::MatConstIterator_<float> py1 = im.begin<float>() + 2*w+1;
|
||||
cv::MatConstIterator_<float> py2 = im.begin<float>() + 1;
|
||||
|
||||
for(y = 1; y < h-1; y++)
|
||||
{
|
||||
for(x = 1; x < w-1; x++)
|
||||
{
|
||||
vx = *px1++ - *px2++;
|
||||
vy = *py1++ - *py2++;
|
||||
*gp++ = vx*vx + vy*vy;
|
||||
}
|
||||
px1 += 2;
|
||||
px2 += 2;
|
||||
py1 += 2;
|
||||
py2 += 2;
|
||||
gp += 2;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// A copy constructor
|
||||
SVR_patch_expert::SVR_patch_expert(const SVR_patch_expert& other) : weights(other.weights.clone())
|
||||
{
|
||||
this->type = other.type;
|
||||
this->scaling = other.scaling;
|
||||
this->bias = other.bias;
|
||||
this->confidence = other.confidence;
|
||||
|
||||
for (std::map<int, cv::Mat_<double> >::const_iterator it = other.weights_dfts.begin(); it != other.weights_dfts.end(); it++)
|
||||
{
|
||||
// Make sure the matrix is copied.
|
||||
this->weights_dfts.insert(std::pair<int, cv::Mat>(it->first, it->second.clone()));
|
||||
}
|
||||
}
|
||||
|
||||
//===========================================================================
|
||||
void SVR_patch_expert::Read(std::ifstream &stream)
|
||||
{
|
||||
|
||||
// A sanity check when reading patch experts
|
||||
int read_type;
|
||||
stream >> read_type;
|
||||
assert(read_type == 2);
|
||||
|
||||
stream >> type >> confidence >> scaling >> bias;
|
||||
LandmarkDetector::ReadMat(stream, weights);
|
||||
|
||||
// OpenCV and Matlab matrix cardinality is different, hence the transpose
|
||||
weights = weights.t();
|
||||
|
||||
}
|
||||
|
||||
//===========================================================================
|
||||
void SVR_patch_expert::Response(const cv::Mat_<float>& area_of_interest, cv::Mat_<float>& response)
|
||||
{
|
||||
|
||||
int response_height = area_of_interest.rows - weights.rows + 1;
|
||||
int response_width = area_of_interest.cols - weights.cols + 1;
|
||||
|
||||
// the patch area on which we will calculate reponses
|
||||
cv::Mat_<float> normalised_area_of_interest;
|
||||
|
||||
if(response.rows != response_height || response.cols != response_width)
|
||||
{
|
||||
response.create(response_height, response_width);
|
||||
}
|
||||
|
||||
// If type is raw just normalise mean and standard deviation
|
||||
if(type == 0)
|
||||
{
|
||||
// Perform normalisation across whole patch
|
||||
cv::Scalar mean;
|
||||
cv::Scalar std;
|
||||
|
||||
cv::meanStdDev(area_of_interest, mean, std);
|
||||
// Avoid division by zero
|
||||
if(std[0] == 0)
|
||||
{
|
||||
std[0] = 1;
|
||||
}
|
||||
normalised_area_of_interest = (area_of_interest - mean[0]) / std[0];
|
||||
}
|
||||
// If type is gradient, perform the image gradient computation
|
||||
else if(type == 1)
|
||||
{
|
||||
Grad(area_of_interest, normalised_area_of_interest);
|
||||
}
|
||||
else
|
||||
{
|
||||
printf("ERROR(%s,%d): Unsupported patch type %d!\n", __FILE__,__LINE__, type);
|
||||
abort();
|
||||
}
|
||||
|
||||
cv::Mat_<float> svr_response;
|
||||
|
||||
// The empty matrix as we don't pass precomputed dft's of image
|
||||
cv::Mat_<double> empty_matrix_0(0,0,0.0);
|
||||
cv::Mat_<float> empty_matrix_1(0,0,0.0);
|
||||
cv::Mat_<float> empty_matrix_2(0,0,0.0);
|
||||
|
||||
// Efficient calc of patch expert SVR response across the area of interest
|
||||
matchTemplate_m(normalised_area_of_interest, empty_matrix_0, empty_matrix_1, empty_matrix_2, weights, weights_dfts, svr_response, cv::TM_CCOEFF_NORMED);
|
||||
|
||||
response.create(svr_response.size());
|
||||
cv::MatIterator_<float> p = response.begin();
|
||||
|
||||
cv::MatIterator_<float> q1 = svr_response.begin(); // respone for each pixel
|
||||
cv::MatIterator_<float> q2 = svr_response.end();
|
||||
|
||||
while(q1 != q2)
|
||||
{
|
||||
// the SVR response passed into logistic regressor
|
||||
*p++ = 1.0/(1.0 + exp( -(*q1++ * scaling + bias )));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void SVR_patch_expert::ResponseDepth(const cv::Mat_<float>& area_of_interest, cv::Mat_<float> &response)
|
||||
{
|
||||
|
||||
// How big the response map will be
|
||||
int response_height = area_of_interest.rows - weights.rows + 1;
|
||||
int response_width = area_of_interest.cols - weights.cols + 1;
|
||||
|
||||
// the patch area on which we will calculate reponses
|
||||
cv::Mat_<float> normalised_area_of_interest;
|
||||
|
||||
if(response.rows != response_height || response.cols != response_width)
|
||||
{
|
||||
response.create(response_height, response_width);
|
||||
}
|
||||
|
||||
if(type == 0)
|
||||
{
|
||||
// Perform normalisation across whole patch
|
||||
cv::Scalar mean;
|
||||
cv::Scalar std;
|
||||
|
||||
// ignore missing values
|
||||
cv::Mat_<uchar> mask = area_of_interest > 0;
|
||||
cv::meanStdDev(area_of_interest, mean, std, mask);
|
||||
|
||||
// if all values the same don't divide by 0
|
||||
if(std[0] == 0)
|
||||
{
|
||||
std[0] = 1;
|
||||
}
|
||||
|
||||
normalised_area_of_interest = (area_of_interest - mean[0]) / std[0];
|
||||
|
||||
// Set the invalid pixels to 0
|
||||
normalised_area_of_interest.setTo(0, mask == 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
printf("ERROR(%s,%d): Unsupported patch type %d!\n", __FILE__,__LINE__,type);
|
||||
abort();
|
||||
}
|
||||
|
||||
cv::Mat_<float> svr_response;
|
||||
|
||||
// The empty matrix as we don't pass precomputed dft's of image
|
||||
cv::Mat_<double> empty_matrix_0(0,0,0.0);
|
||||
cv::Mat_<float> empty_matrix_1(0,0,0.0);
|
||||
cv::Mat_<float> empty_matrix_2(0,0,0.0);
|
||||
|
||||
// Efficient calc of patch expert response across the area of interest
|
||||
|
||||
matchTemplate_m(normalised_area_of_interest, empty_matrix_0, empty_matrix_1, empty_matrix_2, weights, weights_dfts, svr_response, cv::TM_CCOEFF);
|
||||
|
||||
response.create(svr_response.size());
|
||||
cv::MatIterator_<float> p = response.begin();
|
||||
|
||||
cv::MatIterator_<float> q1 = svr_response.begin(); // respone for each pixel
|
||||
cv::MatIterator_<float> q2 = svr_response.end();
|
||||
|
||||
while(q1 != q2)
|
||||
{
|
||||
// the SVR response passed through a logistic regressor
|
||||
*p++ = 1.0/(1.0 + exp( -(*q1++ * scaling + bias )));
|
||||
}
|
||||
}
|
||||
|
||||
// Copy constructor
|
||||
Multi_SVR_patch_expert::Multi_SVR_patch_expert(const Multi_SVR_patch_expert& other) : svr_patch_experts(other.svr_patch_experts)
|
||||
{
|
||||
this->width = other.width;
|
||||
this->height = other.height;
|
||||
}
|
||||
|
||||
//===========================================================================
|
||||
void Multi_SVR_patch_expert::Read(std::ifstream &stream)
|
||||
{
|
||||
// A sanity check when reading patch experts
|
||||
int type;
|
||||
stream >> type;
|
||||
assert(type == 3);
|
||||
|
||||
// The number of patch experts for this view (with different modalities)
|
||||
int number_modalities;
|
||||
|
||||
stream >> width >> height >> number_modalities;
|
||||
|
||||
svr_patch_experts.resize(number_modalities);
|
||||
for(int i = 0; i < number_modalities; i++)
|
||||
svr_patch_experts[i].Read(stream);
|
||||
|
||||
}
|
||||
//===========================================================================
|
||||
void Multi_SVR_patch_expert::Response(const cv::Mat_<float> &area_of_interest, cv::Mat_<float> &response)
|
||||
{
|
||||
|
||||
int response_height = area_of_interest.rows - height + 1;
|
||||
int response_width = area_of_interest.cols - width + 1;
|
||||
|
||||
if(response.rows != response_height || response.cols != response_width)
|
||||
{
|
||||
response.create(response_height, response_width);
|
||||
}
|
||||
|
||||
// For the purposes of the experiment only use the response of normal intensity, for fair comparison
|
||||
|
||||
if(svr_patch_experts.size() == 1)
|
||||
{
|
||||
svr_patch_experts[0].Response(area_of_interest, response);
|
||||
}
|
||||
else
|
||||
{
|
||||
// responses from multiple patch experts these can be gradients, LBPs etc.
|
||||
response.setTo(1.0);
|
||||
|
||||
cv::Mat_<float> modality_resp(response_height, response_width);
|
||||
|
||||
for(size_t i = 0; i < svr_patch_experts.size(); i++)
|
||||
{
|
||||
svr_patch_experts[i].Response(area_of_interest, modality_resp);
|
||||
response = response.mul(modality_resp);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void Multi_SVR_patch_expert::ResponseDepth(const cv::Mat_<float>& area_of_interest, cv::Mat_<float>& response)
|
||||
{
|
||||
int response_height = area_of_interest.rows - height + 1;
|
||||
int response_width = area_of_interest.cols - width + 1;
|
||||
|
||||
if(response.rows != response_height || response.cols != response_width)
|
||||
{
|
||||
response.create(response_height, response_width);
|
||||
}
|
||||
|
||||
// With depth patch experts only do raw data modality
|
||||
svr_patch_experts[0].ResponseDepth(area_of_interest, response);
|
||||
}
|
||||
//===========================================================================
|
||||
12
pkg/OpenFace/lib/local/LandmarkDetector/src/stdafx.cpp
Normal file
12
pkg/OpenFace/lib/local/LandmarkDetector/src/stdafx.cpp
Normal file
@@ -0,0 +1,12 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Copyright (C) 2017, Carnegie Mellon University and University of Cambridge,
|
||||
// all rights reserved.
|
||||
//
|
||||
// ACADEMIC OR NON-PROFIT ORGANIZATION NONCOMMERCIAL RESEARCH USE ONLY
|
||||
//
|
||||
// BY USING OR DOWNLOADING THE SOFTWARE, YOU ARE AGREEING TO THE TERMS OF THIS LICENSE AGREEMENT.
|
||||
// IF YOU DO NOT AGREE WITH THESE TERMS, YOU MAY NOT USE OR DOWNLOAD THE SOFTWARE.
|
||||
//
|
||||
// License can be found in OpenFace-license.txt
|
||||
//
|
||||
#include "stdafx.h"
|
||||
Reference in New Issue
Block a user