open source pkg v1

This commit is contained in:
Vijay Yadev
2020-08-04 19:12:31 -04:00
parent bef213dba9
commit c389fc2c47
3708 changed files with 1624220 additions and 1 deletions

View File

@@ -0,0 +1,415 @@
///////////////////////////////////////////////////////////////////////////////
// Copyright (C) 2017, Tadas Baltrusaitis, all rights reserved.
//
// ACADEMIC OR NON-PROFIT ORGANIZATION NONCOMMERCIAL RESEARCH USE ONLY
//
// BY USING OR DOWNLOADING THE SOFTWARE, YOU ARE AGREEING TO THE TERMS OF THIS LICENSE AGREEMENT.
// IF YOU DO NOT AGREE WITH THESE TERMS, YOU MAY NOT USE OR DOWNLOAD THE SOFTWARE.
//
// License can be found in OpenFace-license.txt
//
// * Any publications arising from the use of this software, including but
// not limited to academic journal and conference publications, technical
// reports and manuals, must cite at least one of the following works:
//
// OpenFace 2.0: Facial Behavior Analysis Toolkit
// Tadas Baltrušaitis, Amir Zadeh, Yao Chong Lim, and Louis-Philippe Morency
// in IEEE International Conference on Automatic Face and Gesture Recognition, 2018
//
// Convolutional experts constrained local model for facial landmark detection.
// A. Zadeh, T. Baltrušaitis, and Louis-Philippe Morency,
// in Computer Vision and Pattern Recognition Workshops, 2017.
//
// Rendering of Eyes for Eye-Shape Registration and Gaze Estimation
// Erroll Wood, Tadas Baltrušaitis, Xucong Zhang, Yusuke Sugano, Peter Robinson, and Andreas Bulling
// in IEEE International. Conference on Computer Vision (ICCV), 2015
//
// Cross-dataset learning and person-specific normalisation for automatic Action Unit detection
// Tadas Baltrušaitis, Marwa Mahmoud, and Peter Robinson
// in Facial Expression Recognition and Analysis Challenge,
// IEEE International Conference on Automatic Face and Gesture Recognition, 2015
//
///////////////////////////////////////////////////////////////////////////////
#include "stdafx_ut.h"
#include "ImageCapture.h"
#include "ImageManipulationHelpers.h"
using namespace Utilities;
#define INFO_STREAM( stream ) \
std::cout << stream << std::endl
#define WARN_STREAM( stream ) \
std::cout << "Warning: " << stream << std::endl
#define ERROR_STREAM( stream ) \
std::cout << "Error: " << stream << std::endl
bool ImageCapture::Open(std::vector<std::string>& arguments)
{
// Consuming the input arguments
bool* valid = new bool[arguments.size()];
for (size_t i = 0; i < arguments.size(); ++i)
{
valid[i] = true;
}
// Some default values
std::string input_root = "";
fx = -1; fy = -1; cx = -1; cy = -1;
std::string separator = std::string(1, fs::path::preferred_separator);
// First check if there is a root argument (so that videos and input directories could be defined more easily)
for (size_t i = 0; i < arguments.size(); ++i)
{
if (arguments[i].compare("-root") == 0)
{
input_root = arguments[i + 1] + separator;
i++;
}
if (arguments[i].compare("-inroot") == 0)
{
input_root = arguments[i + 1] + separator;
i++;
}
}
std::string input_directory;
std::string bbox_directory;
bool directory_found = false;
has_bounding_boxes = false;
std::vector<std::string> input_image_files;
for (size_t i = 0; i < arguments.size(); ++i)
{
if (arguments[i].compare("-f") == 0)
{
input_image_files.push_back(input_root + arguments[i + 1]);
valid[i] = false;
valid[i + 1] = false;
i++;
}
else if (arguments[i].compare("-fdir") == 0)
{
if (directory_found)
{
WARN_STREAM("Input directory already found, using the first one:" + input_directory);
}
else
{
input_directory = (input_root + arguments[i + 1]);
valid[i] = false;
valid[i + 1] = false;
i++;
directory_found = true;
}
}
else if (arguments[i].compare("-bboxdir") == 0)
{
bbox_directory = (input_root + arguments[i + 1]);
valid[i] = false;
valid[i + 1] = false;
has_bounding_boxes = true;
i++;
}
else if (arguments[i].compare("-fx") == 0)
{
std::stringstream data(arguments[i + 1]);
data >> fx;
i++;
}
else if (arguments[i].compare("-fy") == 0)
{
std::stringstream data(arguments[i + 1]);
data >> fy;
i++;
}
else if (arguments[i].compare("-cx") == 0)
{
std::stringstream data(arguments[i + 1]);
data >> cx;
i++;
}
else if (arguments[i].compare("-cy") == 0)
{
std::stringstream data(arguments[i + 1]);
data >> cy;
i++;
}
}
for (int i = (int)arguments.size() - 1; i >= 0; --i)
{
if (!valid[i])
{
arguments.erase(arguments.begin() + i);
}
}
// Based on what was read in open the sequence
if (!input_image_files.empty())
{
return OpenImageFiles(input_image_files, fx, fy, cx, cy);
}
if (!input_directory.empty())
{
return OpenDirectory(input_directory, bbox_directory, fx, fy, cx, cy);
}
// If no input found return false and set a flag for it
no_input_specified = true;
return false;
}
bool ImageCapture::OpenImageFiles(const std::vector<std::string>& image_files, float fx, float fy, float cx, float cy)
{
// Setting some defaults
frame_num = 0;
no_input_specified = false;
latest_frame = cv::Mat();
latest_gray_frame = cv::Mat();
this->image_files = image_files;
// Allow for setting the camera intrinsics, but have to be the same ones for every image
if (fx != -1 && fy != -1 )
{
image_focal_length_set = true;
this->fx = fx;
this->fy = fy;
}
else
{
image_focal_length_set = false;
}
if (cx != -1 && cy != -1)
{
this->cx = cx;
this->cy = cy;
image_optical_center_set = true;
}
else
{
image_optical_center_set = false;
}
return true;
}
bool ImageCapture::OpenDirectory(std::string directory, std::string bbox_directory, float fx, float fy, float cx, float cy)
{
INFO_STREAM("Attempting to read from directory: " << directory);
// Setup some defaults
frame_num = 0;
no_input_specified = false;
image_files.clear();
fs::path image_directory(directory);
std::vector<fs::path> file_in_directory;
copy(fs::directory_iterator(image_directory), fs::directory_iterator(), back_inserter(file_in_directory));
// Sort the images in the directory first
sort(file_in_directory.begin(), file_in_directory.end());
std::vector<std::string> curr_dir_files;
for (std::vector<fs::path>::const_iterator file_iterator(file_in_directory.begin()); file_iterator != file_in_directory.end(); ++file_iterator)
{
// Possible image extension .jpg and .png
if (file_iterator->extension().string().compare(".jpg") == 0 || file_iterator->extension().string().compare(".jpeg") == 0 || file_iterator->extension().string().compare(".png") == 0 || file_iterator->extension().string().compare(".bmp") == 0)
{
curr_dir_files.push_back(file_iterator->string());
// If bounding box directory is specified, read the bounding boxes from it
if (!bbox_directory.empty())
{
fs::path current_file = *file_iterator;
fs::path bbox_file = bbox_directory / current_file.filename().replace_extension("txt");
// If there is a bounding box file push it to the list of bounding boxes
if (fs::exists(bbox_file))
{
std::ifstream in_bbox(bbox_file.string().c_str(), std::ios_base::in);
std::vector<cv::Rect_<float> > bboxes_image;
// Keep reading bounding boxes from a file, stop if empty line or
while (!in_bbox.eof())
{
std::string bbox_string;
std::getline(in_bbox, bbox_string);
if (bbox_string.empty())
continue;
std::stringstream ss(bbox_string);
float min_x, min_y, max_x, max_y;
ss >> min_x >> min_y >> max_x >> max_y;
bboxes_image.push_back(cv::Rect_<float>(min_x, min_y, max_x - min_x, max_y - min_y));
}
in_bbox.close();
bounding_boxes.push_back(bboxes_image);
}
else
{
ERROR_STREAM("Could not find the corresponding bounding box for file:" + file_iterator->string());
exit(1);
}
}
}
}
image_files = curr_dir_files;
if (image_files.empty())
{
std::cout << "No images found in the directory: " << directory << std::endl;
return false;
}
// Allow for setting the camera intrinsics, but have to be the same ones for every image
if (fx != -1 && fy != -1)
{
image_focal_length_set = true;
this->fx = fx;
this->fy = fy;
}
else
{
image_focal_length_set = false;
}
if (cx != -1 && cy != -1)
{
this->cx = cx;
this->cy = cy;
image_optical_center_set = true;
}
else
{
image_optical_center_set = false;
}
return true;
}
void ImageCapture::SetCameraIntrinsics(float fx, float fy, float cx, float cy)
{
// If optical centers are not defined just use center of image
if (cx == -1)
{
this->cx = this->image_width / 2.0f;
this->cy = this->image_height / 2.0f;
}
else
{
this->cx = cx;
this->cy = cy;
}
// Use a rough guess-timate of focal length
if (fx == -1)
{
this->fx = 500.0f * (this->image_width / 640.0f);
this->fy = 500.0f * (this->image_height / 480.0f);
this->fx = (this->fx + this->fy) / 2.0f;
this->fy = this->fx;
}
else
{
this->fx = fx;
this->fy = fy;
}
}
// Returns a read image in 3 channel RGB format, also prepares a grayscale frame if needed
cv::Mat ImageCapture::GetNextImage()
{
if (image_files.empty() || frame_num >= image_files.size())
{
// Indicate lack of success by returning an empty image
latest_frame = cv::Mat();
return latest_frame;
}
// Load the image as an 8 bit RGB
latest_frame = cv::imread(image_files[frame_num], cv::IMREAD_COLOR);
if (latest_frame.empty())
{
ERROR_STREAM("Could not open the image: " + image_files[frame_num]);
exit(1);
}
image_height = latest_frame.size().height;
image_width = latest_frame.size().width;
// Reset the intrinsics for every image if they are not set globally
float _fx = -1;
float _fy = -1;
if (image_focal_length_set)
{
_fx = fx;
_fy = fy;
}
float _cx = -1;
float _cy = -1;
if (image_optical_center_set)
{
_cx = cx;
_cy = cy;
}
SetCameraIntrinsics(_fx, _fy, _cx, _cy);
// Set the grayscale frame
ConvertToGrayscale_8bit(latest_frame, latest_gray_frame);
this->name = image_files[frame_num];
frame_num++;
return latest_frame;
}
std::vector<cv::Rect_<float> > ImageCapture::GetBoundingBoxes()
{
if (!bounding_boxes.empty())
{
return bounding_boxes[frame_num - 1];
}
else
{
return std::vector<cv::Rect_<float> >();
}
}
double ImageCapture::GetProgress()
{
return (double)frame_num / (double)image_files.size();
}
cv::Mat_<uchar> ImageCapture::GetGrayFrame()
{
return latest_gray_frame;
}

View File

@@ -0,0 +1,341 @@
///////////////////////////////////////////////////////////////////////////////
// Copyright (C) 2017, Tadas Baltrusaitis, all rights reserved.
//
// ACADEMIC OR NON-PROFIT ORGANIZATION NONCOMMERCIAL RESEARCH USE ONLY
//
// BY USING OR DOWNLOADING THE SOFTWARE, YOU ARE AGREEING TO THE TERMS OF THIS LICENSE AGREEMENT.
// IF YOU DO NOT AGREE WITH THESE TERMS, YOU MAY NOT USE OR DOWNLOAD THE SOFTWARE.
//
// License can be found in OpenFace-license.txt
//
// * Any publications arising from the use of this software, including but
// not limited to academic journal and conference publications, technical
// reports and manuals, must cite at least one of the following works:
//
// OpenFace 2.0: Facial Behavior Analysis Toolkit
// Tadas Baltrušaitis, Amir Zadeh, Yao Chong Lim, and Louis-Philippe Morency
// in IEEE International Conference on Automatic Face and Gesture Recognition, 2018
//
// Convolutional experts constrained local model for facial landmark detection.
// A. Zadeh, T. Baltrušaitis, and Louis-Philippe Morency,
// in Computer Vision and Pattern Recognition Workshops, 2017.
//
// Rendering of Eyes for Eye-Shape Registration and Gaze Estimation
// Erroll Wood, Tadas Baltrušaitis, Xucong Zhang, Yusuke Sugano, Peter Robinson, and Andreas Bulling
// in IEEE International. Conference on Computer Vision (ICCV), 2015
//
// Cross-dataset learning and person-specific normalisation for automatic Action Unit detection
// Tadas Baltrušaitis, Marwa Mahmoud, and Peter Robinson
// in Facial Expression Recognition and Analysis Challenge,
// IEEE International Conference on Automatic Face and Gesture Recognition, 2015
//
///////////////////////////////////////////////////////////////////////////////
#include "stdafx_ut.h"
#include "RecorderCSV.h"
using namespace Utilities;
// Default constructor initializes the variables
RecorderCSV::RecorderCSV():output_file(){};
// Making sure full stop is used for decimal point separation
struct fullstop : std::numpunct<char> {
char do_decimal_point() const { return '.'; }
};
// Opening the file and preparing the header for it
bool RecorderCSV::Open(std::string output_file_name, bool is_sequence, bool output_2D_landmarks, bool output_3D_landmarks, bool output_model_params, bool output_pose, bool output_AUs, bool output_gaze,
int num_face_landmarks, int num_model_modes, int num_eye_landmarks, const std::vector<std::string>& au_names_class, const std::vector<std::string>& au_names_reg)
{
output_file.open(output_file_name, std::ios_base::out);
output_file.imbue(std::locale(output_file.getloc(), new fullstop));
if (!output_file.is_open())
return false;
this->is_sequence = is_sequence;
// Set up what we are recording
this->output_2D_landmarks = output_2D_landmarks;
this->output_3D_landmarks = output_3D_landmarks;
this->output_AUs = output_AUs;
this->output_gaze = output_gaze;
this->output_model_params = output_model_params;
this->output_pose = output_pose;
this->au_names_class = au_names_class;
this->au_names_reg = au_names_reg;
// Different headers if we are writing out the results on a sequence or an individual image
if(this->is_sequence)
{
output_file << "frame, face_id, timestamp, confidence, success";
}
else
{
output_file << "face, confidence";
}
if (output_gaze)
{
output_file << ", gaze_0_x, gaze_0_y, gaze_0_z, gaze_1_x, gaze_1_y, gaze_1_z, gaze_angle_x, gaze_angle_y";
for (int i = 0; i < num_eye_landmarks; ++i)
{
output_file << ", eye_lmk_x_" << i;
}
for (int i = 0; i < num_eye_landmarks; ++i)
{
output_file << ", eye_lmk_y_" << i;
}
for (int i = 0; i < num_eye_landmarks; ++i)
{
output_file << ", eye_lmk_X_" << i;
}
for (int i = 0; i < num_eye_landmarks; ++i)
{
output_file << ", eye_lmk_Y_" << i;
}
for (int i = 0; i < num_eye_landmarks; ++i)
{
output_file << ", eye_lmk_Z_" << i;
}
}
if (output_pose)
{
output_file << ", pose_Tx, pose_Ty, pose_Tz, pose_Rx, pose_Ry, pose_Rz";
}
if (output_2D_landmarks)
{
for (int i = 0; i < num_face_landmarks; ++i)
{
output_file << ", x_" << i;
}
for (int i = 0; i < num_face_landmarks; ++i)
{
output_file << ", y_" << i;
}
}
if (output_3D_landmarks)
{
for (int i = 0; i < num_face_landmarks; ++i)
{
output_file << ", X_" << i;
}
for (int i = 0; i < num_face_landmarks; ++i)
{
output_file << ", Y_" << i;
}
for (int i = 0; i < num_face_landmarks; ++i)
{
output_file << ", Z_" << i;
}
}
// Outputting model parameters (rigid and non-rigid), the first parameters are the 6 rigid shape parameters, they are followed by the non rigid shape parameters
if (output_model_params)
{
output_file << ", p_scale, p_rx, p_ry, p_rz, p_tx, p_ty";
for (int i = 0; i < num_model_modes; ++i)
{
output_file << ", p_" << i;
}
}
if (output_AUs)
{
std::sort(this->au_names_reg.begin(), this->au_names_reg.end());
for (std::string reg_name : this->au_names_reg)
{
output_file << ", " << reg_name << "_r";
}
std::sort(this->au_names_class.begin(), this->au_names_class.end());
for (std::string class_name : this->au_names_class)
{
output_file << ", " << class_name << "_c";
}
}
output_file << std::endl;
return true;
}
void RecorderCSV::WriteLine(int face_id, int frame_num, double time_stamp, bool landmark_detection_success, double landmark_confidence,
const cv::Mat_<float>& landmarks_2D, const cv::Mat_<float>& landmarks_3D, const cv::Mat_<float>& pdm_model_params, const cv::Vec6f& rigid_shape_params, cv::Vec6f& pose_estimate,
const cv::Point3f& gazeDirection0, const cv::Point3f& gazeDirection1, const cv::Vec2f& gaze_angle, const std::vector<cv::Point2f>& eye_landmarks2d, const std::vector<cv::Point3f>& eye_landmarks3d,
const std::vector<std::pair<std::string, double> >& au_intensities, const std::vector<std::pair<std::string, double> >& au_occurences)
{
if (!output_file.is_open())
{
std::cout << "The output CSV file is not open, exiting" << std::endl;
exit(1);
}
// Making sure fixed and not scientific notation is used
output_file << std::fixed;
output_file << std::noshowpoint;
if(is_sequence)
{
output_file << std::setprecision(3);
output_file << frame_num << ", " << face_id << ", " << time_stamp;
output_file << std::setprecision(2);
output_file << ", " << landmark_confidence;
output_file << std::setprecision(0);
output_file << ", " << landmark_detection_success;
}
else
{
output_file << std::setprecision(3);
output_file << face_id << ", " << landmark_confidence;
}
// Output the estimated gaze
if (output_gaze)
{
output_file << std::setprecision(6);
output_file << ", " << gazeDirection0.x << ", " << gazeDirection0.y << ", " << gazeDirection0.z
<< ", " << gazeDirection1.x << ", " << gazeDirection1.y << ", " << gazeDirection1.z;
// Output gaze angle (same format as head pose angle)
output_file << std::setprecision(3);
output_file << ", " << gaze_angle[0] << ", " << gaze_angle[1];
// Output the 2D eye landmarks
output_file << std::setprecision(1);
for (auto eye_lmk : eye_landmarks2d)
{
output_file << ", " << eye_lmk.x;
}
for (auto eye_lmk : eye_landmarks2d)
{
output_file << ", " << eye_lmk.y;
}
// Output the 3D eye landmarks
for (auto eye_lmk : eye_landmarks3d)
{
output_file << ", " << eye_lmk.x;
}
for (auto eye_lmk : eye_landmarks3d)
{
output_file << ", " << eye_lmk.y;
}
for (auto eye_lmk : eye_landmarks3d)
{
output_file << ", " << eye_lmk.z;
}
}
// Output the estimated head pose
if (output_pose)
{
output_file << std::setprecision(1);
output_file << ", " << pose_estimate[0] << ", " << pose_estimate[1] << ", " << pose_estimate[2];
output_file << std::setprecision(3);
output_file << ", " << pose_estimate[3] << ", " << pose_estimate[4] << ", " << pose_estimate[5];
}
// Output the detected 2D facial landmarks
if (output_2D_landmarks)
{
output_file.precision(1);
// Output the 2D eye landmarks
for (auto lmk : landmarks_2D)
{
output_file << ", " << lmk;
}
}
// Output the detected 3D facial landmarks
if (output_3D_landmarks)
{
output_file.precision(1);
// Output the 2D eye landmarks
for (auto lmk : landmarks_3D)
{
output_file << ", " << lmk;
}
}
if (output_model_params)
{
output_file.precision(3);
for (int i = 0; i < 6; ++i)
{
output_file << ", " << rigid_shape_params[i];
}
// Output the non_rigid shape parameters
for (auto lmk : pdm_model_params)
{
output_file << ", " << lmk;
}
}
if (output_AUs)
{
// write out ar the correct index
output_file.precision(2);
for (std::string au_name : au_names_reg)
{
for (auto au_reg : au_intensities)
{
if (au_name.compare(au_reg.first) == 0)
{
output_file << ", " << au_reg.second;
break;
}
}
}
if (au_intensities.size() == 0)
{
for (size_t p = 0; p < au_names_reg.size(); ++p)
{
output_file << ", 0";
}
}
output_file.precision(1);
// write out ar the correct index
for (std::string au_name : au_names_class)
{
for (auto au_class : au_occurences)
{
if (au_name.compare(au_class.first) == 0)
{
output_file << ", " << au_class.second;
break;
}
}
}
if (au_occurences.size() == 0)
{
for (size_t p = 0; p < au_names_class.size(); ++p)
{
output_file << ", 0";
}
}
}
output_file << std::endl;
}
// Closing the file and cleaning up
void RecorderCSV::Close()
{
output_file.close();
}

View File

@@ -0,0 +1,102 @@
///////////////////////////////////////////////////////////////////////////////
// Copyright (C) 2017, Tadas Baltrusaitis, all rights reserved.
//
// ACADEMIC OR NON-PROFIT ORGANIZATION NONCOMMERCIAL RESEARCH USE ONLY
//
// BY USING OR DOWNLOADING THE SOFTWARE, YOU ARE AGREEING TO THE TERMS OF THIS LICENSE AGREEMENT.
// IF YOU DO NOT AGREE WITH THESE TERMS, YOU MAY NOT USE OR DOWNLOAD THE SOFTWARE.
//
// License can be found in OpenFace-license.txt
//
// * Any publications arising from the use of this software, including but
// not limited to academic journal and conference publications, technical
// reports and manuals, must cite at least one of the following works:
//
// OpenFace 2.0: Facial Behavior Analysis Toolkit
// Tadas Baltrušaitis, Amir Zadeh, Yao Chong Lim, and Louis-Philippe Morency
// in IEEE International Conference on Automatic Face and Gesture Recognition, 2018
//
// Convolutional experts constrained local model for facial landmark detection.
// A. Zadeh, T. Baltrušaitis, and Louis-Philippe Morency,
// in Computer Vision and Pattern Recognition Workshops, 2017.
//
// Rendering of Eyes for Eye-Shape Registration and Gaze Estimation
// Erroll Wood, Tadas Baltrušaitis, Xucong Zhang, Yusuke Sugano, Peter Robinson, and Andreas Bulling
// in IEEE International. Conference on Computer Vision (ICCV), 2015
//
// Cross-dataset learning and person-specific normalisation for automatic Action Unit detection
// Tadas Baltrušaitis, Marwa Mahmoud, and Peter Robinson
// in Facial Expression Recognition and Analysis Challenge,
// IEEE International Conference on Automatic Face and Gesture Recognition, 2015
//
///////////////////////////////////////////////////////////////////////////////
#include "stdafx_ut.h"
#include "RecorderHOG.h"
using namespace Utilities;
// Default constructor initializes the variables
RecorderHOG::RecorderHOG() :hog_file() {};
// Opening the file and preparing the header for it
bool RecorderHOG::Open(std::string output_file_name)
{
hog_file.open(output_file_name, std::ios_base::out | std::ios_base::binary);
return hog_file.is_open();
}
void RecorderHOG::Close()
{
hog_file.close();
}
void RecorderHOG::Write()
{
hog_file.write((char*)(&num_cols), 4);
hog_file.write((char*)(&num_rows), 4);
hog_file.write((char*)(&num_channels), 4);
// Not the best way to store a bool, but will be much easier to read it
float good_frame_float;
if (good_frame)
good_frame_float = 1;
else
good_frame_float = -1;
hog_file.write((char*)(&good_frame_float), 4);
if(hog_descriptor.isContinuous())
{
cv::Mat_<float> desc;
hog_descriptor.convertTo(desc, CV_32F);
hog_file.write((char*)desc.data, 4 * num_cols * num_rows * 31);
}
else
{
cv::MatConstIterator_<double> descriptor_it = hog_descriptor.begin();
for (int y = 0; y < num_cols; ++y)
{
for (int x = 0; x < num_rows; ++x)
{
for (unsigned int o = 0; o < 31; ++o)
{
float hog_data = (float)(*descriptor_it++);
hog_file.write((char*)&hog_data, 4);
}
}
}
}
}
// Writing to a HOG file
void RecorderHOG::SetObservationHOG(bool good_frame, const cv::Mat_<double>& hog_descriptor, int num_cols, int num_rows, int num_channels)
{
this->num_cols = num_cols;
this->num_rows = num_rows;
this->num_channels = num_channels;
this->hog_descriptor = hog_descriptor;
this->good_frame = good_frame;
}

View File

@@ -0,0 +1,525 @@
///////////////////////////////////////////////////////////////////////////////
// Copyright (C) 2017, Tadas Baltrusaitis, all rights reserved.
//
// ACADEMIC OR NON-PROFIT ORGANIZATION NONCOMMERCIAL RESEARCH USE ONLY
//
// BY USING OR DOWNLOADING THE SOFTWARE, YOU ARE AGREEING TO THE TERMS OF THIS LICENSE AGREEMENT.
// IF YOU DO NOT AGREE WITH THESE TERMS, YOU MAY NOT USE OR DOWNLOAD THE SOFTWARE.
//
// License can be found in OpenFace-license.txt
//
// * Any publications arising from the use of this software, including but
// not limited to academic journal and conference publications, technical
// reports and manuals, must cite at least one of the following works:
//
// OpenFace 2.0: Facial Behavior Analysis Toolkit
// Tadas Baltrušaitis, Amir Zadeh, Yao Chong Lim, and Louis-Philippe Morency
// in IEEE International Conference on Automatic Face and Gesture Recognition, 2018
//
// Convolutional experts constrained local model for facial landmark detection.
// A. Zadeh, T. Baltrušaitis, and Louis-Philippe Morency,
// in Computer Vision and Pattern Recognition Workshops, 2017.
//
// Rendering of Eyes for Eye-Shape Registration and Gaze Estimation
// Erroll Wood, Tadas Baltrušaitis, Xucong Zhang, Yusuke Sugano, Peter Robinson, and Andreas Bulling
// in IEEE International. Conference on Computer Vision (ICCV), 2015
//
// Cross-dataset learning and person-specific normalisation for automatic Action Unit detection
// Tadas Baltrušaitis, Marwa Mahmoud, and Peter Robinson
// in Facial Expression Recognition and Analysis Challenge,
// IEEE International Conference on Automatic Face and Gesture Recognition, 2015
//
///////////////////////////////////////////////////////////////////////////////
#include "stdafx_ut.h"
#include "RecorderOpenFace.h"
using namespace Utilities;
#define WARN_STREAM( stream ) \
std::cout << "Warning: " << stream << std::endl
void CreateDirectory(std::string output_path)
{
// Removing trailing separators, as that causes issues with directory creation in unix
while (output_path[output_path.size() - 1] == '/' || output_path[output_path.size() - 1] == '\\')
{
output_path = output_path.substr(0, output_path.size() - 1);
}
// Creating the right directory structure
if (!fs::exists(output_path))
{
bool success = fs::create_directories(output_path);
if (!success)
{
std::cout << "ERROR: failed to create output directory:" << output_path << ", do you have permission to create directory" << std::endl;
exit(1);
}
}
}
void RecorderOpenFace::VideoWritingTask(bool is_sequence)
{
std::pair<std::string, cv::Mat> tracked_data;
while (true)
{
vis_to_out_queue.pop(tracked_data);
// Indicate that the thread should complete
if (tracked_data.second.empty())
{
break;
}
if (is_sequence)
{
if (video_writer.isOpened())
{
video_writer.write(tracked_data.second);
}
}
else
{
bool out_success = cv::imwrite(tracked_data.first, tracked_data.second);
if (!out_success)
{
WARN_STREAM("Could not output tracked image");
}
}
}
}
void RecorderOpenFace::AlignedImageWritingTask()
{
std::pair<std::string, cv::Mat> tracked_data;
while (true)
{
aligned_face_queue.pop(tracked_data);
// Empty frame indicates termination
if (tracked_data.second.empty())
break;
bool write_success = cv::imwrite(tracked_data.first, tracked_data.second);
if (!write_success)
{
WARN_STREAM("Could not output similarity aligned image image");
}
}
}
void RecorderOpenFace::PrepareRecording(const std::string& in_filename)
{
// Construct the directories required for the output
CreateDirectory(record_root);
// Create the filename for the general output file that contains all of the meta information about the recording
fs::path of_det_name(out_name);
of_det_name = fs::path(record_root) / fs::path(out_name + "_of_details.txt");
// Write in the of file what we are outputing what is the input etc.
metadata_file.open(of_det_name.string(), std::ios_base::out);
if (!metadata_file.is_open())
{
std::cout << "ERROR: could not open the output file:" << of_det_name << ", either the path of the output directory is wrong or you do not have the permissions to write to it" << std::endl;
exit(1);
}
// Populate relative and full path names in the meta file, unless it is a webcam
if (!params.isFromWebcam())
{
std::string input_filename_relative = in_filename;
std::string input_filename_full = in_filename;
if (!fs::path(input_filename_full).is_absolute())
{
input_filename_full = fs::canonical(input_filename_relative).string();
}
metadata_file << "Input:" << input_filename_relative << std::endl;
metadata_file << "Input full path:" << input_filename_full << std::endl;
}
else
{
// Populate the metadata file
metadata_file << "Input:webcam" << std::endl;
}
metadata_file << "Camera parameters:" << params.getFx() << "," << params.getFy() << "," << params.getCx() << "," << params.getCy() << std::endl;
// Create the required individual recorders, CSV, HOG, aligned, video
csv_filename = out_name + ".csv";
// Consruct HOG recorder here
if (params.outputHOG())
{
// Output the data based on record_root, but do not include record_root in the meta file, as it is also in that directory
std::string hog_filename = out_name + ".hog";
metadata_file << "Output HOG:" << hog_filename << std::endl;
hog_filename = (fs::path(record_root) / hog_filename).string();
hog_recorder.Open(hog_filename);
}
// saving the videos
if (params.outputTracked())
{
if (params.isSequence())
{
// Output the data based on record_root, but do not include record_root in the meta file, as it is also in that directory
this->media_filename = out_name + ".avi";
metadata_file << "Output video:" << this->media_filename << std::endl;
this->media_filename = (fs::path(record_root) / this->media_filename).string();
}
else
{
this->media_filename = out_name + "." + params.imageFormatVisualization();
metadata_file << "Output image:" << this->media_filename << std::endl;
this->media_filename = (fs::path(record_root) / this->media_filename).string();
}
}
// Prepare image recording
if (params.outputAlignedFaces())
{
aligned_output_directory = out_name + "_aligned";
metadata_file << "Output aligned directory:" << this->aligned_output_directory << std::endl;
this->aligned_output_directory = (fs::path(record_root) / this->aligned_output_directory).string();
CreateDirectory(aligned_output_directory);
}
this->frame_number = 0;
this->tracked_writing_thread_started = false;
this->aligned_writing_thread_started = false;
}
RecorderOpenFace::RecorderOpenFace(const std::string in_filename, const RecorderOpenFaceParameters& parameters, std::vector<std::string>& arguments):video_writer(), params(parameters)
{
// From the filename, strip out the name without directory and extension
if (fs::is_directory(in_filename))
{
out_name = fs::canonical(in_filename).filename().string();
}
else
{
out_name = fs::path(in_filename).filename().replace_extension("").string();
}
// Consuming the input arguments
bool* valid = new bool[arguments.size()];
for (size_t i = 0; i < arguments.size(); ++i)
{
valid[i] = true;
}
for (size_t i = 0; i < arguments.size(); ++i)
{
if (arguments[i].compare("-out_dir") == 0)
{
record_root = arguments[i + 1];
}
}
// Determine output directory
bool output_found = false;
for (size_t i = 0; i < arguments.size(); ++i)
{
if (!output_found && arguments[i].compare("-of") == 0)
{
record_root = (fs::path(record_root) / fs::path(arguments[i + 1])).remove_filename().string();
out_name = fs::path(fs::path(arguments[i + 1])).replace_extension("").filename().string();
valid[i] = false;
valid[i + 1] = false;
i++;
output_found = true;
}
}
// If recording directory not set, record to default location
if (record_root.empty())
record_root = default_record_directory;
for (int i = (int)arguments.size() - 1; i >= 0; --i)
{
if (!valid[i])
{
arguments.erase(arguments.begin() + i);
}
}
PrepareRecording(in_filename);
}
RecorderOpenFace::RecorderOpenFace(const std::string in_filename, const RecorderOpenFaceParameters& parameters, std::string output_directory):video_writer(), params(parameters)
{
// From the filename, strip out the name without directory and extension
if (fs::is_directory(in_filename))
{
out_name = fs::canonical(fs::path(in_filename)).filename().string();
}
else
{
out_name = fs::path(in_filename).filename().replace_extension("").string();
}
record_root = output_directory;
// If recording directory not set, record to default location
if (record_root.empty())
record_root = default_record_directory;
PrepareRecording(in_filename);
}
void RecorderOpenFace::SetObservationFaceAlign(const cv::Mat& aligned_face)
{
this->aligned_face = aligned_face;
}
void RecorderOpenFace::SetObservationVisualization(const cv::Mat &vis_track)
{
if (params.outputTracked())
{
vis_to_out = vis_track;
}
}
void RecorderOpenFace::WriteObservation()
{
// Write out the CSV file (it will always be there, even if not outputting anything more but frame/face numbers)
if(!csv_recorder.isOpen())
{
// As we are writing out the header, work out some things like number of landmarks, names of AUs etc.
int num_face_landmarks = landmarks_2D.rows / 2;
int num_eye_landmarks = (int)eye_landmarks2D.size();
int num_model_modes = pdm_params_local.rows;
std::vector<std::string> au_names_class;
for (auto au : au_occurences)
{
au_names_class.push_back(au.first);
}
std::sort(au_names_class.begin(), au_names_class.end());
std::vector<std::string> au_names_reg;
for (auto au : au_intensities)
{
au_names_reg.push_back(au.first);
}
std::sort(au_names_reg.begin(), au_names_reg.end());
metadata_file << "Output csv:" << csv_filename << std::endl;
metadata_file << "Gaze: " << params.outputGaze() << std::endl;
metadata_file << "AUs: " << params.outputAUs() << std::endl;
metadata_file << "Landmarks 2D: " << params.output2DLandmarks() << std::endl;
metadata_file << "Landmarks 3D: " << params.output3DLandmarks() << std::endl;
metadata_file << "Pose: " << params.outputPose() << std::endl;
metadata_file << "Shape parameters: " << params.outputPDMParams() << std::endl;
csv_filename = (fs::path(record_root) / csv_filename).string();
csv_recorder.Open(csv_filename, params.isSequence(), params.output2DLandmarks(), params.output3DLandmarks(), params.outputPDMParams(), params.outputPose(),
params.outputAUs(), params.outputGaze(), num_face_landmarks, num_model_modes, num_eye_landmarks, au_names_class, au_names_reg);
}
this->csv_recorder.WriteLine(face_id, frame_number, timestamp, landmark_detection_success,
landmark_detection_confidence, landmarks_2D, landmarks_3D, pdm_params_local, pdm_params_global, head_pose,
gaze_direction0, gaze_direction1, gaze_angle, eye_landmarks2D, eye_landmarks3D, au_intensities, au_occurences);
if(params.outputHOG())
{
this->hog_recorder.Write();
}
// Write aligned faces
if (params.outputAlignedFaces())
{
if (!aligned_writing_thread_started)
{
aligned_writing_thread_started = true;
int capacity = (1024 * 1024 * ALIGNED_QUEUE_CAPACITY) / (aligned_face.size().width *aligned_face.size().height * aligned_face.channels()) + 1;
aligned_face_queue.set_capacity(capacity);
// Start the alignment output thread
aligned_writing_thread = std::thread(&RecorderOpenFace::AlignedImageWritingTask, this);
}
char name[100];
// Filename is based on frame number (TODO stringstream this)
if(params.isSequence())
std::sprintf(name, "frame_det_%02d_%06d.", face_id, frame_number);
else
std::sprintf(name, "face_det_%06d.", face_id);
// Construct the output filename
std::string out_file = (fs::path(aligned_output_directory) / fs::path(std::string(name) + params.imageFormatAligned())).string();
if(params.outputBadAligned() || landmark_detection_success)
{
aligned_face_queue.push(std::pair<std::string, cv::Mat>(out_file, aligned_face));
}
// Clear the image
aligned_face = cv::Mat();
}
}
void RecorderOpenFace::WriteObservationTracked()
{
if (params.outputTracked())
{
if (!tracked_writing_thread_started)
{
tracked_writing_thread_started = true;
// Set up the queue for video writing based on output size
int capacity = (1024 * 1024 * TRACKED_QUEUE_CAPACITY) / (vis_to_out.size().width * vis_to_out.size().height * vis_to_out.channels()) + 1;
vis_to_out_queue.set_capacity(capacity);
// Initialize the video writer if it has not been opened yet
if (params.isSequence())
{
std::string output_codec = params.outputCodec();
try
{
video_writer.open(media_filename, cv::VideoWriter::fourcc(output_codec[0], output_codec[1], output_codec[2], output_codec[3]), params.outputFps(), vis_to_out.size(), true);
if (!video_writer.isOpened())
{
WARN_STREAM("Could not open VideoWriter, OUTPUT FILE WILL NOT BE WRITTEN.");
}
}
catch (cv::Exception e)
{
WARN_STREAM("Could not open VideoWriter, OUTPUT FILE WILL NOT BE WRITTEN. Currently using codec " << output_codec << ", try using an other one (-oc option)");
}
}
// Start the video and tracked image writing thread
video_writing_thread = std::thread(&RecorderOpenFace::VideoWritingTask, this, params.isSequence());
}
if (vis_to_out.empty())
{
WARN_STREAM("Output tracked video frame is not set");
}
if (params.isSequence())
{
vis_to_out_queue.push(std::pair<std::string, cv::Mat>("", vis_to_out));
}
else
{
vis_to_out_queue.push(std::pair<std::string, cv::Mat>(media_filename, vis_to_out));
}
// Clear the output
vis_to_out = cv::Mat();
}
}
void RecorderOpenFace::SetObservationHOG(bool good_frame, const cv::Mat_<double>& hog_descriptor, int num_cols, int num_rows, int num_channels)
{
this->hog_recorder.SetObservationHOG(good_frame, hog_descriptor, num_cols, num_rows, num_channels);
}
void RecorderOpenFace::SetObservationTimestamp(double timestamp)
{
this->timestamp = timestamp;
}
// Required observations for video/image-sequence
void RecorderOpenFace::SetObservationFrameNumber(int frame_number)
{
this->frame_number = frame_number;
}
// If in multiple face mode, identifying which face was tracked
void RecorderOpenFace::SetObservationFaceID(int face_id)
{
this->face_id = face_id;
}
void RecorderOpenFace::SetObservationLandmarks(const cv::Mat_<float>& landmarks_2D, const cv::Mat_<float>& landmarks_3D,
const cv::Vec6f& pdm_params_global, const cv::Mat_<float>& pdm_params_local, double confidence, bool success)
{
this->landmarks_2D = landmarks_2D;
this->landmarks_3D = landmarks_3D;
this->pdm_params_global = pdm_params_global;
this->pdm_params_local = pdm_params_local;
this->landmark_detection_confidence = confidence;
this->landmark_detection_success = success;
}
void RecorderOpenFace::SetObservationPose(const cv::Vec6f& pose)
{
this->head_pose = pose;
}
void RecorderOpenFace::SetObservationActionUnits(const std::vector<std::pair<std::string, double> >& au_intensities,
const std::vector<std::pair<std::string, double> >& au_occurences)
{
this->au_intensities = au_intensities;
this->au_occurences = au_occurences;
}
void RecorderOpenFace::SetObservationGaze(const cv::Point3f& gaze_direction0, const cv::Point3f& gaze_direction1,
const cv::Vec2f& gaze_angle, const std::vector<cv::Point2f>& eye_landmarks2D, const std::vector<cv::Point3f>& eye_landmarks3D)
{
this->gaze_direction0 = gaze_direction0;
this->gaze_direction1 = gaze_direction1;
this->gaze_angle = gaze_angle;
this->eye_landmarks2D = eye_landmarks2D;
this->eye_landmarks3D = eye_landmarks3D;
}
RecorderOpenFace::~RecorderOpenFace()
{
this->Close();
}
void RecorderOpenFace::Close()
{
// Insert terminating frames to the queues
vis_to_out_queue.push(std::pair<std::string, cv::Mat>("", cv::Mat()));
aligned_face_queue.push(std::pair<std::string, cv::Mat>("", cv::Mat()));
// Make sure the recording threads complete
if (video_writing_thread.joinable())
video_writing_thread.join();
if (aligned_writing_thread.joinable())
aligned_writing_thread.join();
tracked_writing_thread_started = false;
aligned_writing_thread_started = false;
hog_recorder.Close();
csv_recorder.Close();
video_writer.release();
metadata_file.close();
}

View File

@@ -0,0 +1,191 @@
///////////////////////////////////////////////////////////////////////////////
// Copyright (C) 2017, Tadas Baltrusaitis, all rights reserved.
//
// ACADEMIC OR NON-PROFIT ORGANIZATION NONCOMMERCIAL RESEARCH USE ONLY
//
// BY USING OR DOWNLOADING THE SOFTWARE, YOU ARE AGREEING TO THE TERMS OF THIS LICENSE AGREEMENT.
// IF YOU DO NOT AGREE WITH THESE TERMS, YOU MAY NOT USE OR DOWNLOAD THE SOFTWARE.
//
// License can be found in OpenFace-license.txt
//
// * Any publications arising from the use of this software, including but
// not limited to academic journal and conference publications, technical
// reports and manuals, must cite at least one of the following works:
//
// OpenFace 2.0: Facial Behavior Analysis Toolkit
// Tadas Baltrušaitis, Amir Zadeh, Yao Chong Lim, and Louis-Philippe Morency
// in IEEE International Conference on Automatic Face and Gesture Recognition, 2018
//
// Convolutional experts constrained local model for facial landmark detection.
// A. Zadeh, T. Baltrušaitis, and Louis-Philippe Morency,
// in Computer Vision and Pattern Recognition Workshops, 2017.
//
// Rendering of Eyes for Eye-Shape Registration and Gaze Estimation
// Erroll Wood, Tadas Baltrušaitis, Xucong Zhang, Yusuke Sugano, Peter Robinson, and Andreas Bulling
// in IEEE International. Conference on Computer Vision (ICCV), 2015
//
// Cross-dataset learning and person-specific normalisation for automatic Action Unit detection
// Tadas Baltrušaitis, Marwa Mahmoud, and Peter Robinson
// in Facial Expression Recognition and Analysis Challenge,
// IEEE International Conference on Automatic Face and Gesture Recognition, 2015
//
///////////////////////////////////////////////////////////////////////////////
#include "stdafx_ut.h"
#include "RecorderOpenFaceParameters.h"
using namespace Utilities;
RecorderOpenFaceParameters::RecorderOpenFaceParameters(std::vector<std::string> &arguments, bool sequence, bool from_webcam, float fx, float fy, float cx, float cy, double fps_vid_out)
{
this->is_sequence = sequence;
this->is_from_webcam = from_webcam;
this->fx = fx;
this->fy = fy;
this->cx = cx;
this->cy = cy;
if(fps_vid_out > 0)
{
this->fps_vid_out = fps_vid_out;
}
else
{
this->fps_vid_out = 30; // If an illegal value for fps provided, default to 30
}
// Default output code
this->output_codec = "DIVX";
this->image_format_aligned = "bmp";
this->image_format_visualization = "jpg";
bool output_set = false;
this->output_2D_landmarks = false;
this->output_3D_landmarks = false;
this->output_model_params = false;
this->output_pose = false;
this->output_AUs = false;
this->output_gaze = false;
this->output_hog = false;
this->output_tracked = false;
this->output_aligned_faces = false;
this->record_aligned_bad = true;
for (size_t i = 0; i < arguments.size(); ++i)
{
if (arguments[i].compare("-format_aligned") == 0)
{
this->image_format_aligned = arguments[i+1];
i++;
}
if (arguments[i].compare("-format_vis_image") == 0)
{
this->image_format_visualization = arguments[i + 1];
i++;
}
if (arguments[i].compare("-nobadaligned") == 0)
{
this->record_aligned_bad = false;
}
if (arguments[i].compare("-simalign") == 0)
{
this->output_aligned_faces = true;
output_set = true;
}
else if (arguments[i].compare("-hogalign") == 0)
{
this->output_hog = true;
output_set = true;
}
else if (arguments[i].compare("-2Dfp") == 0)
{
this->output_2D_landmarks = true;
output_set = true;
}
else if (arguments[i].compare("-3Dfp") == 0)
{
this->output_3D_landmarks = true;
output_set = true;
}
else if (arguments[i].compare("-pdmparams") == 0)
{
this->output_model_params = true;
output_set = true;
}
else if (arguments[i].compare("-pose") == 0)
{
this->output_pose = true;
output_set = true;
}
else if (arguments[i].compare("-aus") == 0)
{
this->output_AUs = true;
output_set = true;
}
else if (arguments[i].compare("-gaze") == 0)
{
this->output_gaze = true;
output_set = true;
}
else if (arguments[i].compare("-tracked") == 0)
{
this->output_tracked = true;
output_set = true;
}
}
// Output everything if nothing has been set
if (!output_set)
{
this->output_2D_landmarks = true;
this->output_3D_landmarks = true;
this->output_model_params = true;
this->output_pose = true;
this->output_AUs = true;
this->output_gaze = true;
this->output_hog = true;
this->output_tracked = true;
this->output_aligned_faces = true;
}
}
RecorderOpenFaceParameters::RecorderOpenFaceParameters(bool sequence, bool is_from_webcam, bool output_2D_landmarks, bool output_3D_landmarks,
bool output_model_params, bool output_pose, bool output_AUs, bool output_gaze, bool output_hog, bool output_tracked,
bool output_aligned_faces, bool record_bad, float fx, float fy, float cx, float cy, double fps_vid_out)
{
this->is_sequence = sequence;
this->is_from_webcam = is_from_webcam;
this->fx = fx;
this->fy = fy;
this->cx = cx;
this->cy = cy;
if (fps_vid_out > 0)
{
this->fps_vid_out = fps_vid_out;
}
else
{
this->fps_vid_out = 30; // If an illegal value for fps provided, default to 30
}
// Default output code
this->output_codec = "DIVX";
this->image_format_aligned = "bmp";
this->image_format_visualization = "jpg";
this->output_2D_landmarks = output_2D_landmarks;
this->output_3D_landmarks = output_3D_landmarks;
this->output_model_params = output_model_params;
this->output_pose = output_pose;
this->output_AUs = output_AUs;
this->output_gaze = output_gaze;
this->output_hog = output_hog;
this->output_tracked = output_tracked;
this->output_aligned_faces = output_aligned_faces;
}

View File

@@ -0,0 +1,540 @@
///////////////////////////////////////////////////////////////////////////////
// Copyright (C) 2017, Tadas Baltrusaitis, all rights reserved.
//
// ACADEMIC OR NON-PROFIT ORGANIZATION NONCOMMERCIAL RESEARCH USE ONLY
//
// BY USING OR DOWNLOADING THE SOFTWARE, YOU ARE AGREEING TO THE TERMS OF THIS LICENSE AGREEMENT.
// IF YOU DO NOT AGREE WITH THESE TERMS, YOU MAY NOT USE OR DOWNLOAD THE SOFTWARE.
//
// License can be found in OpenFace-license.txt
//
// * Any publications arising from the use of this software, including but
// not limited to academic journal and conference publications, technical
// reports and manuals, must cite at least one of the following works:
//
// OpenFace 2.0: Facial Behavior Analysis Toolkit
// Tadas Baltrušaitis, Amir Zadeh, Yao Chong Lim, and Louis-Philippe Morency
// in IEEE International Conference on Automatic Face and Gesture Recognition, 2018
//
// Convolutional experts constrained local model for facial landmark detection.
// A. Zadeh, T. Baltrušaitis, and Louis-Philippe Morency,
// in Computer Vision and Pattern Recognition Workshops, 2017.
//
// Rendering of Eyes for Eye-Shape Registration and Gaze Estimation
// Erroll Wood, Tadas Baltrušaitis, Xucong Zhang, Yusuke Sugano, Peter Robinson, and Andreas Bulling
// in IEEE International. Conference on Computer Vision (ICCV), 2015
//
// Cross-dataset learning and person-specific normalisation for automatic Action Unit detection
// Tadas Baltrušaitis, Marwa Mahmoud, and Peter Robinson
// in Facial Expression Recognition and Analysis Challenge,
// IEEE International Conference on Automatic Face and Gesture Recognition, 2015
//
///////////////////////////////////////////////////////////////////////////////
#include "stdafx_ut.h"
#include "SequenceCapture.h"
#include "ImageManipulationHelpers.h"
using namespace Utilities;
#define INFO_STREAM( stream ) \
std::cout << stream << std::endl
#define WARN_STREAM( stream ) \
std::cout << "Warning: " << stream << std::endl
#define ERROR_STREAM( stream ) \
std::cout << "Error: " << stream << std::endl
bool SequenceCapture::Open(std::vector<std::string>& arguments)
{
// Consuming the input arguments
bool* valid = new bool[arguments.size()];
for (size_t i = 0; i < arguments.size(); ++i)
{
valid[i] = true;
}
// Some default values
std::string input_root = "";
fx = -1; fy = -1; cx = -1; cy = -1;
std::string separator = std::string(1, fs::path::preferred_separator);
// First check if there is a root argument (so that videos and input directories could be defined more easily)
for (size_t i = 0; i < arguments.size(); ++i)
{
if (arguments[i].compare("-root") == 0)
{
input_root = arguments[i + 1] + separator;
i++;
}
if (arguments[i].compare("-inroot") == 0)
{
input_root = arguments[i + 1] + separator;
i++;
}
}
std::string input_video_file;
std::string input_sequence_directory;
int device = -1;
int cam_width = 640;
int cam_height = 480;
bool file_found = false;
for (size_t i = 0; i < arguments.size(); ++i)
{
if (!file_found && arguments[i].compare("-f") == 0)
{
input_video_file = (input_root + arguments[i + 1]);
valid[i] = false;
valid[i + 1] = false;
i++;
file_found = true;
}
else if (!file_found && arguments[i].compare("-fdir") == 0)
{
input_sequence_directory = (input_root + arguments[i + 1]);
valid[i] = false;
valid[i + 1] = false;
i++;
file_found = true;
}
else if (arguments[i].compare("-fx") == 0)
{
std::stringstream data(arguments[i + 1]);
data >> fx;
i++;
}
else if (arguments[i].compare("-fy") == 0)
{
std::stringstream data(arguments[i + 1]);
data >> fy;
i++;
}
else if (arguments[i].compare("-cx") == 0)
{
std::stringstream data(arguments[i + 1]);
data >> cx;
i++;
}
else if (arguments[i].compare("-cy") == 0)
{
std::stringstream data(arguments[i + 1]);
data >> cy;
i++;
}
else if (arguments[i].compare("-device") == 0)
{
std::stringstream data(arguments[i + 1]);
data >> device;
valid[i] = false;
valid[i + 1] = false;
i++;
}
else if (arguments[i].compare("-cam_width") == 0)
{
std::stringstream data(arguments[i + 1]);
data >> cam_width;
valid[i] = false;
valid[i + 1] = false;
i++;
}
else if (arguments[i].compare("-cam_height") == 0)
{
std::stringstream data(arguments[i + 1]);
data >> cam_height;
valid[i] = false;
valid[i + 1] = false;
i++;
}
}
for (int i = (int)arguments.size() - 1; i >= 0; --i)
{
if (!valid[i])
{
arguments.erase(arguments.begin() + i);
}
}
no_input_specified = !file_found;
// Based on what was read in open the sequence
if (device != -1)
{
return OpenWebcam(device, cam_width, cam_height, fx, fy, cx, cy);
}
if (!input_video_file.empty())
{
return OpenVideoFile(input_video_file, fx, fy, cx, cy);
}
if (!input_sequence_directory.empty())
{
return OpenImageSequence(input_sequence_directory, fx, fy, cx, cy);
}
// If no input found return false and set a flag for it
no_input_specified = true;
return false;
}
// Get current date/time, format is YYYY-MM-DD.HH:mm, useful for saving data from webcam
const std::string currentDateTime()
{
time_t rawtime;
struct tm * timeinfo;
char buffer[80];
time(&rawtime);
timeinfo = localtime(&rawtime);
strftime(buffer, sizeof(buffer), "%Y-%m-%d-%H-%M", timeinfo);
return buffer;
}
bool SequenceCapture::OpenWebcam(int device, int image_width, int image_height, float fx, float fy, float cx, float cy)
{
INFO_STREAM("Attempting to read from webcam: " << device);
no_input_specified = false;
frame_num = 0;
time_stamp = 0;
if (device < 0)
{
std::cout << "Specify a valid device" << std::endl;
return false;
}
latest_frame = cv::Mat();
latest_gray_frame = cv::Mat();
capture.open(device);
capture.set(cv::CAP_PROP_FRAME_WIDTH, image_width);
capture.set(cv::CAP_PROP_FRAME_HEIGHT, image_height);
is_webcam = true;
is_image_seq = false;
vid_length = 0;
this->frame_width = (int)capture.get(cv::CAP_PROP_FRAME_WIDTH);
this->frame_height = (int)capture.get(cv::CAP_PROP_FRAME_HEIGHT);
if (!capture.isOpened())
{
std::cout << "Failed to open the webcam" << std::endl;
return false;
}
if (frame_width != image_width || frame_height != image_height)
{
std::cout << "Failed to open the webcam with desired resolution" << std::endl;
std::cout << "Defaulting to " << frame_width << "x" << frame_height << std::endl;
}
this->fps = capture.get(cv::CAP_PROP_FPS);
// Check if fps is nan or less than 0
if (fps != fps || fps <= 0)
{
INFO_STREAM("FPS of the webcam cannot be determined, assuming 30");
fps = 30;
}
SetCameraIntrinsics(fx, fy, cx, cy);
std::string time = currentDateTime();
this->name = "webcam_" + time;
start_time = cv::getTickCount();
capturing = true;
return true;
}
void SequenceCapture::Close()
{
// Close the capturing threads
capturing = false;
// If the queue is full it will be blocked, so need to empty it
while (!capture_queue.empty())
{
capture_queue.pop();
}
if (capture_thread.joinable())
capture_thread.join();
// Release the capture objects
if (capture.isOpened())
capture.release();
}
// Destructor that releases the capture
SequenceCapture::~SequenceCapture()
{
Close();
}
bool SequenceCapture::OpenVideoFile(std::string video_file, float fx, float fy, float cx, float cy)
{
INFO_STREAM("Attempting to read from file: " << video_file);
no_input_specified = false;
frame_num = 0;
time_stamp = 0;
latest_frame = cv::Mat();
latest_gray_frame = cv::Mat();
capture.open(video_file);
if (!capture.isOpened())
{
std::cout << "Failed to open the video file at location: " << video_file << std::endl;
return false;
}
this->fps = capture.get(cv::CAP_PROP_FPS);
// Check if fps is nan or less than 0
if (fps != fps || fps <= 0)
{
WARN_STREAM("FPS of the video file cannot be determined, assuming 30");
fps = 30;
}
is_webcam = false;
is_image_seq = false;
this->frame_width = (int)capture.get(cv::CAP_PROP_FRAME_WIDTH);
this->frame_height = (int)capture.get(cv::CAP_PROP_FRAME_HEIGHT);
vid_length = (int)capture.get(cv::CAP_PROP_FRAME_COUNT);
SetCameraIntrinsics(fx, fy, cx, cy);
this->name = video_file;
capturing = true;
capture_thread = std::thread(&SequenceCapture::CaptureThread, this);
return true;
}
bool SequenceCapture::OpenImageSequence(std::string directory, float fx, float fy, float cx, float cy)
{
INFO_STREAM("Attempting to read from directory: " << directory);
no_input_specified = false;
frame_num = 0;
time_stamp = 0;
image_files.clear();
fs::path image_directory(directory);
if (!fs::exists(image_directory))
{
std::cout << "Provided directory does not exist: " << directory << std::endl;
return false;
}
std::vector<fs::path> file_in_directory;
copy(fs::directory_iterator(image_directory), fs::directory_iterator(), back_inserter(file_in_directory));
// Sort the images in the directory first
sort(file_in_directory.begin(), file_in_directory.end());
std::vector<std::string> curr_dir_files;
for (std::vector<fs::path>::const_iterator file_iterator(file_in_directory.begin()); file_iterator != file_in_directory.end(); ++file_iterator)
{
// Possible image extension .jpg and .png
if (file_iterator->extension().string().compare(".jpg") == 0 || file_iterator->extension().string().compare(".jpeg") == 0 || file_iterator->extension().string().compare(".png") == 0 || file_iterator->extension().string().compare(".bmp") == 0)
{
curr_dir_files.push_back(file_iterator->string());
}
}
image_files = curr_dir_files;
if (image_files.empty())
{
std::cout << "No images found in the directory: " << directory << std::endl;
return false;
}
// Assume all images are same size in an image sequence
cv::Mat tmp = cv::imread(image_files[0], cv::IMREAD_COLOR);
this->frame_height = tmp.size().height;
this->frame_width = tmp.size().width;
SetCameraIntrinsics(fx, fy, cx, cy);
// No fps as we have a sequence
this->fps = 0;
this->name = directory;
is_webcam = false;
is_image_seq = true;
vid_length = image_files.size();
capturing = true;
capture_thread = std::thread(&SequenceCapture::CaptureThread, this);
return true;
}
void SequenceCapture::SetCameraIntrinsics(float fx, float fy, float cx, float cy)
{
// If optical centers are not defined just use center of image
if (cx == -1)
{
this->cx = this->frame_width / 2.0f;
this->cy = this->frame_height / 2.0f;
}
else
{
this->cx = cx;
this->cy = cy;
}
// Use a rough guess-timate of focal length
if (fx == -1)
{
this->fx = 500.0f * (this->frame_width / 640.0f);
this->fy = 500.0f * (this->frame_height / 480.0f);
this->fx = (this->fx + this->fy) / 2.0f;
this->fy = this->fx;
}
else
{
this->fx = fx;
this->fy = fy;
}
}
void SequenceCapture::CaptureThread()
{
int capacity = (CAPTURE_CAPACITY * 1024 * 1024) / (4 * frame_width * frame_height);
capture_queue.set_capacity(capacity);
int frame_num_int = 0;
while(capturing)
{
double timestamp_curr = 0;
cv::Mat tmp_frame;
cv::Mat_<uchar> tmp_gray_frame;
if (!is_image_seq)
{
bool success = capture.read(tmp_frame);
if (!success)
{
// Indicate lack of success by returning an empty image
tmp_frame = cv::Mat();
capturing = false;
}
// Recording the timestamp
timestamp_curr = frame_num_int * (1.0 / fps);
}
else if (is_image_seq)
{
if (image_files.empty() || frame_num_int >= (int)image_files.size())
{
// Indicate lack of success by returning an empty image
tmp_frame = cv::Mat();
capturing = false;
}
else
{
tmp_frame = cv::imread(image_files[frame_num_int], cv::IMREAD_COLOR);
}
timestamp_curr = 0;
}
frame_num_int++;
// Set the grayscale frame
ConvertToGrayscale_8bit(tmp_frame, tmp_gray_frame);
capture_queue.push(std::make_tuple(timestamp_curr, tmp_frame, tmp_gray_frame));
}
}
cv::Mat SequenceCapture::GetNextFrame()
{
if(!is_webcam)
{
std::tuple<double, cv::Mat, cv::Mat_<uchar> > data;
data = capture_queue.pop();
time_stamp = std::get<0>(data);
latest_frame = std::get<1>(data);
latest_gray_frame = std::get<2>(data);
}
else
{
// Webcam does not use the threaded interface
bool success = capture.read(latest_frame);
time_stamp = (cv::getTickCount() - start_time) / cv::getTickFrequency();
if (!success)
{
// Indicate lack of success by returning an empty image
latest_frame = cv::Mat();
}
ConvertToGrayscale_8bit(latest_frame, latest_gray_frame);
}
frame_num++;
return latest_frame;
}
double SequenceCapture::GetProgress()
{
if (is_webcam)
{
return -1.0;
}
else
{
return (double)frame_num / (double)vid_length;
}
}
bool SequenceCapture::IsOpened()
{
if (is_webcam || !is_image_seq)
return capture.isOpened();
else
return (image_files.size() > 0 && frame_num < image_files.size());
}
cv::Mat_<uchar> SequenceCapture::GetGrayFrame()
{
return latest_gray_frame;
}

View File

@@ -0,0 +1,185 @@
///////////////////////////////////////////////////////////////////////////////
// Copyright (C) 2017, Tadas Baltrusaitis, all rights reserved.
//
// ACADEMIC OR NON-PROFIT ORGANIZATION NONCOMMERCIAL RESEARCH USE ONLY
//
// BY USING OR DOWNLOADING THE SOFTWARE, YOU ARE AGREEING TO THE TERMS OF THIS LICENSE AGREEMENT.
// IF YOU DO NOT AGREE WITH THESE TERMS, YOU MAY NOT USE OR DOWNLOAD THE SOFTWARE.
//
// License can be found in OpenFace-license.txt
//
// * Any publications arising from the use of this software, including but
// not limited to academic journal and conference publications, technical
// reports and manuals, must cite at least one of the following works:
//
// OpenFace 2.0: Facial Behavior Analysis Toolkit
// Tadas Baltrušaitis, Amir Zadeh, Yao Chong Lim, and Louis-Philippe Morency
// in IEEE International Conference on Automatic Face and Gesture Recognition, 2018
//
// Convolutional experts constrained local model for facial landmark detection.
// A. Zadeh, T. Baltrušaitis, and Louis-Philippe Morency,
// in Computer Vision and Pattern Recognition Workshops, 2017.
//
// Rendering of Eyes for Eye-Shape Registration and Gaze Estimation
// Erroll Wood, Tadas Baltrušaitis, Xucong Zhang, Yusuke Sugano, Peter Robinson, and Andreas Bulling
// in IEEE International. Conference on Computer Vision (ICCV), 2015
//
// Cross-dataset learning and person-specific normalisation for automatic Action Unit detection
// Tadas Baltrušaitis, Marwa Mahmoud, and Peter Robinson
// in Facial Expression Recognition and Analysis Challenge,
// IEEE International Conference on Automatic Face and Gesture Recognition, 2015
//
///////////////////////////////////////////////////////////////////////////////
#include "stdafx_ut.h"
#include "VisualizationUtils.h"
#include "RotationHelpers.h"
namespace Utilities
{
FpsTracker::FpsTracker()
{
// Keep two seconds of history
history_length = 2;
}
void FpsTracker::AddFrame()
{
double current_time = cv::getTickCount() / cv::getTickFrequency();
frame_times.push(current_time);
DiscardOldFrames();
}
double FpsTracker::GetFPS()
{
DiscardOldFrames();
if (frame_times.size() == 0)
return 0;
double current_time = cv::getTickCount() / cv::getTickFrequency();
return ((double)frame_times.size()) / (current_time - frame_times.front());
}
void FpsTracker::DiscardOldFrames()
{
double current_time = cv::getTickCount() / cv::getTickFrequency();
// Remove old history
while (frame_times.size() > 0 && (current_time - frame_times.front()) > history_length)
frame_times.pop();
}
void DrawBox(cv::Mat image, cv::Vec6f pose, cv::Scalar color, int thickness, float fx, float fy, float cx, float cy)
{
auto edge_lines = CalculateBox(pose, fx, fy, cx, cy);
DrawBox(edge_lines, image, color, thickness);
}
std::vector<std::pair<cv::Point2f, cv::Point2f>> CalculateBox(cv::Vec6f pose, float fx, float fy, float cx, float cy)
{
float boxVerts[] = { -1, 1, -1,
1, 1, -1,
1, 1, 1,
-1, 1, 1,
1, -1, 1,
1, -1, -1,
-1, -1, -1,
-1, -1, 1 };
std::vector<std::pair<int, int>> edges;
edges.push_back(std::pair<int, int>(0, 1));
edges.push_back(std::pair<int, int>(1, 2));
edges.push_back(std::pair<int, int>(2, 3));
edges.push_back(std::pair<int, int>(0, 3));
edges.push_back(std::pair<int, int>(2, 4));
edges.push_back(std::pair<int, int>(1, 5));
edges.push_back(std::pair<int, int>(0, 6));
edges.push_back(std::pair<int, int>(3, 7));
edges.push_back(std::pair<int, int>(6, 5));
edges.push_back(std::pair<int, int>(5, 4));
edges.push_back(std::pair<int, int>(4, 7));
edges.push_back(std::pair<int, int>(7, 6));
// The size of the head is roughly 200mm x 200mm x 200mm
cv::Mat_<float> box = cv::Mat(8, 3, CV_32F, boxVerts).clone() * 100.0f;
cv::Matx33f rot = Euler2RotationMatrix(cv::Vec3f(pose[3], pose[4], pose[5]));
cv::Mat_<float> rotBox;
// Rotate the box
rotBox = cv::Mat(rot) * box.t();
rotBox = rotBox.t();
// Move the bounding box to head position
rotBox.col(0) = rotBox.col(0) + pose[0];
rotBox.col(1) = rotBox.col(1) + pose[1];
rotBox.col(2) = rotBox.col(2) + pose[2];
// draw the lines
cv::Mat_<float> rotBoxProj;
Project(rotBoxProj, rotBox, fx, fy, cx, cy);
std::vector<std::pair<cv::Point2f, cv::Point2f>> lines;
for (size_t i = 0; i < edges.size(); ++i)
{
cv::Mat_<float> begin;
cv::Mat_<float> end;
rotBoxProj.row(edges[i].first).copyTo(begin);
rotBoxProj.row(edges[i].second).copyTo(end);
cv::Point2f p1(begin.at<float>(0), begin.at<float>(1));
cv::Point2f p2(end.at<float>(0), end.at<float>(1));
lines.push_back(std::pair<cv::Point2f, cv::Point2f>(p1, p2));
}
return lines;
}
void DrawBox(const std::vector<std::pair<cv::Point2f, cv::Point2f>>& lines, cv::Mat image, cv::Scalar color, int thickness)
{
cv::Rect image_rect(0, 0, image.cols, image.rows);
for (size_t i = 0; i < lines.size(); ++i)
{
cv::Point2f p1 = lines.at(i).first;
cv::Point2f p2 = lines.at(i).second;
// Only draw the line if one of the points is inside the image
if (p1.inside(image_rect) || p2.inside(image_rect))
{
cv::line(image, p1, p2, color, thickness, cv::LINE_AA);
}
}
}
void Visualise_FHOG(const cv::Mat_<double>& descriptor, int num_rows, int num_cols, cv::Mat& visualisation)
{
// First convert to dlib format
dlib::array2d<dlib::matrix<float, 31, 1> > hog(num_rows, num_cols);
cv::MatConstIterator_<double> descriptor_it = descriptor.begin();
for (int y = 0; y < num_cols; ++y)
{
for (int x = 0; x < num_rows; ++x)
{
for (unsigned int o = 0; o < 31; ++o)
{
hog[y][x](o) = *descriptor_it++;
}
}
}
// Draw the FHOG to OpenCV format
auto fhog_vis = dlib::draw_fhog(hog);
visualisation = dlib::toMat(fhog_vis).clone();
}
}

View File

@@ -0,0 +1,455 @@
///////////////////////////////////////////////////////////////////////////////
// Copyright (C) 2017, Tadas Baltrusaitis, all rights reserved.
//
// ACADEMIC OR NON-PROFIT ORGANIZATION NONCOMMERCIAL RESEARCH USE ONLY
//
// BY USING OR DOWNLOADING THE SOFTWARE, YOU ARE AGREEING TO THE TERMS OF THIS LICENSE AGREEMENT.
// IF YOU DO NOT AGREE WITH THESE TERMS, YOU MAY NOT USE OR DOWNLOAD THE SOFTWARE.
//
// License can be found in OpenFace-license.txt
//
// * Any publications arising from the use of this software, including but
// not limited to academic journal and conference publications, technical
// reports and manuals, must cite at least one of the following works:
//
// OpenFace 2.0: Facial Behavior Analysis Toolkit
// Tadas Baltrušaitis, Amir Zadeh, Yao Chong Lim, and Louis-Philippe Morency
// in IEEE International Conference on Automatic Face and Gesture Recognition, 2018
//
// Convolutional experts constrained local model for facial landmark detection.
// A. Zadeh, T. Baltrušaitis, and Louis-Philippe Morency,
// in Computer Vision and Pattern Recognition Workshops, 2017.
//
// Rendering of Eyes for Eye-Shape Registration and Gaze Estimation
// Erroll Wood, Tadas Baltrušaitis, Xucong Zhang, Yusuke Sugano, Peter Robinson, and Andreas Bulling
// in IEEE International. Conference on Computer Vision (ICCV), 2015
//
// Cross-dataset learning and person-specific normalisation for automatic Action Unit detection
// Tadas Baltrušaitis, Marwa Mahmoud, and Peter Robinson
// in Facial Expression Recognition and Analysis Challenge,
// IEEE International Conference on Automatic Face and Gesture Recognition, 2015
//
///////////////////////////////////////////////////////////////////////////////
#include "stdafx_ut.h"
#include "Visualizer.h"
#include "VisualizationUtils.h"
#include "RotationHelpers.h"
#include "ImageManipulationHelpers.h"
using namespace Utilities;
// For subpixel accuracy drawing
const int draw_shiftbits = 4;
const int draw_multiplier = 1 << 4;
const std::map<std::string, std::string> AUS_DESCRIPTION = {
{ "AU01", "Inner Brow Raiser " },
{ "AU02", "Outer Brow Raiser " },
{ "AU04", "Brow Lowerer " },
{ "AU05", "Upper Lid Raiser " },
{ "AU06", "Cheek Raiser " },
{ "AU07", "Lid Tightener " },
{ "AU09", "Nose Wrinkler " },
{ "AU10", "Upper Lip Raiser " },
{ "AU12", "Lip Corner Puller " },
{ "AU14", "Dimpler " },
{ "AU15", "Lip Corner Depressor" },
{ "AU17", "Chin Raiser " },
{ "AU20", "Lip stretcher " },
{ "AU23", "Lip Tightener " },
{ "AU25", "Lips part " },
{ "AU26", "Jaw Drop " },
{ "AU28", "Lip Suck " },
{ "AU45", "Blink " },
};
Visualizer::Visualizer(std::vector<std::string> arguments)
{
// By default not visualizing anything
this->vis_track = false;
this->vis_hog = false;
this->vis_align = false;
this->vis_aus = false;
for (size_t i = 0; i < arguments.size(); ++i)
{
if (arguments[i].compare("-verbose") == 0)
{
this->vis_track = true;
this->vis_align = true;
this->vis_hog = true;
this->vis_aus = true;
}
else if (arguments[i].compare("-vis-align") == 0)
{
this->vis_align = true;
}
else if (arguments[i].compare("-vis-hog") == 0)
{
this->vis_hog = true;
}
else if (arguments[i].compare("-vis-track") == 0)
{
this->vis_track = true;
}
else if (arguments[i].compare("-vis-aus") == 0)
{
this->vis_aus = true;
}
}
}
Visualizer::Visualizer(bool vis_track, bool vis_hog, bool vis_align, bool vis_aus)
{
this->vis_track = vis_track;
this->vis_hog = vis_hog;
this->vis_align = vis_align;
this->vis_aus = vis_aus;
}
// Setting the image on which to draw
void Visualizer::SetImage(const cv::Mat& canvas, float fx, float fy, float cx, float cy)
{
// Convert the image to 8 bit RGB
captured_image = canvas.clone();
this->fx = fx;
this->fy = fy;
this->cx = cx;
this->cy = cy;
// Clearing other images
hog_image = cv::Mat();
aligned_face_image = cv::Mat();
action_units_image = cv::Mat();
}
void Visualizer::SetObservationFaceAlign(const cv::Mat& aligned_face)
{
if(this->aligned_face_image.empty())
{
this->aligned_face_image = aligned_face;
}
else
{
cv::vconcat(this->aligned_face_image, aligned_face, this->aligned_face_image);
}
}
void Visualizer::SetObservationHOG(const cv::Mat_<double>& hog_descriptor, int num_cols, int num_rows)
{
if(vis_hog)
{
if (this->hog_image.empty())
{
Visualise_FHOG(hog_descriptor, num_rows, num_cols, this->hog_image);
}
else
{
cv::Mat tmp_hog;
Visualise_FHOG(hog_descriptor, num_rows, num_cols, tmp_hog);
cv::vconcat(this->hog_image, tmp_hog, this->hog_image);
}
}
}
void Visualizer::SetObservationLandmarks(const cv::Mat_<float>& landmarks_2D, double confidence, const cv::Mat_<int>& visibilities)
{
if(confidence > visualisation_boundary)
{
// Draw 2D landmarks on the image
int n = landmarks_2D.rows / 2;
// Drawing feature points
for (int i = 0; i < n; ++i)
{
if (visibilities.empty() || visibilities.at<int>(i))
{
cv::Point featurePoint(cvRound(landmarks_2D.at<float>(i) * (float)draw_multiplier), cvRound(landmarks_2D.at<float>(i + n) * (float)draw_multiplier));
// A rough heuristic for drawn point size
int thickness = (int)std::ceil(3.0* ((double)captured_image.cols) / 640.0);
int thickness_2 = (int)std::ceil(1.0* ((double)captured_image.cols) / 640.0);
cv::circle(captured_image, featurePoint, 1 * draw_multiplier, cv::Scalar(0, 0, 255), thickness, cv::LINE_AA, draw_shiftbits);
cv::circle(captured_image, featurePoint, 1 * draw_multiplier, cv::Scalar(255, 0, 0), thickness_2, cv::LINE_AA, draw_shiftbits);
}
else
{
// Draw a fainter point if the landmark is self occluded
cv::Point featurePoint(cvRound(landmarks_2D.at<float>(i) * (double)draw_multiplier), cvRound(landmarks_2D.at<float>(i + n) * (double)draw_multiplier));
// A rough heuristic for drawn point size
int thickness = (int)std::ceil(2.5* ((double)captured_image.cols) / 640.0);
int thickness_2 = (int)std::ceil(1.0* ((double)captured_image.cols) / 640.0);
cv::circle(captured_image, featurePoint, 1 * draw_multiplier, cv::Scalar(0, 0, 155), thickness, cv::LINE_AA, draw_shiftbits);
cv::circle(captured_image, featurePoint, 1 * draw_multiplier, cv::Scalar(155, 0, 0), thickness_2, cv::LINE_AA, draw_shiftbits);
}
}
}
}
void Visualizer::SetObservationPose(const cv::Vec6f& pose, double confidence)
{
// Only draw if the reliability is reasonable, the value is slightly ad-hoc
if (confidence > visualisation_boundary)
{
double vis_certainty = confidence;
if (vis_certainty > 1)
vis_certainty = 1;
// Scale from 0 to 1, to allow to indicated by colour how confident we are in the tracking
vis_certainty = (vis_certainty - visualisation_boundary) / (1 - visualisation_boundary);
// A rough heuristic for box around the face width
int thickness = (int)std::ceil(2.0* ((double)captured_image.cols) / 640.0);
// Draw it in reddish if uncertain, blueish if certain
DrawBox(captured_image, pose, cv::Scalar(vis_certainty*255.0, 0, (1 - vis_certainty) * 255), thickness, fx, fy, cx, cy);
}
}
void Visualizer::SetObservationActionUnits(const std::vector<std::pair<std::string, double> >& au_intensities,
const std::vector<std::pair<std::string, double> >& au_occurences)
{
if (au_intensities.size() > 0 || au_occurences.size() > 0)
{
std::set<std::string> au_names;
std::map<std::string, bool> occurences_map;
std::map<std::string, double> intensities_map;
for (size_t idx = 0; idx < au_intensities.size(); idx++)
{
au_names.insert(au_intensities[idx].first);
intensities_map[au_intensities[idx].first] = au_intensities[idx].second;
}
for (size_t idx = 0; idx < au_occurences.size(); idx++)
{
au_names.insert(au_occurences[idx].first);
occurences_map[au_occurences[idx].first] = au_occurences[idx].second > 0;
}
const int AU_TRACKBAR_LENGTH = 400;
const int AU_TRACKBAR_HEIGHT = 10;
const int MARGIN_X = 185;
const int MARGIN_Y = 10;
const int nb_aus = (int) au_names.size();
// Do not reinitialize
if (action_units_image.empty())
{
action_units_image = cv::Mat(nb_aus * (AU_TRACKBAR_HEIGHT + 10) + MARGIN_Y * 2, AU_TRACKBAR_LENGTH + MARGIN_X, CV_8UC3, cv::Scalar(255, 255, 255));
}
else
{
action_units_image.setTo(255);
}
std::map<std::string, std::pair<bool, double>> aus;
// first, prepare a mapping "AU name" -> { present, intensity }
for (auto au_name : au_names)
{
// Insert the intensity and AU presense (as these do not always overlap check if they exist first)
bool occurence = false;
if (occurences_map.find(au_name) != occurences_map.end())
{
occurence = occurences_map[au_name] != 0;
}
else
{
// If we do not have an occurence label, trust the intensity one
occurence = intensities_map[au_name] > 1;
}
double intensity = 0.0;
if (intensities_map.find(au_name) != intensities_map.end())
{
intensity = intensities_map[au_name];
}
else
{
// If we do not have an intensity label, trust the occurence one
intensity = occurences_map[au_name] == 0 ? 0 : 5;
}
aus[au_name] = std::make_pair(occurence, intensity);
}
// then, build the graph
unsigned int idx = 0;
for (auto& au : aus)
{
std::string name = au.first;
bool present = au.second.first;
double intensity = au.second.second;
int offset = MARGIN_Y + idx * (AU_TRACKBAR_HEIGHT + 10);
std::ostringstream au_i;
au_i << std::setprecision(2) << std::setw(4) << std::fixed << intensity;
cv::putText(action_units_image, name, cv::Point(10, offset + 10), cv::FONT_HERSHEY_SIMPLEX, 0.5, CV_RGB(present ? 0 : 200, 0, 0), 1, cv::LINE_AA);
cv::putText(action_units_image, AUS_DESCRIPTION.at(name), cv::Point(55, offset + 10), cv::FONT_HERSHEY_SIMPLEX, 0.3, CV_RGB(0, 0, 0), 1, cv::LINE_AA);
if (present)
{
cv::putText(action_units_image, au_i.str(), cv::Point(160, offset + 10), cv::FONT_HERSHEY_SIMPLEX, 0.3, CV_RGB(0, 100, 0), 1, cv::LINE_AA);
cv::rectangle(action_units_image, cv::Point(MARGIN_X, offset),
cv::Point((int)(MARGIN_X + AU_TRACKBAR_LENGTH * intensity / 5.0), offset + AU_TRACKBAR_HEIGHT),
cv::Scalar(128, 128, 128),
cv::FILLED);
}
else
{
cv::putText(action_units_image, "0.00", cv::Point(160, offset + 10), cv::FONT_HERSHEY_SIMPLEX, 0.3, CV_RGB(0, 0, 0), 1, cv::LINE_AA);
}
idx++;
}
}
}
// Eye gaze infomration drawing, first of eye landmarks then of gaze
void Visualizer::SetObservationGaze(const cv::Point3f& gaze_direction0, const cv::Point3f& gaze_direction1, const std::vector<cv::Point2f>& eye_landmarks2d, const std::vector<cv::Point3f>& eye_landmarks3d, double confidence)
{
if(confidence > visualisation_boundary)
{
if (eye_landmarks2d.size() > 0)
{
// First draw the eye region landmarks
for (size_t i = 0; i < eye_landmarks2d.size(); ++i)
{
cv::Point featurePoint(cvRound(eye_landmarks2d[i].x * (double)draw_multiplier), cvRound(eye_landmarks2d[i].y * (double)draw_multiplier));
// A rough heuristic for drawn point size
int thickness = 1;
int thickness_2 = 1;
size_t next_point = i + 1;
if (i == 7)
next_point = 0;
if (i == 19)
next_point = 8;
if (i == 27)
next_point = 20;
if (i == 7 + 28)
next_point = 0 + 28;
if (i == 19 + 28)
next_point = 8 + 28;
if (i == 27 + 28)
next_point = 20 + 28;
cv::Point nextFeaturePoint(cvRound(eye_landmarks2d[next_point].x * (double)draw_multiplier), cvRound(eye_landmarks2d[next_point].y * (double)draw_multiplier));
if ((i < 28 && (i < 8 || i > 19)) || (i >= 28 && (i < 8 + 28 || i > 19 + 28)))
cv::line(captured_image, featurePoint, nextFeaturePoint, cv::Scalar(255, 0, 0), thickness_2, cv::LINE_AA, draw_shiftbits);
else
cv::line(captured_image, featurePoint, nextFeaturePoint, cv::Scalar(0, 0, 255), thickness_2, cv::LINE_AA, draw_shiftbits);
}
// Now draw the gaze lines themselves
cv::Mat cameraMat = (cv::Mat_<float>(3, 3) << fx, 0, cx, 0, fy, cy, 0, 0, 0);
// Grabbing the pupil location, to draw eye gaze need to know where the pupil is
cv::Point3f pupil_left(0, 0, 0);
cv::Point3f pupil_right(0, 0, 0);
for (size_t i = 0; i < 8; ++i)
{
pupil_left = pupil_left + eye_landmarks3d[i];
pupil_right = pupil_right + eye_landmarks3d[i + eye_landmarks3d.size()/2];
}
pupil_left = pupil_left / 8;
pupil_right = pupil_right / 8;
std::vector<cv::Point3f> points_left;
points_left.push_back(cv::Point3f(pupil_left));
points_left.push_back(cv::Point3f(pupil_left) + cv::Point3f(gaze_direction0)*50.0);
std::vector<cv::Point3f> points_right;
points_right.push_back(cv::Point3f(pupil_right));
points_right.push_back(cv::Point3f(pupil_right) + cv::Point3f(gaze_direction1)*50.0);
cv::Mat_<float> proj_points;
cv::Mat_<float> mesh_0 = (cv::Mat_<float>(2, 3) << points_left[0].x, points_left[0].y, points_left[0].z, points_left[1].x, points_left[1].y, points_left[1].z);
Project(proj_points, mesh_0, fx, fy, cx, cy);
cv::line(captured_image, cv::Point(cvRound(proj_points.at<float>(0, 0) * (float)draw_multiplier), cvRound(proj_points.at<float>(0, 1) * (float)draw_multiplier)),
cv::Point(cvRound(proj_points.at<float>(1, 0) * (float)draw_multiplier), cvRound(proj_points.at<float>(1, 1) * (float)draw_multiplier)), cv::Scalar(110, 220, 0), 2, cv::LINE_AA, draw_shiftbits);
cv::Mat_<float> mesh_1 = (cv::Mat_<float>(2, 3) << points_right[0].x, points_right[0].y, points_right[0].z, points_right[1].x, points_right[1].y, points_right[1].z);
Project(proj_points, mesh_1, fx, fy, cx, cy);
cv::line(captured_image, cv::Point(cvRound(proj_points.at<float>(0, 0) * (float)draw_multiplier), cvRound(proj_points.at<float>(0, 1) * (float)draw_multiplier)),
cv::Point(cvRound(proj_points.at<float>(1, 0) * (float)draw_multiplier), cvRound(proj_points.at<float>(1, 1) * (float)draw_multiplier)), cv::Scalar(110, 220, 0), 2, cv::LINE_AA, draw_shiftbits);
}
}
}
void Visualizer::SetFps(double fps)
{
// Write out the framerate on the image before displaying it
char fpsC[255];
std::sprintf(fpsC, "%d", (int)fps);
std::string fpsSt("FPS:");
fpsSt += fpsC;
cv::putText(captured_image, fpsSt, cv::Point(10, 20), cv::FONT_HERSHEY_SIMPLEX, 0.5, CV_RGB(255, 0, 0), 1, cv::LINE_AA);
}
char Visualizer::ShowObservation()
{
bool ovservation_shown = false;
if (vis_align && !aligned_face_image.empty())
{
cv::imshow("sim_warp", aligned_face_image);
ovservation_shown = true;
}
if (vis_hog && !hog_image.empty())
{
cv::imshow("hog", hog_image);
ovservation_shown = true;
}
if (vis_aus && !action_units_image.empty())
{
cv::imshow("action units", action_units_image);
ovservation_shown = true;
}
if (vis_track)
{
cv::imshow("tracking result", captured_image);
ovservation_shown = true;
}
// Only perform waitKey if something was shown
char result = '\0';
if (ovservation_shown)
{
result = cv::waitKey(1);
}
return result;
}
cv::Mat Visualizer::GetVisImage()
{
return captured_image;
}
cv::Mat Visualizer::GetHOGVis()
{
return hog_image;
}

View File

@@ -0,0 +1,12 @@
///////////////////////////////////////////////////////////////////////////////
// Copyright (C) 2017, Carnegie Mellon University and University of Cambridge,
// all rights reserved.
//
// ACADEMIC OR NON-PROFIT ORGANIZATION NONCOMMERCIAL RESEARCH USE ONLY
//
// BY USING OR DOWNLOADING THE SOFTWARE, YOU ARE AGREEING TO THE TERMS OF THIS LICENSE AGREEMENT.
// IF YOU DO NOT AGREE WITH THESE TERMS, YOU MAY NOT USE OR DOWNLOAD THE SOFTWARE.
//
// License can be found in OpenFace-license.txt
//
#include "stdafx_ut.h"