open source pkg v1

This commit is contained in:
Vijay Yadev
2020-08-04 19:12:31 -04:00
parent bef213dba9
commit c389fc2c47
3708 changed files with 1624220 additions and 1 deletions

View File

@@ -0,0 +1,106 @@
function [data_train, labels_train, data_devel, labels_devel, raw_devel, PC, means_norm, stds_norm, vid_ids_devel_string] = ...
Prepare_HOG_AU_data_generic(train_users, devel_users, au_train, rest_aus, semaine_dir, feature_dir)
%%
addpath(genpath('../data extraction/'));
% First extracting the labels
[ labels_train, valid_ids_train, vid_ids_train ] = extract_SEMAINE_labels(semaine_dir, train_users, au_train);
[ labels_other, ~, ~ ] = extract_SEMAINE_labels(semaine_dir, train_users, rest_aus);
labels_other = cat(1, labels_other{:});
% Reading in the HOG data (of only relevant frames)
[train_appearance_data, valid_ids_train_hog, vid_ids_train_string] = Read_HOG_files(train_users, vid_ids_train, feature_dir);
[train_geom_data] = Read_geom_files(train_users, vid_ids_train, feature_dir);
% Subsample the data to make training quicker
labels_train = cat(1, labels_train{:});
valid_ids_train = logical(cat(1, valid_ids_train{:}));
reduced_inds = false(size(labels_train,1),1);
reduced_inds(labels_train == 1) = true;
% make sure the same number of positive and negative samples is taken
pos_count = sum(labels_train == 1);
neg_count = sum(labels_train == 0);
num_other = floor(pos_count / (size(labels_other, 2)));
inds_all = 1:size(labels_train,1);
for i=1:size(labels_other, 2)+1
if(i > size(labels_other, 2))
% fill the rest with a proportion of neutral
inds_other = inds_all(sum(labels_other,2)==0 & ~labels_train );
num_other_i = min(numel(inds_other), pos_count - sum(labels_train(reduced_inds,:)==0));
else
% take a proportion of each other AU
inds_other = inds_all(labels_other(:, i) & ~labels_train );
num_other_i = min(numel(inds_other), num_other);
end
inds_other_to_keep = inds_other(round(linspace(1, numel(inds_other), num_other_i)));
reduced_inds(inds_other_to_keep) = true;
end
% Remove invalid ids based on CLM failing or AU not being labelled
reduced_inds(~valid_ids_train) = false;
reduced_inds(~valid_ids_train_hog) = false;
labels_other = labels_other(reduced_inds, :);
labels_train = labels_train(reduced_inds,:);
train_appearance_data = train_appearance_data(reduced_inds,:);
train_geom_data = train_geom_data(reduced_inds,:);
vid_ids_train_string = vid_ids_train_string(reduced_inds,:);
%% Extract devel data
% First extracting the labels
[ labels_devel, valid_ids_devel, vid_ids_devel ] = extract_SEMAINE_labels(semaine_dir, devel_users, au_train);
% Reading in the HOG data (of only relevant frames)
[devel_appearance_data, valid_ids_devel_hog, vid_ids_devel_string] = Read_HOG_files(devel_users, vid_ids_devel, feature_dir);
[devel_geom_data] = Read_geom_files(devel_users, vid_ids_devel, feature_dir);
labels_devel = cat(1, labels_devel{:});
% Peforming zone specific masking
if(au_train < 8 || au_train == 43 || au_train == 45) % upper face AUs ignore bottom face
% normalise the data
pca_file = '../../pca_generation/generic_face_upper.mat';
load(pca_file);
elseif(au_train > 9) % lower face AUs ignore upper face and the sides
% normalise the data
pca_file = '../../pca_generation/generic_face_lower.mat';
load(pca_file);
elseif(au_train == 9) % Central face model
% normalise the data
pca_file = '../../pca_generation/generic_face_rigid.mat';
load(pca_file);
end
% Grab all data for validation as want good params for all the data
raw_devel = cat(2, devel_appearance_data, devel_geom_data);
devel_appearance_data = bsxfun(@times, bsxfun(@plus, devel_appearance_data, -means_norm), 1./stds_norm);
train_appearance_data = bsxfun(@times, bsxfun(@plus, train_appearance_data, -means_norm), 1./stds_norm);
data_train = train_appearance_data * PC;
data_devel = devel_appearance_data * PC;
data_train = cat(2, data_train, train_geom_data);
data_devel = cat(2, data_devel, devel_geom_data);
PC_n = zeros(size(PC)+size(train_geom_data, 2));
PC_n(1:size(PC,1), 1:size(PC,2)) = PC;
PC_n(size(PC,1)+1:end, size(PC,2)+1:end) = eye(size(train_geom_data, 2));
PC = PC_n;
means_norm = cat(2, means_norm, zeros(1, size(train_geom_data,2)));
stds_norm = cat(2, stds_norm, ones(1, size(train_geom_data,2)));
end

View File

@@ -0,0 +1,106 @@
function [data_train, labels_train, data_devel, labels_devel, raw_devel, PC, means_norm, stds_norm, vid_ids_devel_string] = ...
Prepare_HOG_AU_data_generic_dynamic(train_users, devel_users, au_train, rest_aus, semaine_dir, feature_dir)
%%
addpath(genpath('../data extraction/'));
% First extracting the labels
[ labels_train, valid_ids_train, vid_ids_train ] = extract_SEMAINE_labels(semaine_dir, train_users, au_train);
[ labels_other, ~, ~ ] = extract_SEMAINE_labels(semaine_dir, train_users, rest_aus);
labels_other = cat(1, labels_other{:});
% Reading in the HOG data (of only relevant frames)
[train_appearance_data, valid_ids_train_hog, vid_ids_train_string] = Read_HOG_files_dynamic(train_users, vid_ids_train, feature_dir);
[train_geom_data] = Read_geom_files_dynamic(train_users, vid_ids_train, feature_dir);
% Subsample the data to make training quicker
labels_train = cat(1, labels_train{:});
valid_ids_train = logical(cat(1, valid_ids_train{:}));
reduced_inds = false(size(labels_train,1),1);
reduced_inds(labels_train == 1) = true;
% make sure the same number of positive and negative samples is taken
pos_count = sum(labels_train == 1);
neg_count = sum(labels_train == 0);
num_other = floor(pos_count / (size(labels_other, 2)));
inds_all = 1:size(labels_train,1);
for i=1:size(labels_other, 2)+1
if(i > size(labels_other, 2))
% fill the rest with a proportion of neutral
inds_other = inds_all(sum(labels_other,2)==0 & ~labels_train );
num_other_i = min(numel(inds_other), pos_count - sum(labels_train(reduced_inds,:)==0));
else
% take a proportion of each other AU
inds_other = inds_all(labels_other(:, i) & ~labels_train );
num_other_i = min(numel(inds_other), num_other);
end
inds_other_to_keep = inds_other(round(linspace(1, numel(inds_other), num_other_i)));
reduced_inds(inds_other_to_keep) = true;
end
% Remove invalid ids based on CLM failing or AU not being labelled
reduced_inds(~valid_ids_train) = false;
reduced_inds(~valid_ids_train_hog) = false;
labels_other = labels_other(reduced_inds, :);
labels_train = labels_train(reduced_inds,:);
train_appearance_data = train_appearance_data(reduced_inds,:);
train_geom_data = train_geom_data(reduced_inds,:);
vid_ids_train_string = vid_ids_train_string(reduced_inds,:);
%% Extract devel data
% First extracting the labels
[ labels_devel, valid_ids_devel, vid_ids_devel ] = extract_SEMAINE_labels(semaine_dir, devel_users, au_train);
% Reading in the HOG data (of only relevant frames)
[devel_appearance_data, valid_ids_devel_hog, vid_ids_devel_string] = Read_HOG_files_dynamic(devel_users, vid_ids_devel, feature_dir);
[devel_geom_data] = Read_geom_files_dynamic(devel_users, vid_ids_devel, feature_dir);
labels_devel = cat(1, labels_devel{:});
% Peforming zone specific masking
if(au_train < 8 || au_train == 43 || au_train == 45) % upper face AUs ignore bottom face
% normalise the data
pca_file = '../../pca_generation/generic_face_upper.mat';
load(pca_file);
elseif(au_train > 9) % lower face AUs ignore upper face and the sides
% normalise the data
pca_file = '../../pca_generation/generic_face_lower.mat';
load(pca_file);
elseif(au_train == 9) % Central face model
% normalise the data
pca_file = '../../pca_generation/generic_face_rigid.mat';
load(pca_file);
end
% Grab all data for validation as want good params for all the data
raw_devel = cat(2, devel_appearance_data, devel_geom_data);
devel_appearance_data = bsxfun(@times, bsxfun(@plus, devel_appearance_data, -means_norm), 1./stds_norm);
train_appearance_data = bsxfun(@times, bsxfun(@plus, train_appearance_data, -means_norm), 1./stds_norm);
data_train = train_appearance_data * PC;
data_devel = devel_appearance_data * PC;
data_train = cat(2, data_train, train_geom_data);
data_devel = cat(2, data_devel, devel_geom_data);
PC_n = zeros(size(PC)+size(train_geom_data, 2));
PC_n(1:size(PC,1), 1:size(PC,2)) = PC;
PC_n(size(PC,1)+1:end, size(PC,2)+1:end) = eye(size(train_geom_data, 2));
PC = PC_n;
means_norm = cat(2, means_norm, zeros(1, size(train_geom_data,2)));
stds_norm = cat(2, stds_norm, ones(1, size(train_geom_data,2)));
end

View File

@@ -0,0 +1,57 @@
function [hog_data, valid_inds, vid_id] = Read_HOG_files(users, vid_ids, hog_data_dir)
hog_data = [];
vid_id = {};
feats_filled = 0;
for i=1:numel(users)
hog_file = [hog_data_dir, '/train/' users{i} '.hog'];
if(~exist(hog_file, 'file'))
hog_file = [hog_data_dir, '/devel/' users{i} '.hog'];
end
f = fopen(hog_file, 'r');
num_cols = fread(f, 1, 'int32');
if(isempty(num_cols))
break;
end
num_rows = fread(f, 1, 'int32');
num_chan = fread(f, 1, 'int32');
num_feats = num_rows * num_cols * num_chan + 1;
% go to the beginning
fseek(f, 0, 'bof');
% Read only the relevant bits
% Skip to the right start element (1 indexed)
fseek(f, 4*(4+num_rows*num_rows*num_chan)*(vid_ids(i,1)-1), 'bof');
feature_vec = fread(f, [4 + num_rows * num_cols * num_chan, vid_ids(i,2) - vid_ids(i,1)], 'float32');
fclose(f);
curr_data = feature_vec(4:end,:)';
curr_ind = size(curr_data,1);
vid_id_curr = cell(size(curr_data,1),1);
vid_id_curr(:) = users(i);
vid_id = cat(1, vid_id, vid_id_curr);
% Assume same number of frames per video
if(i==1)
hog_data = zeros(sum(vid_ids(:,2)-vid_ids(:,1)), num_feats);
end
hog_data(feats_filled+1:feats_filled+curr_ind,:) = curr_data;
feats_filled = feats_filled + curr_ind;
end
valid_inds = hog_data(:,1) > 0;
hog_data = hog_data(:,2:end);
end

View File

@@ -0,0 +1,59 @@
function [hog_data, valid_inds, vid_id] = Read_HOG_files_dynamic(users, vid_ids, hog_data_dir)
hog_data = [];
vid_id = {};
feats_filled = 0;
for i=1:numel(users)
hog_file = [hog_data_dir, '/train/' users{i} '.hog'];
if(~exist(hog_file, 'file'))
hog_file = [hog_data_dir, '/devel/' users{i} '.hog'];
end
f = fopen(hog_file, 'r');
num_cols = fread(f, 1, 'int32');
if(isempty(num_cols))
break;
end
num_rows = fread(f, 1, 'int32');
num_chan = fread(f, 1, 'int32');
num_feats = num_rows * num_cols * num_chan + 1;
% go to the beginning
fseek(f, 0, 'bof');
% Read only the relevant bits
% Skip to the right start element (1 indexed)
fseek(f, 4*(4+num_rows*num_rows*num_chan)*(vid_ids(i,1)-1), 'bof');
feature_vec = fread(f, [4 + num_rows * num_cols * num_chan, vid_ids(i,2) - vid_ids(i,1)], 'float32');
fclose(f);
curr_data = feature_vec(4:end,:)';
curr_ind = size(curr_data,1);
vid_id_curr = cell(size(curr_data,1),1);
vid_id_curr(:) = users(i);
vid_id = cat(1, vid_id, vid_id_curr);
% Assume same number of frames per video
if(i==1)
hog_data = zeros(sum(vid_ids(:,2)-vid_ids(:,1)), num_feats);
end
curr_data(:,2:end) = bsxfun(@plus, curr_data(:,2:end), -median(curr_data(:,2:end)));
hog_data(feats_filled+1:feats_filled+curr_ind,:) = curr_data;
feats_filled = feats_filled + curr_ind;
end
valid_inds = hog_data(:,1) > 0;
hog_data = hog_data(:,2:end);
end

View File

@@ -0,0 +1,47 @@
function [geom_data, valid_ids] = Read_geom_files(users, vid_ids, hog_data_dir)
geom_data = [];
valid_ids = [];
load('../../pca_generation/pdm_68_aligned_wild.mat');
for i=1:numel(users)
geom_file = [hog_data_dir, '/train/' users{i} '.csv'];
m_file = [hog_data_dir, '/train/' users{i} '.params.mat'];
if(~exist(geom_file, 'file'))
geom_file = [hog_data_dir, '/devel/' users{i} '.csv'];
m_file = [hog_data_dir, '/devel/' users{i} '.params.mat'];
end
if(~exist(m_file, 'file'))
if(~exist('shape_inds', 'var'))
tab = readtable(geom_file);
column_names = tab.Properties.VariableNames;
valid_ind = cellfun(@(x) ~isempty(x) && x==1, strfind(column_names, 'success'));
shape_inds = cellfun(@(x) ~isempty(x) && x==1, strfind(column_names, 'p_'));
end
res = dlmread(geom_file, ',', 1, 0);
valid = res(:, valid_ind) > 0.7;
res = res(:, shape_inds);
% Do not consider global parameters
res = res(:, 7:end);
res = res(vid_ids(i,1)+1:vid_ids(i,2),:);
save(m_file, 'res', 'valid');
else
load(m_file);
end
actual_locs = res * V';
res = cat(2, actual_locs, res);
valid_ids = cat(1, valid_ids, valid);
geom_data = cat(1, geom_data, res);
end
end

View File

@@ -0,0 +1,49 @@
function [geom_data, valid_ids] = Read_geom_files_dynamic(users, vid_ids, hog_data_dir)
geom_data = [];
valid_ids = [];
load('../../pca_generation/pdm_68_aligned_wild.mat');
for i=1:numel(users)
geom_file = [hog_data_dir, '/train/' users{i} '.csv'];
m_file = [hog_data_dir, '/train/' users{i} '.params.mat'];
if(~exist(geom_file, 'file'))
geom_file = [hog_data_dir, '/devel/' users{i} '.csv'];
m_file = [hog_data_dir, '/devel/' users{i} '.params.mat'];
end
if(~exist(m_file, 'file'))
if(~exist('shape_inds', 'var'))
tab = readtable(geom_file);
column_names = tab.Properties.VariableNames;
valid_ind = cellfun(@(x) ~isempty(x) && x==1, strfind(column_names, 'success'));
shape_inds = cellfun(@(x) ~isempty(x) && x==1, strfind(column_names, 'p_'));
end
res = dlmread(geom_file, ',', 1, 0);
valid = res(:, valid_ind) > 0.7;
res = res(:, shape_inds);
% Do not consider global parameters
res = res(:, 7:end);
res = res(vid_ids(i,1)+1:vid_ids(i,2),:);
save(m_file, 'res', 'valid');
else
load(m_file);
end
actual_locs = res * V';
res = cat(2, actual_locs, res);
valid_ids = cat(1, valid_ids, valid);
res = bsxfun(@plus, res, -median(res));
geom_data = cat(1, geom_data, res);
end
end

View File

@@ -0,0 +1,77 @@
function Script_HOG_SVM_train()
% Change to your downloaded location
addpath('C:\liblinear\matlab')
addpath('../training_code/');
addpath('../utilities/');
addpath('../../data extraction/');
%% load shared definitions and AU data
shared_defs;
% Set up the hyperparameters to be validated
hyperparams.c = 10.^(-9:0.5:1);
hyperparams.e = 10.^(-3);
hyperparams.validate_params = {'c', 'e'};
% Set the training function
svm_train = @svm_train_linear;
% Set the test function (the first output will be used for validation)
svm_test = @svm_test_linear;
pca_loc = '../../pca_generation/generic_face_rigid.mat';
%%
for a=1:numel(aus)
au = aus(a);
rest_aus = setdiff(all_aus, au);
% load the training and testing data for the current fold
[train_samples, train_labels, valid_samples, valid_labels, ~, PC, means, scaling] = Prepare_HOG_AU_data_generic(train_recs, devel_recs, au, rest_aus, SEMAINE_dir, hog_data_dir);
train_samples = sparse(train_samples);
valid_samples = sparse(valid_samples);
%% Cross-validate here
[ best_params, ~ ] = validate_grid_search_no_par(svm_train, svm_test, false, train_samples, train_labels, valid_samples, valid_labels, hyperparams);
model = svm_train(train_labels, train_samples, best_params);
[prediction, a, actual_vals] = predict(valid_labels, valid_samples, model);
% Go from raw data to the prediction
w = model.w(1:end-1)';
b = model.w(end);
svs = bsxfun(@times, PC, 1./scaling') * w;
name = sprintf('models/AU_%d_static.dat', au);
pos_lbl = model.Label(1);
neg_lbl = model.Label(2);
write_lin_svm(name, means, svs, b, pos_lbl, neg_lbl);
name = sprintf('results_SEMAINE_devel/AU_%d_static.mat', au);
tp = sum(valid_labels == 1 & prediction == 1);
fp = sum(valid_labels == 0 & prediction == 1);
fn = sum(valid_labels == 1 & prediction == 0);
tn = sum(valid_labels == 0 & prediction == 0);
precision = tp/(tp+fp);
recall = tp/(tp+fn);
f1 = 2 * precision * recall / (precision + recall);
save(name, 'model', 'f1', 'precision', 'recall', 'best_params', 'valid_labels', 'prediction');
end
end

View File

@@ -0,0 +1,74 @@
function Script_HOG_SVM_train_dyn()
% Change to your downloaded location
addpath('C:\liblinear\matlab')
%% load shared definitions and AU data
shared_defs;
% Set up the hyperparameters to be validated
hyperparams.c = 10.^(-9:0.5:1);
hyperparams.e = 10.^(-3);
hyperparams.validate_params = {'c', 'e'};
% Set the training function
svm_train = @svm_train_linear;
% Set the test function (the first output will be used for validation)
svm_test = @svm_test_linear;
pca_loc = '../../pca_generation/generic_face_rigid.mat';
%%
for a=1:numel(aus)
au = aus(a);
rest_aus = setdiff(all_aus, au);
% load the training and testing data for the current fold
[train_samples, train_labels, valid_samples, valid_labels, ~, PC, means, scaling] = Prepare_HOG_AU_data_generic_dynamic(train_recs, devel_recs, au, rest_aus, SEMAINE_dir, hog_data_dir);
train_samples = sparse(train_samples);
valid_samples = sparse(valid_samples);
%% Cross-validate here
[ best_params, ~ ] = validate_grid_search_no_par(svm_train, svm_test, false, train_samples, train_labels, valid_samples, valid_labels, hyperparams);
model = svm_train(train_labels, train_samples, best_params);
[prediction, a, actual_vals] = predict(valid_labels, valid_samples, model);
% Go from raw data to the prediction
w = model.w(1:end-1)';
b = model.w(end);
svs = bsxfun(@times, PC, 1./scaling') * w;
name = sprintf('models/AU_%d_dyn.dat', au);
pos_lbl = model.Label(1);
neg_lbl = model.Label(2);
write_lin_dyn_svm(name, means, svs, b, pos_lbl, neg_lbl);
name = sprintf('results_SEMAINE_devel/AU_%d_dyn.mat', au);
tp = sum(valid_labels == 1 & prediction == 1);
fp = sum(valid_labels == 0 & prediction == 1);
fn = sum(valid_labels == 1 & prediction == 0);
tn = sum(valid_labels == 0 & prediction == 0);
precision = tp/(tp+fp);
recall = tp/(tp+fn);
f1 = 2 * precision * recall / (precision + recall);
save(name, 'model', 'f1', 'precision', 'recall', 'best_params', 'valid_labels', 'prediction');
end
end

View File

@@ -0,0 +1,10 @@
% this is data defined across the experiments (to make sure all of them have same user conventions)
% Defining which AU's we are extracting (all corrs above 0.5)
all_aus = [2 12 17 25 28 45];
aus = [2 12 17 25 28 45];
addpath('../../data extraction/');
find_SEMAINE