open source pkg v1
This commit is contained in:
37
pkg/OpenFace/matlab_version/face_detection/mtcnn/ONet.m
Normal file
37
pkg/OpenFace/matlab_version/face_detection/mtcnn/ONet.m
Normal file
@@ -0,0 +1,37 @@
|
||||
function [ out_prob, out_correction, out_lmarks ] = ONet( im_data, ONet_mlab )
|
||||
%PNET Summary of this function goes here
|
||||
% Detailed explanation goes here
|
||||
|
||||
% The convolutional and pooling layers
|
||||
out = convolution(im_data, ONet_mlab.weights_conv1, ONet_mlab.biases_conv1);
|
||||
out = PReLU(out, ONet_mlab.prelu_weights_1);
|
||||
out = max_pooling2(out, 3, 2);
|
||||
out = convolution(out, ONet_mlab.weights_conv2, ONet_mlab.biases_conv2);
|
||||
out = PReLU(out, ONet_mlab.prelu_weights_2);
|
||||
out = max_pooling2(out, 3, 2);
|
||||
out = convolution(out, ONet_mlab.weights_conv3, ONet_mlab.biases_conv3);
|
||||
out = PReLU(out, ONet_mlab.prelu_weights_3);
|
||||
out = max_pooling2(out, 2, 2);
|
||||
out = convolution(out, ONet_mlab.weights_conv4, ONet_mlab.biases_conv4);
|
||||
out = PReLU(out, ONet_mlab.prelu_weights_4);
|
||||
|
||||
% The fully connected layers
|
||||
|
||||
out_fc_1 = zeros(size(out,1)*size(out,2) * size(out,3), size(out,4));
|
||||
out_fc_1(:) = out(:);
|
||||
out_fc_1 = out_fc_1' * ONet_mlab.w_fc1 + ONet_mlab.b_fc1';
|
||||
out_fc_1 = PReLU(out_fc_1, ONet_mlab.prelu_fc1);
|
||||
|
||||
out_fc2 = out_fc_1 * ONet_mlab.w_fc2 + ONet_mlab.b_fc2';
|
||||
out_fc2 = out_fc2';
|
||||
|
||||
% Probability of each proposal
|
||||
out_prob = 1./(1+exp(out_fc2(1,:)-out_fc2(2,:)));
|
||||
|
||||
% The correction of each detection
|
||||
out_correction = out_fc2(3:6,:);
|
||||
|
||||
% The actual detected landmarks
|
||||
out_lmarks = out_fc2(7:end,:);
|
||||
end
|
||||
|
||||
BIN
pkg/OpenFace/matlab_version/face_detection/mtcnn/ONet_mlab.mat
Normal file
BIN
pkg/OpenFace/matlab_version/face_detection/mtcnn/ONet_mlab.mat
Normal file
Binary file not shown.
26
pkg/OpenFace/matlab_version/face_detection/mtcnn/PNet.m
Normal file
26
pkg/OpenFace/matlab_version/face_detection/mtcnn/PNet.m
Normal file
@@ -0,0 +1,26 @@
|
||||
function [ out_prob, out_correction ] = PNet( im_data, PNet_mlab )
|
||||
%PNET Summary of this function goes here
|
||||
% Detailed explanation goes here
|
||||
|
||||
% Pass through the first convolution layer
|
||||
out = convolution(im_data, PNet_mlab.weights_conv1, PNet_mlab.biases_conv1);
|
||||
out = PReLU(out, PNet_mlab.prelu_weights_1);
|
||||
out = max_pooling2(out, 2, 2);
|
||||
out = convolution(out, PNet_mlab.weights_conv2, PNet_mlab.biases_conv2);
|
||||
out = PReLU(out, PNet_mlab.prelu_weights_2);
|
||||
out = convolution(out, PNet_mlab.weights_conv3, PNet_mlab.biases_conv3);
|
||||
out = PReLU(out, PNet_mlab.prelu_weights_3);
|
||||
|
||||
% The fully connected layer
|
||||
out_fc = zeros(size(out,1)*size(out,2), size(out,3));
|
||||
out_fc(:) = out(:);
|
||||
out_fc = out_fc * PNet_mlab.w + PNet_mlab.b';
|
||||
out = reshape(out_fc, size(out,1), size(out,2), size(out_fc,2));
|
||||
|
||||
% The alignment probabilities (face heat map)
|
||||
out_prob = 1./(1+exp(out(:,:,1)-out(:,:,2)));
|
||||
|
||||
% The correction of the detection
|
||||
out_correction = out(:,:,3:end);
|
||||
end
|
||||
|
||||
BIN
pkg/OpenFace/matlab_version/face_detection/mtcnn/PNet_mlab.mat
Normal file
BIN
pkg/OpenFace/matlab_version/face_detection/mtcnn/PNet_mlab.mat
Normal file
Binary file not shown.
26
pkg/OpenFace/matlab_version/face_detection/mtcnn/PReLU.m
Normal file
26
pkg/OpenFace/matlab_version/face_detection/mtcnn/PReLU.m
Normal file
@@ -0,0 +1,26 @@
|
||||
function [ out_map ] = PReLU( input_maps, PReLU_params )
|
||||
%PRELU Summary of this function goes here
|
||||
% Detailed explanation goes here
|
||||
|
||||
out_map = zeros(size(input_maps));
|
||||
if(numel(size(input_maps)) > 2)
|
||||
for i=1:size(input_maps,3)
|
||||
|
||||
% A more readable but slower version
|
||||
% in_map = input_maps(:,:,i,:);
|
||||
% in_map(in_map < 0) = in_map(in_map<0) * PReLU_params(i);
|
||||
|
||||
% alternative
|
||||
% out_map(:,:,i,:) = max(input_maps(:,:,i,:),0) + min(input_maps(:,:,i,:),0)*PReLU_params(i);
|
||||
|
||||
out_map(:,:,i,:) = input_maps(:,:,i,:) .* (PReLU_params(i) + (1 - PReLU_params(i)) * (input_maps(:,:,i,:) > 0)) ;
|
||||
end
|
||||
else
|
||||
for i=1:size(input_maps,2)
|
||||
in_map = input_maps(:,i);
|
||||
in_map(in_map < 0) = in_map(in_map<0) * PReLU_params(i);
|
||||
out_map(:,i) = in_map;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
31
pkg/OpenFace/matlab_version/face_detection/mtcnn/RNet.m
Normal file
31
pkg/OpenFace/matlab_version/face_detection/mtcnn/RNet.m
Normal file
@@ -0,0 +1,31 @@
|
||||
function [ out_prob, out_correction ] = RNet( im_data, RNet_mlab )
|
||||
%PNET Summary of this function goes here
|
||||
% Detailed explanation goes here
|
||||
|
||||
% The convolutional and pooling layers
|
||||
out = convolution(im_data, RNet_mlab.weights_conv1, RNet_mlab.biases_conv1);
|
||||
out = PReLU(out, RNet_mlab.prelu_weights_1);
|
||||
out = max_pooling2(out, 3, 2);
|
||||
out = convolution(out, RNet_mlab.weights_conv2, RNet_mlab.biases_conv2);
|
||||
out = PReLU(out, RNet_mlab.prelu_weights_2);
|
||||
out = max_pooling2(out, 3, 2);
|
||||
out = convolution(out, RNet_mlab.weights_conv3, RNet_mlab.biases_conv3);
|
||||
out = PReLU(out, RNet_mlab.prelu_weights_3);
|
||||
|
||||
% The fully connected layers
|
||||
|
||||
out_fc_1 = zeros(size(out,1)*size(out,2) * size(out,3), size(out,4));
|
||||
out_fc_1(:) = out(:);
|
||||
out_fc_1 = out_fc_1' * RNet_mlab.w_fc1 + RNet_mlab.b_fc1';
|
||||
out_fc_1 = PReLU(out_fc_1, RNet_mlab.prelu_fc1);
|
||||
|
||||
out_fc2 = out_fc_1 * RNet_mlab.w_fc2 + RNet_mlab.b_fc2';
|
||||
out_fc2 = out_fc2';
|
||||
|
||||
% Probability of each proposal
|
||||
out_prob = 1./(1+exp(out_fc2(1,:)-out_fc2(2,:)));
|
||||
|
||||
% The correction of each detection
|
||||
out_correction = out_fc2(3:end,:);
|
||||
end
|
||||
|
||||
BIN
pkg/OpenFace/matlab_version/face_detection/mtcnn/RNet_mlab.mat
Normal file
BIN
pkg/OpenFace/matlab_version/face_detection/mtcnn/RNet_mlab.mat
Normal file
Binary file not shown.
@@ -0,0 +1,23 @@
|
||||
function [ total_bboxes ] = apply_correction( total_bboxes, corrections, add1 )
|
||||
%APPLY_CORRECTION Summary of this function goes here
|
||||
% Detailed explanation goes here
|
||||
|
||||
% Perform correction based on regression values
|
||||
bbw = total_bboxes(:,3) - total_bboxes(:,1);
|
||||
bbh = total_bboxes(:,4) - total_bboxes(:,2);
|
||||
|
||||
% TODO is this needed?
|
||||
if(add1)
|
||||
bbw = bbw + 1;
|
||||
bbh = bbh + 1;
|
||||
end
|
||||
|
||||
new_min_x = total_bboxes(:,1) + corrections(:,1) .* bbw;
|
||||
new_min_y = total_bboxes(:,2) + corrections(:,2) .* bbh;
|
||||
new_max_x = total_bboxes(:,3) + corrections(:,3) .* bbw;
|
||||
new_max_y = total_bboxes(:,4) + corrections(:,4) .* bbh;
|
||||
score = total_bboxes(:,5);
|
||||
total_bboxes = [new_min_x, new_min_y, new_max_x, new_max_y, score];
|
||||
|
||||
end
|
||||
|
||||
@@ -0,0 +1,3 @@
|
||||
PNet PNet.dat
|
||||
RNet RNet.dat
|
||||
ONet ONet.dat
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,70 @@
|
||||
function Write_CNN_to_binary(location_binary, cnn)
|
||||
|
||||
addpath('../../../PDM_helpers/');
|
||||
|
||||
% use little-endian
|
||||
cnn_binary_file = fopen(location_binary, 'w', 'l');
|
||||
|
||||
num_layers = size(cnn.layers,2);
|
||||
|
||||
% Get the number of layers
|
||||
fwrite(cnn_binary_file, num_layers, 'uint'); % 4 bytes
|
||||
|
||||
for layers=1:num_layers
|
||||
|
||||
% write layer type: 0 - convolutional, 1 - max pooling, 2 -
|
||||
% fully connected, 3 - prelu, 4 - sigmoid
|
||||
if(strcmp(cnn.layers{layers}.type, 'conv'))
|
||||
|
||||
% write the type (convolutional)
|
||||
fwrite(cnn_binary_file, 0, 'uint'); % 4 bytes
|
||||
|
||||
num_in_map = size(cnn.layers{layers}.weights{1},3);
|
||||
|
||||
% write the number of input maps
|
||||
fwrite(cnn_binary_file, num_in_map, 'uint'); % 4 bytes
|
||||
|
||||
num_out_kerns = size(cnn.layers{layers}.weights{1},4);
|
||||
|
||||
% write the number of kernels for each output map
|
||||
fwrite(cnn_binary_file, num_out_kerns, 'uint'); % 4 bytes
|
||||
|
||||
% Write output map bias terms
|
||||
for k2=1:num_out_kerns
|
||||
fwrite(cnn_binary_file, cnn.layers{layers}.weights{2}(k2), 'float32'); % 4 bytes
|
||||
end
|
||||
|
||||
for k=1:num_in_map
|
||||
for k2=1:num_out_kerns
|
||||
% Write out the kernel
|
||||
W = squeeze(cnn.layers{layers}.weights{1}(:,:,k,k2));
|
||||
writeMatrixBin(cnn_binary_file, W, 5);
|
||||
end
|
||||
end
|
||||
elseif(strcmp(cnn.layers{layers}.type, 'fc'))
|
||||
|
||||
% This is the fully connected layer
|
||||
fwrite(cnn_binary_file, 2, 'uint'); % 4 bytes
|
||||
|
||||
% the bias term
|
||||
writeMatrixBin(cnn_binary_file, cnn.layers{layers}.weights{2}, 5);
|
||||
% the weights
|
||||
writeMatrixBin(cnn_binary_file, cnn.layers{layers}.weights{1}, 5);
|
||||
|
||||
elseif(strcmp(cnn.layers{layers}.type, 'max_pooling'))
|
||||
fwrite(cnn_binary_file, 1, 'uint'); % 4 bytes, indicate max pooling layer
|
||||
% params kernel and stride size
|
||||
fwrite(cnn_binary_file, cnn.layers{layers}.kernel_size_x, 'uint'); % 4 bytes
|
||||
fwrite(cnn_binary_file, cnn.layers{layers}.kernel_size_y, 'uint'); % 4 bytes
|
||||
fwrite(cnn_binary_file, cnn.layers{layers}.stride_x, 'uint'); % 4 bytes
|
||||
fwrite(cnn_binary_file, cnn.layers{layers}.stride_y, 'uint'); % 4 bytes
|
||||
|
||||
elseif(strcmp(cnn.layers{layers}.type, 'prelu'))
|
||||
fwrite(cnn_binary_file, 3, 'uint'); % 4 bytes, indicate a parametric relu layer
|
||||
writeMatrixBin(cnn_binary_file, cnn.layers{layers}.weights{1}, 5);
|
||||
end
|
||||
end
|
||||
|
||||
fclose(cnn_binary_file);
|
||||
|
||||
end
|
||||
@@ -0,0 +1,184 @@
|
||||
% First writing out PNet
|
||||
load('../PNet_mlab.mat');
|
||||
|
||||
cnn = struct;
|
||||
cnn.layers = cell(1,8);
|
||||
cnn.layers{1} = struct;
|
||||
cnn.layers{1}.type = 'conv';
|
||||
cnn.layers{1}.weights = {PNet_mlab.weights_conv1, PNet_mlab.biases_conv1};
|
||||
|
||||
cnn.layers{2} = struct;
|
||||
cnn.layers{2}.type = 'prelu';
|
||||
cnn.layers{2}.weights = {PNet_mlab.prelu_weights_1};
|
||||
|
||||
cnn.layers{3} = struct;
|
||||
cnn.layers{3}.type = 'max_pooling';
|
||||
cnn.layers{3}.weights = {};
|
||||
cnn.layers{3}.stride_x = 2;
|
||||
cnn.layers{3}.stride_y = 2;
|
||||
cnn.layers{3}.kernel_size_x = 2;
|
||||
cnn.layers{3}.kernel_size_y = 2;
|
||||
|
||||
cnn.layers{4} = struct;
|
||||
cnn.layers{4}.type = 'conv';
|
||||
cnn.layers{4}.weights = {PNet_mlab.weights_conv2, PNet_mlab.biases_conv2};
|
||||
|
||||
cnn.layers{5} = struct;
|
||||
cnn.layers{5}.type = 'prelu';
|
||||
cnn.layers{5}.weights = {PNet_mlab.prelu_weights_2};
|
||||
|
||||
cnn.layers{6} = struct;
|
||||
cnn.layers{6}.type = 'conv';
|
||||
cnn.layers{6}.weights = {PNet_mlab.weights_conv3, PNet_mlab.biases_conv3};
|
||||
|
||||
cnn.layers{7} = struct;
|
||||
cnn.layers{7}.type = 'prelu';
|
||||
cnn.layers{7}.weights = {PNet_mlab.prelu_weights_3};
|
||||
|
||||
cnn.layers{8} = struct;
|
||||
cnn.layers{8}.type = 'fc';
|
||||
cnn.layers{8}.weights = {PNet_mlab.w, PNet_mlab.b};
|
||||
|
||||
Write_CNN_to_binary('PNet.dat', cnn);
|
||||
|
||||
%% Next writing out the RNet
|
||||
clear
|
||||
load('../RNet_mlab.mat');
|
||||
|
||||
cnn = struct;
|
||||
cnn.layers = cell(1,11);
|
||||
cnn.layers{1} = struct;
|
||||
cnn.layers{1}.type = 'conv';
|
||||
cnn.layers{1}.weights = {RNet_mlab.weights_conv1, RNet_mlab.biases_conv1};
|
||||
|
||||
cnn.layers{2} = struct;
|
||||
cnn.layers{2}.type = 'prelu';
|
||||
cnn.layers{2}.weights = {RNet_mlab.prelu_weights_1};
|
||||
|
||||
cnn.layers{3} = struct;
|
||||
cnn.layers{3}.type = 'max_pooling';
|
||||
cnn.layers{3}.weights = {};
|
||||
cnn.layers{3}.stride_x = 2;
|
||||
cnn.layers{3}.stride_y = 2;
|
||||
cnn.layers{3}.kernel_size_x = 3;
|
||||
cnn.layers{3}.kernel_size_y = 3;
|
||||
|
||||
cnn.layers{4} = struct;
|
||||
cnn.layers{4}.type = 'conv';
|
||||
cnn.layers{4}.weights = {RNet_mlab.weights_conv2, RNet_mlab.biases_conv2};
|
||||
|
||||
cnn.layers{5} = struct;
|
||||
cnn.layers{5}.type = 'prelu';
|
||||
cnn.layers{5}.weights = {RNet_mlab.prelu_weights_2};
|
||||
|
||||
cnn.layers{6} = struct;
|
||||
cnn.layers{6}.type = 'max_pooling';
|
||||
cnn.layers{6}.weights = {};
|
||||
cnn.layers{6}.stride_x = 2;
|
||||
cnn.layers{6}.stride_y = 2;
|
||||
cnn.layers{6}.kernel_size_x = 3;
|
||||
cnn.layers{6}.kernel_size_y = 3;
|
||||
|
||||
cnn.layers{7} = struct;
|
||||
cnn.layers{7}.type = 'conv';
|
||||
cnn.layers{7}.weights = {RNet_mlab.weights_conv3, RNet_mlab.biases_conv3};
|
||||
|
||||
cnn.layers{8} = struct;
|
||||
cnn.layers{8}.type = 'prelu';
|
||||
cnn.layers{8}.weights = {RNet_mlab.prelu_weights_3};
|
||||
|
||||
cnn.layers{9} = struct;
|
||||
cnn.layers{9}.type = 'fc';
|
||||
cnn.layers{9}.weights = {RNet_mlab.w_fc1, RNet_mlab.b_fc1};
|
||||
|
||||
cnn.layers{10} = struct;
|
||||
cnn.layers{10}.type = 'prelu';
|
||||
cnn.layers{10}.weights = {RNet_mlab.prelu_fc1};
|
||||
|
||||
cnn.layers{11} = struct;
|
||||
cnn.layers{11}.type = 'fc';
|
||||
cnn.layers{11}.weights = {RNet_mlab.w_fc2, RNet_mlab.b_fc2};
|
||||
|
||||
Write_CNN_to_binary('RNet.dat', cnn);
|
||||
|
||||
%% Next writing out the ONet
|
||||
clear
|
||||
load('../ONet_mlab.mat');
|
||||
|
||||
cnn = struct;
|
||||
cnn.layers = cell(1,14);
|
||||
cnn.layers{1} = struct;
|
||||
cnn.layers{1}.type = 'conv';
|
||||
cnn.layers{1}.weights = {ONet_mlab.weights_conv1, ONet_mlab.biases_conv1};
|
||||
|
||||
cnn.layers{2} = struct;
|
||||
cnn.layers{2}.type = 'prelu';
|
||||
cnn.layers{2}.weights = {ONet_mlab.prelu_weights_1};
|
||||
|
||||
cnn.layers{3} = struct;
|
||||
cnn.layers{3}.type = 'max_pooling';
|
||||
cnn.layers{3}.weights = {};
|
||||
cnn.layers{3}.stride_x = 2;
|
||||
cnn.layers{3}.stride_y = 2;
|
||||
cnn.layers{3}.kernel_size_x = 3;
|
||||
cnn.layers{3}.kernel_size_y = 3;
|
||||
|
||||
cnn.layers{4} = struct;
|
||||
cnn.layers{4}.type = 'conv';
|
||||
cnn.layers{4}.weights = {ONet_mlab.weights_conv2, ONet_mlab.biases_conv2};
|
||||
|
||||
cnn.layers{5} = struct;
|
||||
cnn.layers{5}.type = 'prelu';
|
||||
cnn.layers{5}.weights = {ONet_mlab.prelu_weights_2};
|
||||
|
||||
cnn.layers{6} = struct;
|
||||
cnn.layers{6}.type = 'max_pooling';
|
||||
cnn.layers{6}.weights = {};
|
||||
cnn.layers{6}.stride_x = 2;
|
||||
cnn.layers{6}.stride_y = 2;
|
||||
cnn.layers{6}.kernel_size_x = 3;
|
||||
cnn.layers{6}.kernel_size_y = 3;
|
||||
|
||||
cnn.layers{7} = struct;
|
||||
cnn.layers{7}.type = 'conv';
|
||||
cnn.layers{7}.weights = {ONet_mlab.weights_conv3, ONet_mlab.biases_conv3};
|
||||
|
||||
cnn.layers{8} = struct;
|
||||
cnn.layers{8}.type = 'prelu';
|
||||
cnn.layers{8}.weights = {ONet_mlab.prelu_weights_3};
|
||||
|
||||
cnn.layers{9} = struct;
|
||||
cnn.layers{9}.type = 'max_pooling';
|
||||
cnn.layers{9}.weights = {};
|
||||
cnn.layers{9}.stride_x = 2;
|
||||
cnn.layers{9}.stride_y = 2;
|
||||
cnn.layers{9}.kernel_size_x = 2;
|
||||
cnn.layers{9}.kernel_size_y = 2;
|
||||
|
||||
cnn.layers{10} = struct;
|
||||
cnn.layers{10}.type = 'conv';
|
||||
cnn.layers{10}.weights = {ONet_mlab.weights_conv4, ONet_mlab.biases_conv4};
|
||||
|
||||
cnn.layers{11} = struct;
|
||||
cnn.layers{11}.type = 'prelu';
|
||||
cnn.layers{11}.weights = {ONet_mlab.prelu_weights_4};
|
||||
|
||||
cnn.layers{12} = struct;
|
||||
cnn.layers{12}.type = 'fc';
|
||||
cnn.layers{12}.weights = {ONet_mlab.w_fc1, ONet_mlab.b_fc1};
|
||||
|
||||
cnn.layers{13} = struct;
|
||||
cnn.layers{13}.type = 'prelu';
|
||||
cnn.layers{13}.weights = {ONet_mlab.prelu_fc1};
|
||||
|
||||
cnn.layers{14} = struct;
|
||||
cnn.layers{14}.type = 'fc';
|
||||
cnn.layers{14}.weights = {ONet_mlab.w_fc2, ONet_mlab.b_fc2};
|
||||
|
||||
Write_CNN_to_binary('ONet.dat', cnn);
|
||||
|
||||
f = fopen('MTCNN_detector.txt', 'w');
|
||||
fprintf(f, 'PNet PNet.dat\r\n');
|
||||
fprintf(f, 'RNet RNet.dat\r\n');
|
||||
fprintf(f, 'ONet ONet.dat\r\n');
|
||||
fclose(f);
|
||||
@@ -0,0 +1,24 @@
|
||||
function [ output_maps ] = convolution( input_maps, kernels, biases )
|
||||
%CONVOLUTION Summary of this function goes here
|
||||
% Detailed explanation goes here
|
||||
|
||||
% If MatConvNet is not installed use Matlab (much slower)
|
||||
if(exist('vl_nnconv', 'file') == 3)
|
||||
output_maps = vl_nnconv(single(input_maps), kernels, biases);
|
||||
else
|
||||
n_filters = size(kernels, 4);
|
||||
|
||||
kernels2 = kernels(:,:,end:-1:1,:);
|
||||
for i=1:n_filters
|
||||
for n_in_maps=1:size(kernels,3)
|
||||
kernels2(:,:,n_in_maps,i) = fliplr(squeeze(kernels2(:,:,n_in_maps,i)));
|
||||
kernels2(:,:,n_in_maps,i) = flipud(squeeze(kernels2(:,:,n_in_maps,i)));
|
||||
end
|
||||
end
|
||||
output_maps = [];
|
||||
for i=1:n_filters
|
||||
output_maps = cat(3, output_maps, convn(input_maps, kernels2(:,:,:,i), 'valid') + biases(i));
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -0,0 +1,36 @@
|
||||
function [ total_bboxes, to_keep ] = correct_bbox( total_bboxes, corrections, add1, rectangulate, round, type )
|
||||
%CORRECT_BBOX Summary of this function goes here
|
||||
% Detailed explanation goes here
|
||||
|
||||
% Non maximum supression accross bounding boxes
|
||||
to_keep = non_maximum_supression(total_bboxes, 0.7, type);
|
||||
total_bboxes = total_bboxes(to_keep, :);
|
||||
corrections = corrections(to_keep, :);
|
||||
% Perform correction based on regression values
|
||||
bbw = total_bboxes(:,3) - total_bboxes(:,1);
|
||||
bbh = total_bboxes(:,4) - total_bboxes(:,2);
|
||||
|
||||
% TODO is this needed?
|
||||
if(add1)
|
||||
bbw = bbw + 1;
|
||||
bbh = bbh + 1;
|
||||
end
|
||||
|
||||
new_min_x = total_bboxes(:,1) + corrections(:,1) .* bbw;
|
||||
new_min_y = total_bboxes(:,2) + corrections(:,2) .* bbh;
|
||||
new_max_x = total_bboxes(:,3) + corrections(:,3) .* bbw;
|
||||
new_max_y = total_bboxes(:,4) + corrections(:,4) .* bbh;
|
||||
score = total_bboxes(:,5);
|
||||
total_bboxes = [new_min_x, new_min_y, new_max_x, new_max_y, score];
|
||||
|
||||
if(rectangulate)
|
||||
% Convert the bounding boxes to rectangles
|
||||
total_bboxes(:,1:4) = rectify(total_bboxes(:,1:4));
|
||||
end
|
||||
|
||||
if(round)
|
||||
% Rounding to pixels
|
||||
total_bboxes(:,1:4) = fix(total_bboxes(:,1:4));
|
||||
end
|
||||
end
|
||||
|
||||
10
pkg/OpenFace/matlab_version/face_detection/mtcnn/demo.m
Normal file
10
pkg/OpenFace/matlab_version/face_detection/mtcnn/demo.m
Normal file
@@ -0,0 +1,10 @@
|
||||
clear;
|
||||
|
||||
% Make sure we have the dependencies for convolution
|
||||
od = cd('../../face_validation');
|
||||
setup;
|
||||
cd(od);
|
||||
|
||||
img = imread('test1.jpg');
|
||||
|
||||
[bboxes, lmarks, confidences] = detect_face_mtcnn(img);
|
||||
20
pkg/OpenFace/matlab_version/face_detection/mtcnn/demo_300W.m
Normal file
20
pkg/OpenFace/matlab_version/face_detection/mtcnn/demo_300W.m
Normal file
@@ -0,0 +1,20 @@
|
||||
clear;
|
||||
|
||||
% Make sure we have the dependencies for convolution
|
||||
od = cd('../../face_validation');
|
||||
setup;
|
||||
cd(od);
|
||||
|
||||
imgs = dir('D:\Datasets\300_W\AFW/*.jpg');
|
||||
for i=2:numel(imgs)
|
||||
img = imread(['D:\Datasets\300_W\AFW/', imgs(i).name]);
|
||||
[bboxes, lmarks, confidences] = detect_face_mtcnn(img, 60);
|
||||
hold off
|
||||
imshow(img);
|
||||
hold on;
|
||||
for d=1:size(bboxes,1)
|
||||
rectangle('Position', [bboxes(d,1), bboxes(d,2), bboxes(d,3)-bboxes(d,1), bboxes(d,4) - bboxes(d,2)]);
|
||||
plot(lmarks(d,1:5), lmarks(d,6:10), '.r');
|
||||
end
|
||||
drawnow expose
|
||||
end
|
||||
@@ -0,0 +1,227 @@
|
||||
function [total_bboxes, lmarks, confidence] = detect_face_mtcnn(img, min_face_size)
|
||||
|
||||
% Check if MatConvNet is installed
|
||||
if(exist('vl_nnconv', 'file') ~= 3)
|
||||
fprintf('Warning MatConvNet is not installed or not setup, face detection will be quite slow\n');
|
||||
end
|
||||
|
||||
height_orig = size(img,1);
|
||||
width_orig = size(img,2);
|
||||
|
||||
% Everything is done in floats
|
||||
img = single(img);
|
||||
|
||||
% Deal with the image being grayscale
|
||||
if(size(img,3) == 1)
|
||||
img = cat(3, img, img, img);
|
||||
end
|
||||
|
||||
% Minimum face size
|
||||
if(nargin ==1)
|
||||
min_face_size = 30;
|
||||
end
|
||||
|
||||
% Image pyramid scaling factor
|
||||
factor = 0.709;
|
||||
|
||||
% Thresholds for the PNet, ONet, and RNet
|
||||
threshold=[0.6 0.7 0.7];
|
||||
|
||||
min_dim = min([width_orig height_orig]);
|
||||
|
||||
% Face support region is 12x12 px, so from that can work out the largest
|
||||
% scale (which is 12 / min), and work down from there to smallest scale (no smaller than
|
||||
% 12x12px)
|
||||
face_support = 12;
|
||||
num_scales = floor(log(min_face_size / min_dim) / log(factor));
|
||||
scales = (face_support / min_face_size)*factor.^(0:num_scales);
|
||||
|
||||
load('PNet_mlab');
|
||||
load('RNet_mlab');
|
||||
load('ONet_mlab');
|
||||
|
||||
total_bboxes = [];
|
||||
|
||||
% First the PNet stage on image pyramid
|
||||
for s = scales
|
||||
h_pyr = ceil(height_orig * s);
|
||||
w_pyr = ceil(width_orig * s);
|
||||
|
||||
% Resize the image and normalize to what MTCNN expects it to be
|
||||
im_data=(imresize(img, [h_pyr w_pyr],'bilinear','AntiAliasing',false)-127.5)*0.0078125;
|
||||
|
||||
[ out_prob, out_correction ] = PNet( im_data, PNet_mlab );
|
||||
|
||||
% Generate bounding boxes from the heatmap
|
||||
bboxes = generate_bounding_boxes(out_prob, out_correction, s, threshold(1), face_support);
|
||||
|
||||
% TODO correct bboxes before running NMS?, as now lots of overlaping
|
||||
% boxes are present
|
||||
|
||||
% Perform non maximum supression to remove reduntant bounding boxes
|
||||
pick = non_maximum_supression(bboxes, 0.5, 'Union');
|
||||
bboxes=bboxes(pick,:);
|
||||
if ~isempty(bboxes)
|
||||
total_bboxes = cat(1, total_bboxes, bboxes);
|
||||
end
|
||||
end
|
||||
|
||||
if ~isempty(total_bboxes)
|
||||
% Non maximum supression accross bounding boxes, and their offset
|
||||
% correction
|
||||
corrections = total_bboxes(:,6:end);
|
||||
total_bboxes = total_bboxes(:,1:5);
|
||||
|
||||
to_keep = non_maximum_supression(total_bboxes, 0.7, 'Union');
|
||||
total_bboxes = total_bboxes(to_keep, :);
|
||||
corrections = corrections(to_keep, :);
|
||||
|
||||
total_bboxes = apply_correction(total_bboxes, corrections, false);
|
||||
|
||||
% Making them into rectangles
|
||||
total_bboxes(:,1:4) = rectify(total_bboxes(:,1:4));
|
||||
|
||||
% Rounding to pixels
|
||||
total_bboxes(:,1:4) = fix(total_bboxes(:,1:4));
|
||||
end
|
||||
num_bbox = size(total_bboxes,1);
|
||||
|
||||
% RNet stage
|
||||
if num_bbox > 0
|
||||
|
||||
proposal_imgs = zeros(24, 24, 3, num_bbox);
|
||||
for k=1:num_bbox
|
||||
|
||||
width_target = total_bboxes(k,3) - total_bboxes(k,1) + 1;
|
||||
height_target = total_bboxes(k,4) - total_bboxes(k,2) + 1;
|
||||
|
||||
% Work out the start and end indices in the original image
|
||||
start_x_in = max(total_bboxes(k,1), 1);
|
||||
start_y_in = max(total_bboxes(k,2), 1);
|
||||
end_x_in = min(total_bboxes(k,3), width_orig);
|
||||
end_y_in = min(total_bboxes(k,4), height_orig);
|
||||
|
||||
% Work out the start and end indices in the target image
|
||||
start_x_out = max(-total_bboxes(k,1)+2, 1);
|
||||
start_y_out = max(-total_bboxes(k,2)+2, 1);
|
||||
end_x_out = min(width_target - (total_bboxes(k,3)-width_orig), width_target);
|
||||
end_y_out = min(height_target - (total_bboxes(k,4)-height_orig), height_target);
|
||||
|
||||
tmp = zeros(height_target, width_target, 3);
|
||||
|
||||
tmp(start_y_out:end_y_out,start_x_out:end_x_out,:) = ...
|
||||
img(start_y_in:end_y_in, start_x_in:end_x_in,:);
|
||||
|
||||
proposal_imgs(:,:,:,k) = imresize(tmp, [24 24], 'bilinear','AntiAliasing',false);
|
||||
end
|
||||
|
||||
% Normalize the proposal images
|
||||
proposal_imgs = (proposal_imgs - 127.5) * 0.0078125;
|
||||
|
||||
% Apply RNet to proposal faces
|
||||
[ score, out_correction ] = RNet( proposal_imgs, RNet_mlab );
|
||||
out_correction = out_correction';
|
||||
|
||||
% Find faces above the threshold
|
||||
to_keep = find(score > threshold(2));
|
||||
|
||||
total_bboxes = [total_bboxes(to_keep,1:4) score(to_keep)'];
|
||||
out_correction = out_correction(to_keep,:);
|
||||
|
||||
if ~isempty(total_bboxes)
|
||||
% Non maximum supression accross bounding boxes, and their offset
|
||||
% correction
|
||||
to_keep = non_maximum_supression(total_bboxes, 0.7, 'Union');
|
||||
total_bboxes = total_bboxes(to_keep, :);
|
||||
out_correction = out_correction(to_keep, :);
|
||||
|
||||
total_bboxes = apply_correction(total_bboxes, out_correction, true);
|
||||
|
||||
% Making them into rectangles
|
||||
total_bboxes(:,1:4) = rectify(total_bboxes(:,1:4));
|
||||
|
||||
% Rounding to pixels
|
||||
total_bboxes(:,1:4) = fix(total_bboxes(:,1:4));
|
||||
end
|
||||
end
|
||||
|
||||
num_bbox = size(total_bboxes,1);
|
||||
|
||||
% ONet stage
|
||||
if num_bbox > 0
|
||||
|
||||
proposal_imgs = zeros(48, 48, 3, num_bbox);
|
||||
for k=1:num_bbox
|
||||
|
||||
width_target = total_bboxes(k,3) - total_bboxes(k,1) + 1;
|
||||
height_target = total_bboxes(k,4) - total_bboxes(k,2) + 1;
|
||||
|
||||
% Work out the start and end indices in the original image
|
||||
start_x_in = max(total_bboxes(k,1), 1);
|
||||
start_y_in = max(total_bboxes(k,2), 1);
|
||||
end_x_in = min(total_bboxes(k,3), width_orig);
|
||||
end_y_in = min(total_bboxes(k,4), height_orig);
|
||||
|
||||
% Work out the start and end indices in the target image
|
||||
start_x_out = max(-total_bboxes(k,1)+2, 1);
|
||||
start_y_out = max(-total_bboxes(k,2)+2, 1);
|
||||
end_x_out = min(width_target - (total_bboxes(k,3)-width_orig), width_target);
|
||||
end_y_out = min(height_target - (total_bboxes(k,4)-height_orig), height_target);
|
||||
|
||||
tmp = zeros(height_target, width_target, 3);
|
||||
|
||||
tmp(start_y_out:end_y_out,start_x_out:end_x_out,:) = ...
|
||||
img(start_y_in:end_y_in, start_x_in:end_x_in,:);
|
||||
|
||||
proposal_imgs(:,:,:,k) = imresize(tmp, [48 48], 'bilinear','AntiAliasing',false);
|
||||
end
|
||||
|
||||
% Normalize the proposal images
|
||||
proposal_imgs = (proposal_imgs - 127.5) * 0.0078125;
|
||||
|
||||
% Apply ONet to proposal faces
|
||||
[ score, out_correction, lmarks ] = ONet( proposal_imgs, ONet_mlab );
|
||||
out_correction = out_correction';
|
||||
lmarks = lmarks';
|
||||
|
||||
% Pick the final faces above the threshold
|
||||
to_keep = find(score > threshold(3));
|
||||
lmarks = lmarks(to_keep, :);
|
||||
out_correction = out_correction(to_keep, :);
|
||||
total_bboxes = [total_bboxes(to_keep,1:4) score(to_keep)'];
|
||||
|
||||
% Correct for the landmarks
|
||||
bbw = total_bboxes(:,3) - total_bboxes(:,1) + 1;
|
||||
bbh = total_bboxes(:,4) - total_bboxes(:,2) + 1;
|
||||
|
||||
lmarks(:, 1:5) = bbw .* lmarks(:,1:5) + total_bboxes(:,1) - 1;
|
||||
lmarks(:, 6:10) = bbh .* lmarks(:,6:10) + total_bboxes(:,2) - 1;
|
||||
|
||||
% Correct the bounding boxes
|
||||
if size(total_bboxes,1)>0
|
||||
total_bboxes = apply_correction(total_bboxes, out_correction, true);
|
||||
to_keep = non_maximum_supression(total_bboxes, 0.7, 'Min');
|
||||
|
||||
lmarks = lmarks(to_keep, :);
|
||||
confidence = total_bboxes(to_keep, 5);
|
||||
total_bboxes = total_bboxes(to_keep, 1:4);
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
% Correct the bounding boxes to be around the 68 landmark points
|
||||
widths = total_bboxes(:,3) - total_bboxes(:,1);
|
||||
heights = total_bboxes(:,4) - total_bboxes(:,2);
|
||||
txs = total_bboxes(:,1);
|
||||
tys = total_bboxes(:,2);
|
||||
|
||||
new_widths = widths * 1.0323;
|
||||
new_heights = heights * 0.7751;
|
||||
new_txs = widths * -0.0075 + txs;
|
||||
new_tys = heights * 0.2459 + tys;
|
||||
|
||||
total_bboxes = [new_txs, new_tys, new_txs + new_widths, new_tys + new_heights];
|
||||
total_bboxes = double(total_bboxes);
|
||||
lmarks = double(lmarks);
|
||||
|
||||
end
|
||||
@@ -0,0 +1,25 @@
|
||||
function [bboxes] = generate_bounding_boxes(heatmap, correction, scale, t, face_support)
|
||||
%use heatmap to generate bounding boxes in the original image space
|
||||
|
||||
% Correction for the pooling
|
||||
stride = 2;
|
||||
|
||||
% Offsets for, x, y, width and height
|
||||
dx1=correction(:,:,1);
|
||||
dy1=correction(:,:,2);
|
||||
dx2=correction(:,:,3);
|
||||
dy2=correction(:,:,4);
|
||||
|
||||
% Find the parts of a heatmap above the threshold (x, y, and indices)
|
||||
[x, y]= find(heatmap >= t);
|
||||
inds = find(heatmap >= t);
|
||||
|
||||
% Find the corresponding scores and bbox corrections
|
||||
score=heatmap(inds);
|
||||
correction=[dx1(inds) dy1(inds) dx2(inds) dy2(inds)];
|
||||
|
||||
% Correcting for Matlab's format
|
||||
bboxes=[y - 1 x - 1];
|
||||
bboxes=[fix((stride*(bboxes)+1)/scale) fix((stride*(bboxes)+face_support)/scale) score correction];
|
||||
end
|
||||
|
||||
120
pkg/OpenFace/matlab_version/face_detection/mtcnn/im2col_inds.m
Normal file
120
pkg/OpenFace/matlab_version/face_detection/mtcnn/im2col_inds.m
Normal file
@@ -0,0 +1,120 @@
|
||||
function ttt=im2col_inds(a, block)
|
||||
%IM2COL Rearrange image blocks into columns.
|
||||
% B = IM2COL(A,[M N],'distinct') rearranges each distinct
|
||||
% M-by-N block in the image A into a column of B. IM2COL pads A
|
||||
% with zeros, if necessary, so its size is an integer multiple
|
||||
% of M-by-N. If A = [A11 A12; A21 A22], where each Aij is
|
||||
% M-by-N, then B = [A11(:) A21(:) A12(:) A22(:)].
|
||||
%
|
||||
% B = IM2COL(A,[M N],'sliding') converts each sliding M-by-N
|
||||
% block of A into a column of B, with no zero padding. B has
|
||||
% M*N rows and will contain as many columns as there are M-by-N
|
||||
% neighborhoods in A. If the size of A is [MM NN], then the
|
||||
% size of B is (M*N)-by-((MM-M+1)*(NN-N+1). Each column of B
|
||||
% contains the neighborhoods of A reshaped as NHOOD(:), where
|
||||
% NHOOD is a matrix containing an M-by-N neighborhood of
|
||||
% A. IM2COL orders the columns of B so that they can be
|
||||
% reshaped to form a matrix in the normal way. For example,
|
||||
% suppose you use a function, such as SUM(B), that returns a
|
||||
% scalar for each column of B. You can directly store the
|
||||
% result in a matrix of size (MM-M+1)-by-(NN-N+1) using these
|
||||
% calls:
|
||||
%
|
||||
% B = im2col(A,[M N],'sliding');
|
||||
% C = reshape(sum(B),MM-M+1,NN-N+1);
|
||||
%
|
||||
% B = IM2COL(A,[M N]) uses the default block type of
|
||||
% 'sliding'.
|
||||
%
|
||||
% B = IM2COL(A,'indexed',...) processes A as an indexed image,
|
||||
% padding with zeros if the class of A is uint8 or uint16, or
|
||||
% ones if the class of A is double.
|
||||
%
|
||||
% Class Support
|
||||
% -------------
|
||||
% The input image A can be numeric or logical. The output matrix
|
||||
% B is of the same class as the input image.
|
||||
%
|
||||
% Example
|
||||
% -------
|
||||
% Calculate the local mean using a [2 2] neighborhood with zero padding.
|
||||
%
|
||||
% A = reshape(linspace(0,1,16),[4 4])'
|
||||
% B = im2col(A,[2 2])
|
||||
% M = mean(B)
|
||||
% newA = col2im(M,[1 1],[3 3])
|
||||
%
|
||||
% See also BLOCKPROC, COL2IM, COLFILT, NLFILTER.
|
||||
|
||||
% Copyright 1993-2016 The MathWorks, Inc.
|
||||
|
||||
[ma,na] = size(a);
|
||||
m = block(1); n = block(2);
|
||||
|
||||
if any([ma na] < [m n]) % if neighborhood is larger than image
|
||||
b = zeros(m*n,0);
|
||||
return
|
||||
end
|
||||
|
||||
% Create Hankel-like indexing sub matrix.
|
||||
mc = block(1); nc = ma-m+1; nn = na-n+1;
|
||||
cidx = (0:mc-1)'; ridx = 1:nc;
|
||||
t = cidx(:,ones(nc,1)) + ridx(ones(mc,1),:); % Hankel Subscripts
|
||||
tt = zeros(mc*n,nc);
|
||||
rows = 1:mc;
|
||||
for i=0:n-1,
|
||||
tt(i*mc+rows,:) = t+ma*i;
|
||||
end
|
||||
ttt = zeros(mc*n,nc*nn);
|
||||
cols = 1:nc;
|
||||
for j=0:nn-1,
|
||||
ttt(:,j*nc+cols) = tt+ma*j;
|
||||
end
|
||||
|
||||
|
||||
%%%
|
||||
%%% Function parse_inputs
|
||||
%%%
|
||||
function [a, block, kind, padval] = parse_inputs(varargin)
|
||||
|
||||
narginchk(2,4);
|
||||
|
||||
switch nargin
|
||||
case 2
|
||||
if (strcmp(varargin{2},'indexed'))
|
||||
error(message('images:im2col:tooFewInputs'))
|
||||
else
|
||||
% IM2COL(A, [M N])
|
||||
a = varargin{1};
|
||||
block = varargin{2};
|
||||
kind = 'sliding';
|
||||
padval = 0;
|
||||
end
|
||||
|
||||
case 3
|
||||
if (strcmp(varargin{2},'indexed'))
|
||||
% IM2COL(A, 'indexed', [M N])
|
||||
a = varargin{1};
|
||||
block = varargin{3};
|
||||
kind = 'sliding';
|
||||
padval = 1;
|
||||
else
|
||||
% IM2COL(A, [M N], 'kind')
|
||||
a = varargin{1};
|
||||
block = varargin{2};
|
||||
kind = validatestring(varargin{3},{'sliding','distinct'},mfilename,'kind',3);
|
||||
padval = 0;
|
||||
end
|
||||
|
||||
case 4
|
||||
% IM2COL(A, 'indexed', [M N], 'kind')
|
||||
a = varargin{1};
|
||||
block = varargin{3};
|
||||
kind = validatestring(varargin{4},{'sliding','distinct'},mfilename,'kind',4);
|
||||
padval = 1;
|
||||
|
||||
end
|
||||
|
||||
if (isa(a,'uint8') || isa(a, 'uint16'))
|
||||
padval = 0;
|
||||
end
|
||||
127
pkg/OpenFace/matlab_version/face_detection/mtcnn/im2col_mine.m
Normal file
127
pkg/OpenFace/matlab_version/face_detection/mtcnn/im2col_mine.m
Normal file
@@ -0,0 +1,127 @@
|
||||
function b=im2col_mine(a, block)
|
||||
%IM2COL Rearrange image blocks into columns.
|
||||
% B = IM2COL(A,[M N],'distinct') rearranges each distinct
|
||||
% M-by-N block in the image A into a column of B. IM2COL pads A
|
||||
% with zeros, if necessary, so its size is an integer multiple
|
||||
% of M-by-N. If A = [A11 A12; A21 A22], where each Aij is
|
||||
% M-by-N, then B = [A11(:) A21(:) A12(:) A22(:)].
|
||||
%
|
||||
% B = IM2COL(A,[M N],'sliding') converts each sliding M-by-N
|
||||
% block of A into a column of B, with no zero padding. B has
|
||||
% M*N rows and will contain as many columns as there are M-by-N
|
||||
% neighborhoods in A. If the size of A is [MM NN], then the
|
||||
% size of B is (M*N)-by-((MM-M+1)*(NN-N+1). Each column of B
|
||||
% contains the neighborhoods of A reshaped as NHOOD(:), where
|
||||
% NHOOD is a matrix containing an M-by-N neighborhood of
|
||||
% A. IM2COL orders the columns of B so that they can be
|
||||
% reshaped to form a matrix in the normal way. For example,
|
||||
% suppose you use a function, such as SUM(B), that returns a
|
||||
% scalar for each column of B. You can directly store the
|
||||
% result in a matrix of size (MM-M+1)-by-(NN-N+1) using these
|
||||
% calls:
|
||||
%
|
||||
% B = im2col(A,[M N],'sliding');
|
||||
% C = reshape(sum(B),MM-M+1,NN-N+1);
|
||||
%
|
||||
% B = IM2COL(A,[M N]) uses the default block type of
|
||||
% 'sliding'.
|
||||
%
|
||||
% B = IM2COL(A,'indexed',...) processes A as an indexed image,
|
||||
% padding with zeros if the class of A is uint8 or uint16, or
|
||||
% ones if the class of A is double.
|
||||
%
|
||||
% Class Support
|
||||
% -------------
|
||||
% The input image A can be numeric or logical. The output matrix
|
||||
% B is of the same class as the input image.
|
||||
%
|
||||
% Example
|
||||
% -------
|
||||
% Calculate the local mean using a [2 2] neighborhood with zero padding.
|
||||
%
|
||||
% A = reshape(linspace(0,1,16),[4 4])'
|
||||
% B = im2col(A,[2 2])
|
||||
% M = mean(B)
|
||||
% newA = col2im(M,[1 1],[3 3])
|
||||
%
|
||||
% See also BLOCKPROC, COL2IM, COLFILT, NLFILTER.
|
||||
|
||||
% Copyright 1993-2016 The MathWorks, Inc.
|
||||
|
||||
[ma,na] = size(a);
|
||||
m = block(1); n = block(2);
|
||||
|
||||
if any([ma na] < [m n]) % if neighborhood is larger than image
|
||||
b = zeros(m*n,0);
|
||||
return
|
||||
end
|
||||
|
||||
% Create Hankel-like indexing sub matrix.
|
||||
mc = block(1); nc = ma-m+1; nn = na-n+1;
|
||||
cidx = (0:mc-1)'; ridx = 1:nc;
|
||||
t = cidx(:,ones(nc,1)) + ridx(ones(mc,1),:); % Hankel Subscripts
|
||||
tt = zeros(mc*n,nc);
|
||||
rows = 1:mc;
|
||||
for i=0:n-1,
|
||||
tt(i*mc+rows,:) = t+ma*i;
|
||||
end
|
||||
ttt = zeros(mc*n,nc*nn);
|
||||
cols = 1:nc;
|
||||
for j=0:nn-1,
|
||||
ttt(:,j*nc+cols) = tt+ma*j;
|
||||
end
|
||||
|
||||
% If a is a row vector, change it to a column vector. This change is
|
||||
% necessary when A is a row vector and [M N] = size(A).
|
||||
if ndims(a) == 2 && na > 1 && ma == 1
|
||||
a = a(:);
|
||||
end
|
||||
b = a(ttt);
|
||||
|
||||
|
||||
%%%
|
||||
%%% Function parse_inputs
|
||||
%%%
|
||||
function [a, block, kind, padval] = parse_inputs(varargin)
|
||||
|
||||
narginchk(2,4);
|
||||
|
||||
switch nargin
|
||||
case 2
|
||||
if (strcmp(varargin{2},'indexed'))
|
||||
error(message('images:im2col:tooFewInputs'))
|
||||
else
|
||||
% IM2COL(A, [M N])
|
||||
a = varargin{1};
|
||||
block = varargin{2};
|
||||
kind = 'sliding';
|
||||
padval = 0;
|
||||
end
|
||||
|
||||
case 3
|
||||
if (strcmp(varargin{2},'indexed'))
|
||||
% IM2COL(A, 'indexed', [M N])
|
||||
a = varargin{1};
|
||||
block = varargin{3};
|
||||
kind = 'sliding';
|
||||
padval = 1;
|
||||
else
|
||||
% IM2COL(A, [M N], 'kind')
|
||||
a = varargin{1};
|
||||
block = varargin{2};
|
||||
kind = validatestring(varargin{3},{'sliding','distinct'},mfilename,'kind',3);
|
||||
padval = 0;
|
||||
end
|
||||
|
||||
case 4
|
||||
% IM2COL(A, 'indexed', [M N], 'kind')
|
||||
a = varargin{1};
|
||||
block = varargin{3};
|
||||
kind = validatestring(varargin{4},{'sliding','distinct'},mfilename,'kind',4);
|
||||
padval = 1;
|
||||
|
||||
end
|
||||
|
||||
if (isa(a,'uint8') || isa(a, 'uint16'))
|
||||
padval = 0;
|
||||
end
|
||||
@@ -0,0 +1,57 @@
|
||||
function [ output_maps ] = max_pooling( input_maps)
|
||||
%POOLING Summary of this function goes here
|
||||
% Detailed explanation goes here
|
||||
|
||||
orig_rows = size(input_maps,1);
|
||||
orig_cols = size(input_maps,2);
|
||||
|
||||
pooled_rows = ceil(orig_rows / 2);
|
||||
pooled_cols = ceil(orig_cols / 2);
|
||||
|
||||
up_to_rows_out = floor(orig_rows / 2);
|
||||
up_to_cols_out = floor(orig_cols / 2);
|
||||
|
||||
if(mod(orig_cols,2) == 0)
|
||||
up_to_cols = orig_cols;
|
||||
else
|
||||
up_to_cols = orig_cols - 1;
|
||||
end
|
||||
|
||||
if(mod(orig_rows,2) == 0)
|
||||
up_to_rows = orig_rows;
|
||||
else
|
||||
up_to_rows = orig_rows - 1;
|
||||
end
|
||||
|
||||
output_maps = zeros(pooled_rows, pooled_cols, size(input_maps,3));
|
||||
for i=1:size(input_maps,3)
|
||||
temp = im2col(input_maps(1:up_to_rows,1:up_to_cols,i), [2,2], 'distinct');
|
||||
max_val = max(temp);
|
||||
output_maps(1:up_to_rows_out,1:up_to_cols_out,i) = reshape(max_val, up_to_rows_out, up_to_cols_out);
|
||||
end
|
||||
|
||||
% A bit of a hack for non-even number of rows or columns
|
||||
if(mod(orig_cols,2) ~= 0)
|
||||
for i=1:size(input_maps,3)
|
||||
temp = im2col(input_maps(1:up_to_rows,end,i), [2,1], 'distinct');
|
||||
max_val = max(temp);
|
||||
output_maps(1:up_to_rows_out,end,i) = max_val;
|
||||
end
|
||||
end
|
||||
|
||||
if(mod(orig_rows,2) ~= 0)
|
||||
for i=1:size(input_maps,3)
|
||||
temp = im2col(input_maps(end, 1:up_to_cols,i), [1,2], 'distinct');
|
||||
max_val = max(temp);
|
||||
output_maps(end, 1:up_to_cols_out,i) = max_val;
|
||||
end
|
||||
end
|
||||
|
||||
if(mod(orig_cols,2) ~= 0 && mod(orig_rows,2) ~= 0)
|
||||
output_maps(end,end,:) = input_maps(end,end,:);
|
||||
end
|
||||
|
||||
|
||||
|
||||
end
|
||||
|
||||
118
pkg/OpenFace/matlab_version/face_detection/mtcnn/max_pooling2.m
Normal file
118
pkg/OpenFace/matlab_version/face_detection/mtcnn/max_pooling2.m
Normal file
@@ -0,0 +1,118 @@
|
||||
function [ output_maps ] = max_pooling2( input_maps, kernel_size, stride)
|
||||
%POOLING Summary of this function goes here
|
||||
% Detailed explanation goes here
|
||||
|
||||
orig_rows = size(input_maps,1);
|
||||
orig_cols = size(input_maps,2);
|
||||
|
||||
pooled_rows = round((orig_rows - kernel_size)/stride) + 1;
|
||||
pooled_cols = round((orig_cols - kernel_size)/stride) + 1;
|
||||
|
||||
if(exist('vl_nnpool', 'file') == 3)
|
||||
% Caffe and MatConvNet do pooling slightly differently, so need to
|
||||
% counter for that
|
||||
|
||||
pooled_cols_vl = floor((orig_cols - kernel_size)/stride) + 1;
|
||||
pooled_rows_vl = floor((orig_rows - kernel_size)/stride) + 1;
|
||||
|
||||
if(pooled_rows_vl == pooled_rows && pooled_cols_vl == pooled_cols)
|
||||
output_maps = vl_nnpool(input_maps, [kernel_size, kernel_size], 'stride', stride);
|
||||
else
|
||||
% Else need to pad right and bottom with infinities
|
||||
for x=1:kernel_size
|
||||
pooled_cols_vl = floor((orig_cols + x - kernel_size)/stride) + 1;
|
||||
if(pooled_cols_vl == pooled_cols)
|
||||
break;
|
||||
end
|
||||
end
|
||||
for y=1:kernel_size
|
||||
pooled_rows_vl = floor((orig_rows +y - kernel_size)/stride) + 1;
|
||||
if(pooled_rows_vl == pooled_rows)
|
||||
break;
|
||||
end
|
||||
end
|
||||
|
||||
input_maps_new = -inf * ones(size(input_maps,1)+y, size(input_maps,2)+x, size(input_maps,3), size(input_maps,4));
|
||||
input_maps_new(1:size(input_maps,1),1:size(input_maps,2),:,:) = input_maps;
|
||||
output_maps = vl_nnpool(input_maps_new, [kernel_size, kernel_size], 'stride', stride);
|
||||
end
|
||||
else
|
||||
|
||||
up_to_rows_out = floor((orig_rows - kernel_size)/stride) + 1;
|
||||
up_to_cols_out = floor((orig_cols - kernel_size)/stride) + 1;
|
||||
|
||||
% How many full max-pooling steps are there
|
||||
up_to_cols = kernel_size + (up_to_cols_out-1) * stride;
|
||||
up_to_rows = kernel_size + (up_to_rows_out-1) * stride;
|
||||
|
||||
output_maps = zeros(pooled_rows, pooled_cols, size(input_maps,3), size(input_maps,4));
|
||||
|
||||
% Pick only the striding elements
|
||||
[y, x] = meshgrid(1:up_to_cols-kernel_size+1, 1:up_to_rows-kernel_size+1);
|
||||
to_keep_map = mod(y, stride) == 1 & mod(x, stride) == 1;
|
||||
to_keep = find(to_keep_map);
|
||||
|
||||
inds_pooling = im2col_inds(input_maps(1:up_to_rows,1:up_to_cols,1,1), [kernel_size, kernel_size]);
|
||||
inds_pooling = inds_pooling(:, to_keep);
|
||||
for m=1:size(input_maps,4)
|
||||
for i=1:size(input_maps,3)
|
||||
% temp = im2col(input_maps(1:up_to_rows,1:up_to_cols,i,m), [kernel_size, kernel_size], 'sliding');
|
||||
% temp = im2col_mine(input_maps(1:up_to_rows,1:up_to_cols,i,m), [kernel_size, kernel_size]);
|
||||
% temp = temp(:,to_keep);
|
||||
|
||||
temp = input_maps(1:up_to_rows,1:up_to_cols,i,m);
|
||||
temp = temp(inds_pooling);
|
||||
|
||||
max_val = max(temp);
|
||||
output_maps(1:up_to_rows_out,1:up_to_cols_out,i,m) = reshape(max_val, up_to_rows_out, up_to_cols_out);
|
||||
end
|
||||
end
|
||||
% A bit of a hack for non-even number of rows or columns
|
||||
if(orig_cols ~= up_to_cols)
|
||||
span = orig_cols - (up_to_cols - kernel_size + stride);
|
||||
inds_pooling = im2col_inds(input_maps(1:up_to_rows,end-span+1:end,i,m), [kernel_size, span]);
|
||||
inds_pooling = inds_pooling(:, 1:stride:end);
|
||||
for m=1:size(input_maps,4)
|
||||
for i=1:size(input_maps,3)
|
||||
% temp = im2col(input_maps(1:up_to_rows,end-span+1:end,i,m), [kernel_size, span], 'sliding');
|
||||
% temp = im2col_mine(input_maps(1:up_to_rows,end-span+1:end,i,m), [kernel_size, span]);
|
||||
% max_val = max(temp(:,1:stride:end));
|
||||
|
||||
temp = input_maps(1:up_to_rows,end-span+1:end,i,m);
|
||||
max_val = max(temp(inds_pooling));
|
||||
output_maps(1:up_to_rows_out,end,i,m) = max_val;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
if(orig_rows ~= up_to_rows)
|
||||
span = orig_rows - (up_to_rows - kernel_size + stride);
|
||||
inds_pooling = im2col_inds(input_maps(end-span+1:end, 1:up_to_cols,i,m), [span, kernel_size]);
|
||||
inds_pooling = inds_pooling(:, 1:stride:end);
|
||||
|
||||
for m=1:size(input_maps,4)
|
||||
for i=1:size(input_maps,3)
|
||||
% temp = im2col(input_maps(end-span+1:end, 1:up_to_cols,i,m), [span, kernel_size], 'sliding');
|
||||
% temp = im2col_mine(input_maps(end-span+1:end, 1:up_to_cols,i,m), [span, kernel_size]);
|
||||
% max_val = max(temp(:,1:stride:end));
|
||||
temp = input_maps(end-span+1:end, 1:up_to_cols,i,m);
|
||||
max_val = max(temp(inds_pooling));
|
||||
|
||||
output_maps(end, 1:up_to_cols_out,i,m) = max_val;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
if(orig_cols ~= up_to_cols && orig_rows ~= up_to_rows)
|
||||
for m=1:size(input_maps,4)
|
||||
for i=1:size(input_maps,3)
|
||||
tmp = input_maps(up_to_rows- kernel_size + stride + 1:end,up_to_cols - kernel_size + stride+1:end,i,m);
|
||||
output_maps(end,end,i,m) = max(tmp(:));
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
@@ -0,0 +1,46 @@
|
||||
function pick = non_maximum_supression(boxes, overlap_threshold,type)
|
||||
|
||||
%NMS
|
||||
if isempty(boxes)
|
||||
pick = [];
|
||||
return;
|
||||
end
|
||||
|
||||
% Compute the corners of boxes and the area
|
||||
x1 = boxes(:,1);
|
||||
y1 = boxes(:,2);
|
||||
x2 = boxes(:,3);
|
||||
y2 = boxes(:,4);
|
||||
s = boxes(:,5);
|
||||
area = (x2-x1+1) .* (y2-y1+1);
|
||||
|
||||
% Sorting based on confidence scores
|
||||
[vals, I] = sort(s);
|
||||
|
||||
pick = zeros(numel(s),1);
|
||||
|
||||
counter = 1;
|
||||
while ~isempty(I)
|
||||
last = length(I);
|
||||
i = I(last);
|
||||
pick(counter) = i;
|
||||
counter = counter + 1;
|
||||
|
||||
xx1 = max(x1(i), x1(I(1:last-1)));
|
||||
yy1 = max(y1(i), y1(I(1:last-1)));
|
||||
xx2 = min(x2(i), x2(I(1:last-1)));
|
||||
yy2 = min(y2(i), y2(I(1:last-1)));
|
||||
w = max(0.0, xx2-xx1+1);
|
||||
h = max(0.0, yy2-yy1+1);
|
||||
inter = w.*h;
|
||||
|
||||
if strcmp(type,'Min')
|
||||
o = inter ./ min(area(i),area(I(1:last-1)));
|
||||
else
|
||||
o = inter ./ (area(i) + area(I(1:last-1)) - inter);
|
||||
end
|
||||
I = I(find(o<=overlap_threshold));
|
||||
end
|
||||
|
||||
pick = pick(1:(counter-1));
|
||||
end
|
||||
@@ -0,0 +1,6 @@
|
||||
My re-implementation of MTCNN face detector (https://github.com/kpzhang93/MTCNN_face_detection_alignment) using Matlab and MatcConvNet.
|
||||
|
||||
It uses MatConvNet to speed up face detection, and is able to use GPU support. Alternatively, if MatConvNet is not installed the approach will use Matlab native functions for processing (much slower).
|
||||
|
||||
MatConvNet version used:
|
||||
- MatConvNet from http://www.vlfeat.org/matconvnet/ (tested with version 1.0-beta24), and install following the instructions
|
||||
15
pkg/OpenFace/matlab_version/face_detection/mtcnn/rectify.m
Normal file
15
pkg/OpenFace/matlab_version/face_detection/mtcnn/rectify.m
Normal file
@@ -0,0 +1,15 @@
|
||||
function [bbox_out] = rectify(bbox_in)
|
||||
|
||||
%convert bboxA to square
|
||||
heights = bbox_in(:,4) - bbox_in(:,2);
|
||||
widths = bbox_in(:,3) - bbox_in(:,1);
|
||||
|
||||
max_side = max([widths'; heights'])';
|
||||
|
||||
% Correct the starts based on new size
|
||||
new_min_x = bbox_in(:,1) + 0.5 * (widths - max_side);
|
||||
new_min_y = bbox_in(:,2) + 0.5 * (heights - max_side);
|
||||
|
||||
bbox_out = [new_min_x, new_min_y, new_min_x + max_side, new_min_y + max_side];
|
||||
end
|
||||
|
||||
23
pkg/OpenFace/matlab_version/face_detection/mtcnn/setup.m
Normal file
23
pkg/OpenFace/matlab_version/face_detection/mtcnn/setup.m
Normal file
@@ -0,0 +1,23 @@
|
||||
function setup(varargin)
|
||||
|
||||
addpath C:\matconvnet\matconvnet-1.0-beta25\examples;
|
||||
|
||||
opts.useGpu = false ;
|
||||
opts.verbose = false ;
|
||||
opts = vl_argparse(opts, varargin) ;
|
||||
|
||||
try
|
||||
vl_nnconv(single(1),single(1),[]) ;
|
||||
catch
|
||||
warning('VL_NNCONV() does not seem to be compiled. Trying to compile it now.') ;
|
||||
vl_compilenn('enableGpu', opts.useGpu, 'verbose', opts.verbose) ;
|
||||
end
|
||||
|
||||
if opts.useGpu
|
||||
try
|
||||
vl_nnconv(gpuArray(single(1)),gpuArray(single(1)),[]) ;
|
||||
catch
|
||||
vl_compilenn('enableGpu', opts.useGpu, 'verbose', opts.verbose) ;
|
||||
warning('GPU support does not seem to be compiled in MatConvNet. Trying to compile it now') ;
|
||||
end
|
||||
end
|
||||
BIN
pkg/OpenFace/matlab_version/face_detection/mtcnn/test1.jpg
Normal file
BIN
pkg/OpenFace/matlab_version/face_detection/mtcnn/test1.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 15 KiB |
Reference in New Issue
Block a user