open source pkg v1

2020-08-04 19:12:31 -04:00
parent bef213dba9
commit c389fc2c47
3708 changed files with 1624220 additions and 1 deletions
--- a/pkg/OpenFace/matlab_version/face_detection/mtcnn/ONet.m
+++ b/pkg/OpenFace/matlab_version/face_detection/mtcnn/ONet.m
@@ -0,0 +1,37 @@
+function [ out_prob, out_correction, out_lmarks ] = ONet( im_data, ONet_mlab )
+%PNET Summary of this function goes here
+%   Detailed explanation goes here
+
+    % The convolutional and pooling layers
+    out = convolution(im_data, ONet_mlab.weights_conv1, ONet_mlab.biases_conv1);
+    out = PReLU(out, ONet_mlab.prelu_weights_1);
+    out = max_pooling2(out, 3, 2);
+    out = convolution(out, ONet_mlab.weights_conv2, ONet_mlab.biases_conv2);
+    out = PReLU(out, ONet_mlab.prelu_weights_2);
+    out = max_pooling2(out, 3, 2);
+    out = convolution(out, ONet_mlab.weights_conv3, ONet_mlab.biases_conv3);
+    out = PReLU(out, ONet_mlab.prelu_weights_3);
+    out = max_pooling2(out, 2, 2);
+    out = convolution(out, ONet_mlab.weights_conv4, ONet_mlab.biases_conv4);
+    out = PReLU(out, ONet_mlab.prelu_weights_4);
+    
+    % The fully connected layers
+
+    out_fc_1 = zeros(size(out,1)*size(out,2) * size(out,3), size(out,4));
+    out_fc_1(:) = out(:);
+    out_fc_1 = out_fc_1' * ONet_mlab.w_fc1 + ONet_mlab.b_fc1';
+    out_fc_1 = PReLU(out_fc_1, ONet_mlab.prelu_fc1);
+
+    out_fc2 = out_fc_1 * ONet_mlab.w_fc2 + ONet_mlab.b_fc2';
+    out_fc2 = out_fc2';
+    
+    % Probability of each proposal
+    out_prob = 1./(1+exp(out_fc2(1,:)-out_fc2(2,:)));
+    
+    % The correction of each detection
+    out_correction = out_fc2(3:6,:);
+
+    % The actual detected landmarks
+    out_lmarks = out_fc2(7:end,:);
+end
+
--- a/pkg/OpenFace/matlab_version/face_detection/mtcnn/ONet_mlab.mat
+++ b/pkg/OpenFace/matlab_version/face_detection/mtcnn/ONet_mlab.mat
--- a/pkg/OpenFace/matlab_version/face_detection/mtcnn/PNet.m
+++ b/pkg/OpenFace/matlab_version/face_detection/mtcnn/PNet.m
@@ -0,0 +1,26 @@
+function [ out_prob, out_correction ] = PNet( im_data, PNet_mlab )
+%PNET Summary of this function goes here
+%   Detailed explanation goes here
+
+    % Pass through the first convolution layer
+    out = convolution(im_data, PNet_mlab.weights_conv1, PNet_mlab.biases_conv1);
+    out = PReLU(out, PNet_mlab.prelu_weights_1);
+    out = max_pooling2(out, 2, 2);
+    out = convolution(out, PNet_mlab.weights_conv2, PNet_mlab.biases_conv2);
+    out = PReLU(out, PNet_mlab.prelu_weights_2);
+    out = convolution(out, PNet_mlab.weights_conv3, PNet_mlab.biases_conv3);
+    out = PReLU(out, PNet_mlab.prelu_weights_3);
+    
+    % The fully connected layer
+    out_fc = zeros(size(out,1)*size(out,2), size(out,3));
+    out_fc(:) = out(:);
+    out_fc = out_fc * PNet_mlab.w + PNet_mlab.b';
+    out = reshape(out_fc, size(out,1), size(out,2), size(out_fc,2));
+
+    % The alignment probabilities (face heat map)
+    out_prob = 1./(1+exp(out(:,:,1)-out(:,:,2)));
+
+    % The correction of the detection
+    out_correction = out(:,:,3:end);    
+end
+
--- a/pkg/OpenFace/matlab_version/face_detection/mtcnn/PNet_mlab.mat
+++ b/pkg/OpenFace/matlab_version/face_detection/mtcnn/PNet_mlab.mat
--- a/pkg/OpenFace/matlab_version/face_detection/mtcnn/PReLU.m
+++ b/pkg/OpenFace/matlab_version/face_detection/mtcnn/PReLU.m
@@ -0,0 +1,26 @@
+function [ out_map ] = PReLU( input_maps, PReLU_params )
+%PRELU Summary of this function goes here
+%   Detailed explanation goes here
+
+    out_map = zeros(size(input_maps));
+    if(numel(size(input_maps)) > 2)
+        for i=1:size(input_maps,3)
+             
+            % A more readable but slower version
+            % in_map = input_maps(:,:,i,:);
+            % in_map(in_map < 0) = in_map(in_map<0) * PReLU_params(i);
+            
+            % alternative
+%             out_map(:,:,i,:) = max(input_maps(:,:,i,:),0) + min(input_maps(:,:,i,:),0)*PReLU_params(i);            
+
+            out_map(:,:,i,:) = input_maps(:,:,i,:) .* (PReLU_params(i) + (1 - PReLU_params(i)) * (input_maps(:,:,i,:) > 0)) ;
+        end  
+    else
+        for i=1:size(input_maps,2)
+            in_map = input_maps(:,i);
+            in_map(in_map < 0) = in_map(in_map<0) * PReLU_params(i);
+            out_map(:,i) = in_map;
+        end        
+    end 
+end
+
--- a/pkg/OpenFace/matlab_version/face_detection/mtcnn/RNet.m
+++ b/pkg/OpenFace/matlab_version/face_detection/mtcnn/RNet.m
@@ -0,0 +1,31 @@
+function [ out_prob, out_correction ] = RNet( im_data, RNet_mlab )
+%PNET Summary of this function goes here
+%   Detailed explanation goes here
+
+    % The convolutional and pooling layers
+    out = convolution(im_data, RNet_mlab.weights_conv1, RNet_mlab.biases_conv1);
+    out = PReLU(out, RNet_mlab.prelu_weights_1);
+    out = max_pooling2(out, 3, 2);
+    out = convolution(out, RNet_mlab.weights_conv2, RNet_mlab.biases_conv2);
+    out = PReLU(out, RNet_mlab.prelu_weights_2);
+    out = max_pooling2(out, 3, 2);
+    out = convolution(out, RNet_mlab.weights_conv3, RNet_mlab.biases_conv3);
+    out = PReLU(out, RNet_mlab.prelu_weights_3);
+    
+    % The fully connected layers
+
+    out_fc_1 = zeros(size(out,1)*size(out,2) * size(out,3), size(out,4));
+    out_fc_1(:) = out(:);
+    out_fc_1 = out_fc_1' * RNet_mlab.w_fc1 + RNet_mlab.b_fc1';
+    out_fc_1 = PReLU(out_fc_1, RNet_mlab.prelu_fc1);
+
+    out_fc2 = out_fc_1 * RNet_mlab.w_fc2 + RNet_mlab.b_fc2';
+    out_fc2 = out_fc2';
+    
+    % Probability of each proposal
+    out_prob = 1./(1+exp(out_fc2(1,:)-out_fc2(2,:)));
+    
+    % The correction of each detection
+    out_correction = out_fc2(3:end,:);
+end
+
--- a/pkg/OpenFace/matlab_version/face_detection/mtcnn/RNet_mlab.mat
+++ b/pkg/OpenFace/matlab_version/face_detection/mtcnn/RNet_mlab.mat
--- a/pkg/OpenFace/matlab_version/face_detection/mtcnn/apply_correction.m
+++ b/pkg/OpenFace/matlab_version/face_detection/mtcnn/apply_correction.m
@@ -0,0 +1,23 @@
+function [ total_bboxes ] = apply_correction( total_bboxes, corrections, add1 )
+%APPLY_CORRECTION Summary of this function goes here
+%   Detailed explanation goes here
+
+    % Perform correction based on regression values
+    bbw = total_bboxes(:,3) - total_bboxes(:,1);
+    bbh = total_bboxes(:,4) - total_bboxes(:,2);
+    
+    % TODO is this needed?
+    if(add1)
+        bbw = bbw + 1;
+        bbh = bbh + 1;
+    end
+    
+    new_min_x = total_bboxes(:,1) + corrections(:,1) .* bbw;
+    new_min_y = total_bboxes(:,2) + corrections(:,2) .* bbh;    
+    new_max_x = total_bboxes(:,3) + corrections(:,3) .* bbw;
+    new_max_y = total_bboxes(:,4) + corrections(:,4) .* bbh;
+    score = total_bboxes(:,5);
+    total_bboxes = [new_min_x, new_min_y, new_max_x, new_max_y, score];
+
+end
+
--- a/pkg/OpenFace/matlab_version/face_detection/mtcnn/convert_to_cpp/MTCNN_detector.txt
+++ b/pkg/OpenFace/matlab_version/face_detection/mtcnn/convert_to_cpp/MTCNN_detector.txt
@@ -0,0 +1,3 @@
+PNet PNet.dat
+RNet RNet.dat
+ONet ONet.dat
--- a/pkg/OpenFace/matlab_version/face_detection/mtcnn/convert_to_cpp/ONet.dat
+++ b/pkg/OpenFace/matlab_version/face_detection/mtcnn/convert_to_cpp/ONet.dat
--- a/pkg/OpenFace/matlab_version/face_detection/mtcnn/convert_to_cpp/PNet.dat
+++ b/pkg/OpenFace/matlab_version/face_detection/mtcnn/convert_to_cpp/PNet.dat
--- a/pkg/OpenFace/matlab_version/face_detection/mtcnn/convert_to_cpp/RNet.dat
+++ b/pkg/OpenFace/matlab_version/face_detection/mtcnn/convert_to_cpp/RNet.dat
--- a/pkg/OpenFace/matlab_version/face_detection/mtcnn/convert_to_cpp/Write_CNN_to_binary.m
+++ b/pkg/OpenFace/matlab_version/face_detection/mtcnn/convert_to_cpp/Write_CNN_to_binary.m
@@ -0,0 +1,70 @@
+function Write_CNN_to_binary(location_binary, cnn)
+
+    addpath('../../../PDM_helpers/');
+    
+    % use little-endian
+    cnn_binary_file = fopen(location_binary, 'w', 'l');        
+              
+    num_layers = size(cnn.layers,2);
+
+    % Get the number of layers
+    fwrite(cnn_binary_file, num_layers, 'uint'); % 4 bytes
+
+    for layers=1:num_layers
+
+        % write layer type: 0 - convolutional, 1 - max pooling, 2 -
+        % fully connected, 3 - prelu, 4 - sigmoid
+        if(strcmp(cnn.layers{layers}.type, 'conv'))
+
+            % write the type (convolutional)
+            fwrite(cnn_binary_file, 0, 'uint'); % 4 bytes
+
+            num_in_map = size(cnn.layers{layers}.weights{1},3);
+
+            % write the number of input maps
+            fwrite(cnn_binary_file, num_in_map, 'uint'); % 4 bytes
+
+            num_out_kerns = size(cnn.layers{layers}.weights{1},4);
+
+            % write the number of kernels for each output map
+            fwrite(cnn_binary_file, num_out_kerns, 'uint'); % 4 bytes
+
+            % Write output map bias terms
+            for k2=1:num_out_kerns    
+                fwrite(cnn_binary_file, cnn.layers{layers}.weights{2}(k2), 'float32'); % 4 bytes
+            end
+
+            for k=1:num_in_map                                        
+                for k2=1:num_out_kerns
+                    % Write out the kernel                              
+                    W = squeeze(cnn.layers{layers}.weights{1}(:,:,k,k2));
+                    writeMatrixBin(cnn_binary_file, W, 5);                
+                end
+            end    
+        elseif(strcmp(cnn.layers{layers}.type, 'fc'))
+
+            % This is the fully connected layer
+            fwrite(cnn_binary_file, 2, 'uint'); % 4 bytes
+
+            % the bias term
+            writeMatrixBin(cnn_binary_file, cnn.layers{layers}.weights{2}, 5);
+            % the weights
+            writeMatrixBin(cnn_binary_file, cnn.layers{layers}.weights{1}, 5);
+
+        elseif(strcmp(cnn.layers{layers}.type, 'max_pooling'))
+            fwrite(cnn_binary_file, 1, 'uint'); % 4 bytes, indicate max pooling layer
+            % params kernel and stride size
+            fwrite(cnn_binary_file, cnn.layers{layers}.kernel_size_x, 'uint'); % 4 bytes
+            fwrite(cnn_binary_file, cnn.layers{layers}.kernel_size_y, 'uint'); % 4 bytes
+            fwrite(cnn_binary_file, cnn.layers{layers}.stride_x, 'uint'); % 4 bytes
+            fwrite(cnn_binary_file, cnn.layers{layers}.stride_y, 'uint'); % 4 bytes
+           
+        elseif(strcmp(cnn.layers{layers}.type, 'prelu'))
+            fwrite(cnn_binary_file, 3, 'uint'); % 4 bytes, indicate a parametric relu layer
+            writeMatrixBin(cnn_binary_file, cnn.layers{layers}.weights{1}, 5);
+        end            
+    end
+    
+    fclose(cnn_binary_file);
+    
+end
--- a/pkg/OpenFace/matlab_version/face_detection/mtcnn/convert_to_cpp/Write_out_mtcnn.m
+++ b/pkg/OpenFace/matlab_version/face_detection/mtcnn/convert_to_cpp/Write_out_mtcnn.m
@@ -0,0 +1,184 @@
+% First writing out PNet
+load('../PNet_mlab.mat');
+
+cnn = struct;
+cnn.layers = cell(1,8);
+cnn.layers{1} = struct;
+cnn.layers{1}.type = 'conv';
+cnn.layers{1}.weights = {PNet_mlab.weights_conv1, PNet_mlab.biases_conv1};
+
+cnn.layers{2} = struct;
+cnn.layers{2}.type = 'prelu';
+cnn.layers{2}.weights = {PNet_mlab.prelu_weights_1};
+
+cnn.layers{3} = struct;
+cnn.layers{3}.type = 'max_pooling';
+cnn.layers{3}.weights = {};
+cnn.layers{3}.stride_x = 2;
+cnn.layers{3}.stride_y = 2;
+cnn.layers{3}.kernel_size_x = 2;
+cnn.layers{3}.kernel_size_y = 2;
+
+cnn.layers{4} = struct;
+cnn.layers{4}.type = 'conv';
+cnn.layers{4}.weights = {PNet_mlab.weights_conv2, PNet_mlab.biases_conv2};
+
+cnn.layers{5} = struct;
+cnn.layers{5}.type = 'prelu';
+cnn.layers{5}.weights = {PNet_mlab.prelu_weights_2};
+
+cnn.layers{6} = struct;
+cnn.layers{6}.type = 'conv';
+cnn.layers{6}.weights = {PNet_mlab.weights_conv3, PNet_mlab.biases_conv3};
+
+cnn.layers{7} = struct;
+cnn.layers{7}.type = 'prelu';
+cnn.layers{7}.weights = {PNet_mlab.prelu_weights_3};
+
+cnn.layers{8} = struct;
+cnn.layers{8}.type = 'fc';
+cnn.layers{8}.weights = {PNet_mlab.w, PNet_mlab.b};
+
+Write_CNN_to_binary('PNet.dat', cnn);
+
+%% Next writing out the RNet
+clear
+load('../RNet_mlab.mat');
+    
+cnn = struct;
+cnn.layers = cell(1,11);
+cnn.layers{1} = struct;
+cnn.layers{1}.type = 'conv';
+cnn.layers{1}.weights = {RNet_mlab.weights_conv1, RNet_mlab.biases_conv1};
+
+cnn.layers{2} = struct;
+cnn.layers{2}.type = 'prelu';
+cnn.layers{2}.weights = {RNet_mlab.prelu_weights_1};
+
+cnn.layers{3} = struct;
+cnn.layers{3}.type = 'max_pooling';
+cnn.layers{3}.weights = {};
+cnn.layers{3}.stride_x = 2;
+cnn.layers{3}.stride_y = 2;
+cnn.layers{3}.kernel_size_x = 3;
+cnn.layers{3}.kernel_size_y = 3;
+
+cnn.layers{4} = struct;
+cnn.layers{4}.type = 'conv';
+cnn.layers{4}.weights = {RNet_mlab.weights_conv2, RNet_mlab.biases_conv2};
+
+cnn.layers{5} = struct;
+cnn.layers{5}.type = 'prelu';
+cnn.layers{5}.weights = {RNet_mlab.prelu_weights_2};
+
+cnn.layers{6} = struct;
+cnn.layers{6}.type = 'max_pooling';
+cnn.layers{6}.weights = {};
+cnn.layers{6}.stride_x = 2;
+cnn.layers{6}.stride_y = 2;
+cnn.layers{6}.kernel_size_x = 3;
+cnn.layers{6}.kernel_size_y = 3;
+
+cnn.layers{7} = struct;
+cnn.layers{7}.type = 'conv';
+cnn.layers{7}.weights = {RNet_mlab.weights_conv3, RNet_mlab.biases_conv3};
+
+cnn.layers{8} = struct;
+cnn.layers{8}.type = 'prelu';
+cnn.layers{8}.weights = {RNet_mlab.prelu_weights_3};
+
+cnn.layers{9} = struct;
+cnn.layers{9}.type = 'fc';
+cnn.layers{9}.weights = {RNet_mlab.w_fc1, RNet_mlab.b_fc1};
+
+cnn.layers{10} = struct;
+cnn.layers{10}.type = 'prelu';
+cnn.layers{10}.weights = {RNet_mlab.prelu_fc1};
+    
+cnn.layers{11} = struct;
+cnn.layers{11}.type = 'fc';
+cnn.layers{11}.weights = {RNet_mlab.w_fc2, RNet_mlab.b_fc2};
+
+Write_CNN_to_binary('RNet.dat', cnn);
+
+%% Next writing out the ONet
+clear
+load('../ONet_mlab.mat');
+    
+cnn = struct;
+cnn.layers = cell(1,14);
+cnn.layers{1} = struct;
+cnn.layers{1}.type = 'conv';
+cnn.layers{1}.weights = {ONet_mlab.weights_conv1, ONet_mlab.biases_conv1};
+
+cnn.layers{2} = struct;
+cnn.layers{2}.type = 'prelu';
+cnn.layers{2}.weights = {ONet_mlab.prelu_weights_1};
+
+cnn.layers{3} = struct;
+cnn.layers{3}.type = 'max_pooling';
+cnn.layers{3}.weights = {};
+cnn.layers{3}.stride_x = 2;
+cnn.layers{3}.stride_y = 2;
+cnn.layers{3}.kernel_size_x = 3;
+cnn.layers{3}.kernel_size_y = 3;
+
+cnn.layers{4} = struct;
+cnn.layers{4}.type = 'conv';
+cnn.layers{4}.weights = {ONet_mlab.weights_conv2, ONet_mlab.biases_conv2};
+
+cnn.layers{5} = struct;
+cnn.layers{5}.type = 'prelu';
+cnn.layers{5}.weights = {ONet_mlab.prelu_weights_2};
+
+cnn.layers{6} = struct;
+cnn.layers{6}.type = 'max_pooling';
+cnn.layers{6}.weights = {};
+cnn.layers{6}.stride_x = 2;
+cnn.layers{6}.stride_y = 2;
+cnn.layers{6}.kernel_size_x = 3;
+cnn.layers{6}.kernel_size_y = 3;
+
+cnn.layers{7} = struct;
+cnn.layers{7}.type = 'conv';
+cnn.layers{7}.weights = {ONet_mlab.weights_conv3, ONet_mlab.biases_conv3};
+
+cnn.layers{8} = struct;
+cnn.layers{8}.type = 'prelu';
+cnn.layers{8}.weights = {ONet_mlab.prelu_weights_3};
+
+cnn.layers{9} = struct;
+cnn.layers{9}.type = 'max_pooling';
+cnn.layers{9}.weights = {};
+cnn.layers{9}.stride_x = 2;
+cnn.layers{9}.stride_y = 2;
+cnn.layers{9}.kernel_size_x = 2;
+cnn.layers{9}.kernel_size_y = 2;
+
+cnn.layers{10} = struct;
+cnn.layers{10}.type = 'conv';
+cnn.layers{10}.weights = {ONet_mlab.weights_conv4, ONet_mlab.biases_conv4};
+
+cnn.layers{11} = struct;
+cnn.layers{11}.type = 'prelu';
+cnn.layers{11}.weights = {ONet_mlab.prelu_weights_4};
+
+cnn.layers{12} = struct;
+cnn.layers{12}.type = 'fc';
+cnn.layers{12}.weights = {ONet_mlab.w_fc1, ONet_mlab.b_fc1};
+
+cnn.layers{13} = struct;
+cnn.layers{13}.type = 'prelu';
+cnn.layers{13}.weights = {ONet_mlab.prelu_fc1};
+    
+cnn.layers{14} = struct;
+cnn.layers{14}.type = 'fc';
+cnn.layers{14}.weights = {ONet_mlab.w_fc2, ONet_mlab.b_fc2};
+
+Write_CNN_to_binary('ONet.dat', cnn);
+
+f = fopen('MTCNN_detector.txt', 'w');
+fprintf(f, 'PNet PNet.dat\r\n');
+fprintf(f, 'RNet RNet.dat\r\n');
+fprintf(f, 'ONet ONet.dat\r\n');
+fclose(f);
--- a/pkg/OpenFace/matlab_version/face_detection/mtcnn/convolution.m
+++ b/pkg/OpenFace/matlab_version/face_detection/mtcnn/convolution.m
@@ -0,0 +1,24 @@
+function [ output_maps ] = convolution( input_maps, kernels, biases )
+%CONVOLUTION Summary of this function goes here
+%   Detailed explanation goes here
+
+    % If MatConvNet is not installed use Matlab (much slower)
+    if(exist('vl_nnconv', 'file') == 3)
+        output_maps = vl_nnconv(single(input_maps), kernels, biases);
+    else
+        n_filters = size(kernels, 4);
+
+        kernels2 = kernels(:,:,end:-1:1,:);
+        for i=1:n_filters
+            for n_in_maps=1:size(kernels,3)
+                kernels2(:,:,n_in_maps,i) = fliplr(squeeze(kernels2(:,:,n_in_maps,i)));
+                kernels2(:,:,n_in_maps,i) = flipud(squeeze(kernels2(:,:,n_in_maps,i)));
+            end
+        end
+        output_maps = [];
+        for i=1:n_filters
+            output_maps = cat(3, output_maps, convn(input_maps, kernels2(:,:,:,i), 'valid') + biases(i));
+        end    
+    end
+end
+
--- a/pkg/OpenFace/matlab_version/face_detection/mtcnn/correct_bbox.m
+++ b/pkg/OpenFace/matlab_version/face_detection/mtcnn/correct_bbox.m
@@ -0,0 +1,36 @@
+function [ total_bboxes, to_keep ] = correct_bbox( total_bboxes, corrections, add1, rectangulate, round, type )
+%CORRECT_BBOX Summary of this function goes here
+%   Detailed explanation goes here
+
+    % Non maximum supression accross bounding boxes
+    to_keep = non_maximum_supression(total_bboxes, 0.7, type);
+    total_bboxes = total_bboxes(to_keep, :);
+    corrections = corrections(to_keep, :);
+    % Perform correction based on regression values
+    bbw = total_bboxes(:,3) - total_bboxes(:,1);
+    bbh = total_bboxes(:,4) - total_bboxes(:,2);
+    
+    % TODO is this needed?
+    if(add1)
+        bbw = bbw + 1;
+        bbh = bbh + 1;
+    end
+    
+    new_min_x = total_bboxes(:,1) + corrections(:,1) .* bbw;
+    new_min_y = total_bboxes(:,2) + corrections(:,2) .* bbh;    
+    new_max_x = total_bboxes(:,3) + corrections(:,3) .* bbw;
+    new_max_y = total_bboxes(:,4) + corrections(:,4) .* bbh;
+    score = total_bboxes(:,5);
+    total_bboxes = [new_min_x, new_min_y, new_max_x, new_max_y, score];
+    
+    if(rectangulate)
+        % Convert the bounding boxes to rectangles
+        total_bboxes(:,1:4) = rectify(total_bboxes(:,1:4));
+    end
+    
+    if(round)
+        % Rounding to pixels
+        total_bboxes(:,1:4) = fix(total_bboxes(:,1:4));
+    end
+end
+
--- a/pkg/OpenFace/matlab_version/face_detection/mtcnn/demo.m
+++ b/pkg/OpenFace/matlab_version/face_detection/mtcnn/demo.m
@@ -0,0 +1,10 @@
+clear;
+
+% Make sure we have the dependencies for convolution
+od = cd('../../face_validation');
+setup;
+cd(od);
+
+img = imread('test1.jpg');
+
+[bboxes, lmarks, confidences] = detect_face_mtcnn(img);
--- a/pkg/OpenFace/matlab_version/face_detection/mtcnn/demo_300W.m
+++ b/pkg/OpenFace/matlab_version/face_detection/mtcnn/demo_300W.m
@@ -0,0 +1,20 @@
+clear;
+
+% Make sure we have the dependencies for convolution
+od = cd('../../face_validation');
+setup;
+cd(od);
+
+imgs = dir('D:\Datasets\300_W\AFW/*.jpg');
+for i=2:numel(imgs)
+    img = imread(['D:\Datasets\300_W\AFW/', imgs(i).name]);
+    [bboxes, lmarks, confidences] = detect_face_mtcnn(img, 60);
+    hold off
+    imshow(img);
+    hold on;
+    for d=1:size(bboxes,1)
+        rectangle('Position', [bboxes(d,1), bboxes(d,2), bboxes(d,3)-bboxes(d,1), bboxes(d,4) - bboxes(d,2)]);
+        plot(lmarks(d,1:5), lmarks(d,6:10), '.r');
+    end
+    drawnow expose
+end
--- a/pkg/OpenFace/matlab_version/face_detection/mtcnn/detect_face_mtcnn.m
+++ b/pkg/OpenFace/matlab_version/face_detection/mtcnn/detect_face_mtcnn.m
@@ -0,0 +1,227 @@
+function [total_bboxes, lmarks, confidence] = detect_face_mtcnn(img, min_face_size)
+
+% Check if MatConvNet is installed
+if(exist('vl_nnconv', 'file') ~= 3)
+    fprintf('Warning MatConvNet is not installed or not setup, face detection will be quite slow\n');
+end
+
+height_orig = size(img,1);
+width_orig = size(img,2);
+
+% Everything is done in floats
+img = single(img);
+
+% Deal with the image being grayscale
+if(size(img,3) == 1)
+    img = cat(3, img, img, img);
+end
+
+% Minimum face size
+if(nargin ==1)
+    min_face_size = 30;
+end
+
+% Image pyramid scaling factor
+factor = 0.709;
+
+% Thresholds for the PNet, ONet, and RNet
+threshold=[0.6 0.7 0.7];
+
+min_dim = min([width_orig height_orig]);
+
+% Face support region is 12x12 px, so from that can work out the largest
+% scale (which is 12 / min), and work down from there to smallest scale (no smaller than
+% 12x12px)
+face_support = 12;
+num_scales = floor(log(min_face_size / min_dim) / log(factor));
+scales = (face_support / min_face_size)*factor.^(0:num_scales);
+
+load('PNet_mlab');
+load('RNet_mlab');
+load('ONet_mlab');
+
+total_bboxes = [];
+
+% First the PNet stage on image pyramid
+for s = scales
+    h_pyr = ceil(height_orig * s);
+    w_pyr = ceil(width_orig * s);
+
+    % Resize the image and normalize to what MTCNN expects it to be
+    im_data=(imresize(img, [h_pyr w_pyr],'bilinear','AntiAliasing',false)-127.5)*0.0078125;
+
+    [ out_prob, out_correction ] = PNet( im_data, PNet_mlab );
+
+    % Generate bounding boxes from the heatmap
+    bboxes = generate_bounding_boxes(out_prob, out_correction, s, threshold(1), face_support);
+
+    % TODO correct bboxes before running NMS?, as now lots of overlaping
+    % boxes are present
+    
+    % Perform non maximum supression to remove reduntant bounding boxes
+    pick = non_maximum_supression(bboxes, 0.5, 'Union');
+    bboxes=bboxes(pick,:);
+    if ~isempty(bboxes)
+        total_bboxes = cat(1, total_bboxes, bboxes);
+    end
+end
+
+if ~isempty(total_bboxes)
+    % Non maximum supression accross bounding boxes, and their offset
+    % correction
+    corrections = total_bboxes(:,6:end);
+    total_bboxes = total_bboxes(:,1:5);
+    
+    to_keep = non_maximum_supression(total_bboxes, 0.7, 'Union');
+    total_bboxes = total_bboxes(to_keep, :);
+    corrections = corrections(to_keep, :);
+    
+    total_bboxes = apply_correction(total_bboxes, corrections, false);
+    
+    % Making them into rectangles
+    total_bboxes(:,1:4) = rectify(total_bboxes(:,1:4));
+
+    % Rounding to pixels
+    total_bboxes(:,1:4) = fix(total_bboxes(:,1:4));            
+end
+num_bbox = size(total_bboxes,1);
+
+% RNet stage
+if num_bbox > 0
+    
+    proposal_imgs = zeros(24, 24, 3, num_bbox);
+    for k=1:num_bbox
+        
+        width_target = total_bboxes(k,3) - total_bboxes(k,1) + 1;
+        height_target = total_bboxes(k,4) - total_bboxes(k,2) + 1;
+        
+        % Work out the start and end indices in the original image
+        start_x_in = max(total_bboxes(k,1), 1);
+        start_y_in = max(total_bboxes(k,2), 1);
+        end_x_in = min(total_bboxes(k,3), width_orig);
+        end_y_in = min(total_bboxes(k,4), height_orig);
+        
+        % Work out the start and end indices in the target image
+        start_x_out = max(-total_bboxes(k,1)+2, 1);
+        start_y_out = max(-total_bboxes(k,2)+2, 1);
+        end_x_out = min(width_target - (total_bboxes(k,3)-width_orig), width_target);
+        end_y_out = min(height_target - (total_bboxes(k,4)-height_orig), height_target);
+                
+        tmp = zeros(height_target, width_target, 3);
+        
+        tmp(start_y_out:end_y_out,start_x_out:end_x_out,:) = ...
+            img(start_y_in:end_y_in, start_x_in:end_x_in,:);
+        
+        proposal_imgs(:,:,:,k) = imresize(tmp, [24 24], 'bilinear','AntiAliasing',false);
+    end
+    
+    % Normalize the proposal images
+    proposal_imgs = (proposal_imgs - 127.5) * 0.0078125;
+    
+    % Apply RNet to proposal faces
+    [ score, out_correction ] = RNet( proposal_imgs, RNet_mlab );
+    out_correction = out_correction';
+
+    % Find faces above the threshold
+    to_keep = find(score > threshold(2));
+
+    total_bboxes = [total_bboxes(to_keep,1:4) score(to_keep)'];
+    out_correction = out_correction(to_keep,:);
+
+    if ~isempty(total_bboxes)
+        % Non maximum supression accross bounding boxes, and their offset
+        % correction    
+        to_keep = non_maximum_supression(total_bboxes, 0.7, 'Union');
+        total_bboxes = total_bboxes(to_keep, :);
+        out_correction = out_correction(to_keep, :);
+
+        total_bboxes = apply_correction(total_bboxes, out_correction, true);
+
+        % Making them into rectangles
+        total_bboxes(:,1:4) = rectify(total_bboxes(:,1:4));
+
+        % Rounding to pixels
+        total_bboxes(:,1:4) = fix(total_bboxes(:,1:4));        
+    end
+end
+
+num_bbox = size(total_bboxes,1);
+
+% ONet stage
+if num_bbox > 0
+    
+    proposal_imgs = zeros(48, 48, 3, num_bbox);
+    for k=1:num_bbox
+        
+        width_target = total_bboxes(k,3) - total_bboxes(k,1) + 1;
+        height_target = total_bboxes(k,4) - total_bboxes(k,2) + 1;
+        
+        % Work out the start and end indices in the original image
+        start_x_in = max(total_bboxes(k,1), 1);
+        start_y_in = max(total_bboxes(k,2), 1);
+        end_x_in = min(total_bboxes(k,3), width_orig);
+        end_y_in = min(total_bboxes(k,4), height_orig);
+        
+        % Work out the start and end indices in the target image
+        start_x_out = max(-total_bboxes(k,1)+2, 1);
+        start_y_out = max(-total_bboxes(k,2)+2, 1);
+        end_x_out = min(width_target - (total_bboxes(k,3)-width_orig), width_target);
+        end_y_out = min(height_target - (total_bboxes(k,4)-height_orig), height_target);
+                
+        tmp = zeros(height_target, width_target, 3);
+        
+        tmp(start_y_out:end_y_out,start_x_out:end_x_out,:) = ...
+            img(start_y_in:end_y_in, start_x_in:end_x_in,:);
+        
+        proposal_imgs(:,:,:,k) = imresize(tmp, [48 48], 'bilinear','AntiAliasing',false);
+    end
+    
+    % Normalize the proposal images
+    proposal_imgs = (proposal_imgs - 127.5) * 0.0078125;
+    
+    % Apply ONet to proposal faces
+    [ score, out_correction, lmarks ] = ONet( proposal_imgs, ONet_mlab );
+    out_correction = out_correction';
+    lmarks = lmarks';
+    
+    % Pick the final faces above the threshold
+    to_keep = find(score > threshold(3));    
+    lmarks = lmarks(to_keep, :);
+    out_correction = out_correction(to_keep, :);
+    total_bboxes = [total_bboxes(to_keep,1:4) score(to_keep)'];
+    
+    % Correct for the landmarks
+    bbw = total_bboxes(:,3) - total_bboxes(:,1) + 1;
+    bbh = total_bboxes(:,4) - total_bboxes(:,2) + 1;
+    
+    lmarks(:, 1:5) = bbw .* lmarks(:,1:5) + total_bboxes(:,1) - 1;
+    lmarks(:, 6:10) = bbh .* lmarks(:,6:10) + total_bboxes(:,2) - 1;
+    
+    % Correct the bounding boxes
+    if size(total_bboxes,1)>0	
+        total_bboxes = apply_correction(total_bboxes, out_correction, true);
+        to_keep = non_maximum_supression(total_bboxes, 0.7, 'Min');
+
+        lmarks = lmarks(to_keep, :);
+        confidence = total_bboxes(to_keep, 5);
+        total_bboxes = total_bboxes(to_keep, 1:4);
+    end
+    
+end
+
+% Correct the bounding boxes to be around the 68 landmark points
+widths = total_bboxes(:,3) - total_bboxes(:,1);
+heights = total_bboxes(:,4) - total_bboxes(:,2);
+txs = total_bboxes(:,1);
+tys = total_bboxes(:,2);
+
+new_widths = widths * 1.0323;
+new_heights = heights * 0.7751;
+new_txs = widths * -0.0075 + txs;
+new_tys = heights * 0.2459 + tys;
+
+total_bboxes = [new_txs, new_tys, new_txs + new_widths, new_tys + new_heights];
+total_bboxes = double(total_bboxes);
+lmarks = double(lmarks);
+
+end
--- a/pkg/OpenFace/matlab_version/face_detection/mtcnn/generate_bounding_boxes.m
+++ b/pkg/OpenFace/matlab_version/face_detection/mtcnn/generate_bounding_boxes.m
@@ -0,0 +1,25 @@
+function [bboxes] = generate_bounding_boxes(heatmap, correction, scale, t, face_support)
+	%use heatmap to generate bounding boxes in the original image space
+    
+    % Correction for the pooling
+    stride = 2;
+
+    % Offsets for, x, y, width and height
+    dx1=correction(:,:,1);
+	dy1=correction(:,:,2);
+	dx2=correction(:,:,3);
+	dy2=correction(:,:,4);
+    
+    % Find the parts of a heatmap above the threshold (x, y, and indices)
+    [x, y]= find(heatmap >= t);
+    inds = find(heatmap >= t);
+    
+    % Find the corresponding scores and bbox corrections
+    score=heatmap(inds);    
+	correction=[dx1(inds) dy1(inds) dx2(inds) dy2(inds)];
+
+    % Correcting for Matlab's format
+    bboxes=[y - 1 x - 1];
+    bboxes=[fix((stride*(bboxes)+1)/scale) fix((stride*(bboxes)+face_support)/scale) score correction];
+end
+
--- a/pkg/OpenFace/matlab_version/face_detection/mtcnn/im2col_inds.m
+++ b/pkg/OpenFace/matlab_version/face_detection/mtcnn/im2col_inds.m
@@ -0,0 +1,120 @@
+function ttt=im2col_inds(a, block)
+%IM2COL Rearrange image blocks into columns.
+%   B = IM2COL(A,[M N],'distinct') rearranges each distinct
+%   M-by-N block in the image A into a column of B. IM2COL pads A
+%   with zeros, if necessary, so its size is an integer multiple
+%   of M-by-N. If A = [A11 A12; A21 A22], where each Aij is
+%   M-by-N, then B = [A11(:) A21(:) A12(:) A22(:)].
+%
+%   B = IM2COL(A,[M N],'sliding') converts each sliding M-by-N
+%   block of A into a column of B, with no zero padding. B has
+%   M*N rows and will contain as many columns as there are M-by-N
+%   neighborhoods in A. If the size of A is [MM NN], then the
+%   size of B is (M*N)-by-((MM-M+1)*(NN-N+1). Each column of B
+%   contains the neighborhoods of A reshaped as NHOOD(:), where
+%   NHOOD is a matrix containing an M-by-N neighborhood of
+%   A. IM2COL orders the columns of B so that they can be
+%   reshaped to form a matrix in the normal way. For example,
+%   suppose you use a function, such as SUM(B), that returns a
+%   scalar for each column of B. You can directly store the
+%   result in a matrix of size (MM-M+1)-by-(NN-N+1) using these
+%   calls:
+%
+%        B = im2col(A,[M N],'sliding');
+%        C = reshape(sum(B),MM-M+1,NN-N+1);
+%
+%   B = IM2COL(A,[M N]) uses the default block type of
+%   'sliding'.
+%
+%   B = IM2COL(A,'indexed',...) processes A as an indexed image,
+%   padding with zeros if the class of A is uint8 or uint16, or
+%   ones if the class of A is double.
+%
+%   Class Support
+%   -------------
+%   The input image A can be numeric or logical. The output matrix
+%   B is of the same class as the input image.
+%
+%   Example
+%   -------
+%   Calculate the local mean using a [2 2] neighborhood with zero padding.
+%
+%       A = reshape(linspace(0,1,16),[4 4])'
+%       B = im2col(A,[2 2])
+%       M = mean(B)
+%       newA = col2im(M,[1 1],[3 3])
+%
+%   See also BLOCKPROC, COL2IM, COLFILT, NLFILTER.
+
+%   Copyright 1993-2016 The MathWorks, Inc.
+
+[ma,na] = size(a);
+m = block(1); n = block(2);
+
+if any([ma na] < [m n]) % if neighborhood is larger than image
+    b = zeros(m*n,0);
+    return
+end
+
+% Create Hankel-like indexing sub matrix.
+mc = block(1); nc = ma-m+1; nn = na-n+1;
+cidx = (0:mc-1)'; ridx = 1:nc;
+t = cidx(:,ones(nc,1)) + ridx(ones(mc,1),:);    % Hankel Subscripts
+tt = zeros(mc*n,nc);
+rows = 1:mc;
+for i=0:n-1,
+    tt(i*mc+rows,:) = t+ma*i;
+end
+ttt = zeros(mc*n,nc*nn);
+cols = 1:nc;
+for j=0:nn-1,
+    ttt(:,j*nc+cols) = tt+ma*j;
+end
+    
+
+%%%
+%%% Function parse_inputs
+%%%
+function [a, block, kind, padval] = parse_inputs(varargin)
+
+narginchk(2,4);
+
+switch nargin
+    case 2
+        if (strcmp(varargin{2},'indexed'))
+            error(message('images:im2col:tooFewInputs'))
+        else
+            % IM2COL(A, [M N])
+            a = varargin{1};
+            block = varargin{2};
+            kind = 'sliding';
+            padval = 0;
+        end
+        
+    case 3
+        if (strcmp(varargin{2},'indexed'))
+            % IM2COL(A, 'indexed', [M N])
+            a = varargin{1};
+            block = varargin{3};
+            kind = 'sliding';
+            padval = 1;
+        else
+            % IM2COL(A, [M N], 'kind')
+            a = varargin{1};
+            block = varargin{2};
+            kind = validatestring(varargin{3},{'sliding','distinct'},mfilename,'kind',3);
+            padval = 0;
+        end
+        
+    case 4
+        % IM2COL(A, 'indexed', [M N], 'kind')
+        a = varargin{1};
+        block = varargin{3};
+        kind = validatestring(varargin{4},{'sliding','distinct'},mfilename,'kind',4);
+        padval = 1;
+        
+end
+
+if (isa(a,'uint8') || isa(a, 'uint16'))
+    padval = 0;
+end
--- a/pkg/OpenFace/matlab_version/face_detection/mtcnn/im2col_mine.m
+++ b/pkg/OpenFace/matlab_version/face_detection/mtcnn/im2col_mine.m
@@ -0,0 +1,127 @@
+function b=im2col_mine(a, block)
+%IM2COL Rearrange image blocks into columns.
+%   B = IM2COL(A,[M N],'distinct') rearranges each distinct
+%   M-by-N block in the image A into a column of B. IM2COL pads A
+%   with zeros, if necessary, so its size is an integer multiple
+%   of M-by-N. If A = [A11 A12; A21 A22], where each Aij is
+%   M-by-N, then B = [A11(:) A21(:) A12(:) A22(:)].
+%
+%   B = IM2COL(A,[M N],'sliding') converts each sliding M-by-N
+%   block of A into a column of B, with no zero padding. B has
+%   M*N rows and will contain as many columns as there are M-by-N
+%   neighborhoods in A. If the size of A is [MM NN], then the
+%   size of B is (M*N)-by-((MM-M+1)*(NN-N+1). Each column of B
+%   contains the neighborhoods of A reshaped as NHOOD(:), where
+%   NHOOD is a matrix containing an M-by-N neighborhood of
+%   A. IM2COL orders the columns of B so that they can be
+%   reshaped to form a matrix in the normal way. For example,
+%   suppose you use a function, such as SUM(B), that returns a
+%   scalar for each column of B. You can directly store the
+%   result in a matrix of size (MM-M+1)-by-(NN-N+1) using these
+%   calls:
+%
+%        B = im2col(A,[M N],'sliding');
+%        C = reshape(sum(B),MM-M+1,NN-N+1);
+%
+%   B = IM2COL(A,[M N]) uses the default block type of
+%   'sliding'.
+%
+%   B = IM2COL(A,'indexed',...) processes A as an indexed image,
+%   padding with zeros if the class of A is uint8 or uint16, or
+%   ones if the class of A is double.
+%
+%   Class Support
+%   -------------
+%   The input image A can be numeric or logical. The output matrix
+%   B is of the same class as the input image.
+%
+%   Example
+%   -------
+%   Calculate the local mean using a [2 2] neighborhood with zero padding.
+%
+%       A = reshape(linspace(0,1,16),[4 4])'
+%       B = im2col(A,[2 2])
+%       M = mean(B)
+%       newA = col2im(M,[1 1],[3 3])
+%
+%   See also BLOCKPROC, COL2IM, COLFILT, NLFILTER.
+
+%   Copyright 1993-2016 The MathWorks, Inc.
+
+[ma,na] = size(a);
+m = block(1); n = block(2);
+
+if any([ma na] < [m n]) % if neighborhood is larger than image
+    b = zeros(m*n,0);
+    return
+end
+
+% Create Hankel-like indexing sub matrix.
+mc = block(1); nc = ma-m+1; nn = na-n+1;
+cidx = (0:mc-1)'; ridx = 1:nc;
+t = cidx(:,ones(nc,1)) + ridx(ones(mc,1),:);    % Hankel Subscripts
+tt = zeros(mc*n,nc);
+rows = 1:mc;
+for i=0:n-1,
+    tt(i*mc+rows,:) = t+ma*i;
+end
+ttt = zeros(mc*n,nc*nn);
+cols = 1:nc;
+for j=0:nn-1,
+    ttt(:,j*nc+cols) = tt+ma*j;
+end
+
+% If a is a row vector, change it to a column vector. This change is
+% necessary when A is a row vector and [M N] = size(A).
+if ndims(a) == 2 && na > 1 && ma == 1
+    a = a(:);
+end
+b = a(ttt);
+    
+
+%%%
+%%% Function parse_inputs
+%%%
+function [a, block, kind, padval] = parse_inputs(varargin)
+
+narginchk(2,4);
+
+switch nargin
+    case 2
+        if (strcmp(varargin{2},'indexed'))
+            error(message('images:im2col:tooFewInputs'))
+        else
+            % IM2COL(A, [M N])
+            a = varargin{1};
+            block = varargin{2};
+            kind = 'sliding';
+            padval = 0;
+        end
+        
+    case 3
+        if (strcmp(varargin{2},'indexed'))
+            % IM2COL(A, 'indexed', [M N])
+            a = varargin{1};
+            block = varargin{3};
+            kind = 'sliding';
+            padval = 1;
+        else
+            % IM2COL(A, [M N], 'kind')
+            a = varargin{1};
+            block = varargin{2};
+            kind = validatestring(varargin{3},{'sliding','distinct'},mfilename,'kind',3);
+            padval = 0;
+        end
+        
+    case 4
+        % IM2COL(A, 'indexed', [M N], 'kind')
+        a = varargin{1};
+        block = varargin{3};
+        kind = validatestring(varargin{4},{'sliding','distinct'},mfilename,'kind',4);
+        padval = 1;
+        
+end
+
+if (isa(a,'uint8') || isa(a, 'uint16'))
+    padval = 0;
+end
--- a/pkg/OpenFace/matlab_version/face_detection/mtcnn/max_pooling.m
+++ b/pkg/OpenFace/matlab_version/face_detection/mtcnn/max_pooling.m
@@ -0,0 +1,57 @@
+function [ output_maps ] = max_pooling( input_maps)
+%POOLING Summary of this function goes here
+%   Detailed explanation goes here
+    
+    orig_rows = size(input_maps,1);
+    orig_cols = size(input_maps,2);
+    
+    pooled_rows = ceil(orig_rows / 2);
+    pooled_cols = ceil(orig_cols / 2);
+
+    up_to_rows_out = floor(orig_rows / 2);
+    up_to_cols_out = floor(orig_cols / 2);
+
+    if(mod(orig_cols,2) == 0)
+        up_to_cols = orig_cols;
+    else
+        up_to_cols = orig_cols - 1;
+    end
+    
+    if(mod(orig_rows,2) == 0)
+        up_to_rows = orig_rows;
+    else
+        up_to_rows = orig_rows - 1;
+    end
+    
+    output_maps = zeros(pooled_rows, pooled_cols, size(input_maps,3));
+    for i=1:size(input_maps,3)
+        temp = im2col(input_maps(1:up_to_rows,1:up_to_cols,i), [2,2], 'distinct');
+        max_val = max(temp);
+        output_maps(1:up_to_rows_out,1:up_to_cols_out,i) = reshape(max_val, up_to_rows_out, up_to_cols_out);     
+    end
+    
+    % A bit of a hack for non-even number of rows or columns
+    if(mod(orig_cols,2) ~= 0)
+        for i=1:size(input_maps,3)
+            temp = im2col(input_maps(1:up_to_rows,end,i), [2,1], 'distinct');
+            max_val = max(temp);
+            output_maps(1:up_to_rows_out,end,i) = max_val;     
+        end        
+    end
+
+    if(mod(orig_rows,2) ~= 0)
+        for i=1:size(input_maps,3)
+            temp = im2col(input_maps(end, 1:up_to_cols,i), [1,2], 'distinct');
+            max_val = max(temp);
+            output_maps(end, 1:up_to_cols_out,i) = max_val;     
+        end        
+    end
+    
+    if(mod(orig_cols,2) ~= 0 && mod(orig_rows,2) ~= 0)
+        output_maps(end,end,:) = input_maps(end,end,:);
+    end
+    
+
+    
+end
+
--- a/pkg/OpenFace/matlab_version/face_detection/mtcnn/max_pooling2.m
+++ b/pkg/OpenFace/matlab_version/face_detection/mtcnn/max_pooling2.m
@@ -0,0 +1,118 @@
+function [ output_maps ] = max_pooling2( input_maps, kernel_size, stride)
+%POOLING Summary of this function goes here
+%   Detailed explanation goes here
+    
+    orig_rows = size(input_maps,1);
+    orig_cols = size(input_maps,2);
+    
+    pooled_rows = round((orig_rows - kernel_size)/stride) + 1;
+    pooled_cols = round((orig_cols - kernel_size)/stride) + 1;   
+     
+    if(exist('vl_nnpool', 'file') == 3)
+        % Caffe and MatConvNet do pooling slightly differently, so need to
+        % counter for that
+
+        pooled_cols_vl = floor((orig_cols - kernel_size)/stride) + 1;
+        pooled_rows_vl = floor((orig_rows - kernel_size)/stride) + 1;
+
+        if(pooled_rows_vl == pooled_rows && pooled_cols_vl == pooled_cols)
+            output_maps = vl_nnpool(input_maps, [kernel_size, kernel_size], 'stride', stride);
+        else
+            % Else need to pad right and bottom with infinities 
+            for x=1:kernel_size
+                pooled_cols_vl = floor((orig_cols + x - kernel_size)/stride) + 1;
+                if(pooled_cols_vl == pooled_cols)
+                    break;
+                end
+            end
+            for y=1:kernel_size
+                pooled_rows_vl = floor((orig_rows +y - kernel_size)/stride) + 1;
+                if(pooled_rows_vl == pooled_rows)
+                    break;
+                end
+            end
+
+            input_maps_new = -inf * ones(size(input_maps,1)+y, size(input_maps,2)+x, size(input_maps,3), size(input_maps,4));
+            input_maps_new(1:size(input_maps,1),1:size(input_maps,2),:,:) = input_maps;
+            output_maps = vl_nnpool(input_maps_new, [kernel_size, kernel_size], 'stride', stride);
+        end
+    else
+    
+        up_to_rows_out = floor((orig_rows - kernel_size)/stride) + 1;
+        up_to_cols_out = floor((orig_cols - kernel_size)/stride) + 1;
+
+        % How many full max-pooling steps are there
+        up_to_cols = kernel_size + (up_to_cols_out-1) * stride;
+        up_to_rows = kernel_size + (up_to_rows_out-1) * stride;
+
+        output_maps = zeros(pooled_rows, pooled_cols, size(input_maps,3), size(input_maps,4));
+
+        % Pick only the striding elements
+        [y, x] = meshgrid(1:up_to_cols-kernel_size+1, 1:up_to_rows-kernel_size+1);
+        to_keep_map = mod(y, stride) == 1 & mod(x, stride) == 1;
+        to_keep = find(to_keep_map);
+
+        inds_pooling = im2col_inds(input_maps(1:up_to_rows,1:up_to_cols,1,1), [kernel_size, kernel_size]);
+        inds_pooling = inds_pooling(:, to_keep);
+        for m=1:size(input_maps,4)
+            for i=1:size(input_maps,3)
+    %             temp = im2col(input_maps(1:up_to_rows,1:up_to_cols,i,m), [kernel_size, kernel_size], 'sliding');     
+    %             temp = im2col_mine(input_maps(1:up_to_rows,1:up_to_cols,i,m), [kernel_size, kernel_size]);        
+    %             temp = temp(:,to_keep);
+
+                temp = input_maps(1:up_to_rows,1:up_to_cols,i,m);
+                temp = temp(inds_pooling);
+
+                max_val = max(temp);
+                output_maps(1:up_to_rows_out,1:up_to_cols_out,i,m) = reshape(max_val, up_to_rows_out, up_to_cols_out);     
+            end
+        end
+        % A bit of a hack for non-even number of rows or columns
+        if(orig_cols ~= up_to_cols)
+            span = orig_cols - (up_to_cols - kernel_size + stride);
+            inds_pooling = im2col_inds(input_maps(1:up_to_rows,end-span+1:end,i,m), [kernel_size, span]);
+            inds_pooling = inds_pooling(:, 1:stride:end);
+            for m=1:size(input_maps,4)
+                for i=1:size(input_maps,3)
+    %                 temp = im2col(input_maps(1:up_to_rows,end-span+1:end,i,m), [kernel_size, span], 'sliding');
+    %                 temp = im2col_mine(input_maps(1:up_to_rows,end-span+1:end,i,m), [kernel_size, span]);
+    %                 max_val = max(temp(:,1:stride:end));
+
+                    temp = input_maps(1:up_to_rows,end-span+1:end,i,m);
+                    max_val = max(temp(inds_pooling));
+                    output_maps(1:up_to_rows_out,end,i,m) = max_val;     
+                end        
+            end
+        end
+
+        if(orig_rows ~= up_to_rows)
+            span = orig_rows - (up_to_rows - kernel_size + stride);
+            inds_pooling = im2col_inds(input_maps(end-span+1:end, 1:up_to_cols,i,m), [span, kernel_size]);
+            inds_pooling = inds_pooling(:, 1:stride:end);
+
+            for m=1:size(input_maps,4)
+                for i=1:size(input_maps,3)
+    %                 temp = im2col(input_maps(end-span+1:end, 1:up_to_cols,i,m), [span, kernel_size], 'sliding');
+    %                 temp = im2col_mine(input_maps(end-span+1:end, 1:up_to_cols,i,m), [span, kernel_size]);
+    %                 max_val = max(temp(:,1:stride:end));
+                    temp = input_maps(end-span+1:end, 1:up_to_cols,i,m);
+                    max_val = max(temp(inds_pooling));
+
+                    output_maps(end, 1:up_to_cols_out,i,m) = max_val;     
+                end   
+            end
+        end
+
+        if(orig_cols ~= up_to_cols && orig_rows ~= up_to_rows)
+            for m=1:size(input_maps,4)
+                for i=1:size(input_maps,3)
+                    tmp = input_maps(up_to_rows- kernel_size + stride + 1:end,up_to_cols - kernel_size + stride+1:end,i,m);            
+                    output_maps(end,end,i,m) = max(tmp(:));
+                end
+            end
+        end
+    
+    end
+    
+end
+
--- a/pkg/OpenFace/matlab_version/face_detection/mtcnn/non_maximum_supression.m
+++ b/pkg/OpenFace/matlab_version/face_detection/mtcnn/non_maximum_supression.m
@@ -0,0 +1,46 @@
+function pick = non_maximum_supression(boxes, overlap_threshold,type)
+	
+    %NMS
+	if isempty(boxes)
+        pick = [];
+        return;
+    end
+    
+    % Compute the corners of boxes and the area
+	x1 = boxes(:,1);
+	y1 = boxes(:,2);
+	x2 = boxes(:,3);
+	y2 = boxes(:,4);
+	s = boxes(:,5);
+	area = (x2-x1+1) .* (y2-y1+1);
+
+    % Sorting based on confidence scores
+    [vals, I] = sort(s);
+    
+	pick = zeros(numel(s),1);
+    
+	counter = 1;
+	while ~isempty(I)
+        last = length(I);
+        i = I(last);
+        pick(counter) = i;
+        counter = counter + 1;  
+        
+        xx1 = max(x1(i), x1(I(1:last-1)));
+        yy1 = max(y1(i), y1(I(1:last-1)));
+        xx2 = min(x2(i), x2(I(1:last-1)));
+        yy2 = min(y2(i), y2(I(1:last-1)));  
+        w = max(0.0, xx2-xx1+1);
+        h = max(0.0, yy2-yy1+1); 
+        inter = w.*h;
+        
+        if strcmp(type,'Min')
+            o = inter ./ min(area(i),area(I(1:last-1)));
+        else
+            o = inter ./ (area(i) + area(I(1:last-1)) - inter);
+        end
+        I = I(find(o<=overlap_threshold));
+    end
+    
+	pick = pick(1:(counter-1));
+end
--- a/pkg/OpenFace/matlab_version/face_detection/mtcnn/readme.txt
+++ b/pkg/OpenFace/matlab_version/face_detection/mtcnn/readme.txt
@@ -0,0 +1,6 @@
+My re-implementation of MTCNN face detector (https://github.com/kpzhang93/MTCNN_face_detection_alignment) using Matlab and MatcConvNet.
+
+It uses MatConvNet to speed up face detection, and is able to use GPU support. Alternatively, if MatConvNet is not installed the approach will use Matlab native functions for processing (much slower).
+
+MatConvNet version used:
+- MatConvNet from http://www.vlfeat.org/matconvnet/ (tested with version 1.0-beta24), and install following the instructions
--- a/pkg/OpenFace/matlab_version/face_detection/mtcnn/rectify.m
+++ b/pkg/OpenFace/matlab_version/face_detection/mtcnn/rectify.m
@@ -0,0 +1,15 @@
+function [bbox_out] = rectify(bbox_in)
+	
+    %convert bboxA to square
+    heights = bbox_in(:,4) - bbox_in(:,2);
+	widths = bbox_in(:,3) - bbox_in(:,1);
+
+    max_side = max([widths'; heights'])';
+    
+    % Correct the starts based on new size
+    new_min_x = bbox_in(:,1) + 0.5 * (widths - max_side);
+    new_min_y = bbox_in(:,2) + 0.5 * (heights - max_side);
+    
+    bbox_out = [new_min_x, new_min_y, new_min_x + max_side, new_min_y + max_side];
+end
+
--- a/pkg/OpenFace/matlab_version/face_detection/mtcnn/setup.m
+++ b/pkg/OpenFace/matlab_version/face_detection/mtcnn/setup.m
@@ -0,0 +1,23 @@
+function setup(varargin)
+
+addpath C:\matconvnet\matconvnet-1.0-beta25\examples;
+
+opts.useGpu = false ;
+opts.verbose = false ;
+opts = vl_argparse(opts, varargin) ;
+
+try
+  vl_nnconv(single(1),single(1),[]) ;
+catch
+  warning('VL_NNCONV() does not seem to be compiled. Trying to compile it now.') ;
+  vl_compilenn('enableGpu', opts.useGpu, 'verbose', opts.verbose) ;
+end
+
+if opts.useGpu
+  try
+    vl_nnconv(gpuArray(single(1)),gpuArray(single(1)),[]) ;
+  catch
+    vl_compilenn('enableGpu', opts.useGpu, 'verbose', opts.verbose) ;
+    warning('GPU support does not seem to be compiled in MatConvNet. Trying to compile it now') ;
+  end
+end
--- a/pkg/OpenFace/matlab_version/face_detection/mtcnn/test1.jpg
+++ b/pkg/OpenFace/matlab_version/face_detection/mtcnn/test1.jpg