Files
open_dbm/pkg/OpenFace/model_training/CCNF/CCNF/lib/gradientCCNF.m
2020-08-04 19:12:31 -04:00

185 lines
6.4 KiB
Matlab

function [ gradientParams, SigmaInvs, CholDecomps, Sigmas, bs, allXresp] = gradientCCNF( params, num_alpha, numBeta, sizeTheta, lambda_a, lambda_b, lambda_th, Precalc_Bs, x, y, Precalc_yBys, Precalc_Bs_flat, constant, num_seqs)
%gradientCCNF Summary of this function goes here
% Detailed explanation goes here
% pick out the relevant terms (unpack)
alphas_init = params(1:num_alpha);
betasInit = params(num_alpha+1:num_alpha+numBeta);
thetasInit = reshape(params(num_alpha+numBeta+1:end), sizeTheta);
% Compute the response from the neural layers
allXresp = 1./(1 + exp(-thetasInit * x));
Xt = x;
bs = 2*alphas_init' * allXresp;
% This is precalculated for the next step and is basically the
% feedforward step of the neural net
Z_precalc = 2 * (allXresp .* (1-allXresp));
% These are the outputs weighted by the alphas (see eq TODO
db2_precalc = bsxfun(@times, Z_precalc, alphas_init);
num_feats = sizeTheta(2);
if(constant)
seq_length = size(x,2)/num_seqs;
% As the similarities are the same across all series we can reuse our
% Sigma and SigmaInv calculations
I = eye(seq_length);
[SigmaInv] = CalcSigmaCCNFflat(alphas_init, betasInit, seq_length, Precalc_Bs_flat{1}, I, zeros(seq_length));
CholDecomp=chol(SigmaInv);
% This is a faster way of inverting a symmetric matrix
Sigma=CholDecomp\(CholDecomp'\I);
Sigma_trace = trace(Sigma);
% mu values associated with each time step
mus = Sigma * reshape(bs, seq_length, num_seqs);
% difference between actual and prediction (error)
diff = (y - mus);
db_precalc_mult = bsxfun(@times, db2_precalc, diff(:)');
% Equation 46 from the appendix
gradientThetasT = Xt * db_precalc_mult';
% Reshape into the correct format
gradientThetasT = gradientThetasT(:)';
gradientThetasT = reshape(gradientThetasT, sizeTheta(2), sizeTheta(1))';
gradientThetasT = gradientThetasT(:);
% Some useful precalculations
% for every sequence get a dot product with itself
yy = dot(y,y);
% same goes for the mu
mumu = dot(mus,mus);
% calculating the derivative of L with respect to alpha_k (Equation 27)
% gradientAlphas = (-yq'*yq +(2*yq'*D')' -2 * D * mu + sum(mu.^2) + trace(Sigma));
% allXresp is D
gradient_alphas_add = -sum(yy) + sum(mumu) + num_seqs * Sigma_trace;
gradient_alphas = 2 * allXresp * (y(:) - mus(:)) + gradient_alphas_add;
gradient_betas = zeros(numBeta, 1);
% calculating the derivative of log(L) with respect to the betas
for k=1:numBeta
% From Equation 38 (and 39 for gamma)
% gradient = -yq'*B^(k)*yq + mu'*B^(k)*mu + Vec(Sigma)'*vec(B^(k)
% We precalculate B^(k) (equation 30), as it does not change
% over the course of optimisation
B_k = Precalc_Bs{1}{k};
% precalculated -yq'*B_k*yq can be used as well as it does not
% change (stored in Precalc_yBys)
yq_B_k_yq = sum(Precalc_yBys(:,k));
% A vectorised version of mu'*B^(k)*mu
B_k_mu = B_k*mus;
mu_B_k_mu = mus(:)' * B_k_mu(:);
% Vec(Sigma)*Vec(B^(k)) can be computed as follows:
partition_term = num_seqs * Sigma(:)'*B_k(:);
% Equation 38 and 39 basically
dLdb = yq_B_k_yq + mu_B_k_mu + partition_term;
gradient_betas(k) = dLdb;
end
gradientParams = [gradient_alphas;gradient_betas;gradientThetasT];
SigmaInvs = SigmaInv;
CholDecomps = CholDecomp;
Sigmas = Sigma;
else
SigmaInvs = cell(num_seqs, 1);
CholDecomps = cell(num_seqs, 1);
Sigmas = cell(num_seqs, 1);
gradients = zeros(num_seqs, numel(params));
a_precalc = zeros(sizeTheta(2)*sizeTheta(1), size(allXresp,2));
for i=1:size(db2_precalc,1)
a_precalc((i-1)*num_feats+1:i*num_feats,:) = bsxfun(@times, Xt, db2_precalc(i,:));
end
% y can either be in cell format (diff length seqs.) or in matrix
%, same length seqs
beg_ind = 1;
if(iscell(y))
end_ind = numel(y{1});
y_cell = true;
else
end_ind = size(y,1);
y_cell = false;
end
% Go through every sequence summing the gradients
for q = 1 : num_seqs
currResp = allXresp(:,beg_ind:end_ind);
currB = bs(beg_ind:end_ind)';
PrecalcB = Precalc_Bs{q};
PrecalcBFlat = Precalc_Bs_flat{q};
if(y_cell)
yq = y{q};
else
yq = y(:,q);
end
xq = x(2:end, beg_ind:end_ind);
% Used for equation 46 computation
a_precalc_curr = a_precalc(:,beg_ind:end_ind);
precalc_eye = eye(numel(yq));
precalc_zeros = zeros(numel(yq));
[ gradientsAlphas, gradientsBetas, gradientsThetas, SigmaInv, CholDecomp, Sigma ] = gradientCCNF_per_seq(alphas_init, betasInit, thetasInit, PrecalcB, xq, yq, currResp, currB, Precalc_yBys(q, :), PrecalcBFlat, a_precalc_curr, precalc_eye, precalc_zeros);
gradients(q,:) = [gradientsAlphas; gradientsBetas; gradientsThetas(:)];
SigmaInvs{q} = SigmaInv;
CholDecomps{q} = CholDecomp;
Sigmas{q} = Sigma;
% Update the references to sequence start/end
if(q ~= num_seqs)
beg_ind = end_ind + 1;
if(iscell(y))
end_ind = end_ind + numel(y{q+1});
else
end_ind = end_ind + size(y,1);
end
end
end
gradientParams = sum(gradients,1)';
end
% Add the regularisation term
regAlpha = alphas_init * lambda_a;
regBeta = betasInit * lambda_b;
regTheta = thetasInit * lambda_th;
gradientParams = gradientParams - [regAlpha; regBeta; regTheta(:)];
end