open source pkg v1

This commit is contained in:
Vijay Yadev
2020-08-04 19:12:31 -04:00
parent bef213dba9
commit c389fc2c47
3708 changed files with 1624220 additions and 1 deletions

View File

@@ -0,0 +1,57 @@
function [ alphas, betas, scaling, finalLikelihood] = CCRF_training_bfgs( num_seqs, thresholdX, thresholdFun, x, y, yUnnormed, alphas, betas, lambda_a, lambda_b, similarityFNs, Precalc_Bs, Precalc_Bs_flat, Precalc_yBys, varargin)
%GRADIENTDESCENTCCRF Performs CCRF gradient descen given the initial state
%and gradient descent parameters
% Detailed explanation goes here
% if these are not provided calculate them, TODO this might be
% It is possible to predefine the component B^(k) required
% to compute B term and partial derivatives, also can predefine yB^(k)y,
% as they also do not change through the iterations
if(sum(strcmp(varargin,'PrecalcBs')) && sum(strcmp(varargin,'PrecalcBsFlat'))...
&& sum(strcmp(varargin,'Precalc_yBy')))
ind = find(strcmp(varargin,'PrecalcBs')) + 1;
Precalc_Bs = varargin{ind};
ind = find(strcmp(varargin,'PrecalcBsFlat')) + 1;
Precalc_Bs_flat = varargin{ind};
ind = find(strcmp(varargin,'Precalc_yBys')) + 1;
Precalc_yBys = varargin{ind};
else
% if these are not provided calculate them
[ ~, Precalc_Bs, Precalc_Bs_flat, Precalc_yBys ] = CalculateSimilarities( num_seqs, x, similarityFNs, y);
end
params = [alphas; betas];
objectiveFun = @(params)objectiveFunction(params, numel(alphas), lambda_a, lambda_b, Precalc_Bs, x, y, Precalc_yBys, Precalc_Bs_flat);
options = optimset('Algorithm','interior-point','GradObj','on', 'TolX', thresholdX, 'TolFun', thresholdFun, 'Hessian', 'bfgs', 'display','off', 'useParallel', 'Always');
if(sum(strcmp(varargin,'max_iter')))
options.MaxIter = varargin{find(strcmp(varargin,'max_iter')) + 1};
end
params = fmincon(objectiveFun, params, [], [],[],[], zeros(numel(params),1), Inf(numel(params), 1), [], options);
alphas = params(1:numel(alphas));
betas = params(numel(alphas)+1:end);
finalLikelihood = LogLikelihoodCCRF(y, x, alphas, betas, lambda_a, lambda_b, Precalc_Bs_flat);
% fprintf('Final log likelihood at iteration; logL %f, learning rate\n', finalLikelihood);
% establish the scaling
scaling = getScaling2(alphas, betas, x, yUnnormed, Precalc_Bs);
end
function [loss, gradient] = objectiveFunction(params, numAlpha, lambda_a, lambda_b, PrecalcBs, x, y, Precalc_yBys, PrecalcBsFlat)
alphas = params(1:numAlpha);
betas = params(numAlpha+1:end);
[gradient, SigmaInvs, CholDecomps, Sigmas] = gradientCCRFFull(params, lambda_a, lambda_b, PrecalcBs, x, y, Precalc_yBys, PrecalcBsFlat);
% as bfgs does gradient descent rather than ascent, negate the results
gradient = -gradient;
loss = -LogLikelihoodCCRF(y, x, alphas, betas, lambda_a, lambda_b, PrecalcBsFlat, SigmaInvs, CholDecomps, Sigmas);
end

View File

@@ -0,0 +1,122 @@
function [ alphas, betas, scaling, finalLikelihood] = CCRF_training_gradient_descent( nIterations, nExamples, learningRate, threshold, x, y, yUnnormed, masks, alphas, betas, lambda_a, lambda_b, similarityFNs, useIndicators, verbose)
%GRADIENTDESCENTCCRF Performs CCRF gradient descen given the initial state
%and gradient descent parameters
% Detailed explanation goes here
if(verbose)
logLikelihood = zeros(round(nIterations/10)+1, 1);
alphaTrack = zeros(nIterations, numel(alphas));
betaTrack = zeros(nIterations, numel(betas));
end
logAlphas = log(alphas);
logBetas = log(betas);
K = numel(similarityFNs);
%calculate similarity measures for each of the sequences
Similarities = cell(nExamples, 1);
PrecalcQ2s = cell(nExamples,1);
PrecalcQ2sFlat = cell(nExamples,1);
PrecalcYqDs = zeros(nExamples, K);
for q = 1 : nExamples
yq = y{q};
xq = x{q};
mask = masks{q};
n = size(yq, 1);
Similarities{q} = zeros([n, n, K]);
% PrecalcQ2s{q} = zeros([n, n, K]);
PrecalcQ2s{q} = cell(K,1);
% PrecalcQ2sFlat{q} = cell(K,1);
PrecalcQ2sFlat{q} = zeros((n*(n+1))/2,K);
% go over all of the similarity metrics and construct the
% similarity matrices
for k=1:K
Similarities{q}(:,:,k) = similarityFNs{k}(xq, mask);
S = Similarities{q}(:,:,k);
D = diag(sum(S));
B = D - S;
% PrecalcQ2s{q}(:,:,k) = B;
PrecalcQ2s{q}{k} = B;
% PrecalcQ2sFlat{q}{k} = PrecalcQ2s{q}{k}(logical(tril(ones(size(S)))));
PrecalcQ2sFlat{q}(:,k) = B(logical(tril(ones(size(S)))));
PrecalcYqDs(q,k) = -yq'*B*yq;
end
end
%stochastic gradient descent
for iter = 1 : nIterations
prevAlphas = alphas;
prevBetas = betas;
for q = 1 : nExamples
yq = y{q};
xq = x{q};
mask = masks{q};
PrecalcQ2 = PrecalcQ2s{q};
PrecalcQ2Flat = PrecalcQ2sFlat{q};
[ logGradientsAlphas, logGradientsBetas] = gradientCCRF(alphas, betas, lambda_a, lambda_b, PrecalcQ2, xq, yq, mask, PrecalcYqDs(q, :), useIndicators, PrecalcQ2Flat);
% [logGradientAlphasAnalytical, logGradientBetasAnalytical] = gradientAnalytical(PrecalcQ2, alphas, betas, lambda, xq, yq, mask);
%
% diffInGradientsAlpha = mean(abs(logGradientsAlphas - logGradientAlphasAnalytical));
% diffInGradientsBeta = mean(abs(logGradientsBetas - logGradientBetasAnalytical));
%update log alpha
logAlphas = logAlphas + learningRate * logGradientsAlphas;
alphas = exp(logAlphas);
%update log beta
logBetas = logBetas + learningRate * logGradientsBetas;
betas = exp(logBetas);
if(verbose)
%record alpha and beta values for each iteration for debug purposes
alphaTrack(iter,:) = alphas(:);
betaTrack(iter,:) = betas;
end
end
%check for convergence
if (norm([prevAlphas;prevBetas] - [alphas;betas])/norm([prevAlphas;prevBetas]) < threshold || norm([logGradientsAlphas;logGradientsBetas]) < threshold)
break;
end
if(verbose)
if(mod(iter, 10)==0)
logLikelihood(iter/10 + 1) = LogLikelihoodCCRF(y, x, masks, alphas, betas, lambda_a, lambda_b, PrecalcQ2sFlat, useIndicators);
fprintf('Iteration %d; logL %f\n', iter, logLikelihood(iter/10 + 1));
end
end
end
% establish the scaling
scaling = getScaling(alphas, betas, x, yUnnormed, masks, PrecalcQ2s, useIndicators);
if(verbose)
figure
subplot(1,3,1)
plot(betaTrack(1:iter,:));
title('beta');
subplot(1,3,2)
plot(alphaTrack(1:iter,:))
title('alpha');
subplot(1,3,3)
plot(logLikelihood(1:round(iter/10),:))
title('log likelihood');
finalLikelihood = LogLikelihoodCCRF(y, x, masks, alphas, betas, lambda_a, lambda_b, PrecalcQ2sFlat, useIndicators);
fprintf('Final log likelihood at iteration %d; logL %f, learning rate %f\n', iter, finalLikelihood, learningRate);
else
finalLikelihood = LogLikelihoodCCRF(y, x, masks, alphas, betas, lambda_a, lambda_b, PrecalcQ2sFlat, useIndicators);
fprintf('Final log likelihood at iteration %d; logL %f, learning rate %f\n', iter, finalLikelihood, learningRate);
end
end

View File

@@ -0,0 +1,50 @@
function [ SigmaInv] = CalcSigmaCCRF(alphas, betas, precalcBwithoutBeta )
%CALCSIGMAPRF Summary of this function goes here
% Detailed explanation goes here
% constructing the sigma
% the number of elements in a current sequence
n = size(precalcBwithoutBeta{1},1);
q1 = sum(alphas) * eye(n);
% the above code can be simplified by the following 2 lines of the
% inner loop, we want to do that for every beta however
K2 = numel(betas);
q2 = zeros([n,n]);
% calculating the q2 from the paper
for i=1:K2
% We're basically performing the following calculation, but use
% precalculated D - S instead of doing it every iteration
% S = Similarities(:,:,i);
% D = diag(sum(S));
% q = betas(i) * D - betas(i) * S;
% q2s(:,:,i) = q;
% q2 = q2 + betas(i)*precalcQ2withoutBeta(:,:,i);
q2 = q2 + betas(i)*precalcBwithoutBeta{i};
end
% This is another alternative, does not seem to be faster
% q2old = sum(bsxfun(@times, precalcQ2withoutBeta, reshape(betas,[1,1,K2])),3);
% q2 = sum(q2s, 3);
% % An alternative way of calculating the above could be using bsxfun,
% but this seems to be actually slower than using it
% S = bsxfun(@times, Similarities, -reshape(betas,[1,1,K2]));
%
% % now need the diagonals
% d = sum(Similarities);
%
% I = repmat(eye(n), [1, 1, K2]);
% I = bsxfun(@times, I, reshape(betas,[1,1,K2]));
% D = bsxfun(@times, I, d);
%
% q2s = D + S;
% q2 = sum(q2s2,3);
SigmaInv = 2 * (q1 + q2);
end

View File

@@ -0,0 +1,26 @@
function [ SigmaInv] = CalcSigmaCCRFflat(alphas, betas, n, PrecalcB_flat)
%CALCSIGMAPRF Summary of this function goes here
% Detailed explanation goes here
% constructing the Sigma (that is laid out in an efficient way for
% symmertic matrices
A = sum(alphas) * eye(n);
% calculating the B from the paper
% using the precalculated lower triangular elements of B without beta
Btmp = PrecalcB_flat * betas;
% not faster
% now make it into a square symmetric matrix
B = zeros(n,n);
on = tril(true(n,n));
B(on) = Btmp;
B = B';
B(on) = Btmp;
% Combine A and B
SigmaInv = 2 * (A + B);
end

View File

@@ -0,0 +1,14 @@
function b = CalcbCCRF( alpha, x)
%CALCBPRF Summary of this function goes here
% Detailed explanation goes here
% b = zeros(size(x,1),1);
%
% for i=1:size(x,1)
% b(i) = 2 * x(i,:) * alpha;
% end
% vectorising above code
b = 2 * x * alpha;
end

View File

@@ -0,0 +1,85 @@
function [ Similarities, PrecalcQ2s, PrecalcQ2sFlat, PrecalcYqDs ] = CalculateSimilarities( n_sequences, x, similarityFNs, y)
%CALCULATESIMILARITIES Summary of this function goes here
% Detailed explanation goes here
K = numel(similarityFNs);
%calculate similarity measures for each of the sequences
Similarities = cell(n_sequences, 1);
PrecalcQ2s = cell(n_sequences,1);
PrecalcQ2sFlat = cell(n_sequences,1);
PrecalcYqDs = zeros(n_sequences, K);
if(iscell(x))
for q = 1 : n_sequences
xq = x{q};
n = size(xq, 1);
Similarities{q} = zeros([n, n, K]);
PrecalcQ2s{q} = cell(K,1);
PrecalcQ2sFlat{q} = zeros((n*(n+1))/2,K);
% go over all of the similarity metrics and construct the
% similarity matrices
if(nargin > 3)
yq = y{q};
end
for k=1:K
Similarities{q}(:,:,k) = similarityFNs{k}(xq);
S = Similarities{q}(:,:,k);
D = diag(sum(S));
% PrecalcQ2s{q}(:,:,k) = D - S;
PrecalcQ2s{q}{k} = D - S;
B = D - S;
% PrecalcQ2sFlat{q}{k} = PrecalcQ2s{q}{k}(logical(tril(ones(size(S)))));
PrecalcQ2sFlat{q}(:,k) = B(logical(tril(ones(size(S)))));
if(nargin > 3)
PrecalcYqDs(q,k) = -yq'*B*yq;
end
end
end
else
sample_length = size(x,2)/n_sequences;
for q = 1 : n_sequences
beg_ind = (q-1)*sample_length + 1;
end_ind = q*sample_length;
% don't take the bias term
xq = x(2:end, beg_ind:end_ind);
Similarities{q} = zeros([sample_length, sample_length, K]);
PrecalcQ2s{q} = cell(K,1);
PrecalcQ2sFlat{q} = zeros((sample_length*(sample_length+1))/2,K);
% go over all of the similarity metrics and construct the
% similarity matrices
if(nargin > 3)
yq = y(:,q);
end
for k=1:K
Similarities{q}(:,:,k) = similarityFNs{k}(xq);
S = Similarities{q}(:,:,k);
D = diag(sum(S));
% PrecalcQ2s{q}(:,:,k) = D - S;
PrecalcQ2s{q}{k} = D - S;
B = D - S;
% PrecalcQ2sFlat{q}{k} = PrecalcQ2s{q}{k}(logical(tril(ones(size(S)))));
PrecalcQ2sFlat{q}(:,k) = B(logical(tril(ones(size(S)))));
if(nargin > 3)
PrecalcYqDs(q,k) = -yq'*B*yq;
end
end
end
end
end

View File

@@ -0,0 +1,173 @@
function [ Similarities, PrecalcQ2s, PrecalcQ2sFlat, PrecalcYqDs ] = CalculateSimilarities_sparsity( n_sequences, x, similarityFNs, sparsityFNs, y, const)
%CALCULATESIMILARITIES Summary of this function goes here
% Detailed explanation goes here
K = numel(similarityFNs);
K2 = numel(sparsityFNs);
%calculate similarity measures for each of the sequences
Similarities = cell(n_sequences, 1);
PrecalcQ2s = cell(n_sequences,1);
PrecalcQ2sFlat = cell(n_sequences,1);
PrecalcYqDs = zeros(n_sequences, K + K2);
if(iscell(x))
for q = 1 : n_sequences
xq = x{q};
n = size(xq, 1);
Similarities{q} = zeros([n, n, K+K2]);
PrecalcQ2s{q} = cell(K+K2,1);
PrecalcQ2sFlat{q} = zeros((n*(n+1))/2,K+K2);
% go over all of the similarity metrics and construct the
% similarity matrices
if(nargin > 4)
yq = y{q};
end
for k=1:K
Similarities{q}(:,:,k) = similarityFNs{k}(xq);
S = Similarities{q}(:,:,k);
D = diag(sum(S));
% PrecalcQ2s{q}(:,:,k) = D - S;
PrecalcQ2s{q}{k} = D - S;
B = D - S;
% PrecalcQ2sFlat{q}{k} = PrecalcQ2s{q}{k}(logical(tril(ones(size(S)))));
PrecalcQ2sFlat{q}(:,k) = B(logical(tril(ones(size(S)))));
if(nargin > 4)
PrecalcYqDs(q,k) = -yq'*B*yq;
end
end
for k=1:K2
Similarities{q}(:,:,K+k) = sparsityFNs{k}(xq);
S = Similarities{q}(:,:,K+k);
D = diag(sum(S));
% PrecalcQ2s{q}(:,:,k) = D - S;
PrecalcQ2s{q}{K+k} = D + S;
B = D + S;
% PrecalcQ2sFlat{q}{k} = PrecalcQ2s{q}{k}(logical(tril(ones(size(S)))));
PrecalcQ2sFlat{q}(:,K+k) = B(logical(tril(ones(size(S)))));
if(nargin > 4)
PrecalcYqDs(q,K+k) = -yq'*B*yq;
end
end
end
elseif(~const)
sample_length = size(x,2)/n_sequences;
similarities = cell(K, 1);
sparsities = cell(K2, 1);
for q = 1 : n_sequences
beg_ind = (q-1)*sample_length + 1;
end_ind = q*sample_length;
% don't take the bias term
xq = x(2:end, beg_ind:end_ind);
Similarities{q} = zeros([sample_length, sample_length, K+K2]);
PrecalcQ2s{q} = cell(K+K2,1);
PrecalcQ2sFlat{q} = zeros((sample_length*(sample_length+1))/2,K+K2);
% go over all of the similarity metrics and construct the
% similarity matrices
if(nargin > 4)
yq = y(:,q);
end
for k=1:K
if(q==1)
similarities{k} = similarityFNs{k}(xq);
end
Similarities{q}(:,:,k) = similarities{k};
S = Similarities{q}(:,:,k);
D = diag(sum(S));
% PrecalcQ2s{q}(:,:,k) = D - S;
PrecalcQ2s{q}{k} = D - S;
B = D - S;
% PrecalcQ2sFlat{q}{k} = PrecalcQ2s{q}{k}(logical(tril(ones(size(S)))));
PrecalcQ2sFlat{q}(:,k) = B(logical(tril(ones(size(S)))));
if(nargin > 4)
PrecalcYqDs(q,k) = -yq'*B*yq;
end
end
for k=1:K2
% this is constant so don't need to recalc
if(q==1)
sparsities{k} = sparsityFNs{k}(xq);
end
Similarities{q}(:,:,K+k) = sparsities{k};
S = Similarities{q}(:,:,K+k);
D = diag(sum(S));
% PrecalcQ2s{q}(:,:,k) = D - S;
PrecalcQ2s{q}{K+k} = D + S;
B = D + S;
% PrecalcQ2sFlat{q}{k} = PrecalcQ2s{q}{k}(logical(tril(ones(size(S)))));
PrecalcQ2sFlat{q}(:,K+k) = B(logical(tril(ones(size(S)))));
if(nargin > 4)
PrecalcYqDs(q,K+k) = -yq'*B*yq;
end
end
end
else
sample_length = size(x,2)/n_sequences;
similarities = cell(K, 1);
sparsities = cell(K2, 1);
PrecalcQ2s = {cell(K+K2,1)};
PrecalcQ2sFlat = {zeros((sample_length*(sample_length+1))/2,K+K2)};
Similarities = {zeros([sample_length, sample_length, K+K2])};
beg_ind = 1;
end_ind = sample_length;
% don't take the bias term
xq = x(2:end, beg_ind:end_ind);
% go over all of the similarity metrics and construct the
% similarity matrices
for k=1:K
similarities{k} = similarityFNs{k}(xq);
Similarities{1}(:,:,k) = similarities{k};
S = Similarities{1}(:,:,k);
D = diag(sum(S));
PrecalcQ2s{1}{k} = D - S;
B = D - S;
% flatten the symmetric matrix to save space
PrecalcQ2sFlat{1}(:,k) = B(logical(tril(ones(size(S)))));
if(nargin > 4)
PrecalcYqDs(:,k) = diag(-y'*B*y);
end
end
for k=1:K2
% this is constant so don't need to recalc
sparsities{k} = sparsityFNs{k}(xq);
Similarities{1}(:,:,K+k) = sparsities{k};
S = Similarities{1}(:,:,K+k);
D = diag(sum(S));
% PrecalcQ2s{q}(:,:,k) = D - S;
PrecalcQ2s{1}{K+k} = D + S;
B = D + S;
% PrecalcQ2sFlat{q}{k} = PrecalcQ2s{q}{k}(logical(tril(ones(size(S)))));
PrecalcQ2sFlat{1}(:,K+k) = B(logical(tril(ones(size(S)))));
if(nargin > 4)
PrecalcYqDs(:,K+k) = diag(-y'*B*y);
end
end
end
end

View File

@@ -0,0 +1,54 @@
function [ PrecalcYqDs ] = CalculateYqDs( n_sequences, x, similarityFNs, sparsityFNs, y)
%CALCULATESIMILARITIES Summary of this function goes here
% Detailed explanation goes here
K = numel(similarityFNs);
K2 = numel(sparsityFNs);
PrecalcYqDs = zeros(n_sequences, K + K2);
sample_length = size(y,1);
similarities = cell(K, 1);
sparsities = cell(K2, 1);
Similarities = zeros([sample_length, sample_length, K+K2]);
Bs = zeros([sample_length, sample_length, K+K2]);
for k=1:K
similarities{k} = similarityFNs{k}(x);
Similarities(:,:,k) = similarities{k};
S = Similarities(:,:,k);
D = diag(sum(S));
Bs(:,:,k) = D - S;
end
for k=1:K2
% this is constant so don't need to recalc
sparsities{k} = sparsityFNs{k}(x);
Similarities(:,:,K+k) = sparsities{k};
S = Similarities(:,:,K+k);
D = diag(sum(S));
% PrecalcQ2s{q}(:,:,k) = D - S;
Bs(:,:,K+k) = D + S;
% PrecalcQ2sFlat{q}{k} = PrecalcQ2s{q}{k}(logical(tril(ones(size(S)))));
end
for q = 1 : n_sequences
% go over all of the similarity metrics and construct the
% similarity matrices
yq = y(:,q);
for k=1:K+K2
PrecalcYqDs(q,k) = -yq'*Bs(:,:,k)*yq;
end
end
end

View File

@@ -0,0 +1,48 @@
function logL = LogLikelihoodCCRF(y_coll, x_coll, alphas, betas,...
lambda_a,lambda_b, PrecalcBsFlat,...
SigmaInvs, ChDecomps, Sigmas)
% Calculating the log likelihood of the CCRF with multi alpha and beta
Q = numel(y_coll);
logL = 0;
for q=1:Q
yq = y_coll{q};
xq = x_coll{q};
n = size(xq, 1);
b = CalcbCCRF(alphas, xq);
% constructing the sigma inverse
if(nargin < 11)
[SigmaInv] = CalcSigmaCCRFflat(alphas, betas, n, PrecalcBsFlat{q});
L = chol(SigmaInv);
mu = SigmaInv \ b;
else
SigmaInv = SigmaInvs{q};
L = ChDecomps{q};
Sigma = Sigmas{q};
mu = Sigma * b;
end
% normalisation = 1/((2*pi)^(n/2)*sqrt(det(Sigma)));
% Removing the division by pi, as it is constant
% normalisation = 1/(sqrt(det(sigma)));
% flipping around determinant of SigmaInv, as det(inv(Sigma)) = inv(det(Sigma)
% normalisation = log(sqrt(det(SigmaInv)));
% normalisation 2 using Cholesky decomposition
normalisation2 = sum(log(diag(L))); % no times 2 here as we calculate the square root of determinant
% probq = normalisation * exp(-0.5 * (y - mu)'*SigmaInv*(y-mu));
% applying a logarithm to this leads to
% logLq = log(normalisation) + (-0.5 * (yq - mu)'*SigmaInv*(yq-mu));
logLq = normalisation2 + (-0.5 * (yq - mu)'*SigmaInv*(yq-mu));
logL = logL + logLq;
end
% add regularisation term
logL = logL -lambda_b * (betas'*betas)/2 - lambda_a * (alphas'*alphas)/2;

View File

@@ -0,0 +1,83 @@
function [ correlations, rms, meanCorr, meanRMS, longCorr, longRMS, predictions, gt ] = evaluateCCRFmodel( alphas, betas, x, xOffsets, y, similarityFNs, scaling, verbose, PrecalcBsFlat)
%EVALUATEPRFMODEL Summary of this function goes here
% Detailed explanation goes here
num_x_plots = 8;
num_y_plots = 10;
total_plots = num_x_plots * num_y_plots;
nExamples = numel(x);
if(nargin < 11)
[ ~, ~, PrecalcBsFlat, ~ ] = CalculateSimilarities( nExamples, x, similarityFNs);
end
correlations = zeros(nExamples, 1);
rms = zeros(nExamples, 1);
% concatenated data for an alternative correlation
y_predConcat = [];
y_trueConcat = [];
for q=1:nExamples
X = x{q};
nFrames = size(X,1);
PrecalcBflat = PrecalcBsFlat{q};
SigmaInv = CalcSigmaCCRFflat(alphas, betas, nFrames, PrecalcBflat);
b = CalcbCCRF(alphas, x{q});
y_est = SigmaInv \ b;
% y_est = y_est * scaling + xOffsets(q);
y_est = y_est * scaling + xOffsets(q);
R = corrcoef(y_est, y{q});
correlations(q) = R(1,2);
rms(q) = sqrt( (1/nFrames) * sum((y_est - y{q}).^2) );
y_predConcat = cat(1, y_predConcat, y_est);
y_trueConcat = cat(1, y_trueConcat, y{q});
if(verbose)
if(mod(q,total_plots) == 1)
figure;
remainingPlots = nExamples - q;
if(remainingPlots < total_plots)
num_y_plots = ceil(remainingPlots / num_x_plots);
end
end
subplot(num_y_plots,num_x_plots,mod(q-1,total_plots)+1);
t = 1:nFrames;
plot(t,y{q},'g',t,y_est,'b');
title(sprintf('C %.2f, R %.2f', correlations(q), rms(q)));
set(gca, 'XTick', [], 'YTick', []);
% legend('y_{true}','y_{ccrf}');
end
end
meanCorr = mean(correlations);
meanRMS = mean(rms);
longCorr = corr(y_predConcat, y_trueConcat).^2;
longRMS = sqrt( (1/numel(y_predConcat)) * sum((y_predConcat - y_trueConcat).^2) );
predictions = y_predConcat;
gt = y_trueConcat;
if(verbose)
figure
plot([1:numel(y_trueConcat)],y_trueConcat,'g',[1:numel(y_trueConcat)],y_predConcat,'b');
title(sprintf('C %.2f, R %.2f', longCorr, longRMS));
set(gca, 'XTick', [], 'YTick', []);
end
end

View File

@@ -0,0 +1,28 @@
function [ scaling ] = getScaling( alphas, betas, x, y, masks, PrecalcQ2s, useIndicator)
%getScaling Summary of this function goes here
% Detailed explanation goes here
% for visualisation use only the first sequence
nExamples = numel(x);
scalings = zeros(1,nExamples);
for q=1:nExamples
mask = masks{q};
PrecalcQ2 = PrecalcQ2s{q};
SigmaInv = CalcSigmaCCRF(alphas, betas, PrecalcQ2, mask, useIndicator);
b = CalcbCCRF(alphas, x{q}, mask, useIndicator);
y_est = SigmaInv \ b;
sc = std(y{q}) / std(y_est);
scalings(q) = sc;
end
scaling = mean(scalings);
end

View File

@@ -0,0 +1,30 @@
function [ scaling ] = getScaling2( alphas, betas, x, y, PrecalcBs)
%getScaling Summary of this function goes here
% Detailed explanation goes here
% for visualisation use only the first sequence
nExamples = numel(x);
cat_y = [];
cat_y_pred = [];
for q=1:nExamples
PrecalcB = PrecalcBs{q};
SigmaInv = CalcSigmaCCRF(alphas, betas, PrecalcB);
b = CalcbCCRF(alphas, x{q});
y_est = SigmaInv \ b;
cat_y = cat(1, cat_y, y{q} - mean(y{q}));
% cat_y = cat(1, cat_y, y{q});
cat_y_pred = cat(1, cat_y_pred, y_est);
end
% scaling = (max(cat_y) - min(cat_y)) / (max(cat_y_pred) - min(cat_y_pred));
scaling = std(cat_y) / std(cat_y_pred);
end

View File

@@ -0,0 +1,92 @@
function [ logGradientAlphas, logGradientBetas, SigmaInv, ChDecomp ] = gradientCCRF( alphas, betas, lambda_a, lambda_b, precalcQ2withoutBeta, xq, yq, mask, precalcYQ, useIndicator, PrecalcQ2Flat)
%GRADIENTPRF Summary of this function goes here
% Detailed explanation goes here
% Calculate the Sigma inverse now
% [SigmaInv2] = CalcSigmaCCRF(alphas, betas, precalcQ2withoutBeta, mask);
% This is an optimised version as it does not use the whole matrix but
% a lower diagonal part due to symmetry
numElemsInSeq = size(precalcQ2withoutBeta{1}, 1);
[SigmaInv] = CalcSigmaCCRFflat(alphas, betas, numElemsInSeq, PrecalcQ2Flat, mask, useIndicator);
% Get the actual sigma from out SigmaInv
% Sigma = inv(SigmaInv);
% Below is an optimised version of the above using Cholesky decomposition
% which decomposes a matrix into a upper triangular (R) and its
% conjugate transpose R'; A = R'*R for real numbers, thus
% inv(A) = inv(R)inv(R')
ChDecomp=chol(SigmaInv);
I=eye(size(SigmaInv));
% Rinv = (R\I);
% Sigma = Rinv*Rinv';
% This is a very slightly faster version of the above
Sigma=ChDecomp\(ChDecomp'\I);
b = CalcbCCRF(alphas, xq, mask, useIndicator);
% mu = SigmaInv \ b = Sigma * b;
% as we've calculate Sigma already, this is equivalent of the above
mu = Sigma * b;
logGradientAlphas = zeros(size(alphas));
logGradientBetas = zeros(size(betas));
K1 = numel(alphas);
K2 = numel(betas);
% calculating the derivative of L with respect to alpha_k
for k = 1:K1
if(useIndicator)
dQ1da = diag(mask(:,k));
dbda = xq(:,k).*mask(:,k);
gaussGradient = -yq'*dQ1da*yq +2*yq'*dbda -2 * dbda' * mu + mu'*dQ1da*mu;
zGradient = Sigma(:)'*dQ1da(:);
else
% if we don't use the masks here's a speedup
gaussGradient = -yq'*yq +2*yq'*xq(:,k) -2 * xq(:,k)' * mu + sum(mu.^2);
% simplification as trace(Sigma * I) = trace(Sigma)
zGradient = trace(Sigma);
end
% add the Z derivative now
dLda = zGradient + gaussGradient;
% add regularisation
dLda = dLda - lambda_a * alphas(k);
logGradientAlphas(k) = alphas(k) * dLda;
end
% This was done for gradient checking
% [alphasG, betaG] = gradientAnalytical(nFrames, S, alphas, beta, xq, yq, mask);
% calculating the derivative of log(L) with respect to the betas
for k=1:K2
% Bs = Bs(:,:,k);
% dSdb = q2./betas(k); we precalculate this, as it does not change
% over the course of optimisation (dSdb - dSigma/dbeta)
dSdb = precalcQ2withoutBeta{k};
% -yq'*dSdb*yq can be precalculated as they don't change through
% iterations (precalcQ2withoutBeta is dSdb
% gaussGradient = -yq'*dSdb*yq + mu'*dSdb*mu;
% this does the above line
gaussGradient = precalcYQ(k) + mu'*dSdb*mu;
% zGradient = trace(Sigma*dSdb);
zGradient = Sigma(:)'*dSdb(:); % equivalent but faster to the above line
dLdb = gaussGradient + zGradient;
% add regularisation term
dLdb = dLdb - lambda_b * betas(k);
logGradientBetas(k) = betas(k) * dLdb;
end
end

View File

@@ -0,0 +1,39 @@
function [ gradientParams, SigmaInvs, CholDecomps, Sigmas ] = gradientCCRFFull( params, lambda_a, lambda_b, PrecalcBs, x, y, Precalc_yBys, PrecalcBsFlat)
%GRADIENTPRF Summary of this function goes here
% Detailed explanation goes here
nExamples = numel(x);
numBetas = size(PrecalcBsFlat{1},2);
numAlphas = numel(params) - numBetas;
alphasInit = params(1:numAlphas);
betasInit = params(numAlphas+1:end);
gradientParams = zeros(size(params));
% These might be use to calculate the LogLikelihood, don't want to
% recompute them
SigmaInvs = cell(nExamples, 1);
CholDecomps = cell(nExamples, 1);
Sigmas = cell(nExamples, 1);
gradients = zeros(nExamples, numel(params));
for q = 1 : nExamples
yq = y{q};
xq = x{q};
PrecalcB = PrecalcBs{q};
PrecalcB_flat = PrecalcBsFlat{q};
[ logGradientsAlphas, logGradientsBetas, SigmaInv, CholDecomp, Sigma ] = gradientCCRF_withoutReg(alphasInit, betasInit, PrecalcB, xq, yq, Precalc_yBys(q, :), PrecalcB_flat);
SigmaInvs{q} = SigmaInv;
CholDecomps{q} = CholDecomp;
Sigmas{q} = Sigma;
gradients(q,:) = [logGradientsAlphas; logGradientsBetas];
end
gradientParams = sum(gradients,1)';
regAlpha = alphasInit * lambda_a;
regBeta = betasInit * lambda_b;
gradientParams = gradientParams - [regAlpha; regBeta];
end

View File

@@ -0,0 +1,76 @@
function [ logGradientAlphas, logGradientBetas, SigmaInv, CholDecomp, Sigma ] = gradientCCRF_withoutReg( alphas, betas, precalcQ2withoutBeta, xq, yq, Precalc_yBy, PrecalcB_flat)
%GRADIENTPRF Summary of this function goes here
% Detailed explanation goes here
% Calculate the Sigma inverse now
% This is an optimised version as it does not use the whole matrix but
% a lower diagonal part due to symmetry
n = size(xq, 1);
[SigmaInv] = CalcSigmaCCRFflat(alphas, betas, n, PrecalcB_flat);
% Get the actual sigma from out SigmaInv
% Sigma = inv(SigmaInv);
% Below is an optimised version of the above using Cholesky decomposition
% which decomposes a matrix into a upper triangular (R) and its
% conjugate transpose R'; A = R'*R for real numbers, thus
% inv(A) = inv(R)inv(R')
CholDecomp=chol(SigmaInv);
I=eye(size(SigmaInv));
% This is a way of calculating it faster than just inv(SigmaInv)
Sigma=CholDecomp\(CholDecomp'\I);
b = CalcbCCRF(alphas, xq);
% mu = SigmaInv \ b = Sigma * b;
% as we've calculate Sigma already, this is equivalent of the above
mu = Sigma * b;
logGradientAlphas = zeros(size(alphas));
logGradientBetas = zeros(size(betas));
K1 = numel(alphas);
K2 = numel(betas);
% calculating the derivative of L with respect to alpha_k
for k = 1:K1
gaussGradient = -yq'*yq +2*yq'*xq(:,k) -2 * xq(:,k)' * mu + sum(mu.^2);
% simplification as trace(Sigma * I) = trace(Sigma)
zGradient = trace(Sigma);
% add the Z (partition function) derivative now
dLda = zGradient + gaussGradient;
logGradientAlphas(k) = dLda;
end
% This was done for gradient checking
% [alphasG, betaG] = gradientAnalytical(nFrames, S, alphas, beta, xq, yq, mask);
% calculating the derivative of log(L) with respect to the betas
for k=1:K2
% Bs = Bs(:,:,k);
% dSdb = q2./betas(k); we precalculate this, as it does not change
% over the course of optimisation (dSdb - dSigma/dbeta)
dSdb = precalcQ2withoutBeta{k};
% -yq'*dSdb*yq can be precalculated as they don't change through
% iterations (precalcQ2withoutBeta is dSdb
% gaussGradient = -yq'*dSdb*yq + mu'*dSdb*mu;
% this does the above line
gaussGradient = Precalc_yBy(k) + mu'*dSdb*mu;
% zGradient = trace(Sigma*dSdb);
zGradient = Sigma(:)'*dSdb(:); % equivalent but faster to the above line
dLdb = gaussGradient + zGradient;
logGradientBetas(k) = dLdb;
end
end

View File

@@ -0,0 +1,35 @@
function W = randInitializeWeights(L_in, L_out)
%RANDINITIALIZEWEIGHTS Randomly initialize the weights of a layer with L_in
%incoming connections and L_out outgoing connections
% W = RANDINITIALIZEWEIGHTS(L_in, L_out) randomly initializes the weights
% of a layer with L_in incoming connections and L_out outgoing
% connections.
%
% Note that W should be set to a matrix of size(L_out, 1 + L_in) as
% the column row of W handles the "bias" terms
%
% You need to return the following variables correctly
% epsilon_init = 0.12;
% epsilon_init = 0.12;
epsilon_init = 1/sqrt(L_in);
W = rand(L_out, 1 + L_in) * 2 * epsilon_init - epsilon_init;
% ====================== YOUR CODE HERE ======================
% Instructions: Initialize W randomly so that we break the symmetry while
% training the neural network.
%
% Note: The first row of W corresponds to the parameters for the bias units
%
% =========================================================================
end

View File

@@ -0,0 +1,8 @@
function SimilarityMatrix = similarityEuclidean(x)
%spatial distance measure
Distances = sqrt(pdist(x)+3e-6).^-1; % 0.05 best so far
SimilarityMatrix = squareform(Distances) + eye(size(x, 1));
end

View File

@@ -0,0 +1,25 @@
function SimilarityMatrix = similarityGauss(x, sigma, range, mask)
%spatial distance measure, based on exponential decay, creates a matrix of
%similarities
% get the euclidean distance for each pair
if(numel(range) > 0)
Distances = exp(-pdist(x(:,range))/sigma); % 0.05 best so far
else
Distances = exp(-pdist(x)/sigma); % 0.05 best so far
end
SimilarityMatrix = squareform(Distances);
% invalidate the illegal values from the mask (if at least one element is
% not present in the mask set similarity to 0)
if(numel(mask) ~= 0)
invalidInds = sum(mask(:,range),2) < numel(range);
SimilarityMatrix(invalidInds,:) = 0;
SimilarityMatrix(:,invalidInds) = 0;
end
SimilarityMatrix = SimilarityMatrix + eye(size(x, 1));
end

View File

@@ -0,0 +1,25 @@
function [ SimilarityMatrix ] = similarityNeighbor( x, n, range)
%SIMILARITYNEIGHBOR Summary of this function goes here
% Detailed explanation goes here
sz = size(x,1);
SimilarityMatrix = eye(sz);
i = 1:sz-n;
SimilarityMatrix(sub2ind([sz, sz], i+n,i)) = 1;
SimilarityMatrix(sub2ind([sz, sz], i,i+n)) = 1;
% invalidate the illegal values from the mask (if at least one element is
% not present in the mask set similarity to 0)
% if(numel(mask)~=0)
% invalidInds = sum(mask(:,range),2) < numel(range);
%
% SimilarityMatrix(invalidInds,:) = 0;
% SimilarityMatrix(:,invalidInds) = 0;
% end
DiagMask = ones(size(x, 1)) - eye(size(x,1));
SimilarityMatrix = SimilarityMatrix .* DiagMask;
SimilarityMatrix = SimilarityMatrix + eye(size(x, 1));
end