% Function addGhostPoints
% Suzan Koknar-Tezel June 2009
%
% This function takes a training-by-training matrix of distance scores, and
% the testing-by-training matrix of distance scores, and augments them by
% adding ghost points to the minority class. 
%
% Subfunction:
%   ghostDist(abDist,axDist,bxDist)
%
% Input:
%   trainScores - the n x n training score matrix
%   testScores - the m x n testing score matrix
%   trainLabels - the n x 1 vector containing the labels
%   numGP - the number of ghost points to add per training example
%   minorityClass - the class label of the minority class
%   knnScores - the score matrix to use to find the knn of each example
%   sortFlag - should be set to 'ascend' if knnScores are distances and to
%              'descend' if knnScores are similarities
%
% Output:
%   trainScoresGP - the training score matrix augmented with ghost points
%   testScoresGP - the testing score matrix augmented with ghost points
%   trainLabelsGP - the training labels augmented with the labels of the
%      ghost point examples

function [trainScoresGP, testScoresGP, trainLabelsGP] = addGhostPointsMinorityClass(trainScores,testScores,trainLabels,numGP,minorityClass,knnScores,sortFlag)
numTrains = length(trainLabels);
numTests = size(testScores,1);

idxOfMinorityClass = find(trainLabels == minorityClass);
numInMinorityClass = length(idxOfMinorityClass);

numNewPoints = numGP * numInMinorityClass;

% Create a matrix to keep the indices of the two end points used in
% calculating the ghost points
endPoints = zeros(numNewPoints,3);

% Initialize the matrix that will hold the augmented training scores and
% the augmented training labels
trainScoresGP = zeros(numTrains+numNewPoints,numTrains+numNewPoints);
trainScoresGP(1:numTrains,1:numTrains) = trainScores;
trainLabelsGP = zeros(numTrains+numNewPoints,1);
trainLabelsGP(1:numTrains,1) = trainLabels;

newPointIdx = numTrains + 1;
endPointsIdx = 1;

% Add GP to each training example
for i = idxOfMinorityClass'
    aPoint = i;
    
    scores = knnScores(aPoint,idxOfMinorityClass);
    [nnScores nnIdx] = sort(scores,sortFlag); %nnScores is not used - a dummy variable
    
    offset = 1; % To eliminate self matching
    % Add the "pt"-many ghost points to this example
    for pt = 1:numGP
        bPoint = idxOfMinorityClass(nnIdx(pt+offset));
        abDist = trainScores(aPoint,bPoint);
        
        % save the indices of the end points and the distance between
        endPoints(endPointsIdx,1) = aPoint;
        endPoints(endPointsIdx,2) = bPoint;
        endPoints(endPointsIdx,3) = abDist;
        
        % The new ghost point is half-way between aPoint and bPoint.
        % Calculate the distance from this new point to every other
        % point, call this x, using the formula from KAIS 2010 paper
        newScores = zeros(1,newPointIdx);
        for j = 1:newPointIdx
            axDist = trainScoresGP(aPoint,j);
            bxDist = trainScoresGP(bPoint,j);
            newScores(1,j) = ghostDist(abDist,axDist,bxDist);
        end
        
        % Add the new row of scores
        trainScoresGP(newPointIdx,1:newPointIdx) = newScores;
        % Add the new col of scores
        trainScoresGP(1:newPointIdx,newPointIdx) = newScores';
        % Add the label of this ghost point
        trainLabelsGP(newPointIdx,1) = trainLabels(aPoint);
        
        % Increment the number of training examples
        newPointIdx = newPointIdx + 1;
        % Increment the endPoints index
        endPointsIdx = endPointsIdx + 1;
    end % pt loop
end % i loop


%---------------------------------------------------------------------
% Now add the ghost point distances to the testing matrix

% Initialize the matrix to hold the distances from each test example to the 
% ghost points. Later, this will be concatenated to the test scores
newTestScores = zeros(numTests,numNewPoints);

for i = 1:numTests
    for j = 1:numNewPoints
        % a is one of the original training points
        % b is the other original training point
        % x is the testing point
        % The ghost point lies between a and b and we need to calculate
        % the distance from it to x (the testing point) given the
        % distances from a to b, a to x, and b to x
        aPoint = endPoints(j,1);
        bPoint = endPoints(j,2);
        xPoint = i;
        abDist = endPoints(j,3);
        axDist = testScores(xPoint,aPoint);
        bxDist = testScores(xPoint,bPoint);
        
        newTestScores(i,j) = ghostDist(abDist,axDist,bxDist);
    end
end

% Now concatenate these GP scores with the original TestVsTrain scores
testScoresGP = [testScores newTestScores];

return


