-
Notifications
You must be signed in to change notification settings - Fork 1
/
ksrc_spam_rbf.m
142 lines (97 loc) · 2.96 KB
/
ksrc_spam_rbf.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
[num_data text_data] = xlsread('spam.xlsx');
sentences = tokenizedDocument(text_data(:,1));
bag = bagOfWords(sentences);
error_count =0;
Data = full(bag.Counts');
%matrix of all test samples
TestData = [Data(:,1:50) Data(:,5523:5572)];
TestLabels = [num_data(1:50,1); num_data(5523:5572,1)] ;
%matrix of all training samples
X = Data(:,51:5522);
labels = num_data(51:5522,1);
uniqlabels = unique(labels);
% finding number of unique classes
c = max(size(uniqlabels));
% m = dimensionality of training data
% n = total no of training samples
[m, n] = size(X);
testdata_n = size(TestData,2);
Predictions = zeros(testdata_n,1);
% noise threshold for data
epsilon = 0.001;
%define vector to save scores for each class
scores = zeros(testdata_n,c);
% used for calculating gamma for RBF kernel
mean_x = mean(X,2);
gamma = median (norm((X - mean_x),2).^(-2))
%calculating RBF gram matrix
n1sq = sum(X.^2,1);
n1 = size(X,2);
temp = (ones(n1,1)*n1sq)' + ones(n1,1)*n1sq -2*X'*X;
K = exp(temp.*-gamma);
%Finding Pseudo transformation matrix using KPCA
%Finding Eigen vectors and Eigen values
[V,D] = eig(K);
if ~issorted(diag(D), 'descend')
[V,D] = eig(K);
[D,I] = sort(diag(D),'descend');
V = V(:, I);
end
%Normalizing eigen vectors
D1 = D.*sqrt(D);
D = D./D1;
V = V*diag(D);
%Select the first 10 eigen vectors for B
B = V(:,1:10);
for j = 1:testdata_n
test = TestData(:,j);
%calculating RBF test
n2sq = sum(test.^2,1);
n2 = size(test,2);
temp = (ones(n2,1)*n1sq)' + ones(n1,1)*n2sq -2*X'*test;
k = exp(temp.*-gamma);
cvx_begin
cvx_quiet(true);
%coefficient vector to be found
variable a(n,1);
minimize norm(a,1);
subject to
norm(B'*k - B'*K*a, 2) <= epsilon
cvx_end
%calculate residuals and scores
for i=1:c
delta_i = zeros(n,1);
delta_i(find(labels==uniqlabels(i)),1) = a(find(labels==uniqlabels(i)),1);
Residual_i = B'*k - B'*K*delta_i;
scores(j,i) = norm(Residual_i,2);
end
[minval , index] = min(scores(j,:));
Predictions(j,1) = uniqlabels (index);
if (Predictions(j,1) ~= TestLabels(j,1))
error_count = error_count +1;
fprintf('Should be %f, but was %f.\n',TestLabels(j,1),Predictions(j,1));
end
end
%{
%src with noise tolerance
%computations for coefficient vector using cvx
cvx_begin
%cvx_quiet(true);
%coefficient vector to be found
variable a(n,1);
minimize norm(a,1);
subject to
norm(test - X*a, 2) <= epsilon
cvx_end
%{
for i=1:c
R=test-a()*Traindata(find(Trainlabels==uniqlabels(i)),:);
src_scores(:,i)=sqrt(sum(R.*R,2));
end
%}
Residual_1 = test - X(:,n1)*a(n1,1)
score1 = sqrt(sum(Residual_1.*Residual_1,2))
Residual_2 = test - X(:,n2)*a(n2,1)
score2 = sqrt(sum(Residual_2.*Residual_2,2))
%}
%[predictions,src_scores]=src(X,labels,Y,0.3)