-
Notifications
You must be signed in to change notification settings - Fork 0
/
NMI.m
66 lines (61 loc) · 1.49 KB
/
NMI.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
function [ nmi ] = NMI( x, y )
% Compute the normalized mutual information to measure the accuracy of
% the clustering result.
%
% INPUT:
% x, y: The cluster result and the ground truth. Each is a matrix
% with two columns. The first column is the index of each node. The
% second column is the corresponding cluster label.
%
% OUTPUT:
% nmi: the NMI score (0~1)
%
% Author: Peizhuo Wang ([email protected])
% Sep. 2016
Ncount = size(x,1);
NCLUSTER_x = max(x(:,2));
NCLUSTER_y = max(y(:,2));
cluster_x = cell(1, NCLUSTER_x);
cluster_y = cell(1, NCLUSTER_y);
for i = 1:NCLUSTER_x
cluster_x{i} = x(x(:, 2)==i, 1);
end
count = 0;
for i = 1:NCLUSTER_y
temp = y(y(:, 2)==i, 1);
if (~isempty(temp))
count = count+1;
cluster_y{count} = temp;
end
end
NCLUSTER_y = count;
cluster_y = cluster_y(1:NCLUSTER_y);
% Compute the confusion matrix
c = zeros(NCLUSTER_x, NCLUSTER_y);
for i = 1:NCLUSTER_x
for j = 1:NCLUSTER_y
c(i, j) = length(intersect(cluster_x{i}, cluster_y{j}));
end
end
% Compute the normalized mutual information
H_x = 0;
H_y = 0;
MI = 0;
for i = 1:NCLUSTER_x
ci = sum(c(i,:));
H_x = H_x + ci*log(ci/Ncount);
for j = 1:NCLUSTER_y
cj = sum(c(:,j));
if (i == 1)
H_y = H_y + cj*log(cj/Ncount);
end
if (c(i,j) == 0)
tempMI = 0;
else
tempMI = c(i,j)*log(Ncount*c(i,j)/(ci*cj));
end
MI = MI + tempMI;
end
end
nmi = (-2)*MI/(H_x+H_y);
end