-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdice_test_nim.nim
37 lines (29 loc) · 1 KB
/
dice_test_nim.nim
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import strutils, tables, parseopt2, system, algorithm, future
)
proc dice_coefficient(a:string, b:string): float =
if len(a) == 0 or len(b) == 0: return 0.0
# quick case for true duplicates
if a == b: return 1.0
# if a != b, and a or b are single chars, then they can't possibly match
if len(a) == 1 or len(b) == 1: return 0.0
var
a_bigram_list = lc[a[i..<i+2] | (i <- 0..<len(a)-1), string]
b_bigram_list = lc[b[i..<i+2] | (i <- 0..<len(b)-1), string]
lena = len(a_bigram_list)
lenb = len(b_bigram_list)
matches = 0
i = 0
j = 0
sort(a_bigram_list, system.cmp)
sort(b_bigram_list, system.cmp)
while i < lena and j < lenb:
if a_bigram_list[i] == b_bigram_list[j]:
matches += 2
i += 1
j += 1
elif a_bigram_list[i] < b_bigram_list[j]:
i += 1
else:
j += 1
return matches/(lena + lenb)
echo dice_coefficient(args["a"],args["b"])