-
-
Notifications
You must be signed in to change notification settings - Fork 24
/
qgram.go
56 lines (46 loc) · 1.51 KB
/
qgram.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
package edlib
import (
"math"
)
// QgramDistance compute the q-gram similarity between two strings
// Takes two strings as parameters, a split length which defines the k-gram shingle length
func QgramDistance(str1, str2 string, splitLength int) int {
splittedStr1 := Shingle(str1, splitLength)
splittedStr2 := Shingle(str2, splitLength)
union := make(map[string]int)
for i := range splittedStr1 {
union[i] = 0
}
for i := range splittedStr2 {
union[i] = 0
}
res := 0
for i := range union {
res += int(math.Abs(float64(splittedStr1[i] - splittedStr2[i])))
}
return res
}
// QgramDistanceCustomNgram compute the q-gram similarity between two custom set of individuals
// Takes two n-gram map as parameters
func QgramDistanceCustomNgram(splittedStr1, splittedStr2 map[string]int) int {
union := make(map[string]int)
for i := range splittedStr1 {
union[i] = 0
}
for i := range splittedStr2 {
union[i] = 0
}
res := 0
for i := range union {
res += int(math.Abs(float64(splittedStr1[i] - splittedStr2[i])))
}
return res
}
// QgramSimilarity compute a similarity index (between 0 and 1) between two strings from a Qgram distance
// Takes two strings as parameters, a split length which defines the k-gram shingle length
func QgramSimilarity(str1, str2 string, splitLength int) float32 {
splittedStr1 := Shingle(str1, splitLength)
splittedStr2 := Shingle(str2, splitLength)
res := float32(QgramDistanceCustomNgram(splittedStr1, splittedStr2))
return 1 - (res / float32(len(splittedStr1)+len(splittedStr2)))
}