Skip to content

Commit

Permalink
Fuzzy search between string and list with optional threshold
Browse files Browse the repository at this point in the history
  • Loading branch information
hbollon committed Sep 15, 2020
1 parent 744e55c commit f2611c4
Showing 1 changed file with 47 additions and 1 deletion.
48 changes: 47 additions & 1 deletion string-analysis.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
package edlib

import "errors"
import (
"errors"
"log"
)

// AlgorithMethod is an Integer type used to identify edit distance algorithms
type AlgorithMethod uint8
Expand Down Expand Up @@ -50,3 +53,46 @@ func matchingIndex(str1 string, str2 string, distance int) float32 {
}
return float32(len(str2)-distance) / float32(len(str2))
}

// FuzzySearch realize an approximate search on a string list and return the closest one compared
// to the string input
func FuzzySearch(str string, strList []string, algo AlgorithMethod) string {
var higherMatchPercent float32
var tmpStr string
for _, strToCmp := range strList {
sim, err := StringsSimilarity(str, strToCmp, algo)
if err != nil {
log.Fatal(err)
}

if sim == 1.0 {
return strToCmp
} else if sim > higherMatchPercent {
higherMatchPercent = sim
tmpStr = strToCmp
}
}

return tmpStr
}

// FuzzySearchThreshold realize an approximate search on a string list and return the closest one compared
// to the string input. Take an similarity threshold in parameter.
func FuzzySearchThreshold(str string, strList []string, minSim float32, algo AlgorithMethod) string {
var higherMatchPercent float32
var tmpStr string
for _, strToCmp := range strList {
sim, err := StringsSimilarity(str, strToCmp, algo)
if err != nil {
log.Fatal(err)
}

if sim == 1.0 {
return strToCmp
} else if sim > higherMatchPercent && sim >= minSim {
higherMatchPercent = sim
tmpStr = strToCmp
}
}
return tmpStr
}

0 comments on commit f2611c4

Please sign in to comment.