From f2611c400f9bee18ceba196daa75fd89f29edccd Mon Sep 17 00:00:00 2001 From: Hugo Date: Tue, 15 Sep 2020 10:03:51 +0200 Subject: [PATCH] Fuzzy search between string and list with optional threshold --- string-analysis.go | 48 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) diff --git a/string-analysis.go b/string-analysis.go index 7b17102..521ea8c 100644 --- a/string-analysis.go +++ b/string-analysis.go @@ -1,6 +1,9 @@ package edlib -import "errors" +import ( + "errors" + "log" +) // AlgorithMethod is an Integer type used to identify edit distance algorithms type AlgorithMethod uint8 @@ -50,3 +53,46 @@ func matchingIndex(str1 string, str2 string, distance int) float32 { } return float32(len(str2)-distance) / float32(len(str2)) } + +// FuzzySearch realize an approximate search on a string list and return the closest one compared +// to the string input +func FuzzySearch(str string, strList []string, algo AlgorithMethod) string { + var higherMatchPercent float32 + var tmpStr string + for _, strToCmp := range strList { + sim, err := StringsSimilarity(str, strToCmp, algo) + if err != nil { + log.Fatal(err) + } + + if sim == 1.0 { + return strToCmp + } else if sim > higherMatchPercent { + higherMatchPercent = sim + tmpStr = strToCmp + } + } + + return tmpStr +} + +// FuzzySearchThreshold realize an approximate search on a string list and return the closest one compared +// to the string input. Take an similarity threshold in parameter. +func FuzzySearchThreshold(str string, strList []string, minSim float32, algo AlgorithMethod) string { + var higherMatchPercent float32 + var tmpStr string + for _, strToCmp := range strList { + sim, err := StringsSimilarity(str, strToCmp, algo) + if err != nil { + log.Fatal(err) + } + + if sim == 1.0 { + return strToCmp + } else if sim > higherMatchPercent && sim >= minSim { + higherMatchPercent = sim + tmpStr = strToCmp + } + } + return tmpStr +}