From 1acfbbe83c0cb0b9498a7e4d2256317781a469a7 Mon Sep 17 00:00:00 2001 From: Hugo Date: Tue, 15 Sep 2020 12:04:25 +0200 Subject: [PATCH] Fuzzy search retruning set of matching set (optinnal threshold) + tests --- string-analysis.go | 48 ++++++++++++++- tests/string-analysis_test.go | 108 ++++++++++++++++++++++++++++++++++ 2 files changed, 155 insertions(+), 1 deletion(-) diff --git a/string-analysis.go b/string-analysis.go index 521ea8c..39b9ab3 100644 --- a/string-analysis.go +++ b/string-analysis.go @@ -2,7 +2,10 @@ package edlib import ( "errors" + "fmt" "log" + + "github.com/hbollon/go-edlib/internal/orderedmap" ) // AlgorithMethod is an Integer type used to identify edit distance algorithms @@ -77,7 +80,7 @@ func FuzzySearch(str string, strList []string, algo AlgorithMethod) string { } // FuzzySearchThreshold realize an approximate search on a string list and return the closest one compared -// to the string input. Take an similarity threshold in parameter. +// to the string input. Takes a similarity threshold in parameter. func FuzzySearchThreshold(str string, strList []string, minSim float32, algo AlgorithMethod) string { var higherMatchPercent float32 var tmpStr string @@ -96,3 +99,46 @@ func FuzzySearchThreshold(str string, strList []string, minSim float32, algo Alg } return tmpStr } + +// FuzzySearchSet realize an approximate search on a string list and return a set composed with x strings compared +// to the string input sorted by similarity with the base string. Takes the a quantity parameter to define the number of output strings desired (For exemple 3 in the case of the Google Keyborad word suggestion). +func FuzzySearchSet(str string, strList []string, quantity int, algo AlgorithMethod) []string { + sortedMap := make(orderedmap.OrderedMap, quantity) + for _, strToCmp := range strList { + sim, err := StringsSimilarity(str, strToCmp, algo) + if err != nil { + log.Fatal(err) + } else { + fmt.Printf("Sim %s/%s : %f\n", str, strToCmp, sim) + } + + if sim > sortedMap[sortedMap.Len()-1].Value { + sortedMap[sortedMap.Len()-1].Key = strToCmp + sortedMap[sortedMap.Len()-1].Value = sim + sortedMap.SortByValues() + } + } + + return sortedMap.ToArray() +} + +// FuzzySearchSetThreshold realize an approximate search on a string list and return a set composed with x strings compared +// to the string input sorted by similarity with the base string. Take a similarity threshold in parameter. Takes the a quantity parameter to define the number of output strings desired (For exemple 3 in the case of the Google Keyborad word suggestion). +// Takes also a threshold parameter for similarity with base string. +func FuzzySearchSetThreshold(str string, strList []string, quantity int, minSim float32, algo AlgorithMethod) []string { + sortedMap := make(orderedmap.OrderedMap, quantity) + for _, strToCmp := range strList { + sim, err := StringsSimilarity(str, strToCmp, algo) + if err != nil { + log.Fatal(err) + } + + if sim >= minSim && sim > sortedMap[sortedMap.Len()-1].Value { + sortedMap[sortedMap.Len()-1].Key = strToCmp + sortedMap[sortedMap.Len()-1].Value = sim + sortedMap.SortByValues() + } + } + + return sortedMap.ToArray() +} diff --git a/tests/string-analysis_test.go b/tests/string-analysis_test.go index c2243df..e99d4cd 100644 --- a/tests/string-analysis_test.go +++ b/tests/string-analysis_test.go @@ -1,11 +1,26 @@ package edlib import ( + "reflect" "testing" "github.com/hbollon/go-edlib" ) +var strList []string + +func init() { + strList = []string{ + "test", + "tester", + "tests", + "testers", + "testing", + "tsting", + "sting", + } +} + func TestStringsSimilarity(t *testing.T) { type args struct { str1 string @@ -118,3 +133,96 @@ func TestStringsSimilarity(t *testing.T) { }) } } + +func TestFuzzySearch(t *testing.T) { + type args struct { + str string + strList []string + algo edlib.AlgorithMethod + } + tests := []struct { + name string + args args + want string + }{ + {"FuzzySearch 'testing'", args{"testnig", strList, edlib.Levenshtein}, "testing"}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := edlib.FuzzySearch(tt.args.str, tt.args.strList, tt.args.algo); got != tt.want { + t.Errorf("FuzzySearch() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestFuzzySearchThreshold(t *testing.T) { + type args struct { + str string + strList []string + minSim float32 + algo edlib.AlgorithMethod + } + tests := []struct { + name string + args args + want string + }{ + {"FuzzySearch 'testing'", args{"testnig", strList, 0.7, edlib.Levenshtein}, "testing"}, + {"FuzzySearch 'testing'", args{"hello", strList, 0.7, edlib.Levenshtein}, ""}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := edlib.FuzzySearchThreshold(tt.args.str, tt.args.strList, tt.args.minSim, tt.args.algo); got != tt.want { + t.Errorf("FuzzySearchThreshold() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestFuzzySearchSet(t *testing.T) { + type args struct { + str string + strList []string + quantity int + algo edlib.AlgorithMethod + } + tests := []struct { + name string + args args + want []string + }{ + {"FuzzySearch 'testing'", args{"testnig", strList, 3, edlib.Levenshtein}, []string{"testing", "test", "tester"}}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := edlib.FuzzySearchSet(tt.args.str, tt.args.strList, tt.args.quantity, tt.args.algo); !reflect.DeepEqual(got, tt.want) { + t.Errorf("FuzzySearchSet() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestFuzzySearchSetThreshold(t *testing.T) { + type args struct { + str string + strList []string + quantity int + minSim float32 + algo edlib.AlgorithMethod + } + tests := []struct { + name string + args args + want []string + }{ + {"FuzzySearch 'testing'", args{"testnig", strList, 3, 0.7, edlib.Levenshtein}, []string{"testing", "", ""}}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := edlib.FuzzySearchSetThreshold(tt.args.str, tt.args.strList, tt.args.quantity, tt.args.minSim, tt.args.algo); !reflect.DeepEqual(got, tt.want) { + t.Errorf("FuzzySearchSetThreshold() = %v, want %v", got, tt.want) + } + }) + } +}