Skip to content

Commit

Permalink
Fuzzy search retruning set of matching set (optinnal threshold) + tests
Browse files Browse the repository at this point in the history
  • Loading branch information
hbollon committed Sep 15, 2020
1 parent b471431 commit 1acfbbe
Show file tree
Hide file tree
Showing 2 changed files with 155 additions and 1 deletion.
48 changes: 47 additions & 1 deletion string-analysis.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@ package edlib

import (
"errors"
"fmt"
"log"

"github.com/hbollon/go-edlib/internal/orderedmap"
)

// AlgorithMethod is an Integer type used to identify edit distance algorithms
Expand Down Expand Up @@ -77,7 +80,7 @@ func FuzzySearch(str string, strList []string, algo AlgorithMethod) string {
}

// FuzzySearchThreshold realize an approximate search on a string list and return the closest one compared
// to the string input. Take an similarity threshold in parameter.
// to the string input. Takes a similarity threshold in parameter.
func FuzzySearchThreshold(str string, strList []string, minSim float32, algo AlgorithMethod) string {
var higherMatchPercent float32
var tmpStr string
Expand All @@ -96,3 +99,46 @@ func FuzzySearchThreshold(str string, strList []string, minSim float32, algo Alg
}
return tmpStr
}

// FuzzySearchSet realize an approximate search on a string list and return a set composed with x strings compared
// to the string input sorted by similarity with the base string. Takes the a quantity parameter to define the number of output strings desired (For exemple 3 in the case of the Google Keyborad word suggestion).
func FuzzySearchSet(str string, strList []string, quantity int, algo AlgorithMethod) []string {
sortedMap := make(orderedmap.OrderedMap, quantity)
for _, strToCmp := range strList {
sim, err := StringsSimilarity(str, strToCmp, algo)
if err != nil {
log.Fatal(err)
} else {
fmt.Printf("Sim %s/%s : %f\n", str, strToCmp, sim)
}

if sim > sortedMap[sortedMap.Len()-1].Value {
sortedMap[sortedMap.Len()-1].Key = strToCmp
sortedMap[sortedMap.Len()-1].Value = sim
sortedMap.SortByValues()
}
}

return sortedMap.ToArray()
}

// FuzzySearchSetThreshold realize an approximate search on a string list and return a set composed with x strings compared
// to the string input sorted by similarity with the base string. Take a similarity threshold in parameter. Takes the a quantity parameter to define the number of output strings desired (For exemple 3 in the case of the Google Keyborad word suggestion).
// Takes also a threshold parameter for similarity with base string.
func FuzzySearchSetThreshold(str string, strList []string, quantity int, minSim float32, algo AlgorithMethod) []string {
sortedMap := make(orderedmap.OrderedMap, quantity)
for _, strToCmp := range strList {
sim, err := StringsSimilarity(str, strToCmp, algo)
if err != nil {
log.Fatal(err)
}

if sim >= minSim && sim > sortedMap[sortedMap.Len()-1].Value {
sortedMap[sortedMap.Len()-1].Key = strToCmp
sortedMap[sortedMap.Len()-1].Value = sim
sortedMap.SortByValues()
}
}

return sortedMap.ToArray()
}
108 changes: 108 additions & 0 deletions tests/string-analysis_test.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,26 @@
package edlib

import (
"reflect"
"testing"

"github.com/hbollon/go-edlib"
)

var strList []string

func init() {
strList = []string{
"test",
"tester",
"tests",
"testers",
"testing",
"tsting",
"sting",
}
}

func TestStringsSimilarity(t *testing.T) {
type args struct {
str1 string
Expand Down Expand Up @@ -118,3 +133,96 @@ func TestStringsSimilarity(t *testing.T) {
})
}
}

func TestFuzzySearch(t *testing.T) {
type args struct {
str string
strList []string
algo edlib.AlgorithMethod
}
tests := []struct {
name string
args args
want string
}{
{"FuzzySearch 'testing'", args{"testnig", strList, edlib.Levenshtein}, "testing"},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := edlib.FuzzySearch(tt.args.str, tt.args.strList, tt.args.algo); got != tt.want {
t.Errorf("FuzzySearch() = %v, want %v", got, tt.want)
}
})
}
}

func TestFuzzySearchThreshold(t *testing.T) {
type args struct {
str string
strList []string
minSim float32
algo edlib.AlgorithMethod
}
tests := []struct {
name string
args args
want string
}{
{"FuzzySearch 'testing'", args{"testnig", strList, 0.7, edlib.Levenshtein}, "testing"},
{"FuzzySearch 'testing'", args{"hello", strList, 0.7, edlib.Levenshtein}, ""},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := edlib.FuzzySearchThreshold(tt.args.str, tt.args.strList, tt.args.minSim, tt.args.algo); got != tt.want {
t.Errorf("FuzzySearchThreshold() = %v, want %v", got, tt.want)
}
})
}
}

func TestFuzzySearchSet(t *testing.T) {
type args struct {
str string
strList []string
quantity int
algo edlib.AlgorithMethod
}
tests := []struct {
name string
args args
want []string
}{
{"FuzzySearch 'testing'", args{"testnig", strList, 3, edlib.Levenshtein}, []string{"testing", "test", "tester"}},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := edlib.FuzzySearchSet(tt.args.str, tt.args.strList, tt.args.quantity, tt.args.algo); !reflect.DeepEqual(got, tt.want) {
t.Errorf("FuzzySearchSet() = %v, want %v", got, tt.want)
}
})
}
}

func TestFuzzySearchSetThreshold(t *testing.T) {
type args struct {
str string
strList []string
quantity int
minSim float32
algo edlib.AlgorithMethod
}
tests := []struct {
name string
args args
want []string
}{
{"FuzzySearch 'testing'", args{"testnig", strList, 3, 0.7, edlib.Levenshtein}, []string{"testing", "", ""}},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := edlib.FuzzySearchSetThreshold(tt.args.str, tt.args.strList, tt.args.quantity, tt.args.minSim, tt.args.algo); !reflect.DeepEqual(got, tt.want) {
t.Errorf("FuzzySearchSetThreshold() = %v, want %v", got, tt.want)
}
})
}
}

0 comments on commit 1acfbbe

Please sign in to comment.