Skip to content

Commit

Permalink
Reworked project structure
Browse files Browse the repository at this point in the history
  • Loading branch information
hbollon committed Sep 22, 2020
1 parent 195adbf commit f5c314e
Show file tree
Hide file tree
Showing 12 changed files with 140 additions and 127 deletions.
20 changes: 12 additions & 8 deletions string-analysis.go → pkg/analysis/string-analysis.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
package edlib
package analysis

import (
"errors"

"github.com/hbollon/go-edlib/internal/orderedmap"
"github.com/hbollon/go-edlib/pkg/hamming"
"github.com/hbollon/go-edlib/pkg/jaro"
"github.com/hbollon/go-edlib/pkg/lcs"
"github.com/hbollon/go-edlib/pkg/levenshtein"
)

// AlgorithMethod is an Integer type used to identify edit distance algorithms
Expand All @@ -24,23 +28,23 @@ const (
func StringsSimilarity(str1 string, str2 string, algo AlgorithMethod) (float32, error) {
switch algo {
case Levenshtein:
return matchingIndex(str1, str2, LevenshteinDistance(str1, str2)), nil
return matchingIndex(str1, str2, levenshtein.Distance(str1, str2)), nil
case DamerauLevenshtein:
return matchingIndex(str1, str2, DamerauLevenshteinDistance(str1, str2)), nil
return matchingIndex(str1, str2, levenshtein.DamerauLevenshteinDistance(str1, str2)), nil
case OSADamerauLevenshtein:
return matchingIndex(str1, str2, OSADamerauLevenshteinDistance(str1, str2)), nil
return matchingIndex(str1, str2, levenshtein.OSADamerauLevenshteinDistance(str1, str2)), nil
case Lcs:
return matchingIndex(str1, str2, LCSEditDistance(str1, str2)), nil
return matchingIndex(str1, str2, lcs.Distance(str1, str2)), nil
case Hamming:
distance, err := HammingDistance(str1, str2)
distance, err := hamming.Distance(str1, str2)
if err == nil {
return matchingIndex(str1, str2, distance), nil
}
return 0.0, err
case Jaro:
return JaroSimilarity(str1, str2), nil
return jaro.Similarity(str1, str2), nil
case JaroWinkler:
return JaroWinklerSimilarity(str1, str2), nil
return jaro.WinklerSimilarity(str1, str2), nil
default:
return 0.0, errors.New("Illegal argument for algorithm method")
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package edlib
package analysis

import (
"reflect"
Expand Down
10 changes: 6 additions & 4 deletions hamming.go → pkg/hamming/hamming.go
Original file line number Diff line number Diff line change
@@ -1,19 +1,21 @@
package edlib
package hamming

import (
"errors"

"github.com/hbollon/go-edlib/pkg/utils"
)

// HammingDistance calculate the edit distance between two given strings using only substitutions
// Distance calculate the edit distance between two given strings using only substitutions
// Return edit distance integer and an error
func HammingDistance(str1, str2 string) (int, error) {
func Distance(str1, str2 string) (int, error) {
// Convert strings to rune array to handle no-ASCII characters
runeStr1 := []rune(str1)
runeStr2 := []rune(str2)

if len(runeStr1) != len(runeStr2) {
return 0, errors.New("Undefined for strings of unequal length")
} else if equal(runeStr1, runeStr2) {
} else if utils.Equal(runeStr1, runeStr2) {
return 0, nil
}

Expand Down
4 changes: 2 additions & 2 deletions hamming_test.go → pkg/hamming/hamming_test.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package edlib
package hamming

import (
"testing"
Expand All @@ -24,7 +24,7 @@ func TestHammingDistance(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := HammingDistance(tt.args.str1, tt.args.str2)
got, err := Distance(tt.args.str1, tt.args.str2)
if (err != nil) != tt.wantErr {
t.Errorf("HammingDistance() error = %v, wantErr %v", err, tt.wantErr)
return
Expand Down
24 changes: 13 additions & 11 deletions jaro.go → pkg/jaro/jaro.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
package edlib
package jaro

// JaroSimilarity return a similarity index (between 0 and 1)
import "github.com/hbollon/go-edlib/pkg/utils"

// Similarity return a similarity index (between 0 and 1)
// It use Jaro distance algorithm and allow only transposition operation
func JaroSimilarity(str1, str2 string) float32 {
func Similarity(str1, str2 string) float32 {
// Convert string parameters to rune arrays to be compatible with non-ASCII
runeStr1 := []rune(str1)
runeStr2 := []rune(str2)
Expand All @@ -12,20 +14,20 @@ func JaroSimilarity(str1, str2 string) float32 {
runeStr2len := len(runeStr2)
if runeStr1len == 0 || runeStr2len == 0 {
return 0.0
} else if equal(runeStr1, runeStr2) {
} else if utils.Equal(runeStr1, runeStr2) {
return 1.0
}

var match int
// Maximum matching distance allowed
maxDist := max(runeStr1len, runeStr2len)/2 - 1
maxDist := utils.Max(runeStr1len, runeStr2len)/2 - 1
// Correspondence tables (1 for matching and 0 if it's not the case)
str1Table := make([]int, runeStr1len)
str2Table := make([]int, runeStr2len)

// Check for matching characters in both strings
for i := 0; i < runeStr1len; i++ {
for j := max(0, i-maxDist); j < min(runeStr2len, i+maxDist+1); j++ {
for j := utils.Max(0, i-maxDist); j < utils.Min(runeStr2len, i+maxDist+1); j++ {
if runeStr1[i] == runeStr2[j] && str2Table[j] == 0 {
str1Table[i] = 1
str2Table[j] = 1
Expand Down Expand Up @@ -59,11 +61,11 @@ func JaroSimilarity(str1, str2 string) float32 {
(float32(match)-t)/float32(match)) / 3.0
}

// JaroWinklerSimilarity return a similarity index (between 0 and 1)
// WinklerSimilarity return a similarity index (between 0 and 1)
// Use Jaro similarity and after look for a common prefix (length <= 4)
func JaroWinklerSimilarity(str1, str2 string) float32 {
func WinklerSimilarity(str1, str2 string) float32 {
// Get Jaro similarity index between str1 and str2
jaroSim := JaroSimilarity(str1, str2)
jaroSim := Similarity(str1, str2)

if jaroSim != 0.0 && jaroSim != 1.0 {
// Convert string parameters to rune arrays to be compatible with non-ASCII
Expand All @@ -77,7 +79,7 @@ func JaroWinklerSimilarity(str1, str2 string) float32 {
var prefix int

// Find length of the common prefix
for i := 0; i < min(runeStr1len, runeStr2len); i++ {
for i := 0; i < utils.Min(runeStr1len, runeStr2len); i++ {
if runeStr1[i] == runeStr2[i] {
prefix++
} else {
Expand All @@ -87,7 +89,7 @@ func JaroWinklerSimilarity(str1, str2 string) float32 {

// Normalized prefix count with Winkler's constraint
// (prefix length must be inferior or equal to 4)
prefix = min(prefix, 4)
prefix = utils.Min(prefix, 4)

// Return calculated Jaro-Winkler similarity index
return jaroSim + 0.1*float32(prefix)*(1-jaroSim)
Expand Down
6 changes: 3 additions & 3 deletions jaro_test.go → pkg/jaro/jaro_test.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package edlib
package jaro

import (
"testing"
Expand All @@ -25,7 +25,7 @@ func TestJaroSimilarity(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := JaroSimilarity(tt.args.str1, tt.args.str2); got != tt.want {
if got := Similarity(tt.args.str1, tt.args.str2); got != tt.want {
t.Errorf("JaroSimilarity() = %v, want %v", got, tt.want)
}
})
Expand Down Expand Up @@ -53,7 +53,7 @@ func TestJaroWinklerSimilarity(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := JaroWinklerSimilarity(tt.args.str1, tt.args.str2); got != tt.want {
if got := WinklerSimilarity(tt.args.str1, tt.args.str2); got != tt.want {
t.Errorf("JaroWinklerSimilarity() = %v, want %v", got, tt.want)
}
})
Expand Down
46 changes: 24 additions & 22 deletions lcs.go → pkg/lcs/lcs.go
Original file line number Diff line number Diff line change
@@ -1,19 +1,21 @@
package edlib
package lcs

import (
"errors"
"fmt"

"github.com/hbollon/go-edlib/pkg/utils"
)

// LCS takes two strings and compute their LCS(Longuest Subsequence Problem)
func LCS(str1, str2 string) int {
// ComputeLCS takes two strings and compute their LCS(Longuest Subsequence Problem)
func ComputeLCS(str1, str2 string) int {
// Convert strings to rune array to handle no-ASCII characters
runeStr1 := []rune(str1)
runeStr2 := []rune(str2)

if len(runeStr1) == 0 || len(runeStr2) == 0 {
return 0
} else if equal(runeStr1, runeStr2) {
} else if utils.Equal(runeStr1, runeStr2) {
return len(runeStr1)
}

Expand All @@ -37,22 +39,22 @@ func lcsProcess(runeStr1, runeStr2 []rune) [][]int {
if runeStr1[i-1] == runeStr2[j-1] {
lcsMatrix[i][j] = lcsMatrix[i-1][j-1] + 1
} else {
lcsMatrix[i][j] = max(lcsMatrix[i][j-1], lcsMatrix[i-1][j])
lcsMatrix[i][j] = utils.Max(lcsMatrix[i][j-1], lcsMatrix[i-1][j])
}
}
}

return lcsMatrix
}

// LCSBacktrack returns all choices taken during LCS process
func LCSBacktrack(str1, str2 string) (string, error) {
// Backtrack returns all choices taken during LCS process
func Backtrack(str1, str2 string) (string, error) {
runeStr1 := []rune(str1)
runeStr2 := []rune(str2)

if len(runeStr1) == 0 || len(runeStr2) == 0 {
return "", errors.New("Can't process and backtrack any LCS with empty string")
} else if equal(runeStr1, runeStr2) {
} else if utils.Equal(runeStr1, runeStr2) {
return str1, nil
}

Expand All @@ -75,27 +77,27 @@ func processLCSBacktrack(str1 string, str2 string, lcsMatrix [][]int, m, n int)
return processLCSBacktrack(str1, str2, lcsMatrix, m-1, n)
}

// LCSBacktrackAll returns an array containing all common substrings between str1 and str2
func LCSBacktrackAll(str1, str2 string) ([]string, error) {
// BacktrackAll returns an array containing all common substrings between str1 and str2
func BacktrackAll(str1, str2 string) ([]string, error) {
runeStr1 := []rune(str1)
runeStr2 := []rune(str2)

if len(runeStr1) == 0 || len(runeStr2) == 0 {
return nil, errors.New("Can't process and backtrack any LCS with empty string")
} else if equal(runeStr1, runeStr2) {
} else if utils.Equal(runeStr1, runeStr2) {
return []string{str1}, nil
}

return processLCSBacktrackAll(str1, str2, lcsProcess(runeStr1, runeStr2), len(str1), len(str2)).toArray(), nil
return processLCSBacktrackAll(str1, str2, lcsProcess(runeStr1, runeStr2), len(str1), len(str2)).ToArray(), nil
}

func processLCSBacktrackAll(str1 string, str2 string, lcsMatrix [][]int, m, n int) StringHashMap {
func processLCSBacktrackAll(str1 string, str2 string, lcsMatrix [][]int, m, n int) utils.StringHashMap {
// Convert strings to rune array to handle no-ASCII characters
runeStr1 := []rune(str1)
runeStr2 := []rune(str2)

// Map containing all commons substrings (Hash set builded from map)
substrings := make(StringHashMap)
substrings := make(utils.StringHashMap)

if m == 0 || n == 0 {
substrings[""] = struct{}{}
Expand All @@ -105,24 +107,24 @@ func processLCSBacktrackAll(str1 string, str2 string, lcsMatrix [][]int, m, n in
}
} else {
if lcsMatrix[m-1][n] >= lcsMatrix[m][n-1] {
substrings.addAll(processLCSBacktrackAll(str1, str2, lcsMatrix, m-1, n))
substrings.AddAll(processLCSBacktrackAll(str1, str2, lcsMatrix, m-1, n))
}
if lcsMatrix[m][n-1] >= lcsMatrix[m-1][n] {
substrings.addAll(processLCSBacktrackAll(str1, str2, lcsMatrix, m, n-1))
substrings.AddAll(processLCSBacktrackAll(str1, str2, lcsMatrix, m, n-1))
}
}

return substrings
}

// LCSDiff will backtrack through the lcs matrix and return the diff between the two sequences
func LCSDiff(str1, str2 string) ([]string, error) {
// Diff will backtrack through the lcs matrix and return the diff between the two sequences
func Diff(str1, str2 string) ([]string, error) {
runeStr1 := []rune(str1)
runeStr2 := []rune(str2)

if len(runeStr1) == 0 || len(runeStr2) == 0 {
return nil, errors.New("Can't process LCS diff with empty string")
} else if equal(runeStr1, runeStr2) {
} else if utils.Equal(runeStr1, runeStr2) {
return []string{str1}, nil
}

Expand Down Expand Up @@ -158,9 +160,9 @@ func processLCSDiff(str1 string, str2 string, lcsMatrix [][]int, m, n int) []str
return diff
}

// LCSEditDistance determines the edit distance between two strings using LCS function
// Distance determines the edit distance between two strings using LCS function
// (allow only insert and delete operations)
func LCSEditDistance(str1, str2 string) int {
func Distance(str1, str2 string) int {
if len(str1) == 0 {
return len(str2)
} else if len(str2) == 0 {
Expand All @@ -169,6 +171,6 @@ func LCSEditDistance(str1, str2 string) int {
return 0
}

lcs := LCS(str1, str2)
lcs := ComputeLCS(str1, str2)
return (len(str1) - lcs) + (len(str2) - lcs)
}
12 changes: 6 additions & 6 deletions lcs_test.go → pkg/lcs/lcs_test.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package edlib
package lcs

import (
"reflect"
Expand Down Expand Up @@ -26,7 +26,7 @@ func TestLCS(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := LCS(tt.args.str1, tt.args.str2); got != tt.want {
if got := ComputeLCS(tt.args.str1, tt.args.str2); got != tt.want {
t.Errorf("LCS() = %v, want %v", got, tt.want)
}
})
Expand Down Expand Up @@ -54,7 +54,7 @@ func TestLCSBacktrack(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := LCSBacktrack(tt.args.str1, tt.args.str2)
got, err := Backtrack(tt.args.str1, tt.args.str2)
if (err != nil) != tt.wantErr {
t.Errorf("LCSBacktrack() error = %v, wantErr %v", err, tt.wantErr)
return
Expand Down Expand Up @@ -89,7 +89,7 @@ func TestLCSBacktrackAll(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := LCSBacktrackAll(tt.args.str1, tt.args.str2)
got, err := BacktrackAll(tt.args.str1, tt.args.str2)
if (err != nil) != tt.wantErr {
t.Errorf("LCSBacktrackAll() error = %v, wantErr %v", err, tt.wantErr)
return
Expand Down Expand Up @@ -121,7 +121,7 @@ func TestLCSDiff(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := LCSDiff(tt.args.str1, tt.args.str2)
got, err := Diff(tt.args.str1, tt.args.str2)
if (err != nil) != tt.wantErr {
t.Errorf("LCSDiff() error = %v, wantErr %v", err, tt.wantErr)
return
Expand Down Expand Up @@ -157,7 +157,7 @@ func TestLCSEditDistance(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := LCSEditDistance(tt.args.str1, tt.args.str2); got != tt.want {
if got := Distance(tt.args.str1, tt.args.str2); got != tt.want {
t.Errorf("LCSEditDistance() = %v, want %v", got, tt.want)
}
})
Expand Down
Loading

0 comments on commit f5c314e

Please sign in to comment.