diff --git a/hw03_frequency_analysis/top.go b/hw03_frequency_analysis/top.go index 5a6b2d8..647f193 100644 --- a/hw03_frequency_analysis/top.go +++ b/hw03_frequency_analysis/top.go @@ -4,10 +4,10 @@ import ( "regexp" "sort" "strings" + "unicode" ) -var SplitFilter = regexp.MustCompile(`[\s?!.;,]`) -var IgnoredSymbols = regexp.MustCompile(`[-]`) +var ignoredSymbols = regexp.MustCompile(`[-]`) type WordFrequency struct { word string @@ -20,8 +20,8 @@ func Top10(inputLine string) []string { // prepare map with counter freqMap := make(map[string]int) - for _, word := range SplitFilter.Split(inputLine, -1) { - word = IgnoredSymbols.ReplaceAllString(word, "") + inputLine = ignoredSymbols.ReplaceAllString(inputLine, "") + for _, word := range strings.FieldsFunc(inputLine, splitFunc) { if len(word) > 0 { freqMap[strings.ToLower(word)]++ } @@ -46,3 +46,7 @@ func Top10(inputLine string) []string { return mostFrequentWords } + +func splitFunc(char rune) bool { + return unicode.IsPunct(char) || unicode.IsSpace(char) +} diff --git a/hw03_frequency_analysis/top_test.go b/hw03_frequency_analysis/top_test.go index 74c13df..03830c4 100644 --- a/hw03_frequency_analysis/top_test.go +++ b/hw03_frequency_analysis/top_test.go @@ -75,3 +75,9 @@ func TestExtendedTop10(t *testing.T) { assert.Equal(t, result, testCase.expected) } } + +func BenchmarkTop(b *testing.B) { + for i := 0; i < b.N; i++ { + Top10(text) + } +}