diff --git a/parse/capitalize.go b/parse/capitalize.go index e2ce7d4..605a6f2 100644 --- a/parse/capitalize.go +++ b/parse/capitalize.go @@ -109,6 +109,9 @@ func Capitalize(str string) string { str = breakRegex.ReplaceAllStringFunc(str, func(word string) string { return strings.Title(word) }) + str = abbreviationRegexp.ReplaceAllStringFunc(str, func(abbr string) string { + return strings.ToUpper(abbr) + }) str = strings.Replace(str, "'S", "'s", -1) return str } diff --git a/parse/capitalize_test.go b/parse/capitalize_test.go index 6f6c6df..19ce32b 100644 --- a/parse/capitalize_test.go +++ b/parse/capitalize_test.go @@ -8,37 +8,7 @@ import ( ) func TestCapitalize(t *testing.T) { - for _, v := range []string{ - "Gone with the Wind", - "The Shawshank Redemption", - "The Godfather: Part II", - "Schindler's List", - "The Lord of the Rings: The Return of the King", - "The Good, the Bad and the Ugly", - "12 Angry Men", - "Avengers: Infinity War", - "The Lord of the Rings: The Fellowship of the Ring", - "Star Wars: Episode V - The Empire Strikes Back", - "One Flew Over the Cuckoo's Nest", - "The Silence of the Lambs", - "Léon: The Professional", - "Se7en", - "Star Wars: Episode IV - A New Hope", - "City of God", - "Life Is Beautiful", - "Once Upon a Time in America", - "21 and Over", - "2001: A Space Odyssey", - "To Kill a Mockingbird", - "Monty Python and the Holy Grail", - "L.A. Confidential", - "Lock, Stock and Two Smoking Barrels", - "Mr. Smith Goes to Washington", - "V for Vendetta", - "Kill Bill: Vol. 1", - "Agents of S.H.I.E.L.D.", - "Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb", - } { + for _, v := range testMovieTitles { assert.Equal(t, v, Capitalize(strings.ToLower(v))) } } diff --git a/parse/parse.go b/parse/parse.go index 5defb46..13a73a9 100644 --- a/parse/parse.go +++ b/parse/parse.go @@ -64,8 +64,25 @@ func Filename(filename string) string { return strings.TrimSuffix(f, filepath.Ext(f)) } -var abbreviationRegexp = regexp.MustCompile(`\s[A-Z]\s[A-Z](\s[A-Z])*\s`) -var illegalcharsRegexp = regexp.MustCompile(`[^\p{L}0-9\s&'_\(\)-]`) +var abbreviationList = []string{ + "mr", + "mrs", + "dr", + "vol", +} + +func isAbbreviation(str string) bool { + lower := strings.ToLower(str) + for _, v := range abbreviationList { + if lower == v { + return true + } + } + return false +} + +var abbreviationRegexp = regexp.MustCompile(`\b[A-Za-z]([\s\.][A-Za-z])+\b`) +var illegalcharsRegexp = regexp.MustCompile(`[^\p{L}0-9\s&'_\(\)\-,:]`) var spaceReplaceRegexp = regexp.MustCompile(`[\.\s_]+`) // CleanName returns the media name cleaned from punctuation @@ -74,7 +91,14 @@ func CleanName(name string) string { name = illegalcharsRegexp.ReplaceAllString(name, "") name = abbreviationRegexp.ReplaceAllStringFunc(name, func(match string) string { - return " " + strings.Replace(match, " ", "", -1) + " " + return strings.Replace(match, " ", ".", -1) + "." + }) + + name = wordRegex.ReplaceAllStringFunc(name, func(match string) string { + if isAbbreviation(match) { + return match + "." + } + return match }) name = Capitalize(name) diff --git a/parse/parse_test.go b/parse/parse_test.go index 587db33..2b6e750 100644 --- a/parse/parse_test.go +++ b/parse/parse_test.go @@ -1,11 +1,56 @@ package parse import ( + "regexp" + "strings" "testing" "github.com/stretchr/testify/assert" ) +var testMovieTitles = []string{ + "Gone with the Wind", + "The Shawshank Redemption", + "The Godfather: Part II", + "Schindler's List", + "The Lord of the Rings: The Return of the King", + "The Good, the Bad and the Ugly", + "12 Angry Men", + "Avengers: Infinity War", + "The Lord of the Rings: The Fellowship of the Ring", + "Star Wars: Episode V - The Empire Strikes Back", + "One Flew Over the Cuckoo's Nest", + "The Silence of the Lambs", + "Léon: The Professional", + "Se7en", + "Star Wars: Episode IV - A New Hope", + "City of God", + "Life Is Beautiful", + "Once Upon a Time in America", + "21 and Over", + "2001: A Space Odyssey", + "To Kill a Mockingbird", + "Monty Python and the Holy Grail", + "L.A. Confidential", + "Lock, Stock and Two Smoking Barrels", + "Mr. Smith Goes to Washington", + "V for Vendetta", + "Kill Bill: Vol. 1", + "Agents of S.H.I.E.L.D.", + "Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb", + "X-Men Origins: Wolverine", + "Mr. & Mrs. Smith", +} + +func TestCleanNameMovieTitles(t *testing.T) { + space := regexp.MustCompile(`[\s\.]+`) + + for _, v := range testMovieTitles { + s := space.ReplaceAllString(v, ".") + assert.Equal(t, v, CleanName(strings.ToLower(s))) + } +} + func TestIdentity(t *testing.T) { assert.Equal(t, "thisisatest", Identity("thìs is â tést")) assert.Equal(t, "vyzkousejtetentoretezec", Identity("vyzkoušejte tento řetězec")) @@ -15,9 +60,9 @@ func TestIdentity(t *testing.T) { func TestCleanName(t *testing.T) { assert.Equal(t, "This Is a Test", CleanName("this.is.a.test")) - assert.Equal(t, "This Is a (Test)", CleanName("this?_=is#.,a_(test)")) - assert.Equal(t, "Abc ABC Abc", CleanName("abc.A.B.C.abc")) - assert.Equal(t, "Abc ABC Abc", CleanName("abc A B C abc")) + assert.Equal(t, "This Is a (Test)", CleanName("this?_=is#.a_(test)")) + assert.Equal(t, "Abc A.B.C. Abc", CleanName("abc.A.B.C.abc")) + assert.Equal(t, "Abc A.B.C. Abc", CleanName("abc A B C abc")) assert.Equal(t, "A Good Day to Die Hard", CleanName("A.Good.Day.To.Die.Hard")) assert.Equal(t, "This Is a Test", CleanName("This.Is.A.Test")) } diff --git a/provider/subscene.go b/provider/subscene.go index 3db4dce..9bbf3c1 100644 --- a/provider/subscene.go +++ b/provider/subscene.go @@ -38,6 +38,8 @@ const subsceneDelay = 500 * time.Millisecond var subsceneLock = new(sync.Mutex) +var subsceneIllegal = regexp.MustCompile(`[^\p{L}0-9\s]`) + // lockSubscene is used to limit the number of calls to subscene to prevent spamming func lockSubscene() { subsceneLock.Lock() @@ -59,12 +61,13 @@ func (s *subscene) ResolveSubtitle(l types.Linker) (types.Downloadable, error) { } func (s *subscene) searchTerm(m types.Media) string { + var term string if movie, ok := m.TypeMovie(); ok { - return s.searchTermMovie(movie) + term = s.searchTermMovie(movie) } else if episode, ok := m.TypeEpisode(); ok { - return s.searchTermEpisode(episode) + term = s.searchTermEpisode(episode) } - return "" + return subsceneIllegal.ReplaceAllString(term, "") } func (s *subscene) searchTermMovie(movie types.Movie) string {