Skip to content

Commit

Permalink
fix: clean name respects special characters
Browse files Browse the repository at this point in the history
  • Loading branch information
tympanix committed Jun 24, 2018
1 parent b9455ef commit ae25223
Show file tree
Hide file tree
Showing 5 changed files with 85 additions and 40 deletions.
3 changes: 3 additions & 0 deletions parse/capitalize.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,9 @@ func Capitalize(str string) string {
str = breakRegex.ReplaceAllStringFunc(str, func(word string) string {
return strings.Title(word)
})
str = abbreviationRegexp.ReplaceAllStringFunc(str, func(abbr string) string {
return strings.ToUpper(abbr)
})
str = strings.Replace(str, "'S", "'s", -1)
return str
}
32 changes: 1 addition & 31 deletions parse/capitalize_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,37 +8,7 @@ import (
)

func TestCapitalize(t *testing.T) {
for _, v := range []string{
"Gone with the Wind",
"The Shawshank Redemption",
"The Godfather: Part II",
"Schindler's List",
"The Lord of the Rings: The Return of the King",
"The Good, the Bad and the Ugly",
"12 Angry Men",
"Avengers: Infinity War",
"The Lord of the Rings: The Fellowship of the Ring",
"Star Wars: Episode V - The Empire Strikes Back",
"One Flew Over the Cuckoo's Nest",
"The Silence of the Lambs",
"Léon: The Professional",
"Se7en",
"Star Wars: Episode IV - A New Hope",
"City of God",
"Life Is Beautiful",
"Once Upon a Time in America",
"21 and Over",
"2001: A Space Odyssey",
"To Kill a Mockingbird",
"Monty Python and the Holy Grail",
"L.A. Confidential",
"Lock, Stock and Two Smoking Barrels",
"Mr. Smith Goes to Washington",
"V for Vendetta",
"Kill Bill: Vol. 1",
"Agents of S.H.I.E.L.D.",
"Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb",
} {
for _, v := range testMovieTitles {
assert.Equal(t, v, Capitalize(strings.ToLower(v)))
}
}
Expand Down
30 changes: 27 additions & 3 deletions parse/parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,25 @@ func Filename(filename string) string {
return strings.TrimSuffix(f, filepath.Ext(f))
}

var abbreviationRegexp = regexp.MustCompile(`\s[A-Z]\s[A-Z](\s[A-Z])*\s`)
var illegalcharsRegexp = regexp.MustCompile(`[^\p{L}0-9\s&'_\(\)-]`)
var abbreviationList = []string{
"mr",
"mrs",
"dr",
"vol",
}

func isAbbreviation(str string) bool {
lower := strings.ToLower(str)
for _, v := range abbreviationList {
if lower == v {
return true
}
}
return false
}

var abbreviationRegexp = regexp.MustCompile(`\b[A-Za-z]([\s\.][A-Za-z])+\b`)
var illegalcharsRegexp = regexp.MustCompile(`[^\p{L}0-9\s&'_\(\)\-,:]`)
var spaceReplaceRegexp = regexp.MustCompile(`[\.\s_]+`)

// CleanName returns the media name cleaned from punctuation
Expand All @@ -74,7 +91,14 @@ func CleanName(name string) string {
name = illegalcharsRegexp.ReplaceAllString(name, "")

name = abbreviationRegexp.ReplaceAllStringFunc(name, func(match string) string {
return " " + strings.Replace(match, " ", "", -1) + " "
return strings.Replace(match, " ", ".", -1) + "."
})

name = wordRegex.ReplaceAllStringFunc(name, func(match string) string {
if isAbbreviation(match) {
return match + "."
}
return match
})

name = Capitalize(name)
Expand Down
51 changes: 48 additions & 3 deletions parse/parse_test.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,56 @@
package parse

import (
"regexp"
"strings"
"testing"

"github.com/stretchr/testify/assert"
)

var testMovieTitles = []string{
"Gone with the Wind",
"The Shawshank Redemption",
"The Godfather: Part II",
"Schindler's List",
"The Lord of the Rings: The Return of the King",
"The Good, the Bad and the Ugly",
"12 Angry Men",
"Avengers: Infinity War",
"The Lord of the Rings: The Fellowship of the Ring",
"Star Wars: Episode V - The Empire Strikes Back",
"One Flew Over the Cuckoo's Nest",
"The Silence of the Lambs",
"Léon: The Professional",
"Se7en",
"Star Wars: Episode IV - A New Hope",
"City of God",
"Life Is Beautiful",
"Once Upon a Time in America",
"21 and Over",
"2001: A Space Odyssey",
"To Kill a Mockingbird",
"Monty Python and the Holy Grail",
"L.A. Confidential",
"Lock, Stock and Two Smoking Barrels",
"Mr. Smith Goes to Washington",
"V for Vendetta",
"Kill Bill: Vol. 1",
"Agents of S.H.I.E.L.D.",
"Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb",
"X-Men Origins: Wolverine",
"Mr. & Mrs. Smith",
}

func TestCleanNameMovieTitles(t *testing.T) {
space := regexp.MustCompile(`[\s\.]+`)

for _, v := range testMovieTitles {
s := space.ReplaceAllString(v, ".")
assert.Equal(t, v, CleanName(strings.ToLower(s)))
}
}

func TestIdentity(t *testing.T) {
assert.Equal(t, "thisisatest", Identity("thìs is â tést"))
assert.Equal(t, "vyzkousejtetentoretezec", Identity("vyzkoušejte tento řetězec"))
Expand All @@ -15,9 +60,9 @@ func TestIdentity(t *testing.T) {

func TestCleanName(t *testing.T) {
assert.Equal(t, "This Is a Test", CleanName("this.is.a.test"))
assert.Equal(t, "This Is a (Test)", CleanName("this?_=is#.,a_(test)"))
assert.Equal(t, "Abc ABC Abc", CleanName("abc.A.B.C.abc"))
assert.Equal(t, "Abc ABC Abc", CleanName("abc A B C abc"))
assert.Equal(t, "This Is a (Test)", CleanName("this?_=is#.a_(test)"))
assert.Equal(t, "Abc A.B.C. Abc", CleanName("abc.A.B.C.abc"))
assert.Equal(t, "Abc A.B.C. Abc", CleanName("abc A B C abc"))
assert.Equal(t, "A Good Day to Die Hard", CleanName("A.Good.Day.To.Die.Hard"))
assert.Equal(t, "This Is a Test", CleanName("This.Is.A.Test"))
}
Expand Down
9 changes: 6 additions & 3 deletions provider/subscene.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ const subsceneDelay = 500 * time.Millisecond

var subsceneLock = new(sync.Mutex)

var subsceneIllegal = regexp.MustCompile(`[^\p{L}0-9\s]`)

// lockSubscene is used to limit the number of calls to subscene to prevent spamming
func lockSubscene() {
subsceneLock.Lock()
Expand All @@ -59,12 +61,13 @@ func (s *subscene) ResolveSubtitle(l types.Linker) (types.Downloadable, error) {
}

func (s *subscene) searchTerm(m types.Media) string {
var term string
if movie, ok := m.TypeMovie(); ok {
return s.searchTermMovie(movie)
term = s.searchTermMovie(movie)
} else if episode, ok := m.TypeEpisode(); ok {
return s.searchTermEpisode(episode)
term = s.searchTermEpisode(episode)
}
return ""
return subsceneIllegal.ReplaceAllString(term, "")
}

func (s *subscene) searchTermMovie(movie types.Movie) string {
Expand Down

0 comments on commit ae25223

Please sign in to comment.