Skip to content

Commit

Permalink
Better support for emoji with zero width connectors (#17)
Browse files Browse the repository at this point in the history
The [zero-width connector](https://emojipedia.org/zero-width-joiner#technical) is a
special character used to combine multiple emoji into a single emoji
character on screen. For example ๐Ÿ‘ฉโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ง

This patches the code to skip specific instances of the zero-width
connector to avoid parsing parts of the emoji as unicode
characters/breaking into multiple emoji.

This is based on
#12 (comment)
  • Loading branch information
tmdvs authored Oct 19, 2024
1 parent 3ebfd53 commit 971af81
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 3 deletions.
2 changes: 1 addition & 1 deletion emoji.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ func RemoveAll(input string) string {
matches := FindAll(input)

for _, item := range matches {
emo := item.Match.(Emoji)
emo := item.Match
rs := []rune(emo.Value)
for _, r := range rs {
input = strings.ReplaceAll(input, string([]rune{r}), "")
Expand Down
17 changes: 15 additions & 2 deletions search.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import (

// SearchResult - Occurrence of an emoji in a string
type SearchResult struct {
Match interface{}
Match Emoji
Occurrences int
Locations [][]int
}
Expand Down Expand Up @@ -43,7 +43,7 @@ func Find(emojiString string, input string) (result SearchResult, err error) {
// Loop through emoji present in input and if any match the
// emoji we're looking for we'll return the result
for _, r := range allEmoji {
if r.Match.(Emoji).Key == emoji.Key {
if r.Match.Key == emoji.Key {
result = r
return
}
Expand Down Expand Up @@ -71,6 +71,15 @@ func FindAll(input string) (detectedEmojis SearchResults) {
continue
}

// If the previous rune was a zero width joiner we'll skip this one
// [Github issue](https://github.com/tmdvs/Go-Emoji-Utils/issues/12#issuecomment-1362747872)
if index >= 1 {
previousRune := []rune{runes[index-1]}
if isRuneZeroWidthJoiner(previousRune) {
continue
}
}

// Grab the initial hex value of this run
hexKey := utils.RunesToHexKey([]rune{r})

Expand Down Expand Up @@ -162,3 +171,7 @@ func findEmoji(term string, list map[string]Emoji) (results map[string]Emoji) {
}
return
}

func isRuneZeroWidthJoiner(r []rune) bool {
return utils.RunesToHexKey(r) == "200D"
}
14 changes: 14 additions & 0 deletions tests/search_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ func TestRemoveAllEmoji(t *testing.T) {
totalUniqueEmoji := len(matches)

assert.Equal(t, totalUniqueEmoji, 6, "There should be six different emoji, found: %v", matches)
assert.Equal(t, matches[0].Match.Value, "๐Ÿ˜„", "The first emoji should be ๐Ÿ˜„")
assert.Equal(t, matches[1].Match.Value, "๐Ÿท", "The second emoji should be ๐Ÿท")
assert.Equal(t, matches[5].Match.Value, "๐Ÿฅฐ", "The second emoji should be ๐Ÿฅฐ")

emojiRemoved := emoji.RemoveAll(str)
assert.Equal(t, "This is a string with some emoji!", emojiRemoved, "There should be no emoji")
Expand All @@ -57,6 +60,14 @@ func TestNumericalKeycaps(t *testing.T) {
assert.Equal(t, 11, totalUniqueEmoji, "There should be 11 unique emoji")
}

func TestFamilyEmoji(t *testing.T) {
str := "๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘ฆโ€๐Ÿ‘ฆfamily emoji"
matches := emoji.FindAll(str)
totalUniqueEmoji := len(matches)

assert.Equal(t, 1, totalUniqueEmoji, "There should be 1 unique emoji")
}

func TestRemoveAllEmojiChinese(t *testing.T) {

str := "่ตทๅŽ็‰นๅœจ๐Ÿ‡ซ๐Ÿ‡ท้˜Ÿ็š„ไฝœ็”จๆ›ด ๅ“ˆๅ“ˆๅ“ˆ"
Expand All @@ -65,6 +76,7 @@ func TestRemoveAllEmojiChinese(t *testing.T) {
totalUniqueEmoji := len(matches)

assert.Equal(t, totalUniqueEmoji, 1, "There should be one emoji")
assert.Equal(t, matches[0].Match.Value, "๐Ÿ‡ซ๐Ÿ‡ท", "The emoji should be ๐Ÿ‡ซ๐Ÿ‡ท")

emojiRemoved := emoji.RemoveAll(str)
assert.Equal(t, "่ตทๅŽ็‰นๅœจ้˜Ÿ็š„ไฝœ็”จๆ›ด ๅ“ˆๅ“ˆๅ“ˆ", emojiRemoved, "There should be no emoji")
Expand All @@ -79,6 +91,8 @@ func TestRemoveAllEmojiChineseEnglishMixed(t *testing.T) {
totalUniqueEmoji := len(matches)

assert.Equal(t, totalUniqueEmoji, 8, "There should be one emoji")
assert.Equal(t, matches[0].Match.Value, "๐Ÿคฎ", "The first emoji should be ๐Ÿคฎ")
assert.Equal(t, matches[4].Match.Value, "๐Ÿค ", "The fifth emoji should be ๐Ÿค ")

emojiRemoved := emoji.RemoveAll(str)
assert.Equal(t, "woๆญฆๆ–Œello aๆญฆๆ–Œ g ood peoello", emojiRemoved, "There should be no emoji")
Expand Down

0 comments on commit 971af81

Please sign in to comment.