From c3a182056de89273b3761e703f10e5b2131ecdfa Mon Sep 17 00:00:00 2001 From: Maxime Brunet Date: Sun, 17 Oct 2021 19:09:35 -0700 Subject: [PATCH] Remove accents in Emoji names (#20) Co-authored-by: Eduardo Soares --- CHANGELOG.md | 6 +++++- poetry.lock | 14 +++++++++++++- pyproject.toml | 1 + src/mmemoji/emoji.py | 3 +++ "tests/emojis/accentu\303\251.png" | Bin 0 -> 2173 bytes tests/test_emoji.py | 21 +++++++++++++++++++++ tests/utils.py | 4 ++++ 7 files changed, 47 insertions(+), 2 deletions(-) create mode 100644 "tests/emojis/accentu\303\251.png" create mode 100644 tests/test_emoji.py diff --git a/CHANGELOG.md b/CHANGELOG.md index aa81022b..d54a18b4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - Switch from `setuptools` to `poetry` ([#11]) - Switch from Travis CI to Github Actions ([#13]) -- Rename `Emoji.emoji` property to `Emoji.metadata` (#16) +- Rename `Emoji.emoji` property to `Emoji.metadata` ([#16]) + +### Fixed +- Remove accents in Emoji names instead of replacing characters by underscores ([#20]) ### Removed - Drop support for Python 3.5 ([#10]) @@ -36,6 +39,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 [0.2.0]: https://github.com/maxbrunet/mmemoji/compare/v0.1.0...v0.2.0 [0.1.0]: https://github.com/maxbrunet/mmemoji/releases/tag/v0.1.0 +[#20]: https://github.com/maxbrunet/mmemoji/issues/20 [#16]: https://github.com/maxbrunet/mmemoji/issues/16 [#13]: https://github.com/maxbrunet/mmemoji/issues/13 [#12]: https://github.com/maxbrunet/mmemoji/issues/12 diff --git a/poetry.lock b/poetry.lock index 4e602e1b..022b935b 100644 --- a/poetry.lock +++ b/poetry.lock @@ -503,6 +503,14 @@ category = "main" optional = false python-versions = "*" +[[package]] +name = "unidecode" +version = "1.3.2" +description = "ASCII transliterations of Unicode text" +category = "main" +optional = false +python-versions = ">=3.5" + [[package]] name = "urllib3" version = "1.26.7" @@ -539,7 +547,7 @@ testing = ["pytest (>=4.6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytes [metadata] lock-version = "1.1" python-versions = ">=3.6.2,<4.0" -content-hash = "1c584baa55677a05ab7a8e3c7fcba8c64f6f60483c381a2ef74408a43803a44d" +content-hash = "47dda862a0c082b789c7b0112e258b6b8d5e096a058bc15d3a5423a28e102c33" [metadata.files] atomicwrites = [ @@ -838,6 +846,10 @@ typing-extensions = [ {file = "typing_extensions-3.10.0.2-py3-none-any.whl", hash = "sha256:f1d25edafde516b146ecd0613dabcc61409817af4766fbbcfb8d1ad4ec441a34"}, {file = "typing_extensions-3.10.0.2.tar.gz", hash = "sha256:49f75d16ff11f1cd258e1b988ccff82a3ca5570217d7ad8c5f48205dd99a677e"}, ] +unidecode = [ + {file = "Unidecode-1.3.2-py3-none-any.whl", hash = "sha256:215fe33c9d1c889fa823ccb66df91b02524eb8cc8c9c80f9c5b8129754d27829"}, + {file = "Unidecode-1.3.2.tar.gz", hash = "sha256:669898c1528912bcf07f9819dc60df18d057f7528271e31f8ec28cc88ef27504"}, +] urllib3 = [ {file = "urllib3-1.26.7-py2.py3-none-any.whl", hash = "sha256:c4fdf4019605b6e5423637e01bc9fe4daef873709a7973e195ceba0a62bbc844"}, {file = "urllib3-1.26.7.tar.gz", hash = "sha256:4987c65554f7a2dbf30c18fd48778ef124af6fab771a377103da0585e2336ece"}, diff --git a/pyproject.toml b/pyproject.toml index 2e730b7a..372a94da 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -74,6 +74,7 @@ mattermostdriver = ">=6.1.2" requests = "" tablib = "" tabulate = "" +Unidecode = "" [tool.poetry.dev-dependencies] black = "==21.9b0" diff --git a/src/mmemoji/emoji.py b/src/mmemoji/emoji.py index 8902cfef..286c73f3 100644 --- a/src/mmemoji/emoji.py +++ b/src/mmemoji/emoji.py @@ -11,6 +11,7 @@ from typing import IO, Any, Dict, List, cast from mattermostdriver.exceptions import ResourceNotFound +from unidecode import unidecode from mmemoji.exceptions import EmojiAlreadyExists, EmojiNotFound @@ -49,6 +50,8 @@ def sanitize_name(filepath: str) -> str: """ # Extract filename without extension name = basename(filepath).split(".")[0] + # Transliterate Unicode to ASCII (remove accents) + name = unidecode(name) # Remove parentheses name = re.sub(r"[()[\]{}]", "", name) # Replace forbidden characters by underscores diff --git "a/tests/emojis/accentu\303\251.png" "b/tests/emojis/accentu\303\251.png" new file mode 100644 index 0000000000000000000000000000000000000000..d375a5124a5713e51f03e726add8fef82903d2db GIT binary patch literal 2173 zcmV-@2!i*CP)EX>4Tx04R}tkv&MmP!xqvQ>9WWf_4yb$WS|35EXIMDionYs1;guFnQ@8G%+M8 zE{=k0!NH%!s)LKOt`4q(Aov5~=H{g6A|>9J6k5c1;qgAsyXWxUeSpxYGR^8512o+> zGpVGQ%dd!`SA@}z03wLX%ra&rDGlHHx~ER6y9Cej@B6cQ)q=%z@3Dk%GRw#C2N3NMQkskRU=q4HZ;jBSE`PiiHfFCw=@wu3sXTLaquJ zIp(ne4YKP8|AXJNwTcttUQ#3pbiX*x#|RMG1)6oo`95}><_Qpd2CnqBf1?3Rf0ABr zYq2AsZyUI{ZfnXOaJd5vJ{htpyHb#rP%HxPXY@^ZVBi)AuX=N9o#XTY$kMFRH^9Lm z5HC^on$Np?I_LIpPiuZZG7)l(h(F&900006VoOIv0RI600RN!9r;`8x010qNS#tmY zE+YT{E+YYWr9XB6000McNlirueSad^gZEa<4bO1wgWnpw> zWFU8GbZ8()Nlj2!fese{00vb_L_t(|+U=cNh?P|o$A4$eIHqG4&1@RAnWiEmFdK`a zrtp>&^_0GZMr05MVL?$*58a^X@)F%3EGxT-y%mH}lrdclyI)L8tnA|GsVviSE@ygJ zduWg|x%W5c@}2$tfAGD1hqKxL_3g{rYpo4J2qA3+tPkfua*KFdJg?X;1P)*7mGYs{YC6tK;lP48_?sq;JZiS$Hf@n4G;1Afg7~oi;ufJ z=Ka0cnSh9lPN4T7;{SpII-=!#Mn35)0`CCFs+)umLI@#*5JE&wR^6RVz%-x*I~pE` zxr+B=@qPu&G4wN$=lNyD08R$x16{x@;CTEJKlfrGV;^Ag|LcHV8aL{LHsB86Ymy~; zj&xA89#{rUP#33@S-@KS!@ZI|1>jEL2z7ZnXvK2$`YSB_flm*x9K{aQbRBaU@N1QY zKkz{5O`u(JRF`I~B1f@?;tzjzVu>cA3LN8B=YuJG{&9;$Q9;^)O(_(A_-^4U$y6Cy zfUi<4{?OA4ES4jv{B!>6z*z}@Uj#k^-U2pY$ss=if4ObrI4obV9hd=}gGCcfP1sj| z0CRzl_1%`oSf5O|;M)z{3v?v--v(eF@M^;U{!C7z@{pv0M?dfcFu6=^ScugE4Ba9= zAo)hebizxw{SI7GfwoKr-X^?++-Z_A`Ov>Q!G8So3btti7D*h^rdN=+FcDpZ`St%C z#Ue3;scr!oG97hE&hQ0TNa?l#%q?6k787>%uok791#w^MH3dNJ)wLG#Wr7a7MuU>r>M1Z`}Ou z!&odgK?YEQDfkCh4gjB~q}@HPJ-%y(=`w)QoD?ws_hF%7X`m1=0_0=>BQqnQUAwTu zpENME4fN&6w9L0@BV=yu11=5t@+wIaVIICoGJw*A42>PY({<|W&@Nzv_Oy@jRqGJyD|fb9YAPeC#E(d`1Z1-#n=93mOx zGJwqizJReP$s#%f_!5it_aP6QDjDN4fbX$HPakqvAtw=$0rX;NVZO{k=Lb<4z#vwG z$CnOG192GuSkZ+(&vzSvC6Y2O8nOC%zGnhEFsG0RB&|-AA{IA47e7y5Ggg(eTav~@ z8&*@tcNEw~ZA!$53Xua|B&-(SOQ;rKRCKy+VMv*{73#KZj{^IFMe4eEH~|Y13B9#r zX-W-RB;XdpOSSg_cax5&q1nLWz@w5f8BM^uB)iS)J;ExEl#A)Wjld_vjCZx9OwNgf zmy91?N$mLw8$u44=|-y!l)l?Q1~5u4a`XFW?)Tg1M4HLy9AVACnZPw}q1xjyO{LBN z>TyxB%+06e@M7+9L-bbQ2ki36ea^^+($vNShhYUBTHMgyj+KiK^*KCCb*q_2%hgo= zZs1BhsT%?6)ob<`aJldM|85jb zBfVI}v#20rfGdD4q~k{Mq7(gApjGlzk34WO@G|~o=gTSjCg48I1uSaJBsVIo@pRTC z5&S#14p~l}C8}iYbSj5cBJBiD!A=_6fTOS*LC3pkq`?W>>zqP+u#mF?7Js-MOJUm# ze1SzH2_b|KLI@#*5JCtcgb+dqA%qY@q=J6|=w!!FC4T}z00000NkvXXu0mjf)IsYS literal 0 HcmV?d00001 diff --git a/tests/test_emoji.py b/tests/test_emoji.py new file mode 100644 index 00000000..773c9317 --- /dev/null +++ b/tests/test_emoji.py @@ -0,0 +1,21 @@ +from mmemoji import Emoji + + +def test_emoji_sanitize_simple_name() -> None: + name = Emoji.sanitize_name("emoji_1") + assert name == "emoji_1" + + +def test_emoji_sanitize_name_with_parentheses() -> None: + name = Emoji.sanitize_name("{[(parentheses)]}") + assert name == "parentheses" + + +def test_emoji_sanitize_name_with_spaces() -> None: + name = Emoji.sanitize_name("spa a ace") + assert name == "spa_a_ace" + + +def test_emoji_sanitize_name_with_accents() -> None: + name = Emoji.sanitize_name("àéêöhelloĐıł") + assert name == "aeeohelloDil" diff --git a/tests/utils.py b/tests/utils.py index 79804687..033dd894 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -23,6 +23,10 @@ "path": "tests/emojis/parentheses (spaced).png", "sha256": "195645113194074832ac56af71de520f1e2e87e52e4c8268b675832b91bab003", # noqa: E501 }, + "accentue": { + "path": "tests/emojis/accentué.png", + "sha256": "508c3f7dd47fdc0e879748cb0286e9de82d8945aab3a580c58a4a6682df6ab8f", # noqa: E501 + }, } USERS = { "sysadmin": {