From 3e54a5b9de9f53b8cbe462dd284e1160ed7ce1ee Mon Sep 17 00:00:00 2001 From: xxyzz Date: Mon, 23 Sep 2024 22:55:12 +0800 Subject: [PATCH] Add Hindi, Indonesian, Malay language data --- data/deps.json | 2 +- data/languages.json | 67 ++++++++++----------------------------------- docs/index.rst | 63 ++++++++++++++++++++++-------------------- metadata.py | 18 ++++++------ utils.py | 2 +- 5 files changed, 59 insertions(+), 93 deletions(-) diff --git a/data/deps.json b/data/deps.json index 86ae8e6..49a7cc1 100644 --- a/data/deps.json +++ b/data/deps.json @@ -1,6 +1,6 @@ { "lxml": "5.3.0", - "rapidfuzz": "3.9.7", + "rapidfuzz": "3.10.0", "spacy": "3.7.6", "spacy_cpu_model": "3.7.0", "en_spacy_cpu_model": "3.7.1", diff --git a/data/languages.json b/data/languages.json index b93199f..f6604d1 100644 --- a/data/languages.json +++ b/data/languages.json @@ -1,36 +1,26 @@ { "ca": { - "639-2": "cat", "gloss_source": "", - "has_trf": true, "name": "Catalan", "spacy": "ca_core_news_" }, "cs": { - "639-2": "ces", "gloss_source": "", - "has_trf": false, "name": "Czech", "spacy": "" }, "da": { - "639-2": "dan", "gloss_source": "", - "has_trf": true, "name": "Danish", "spacy": "da_core_news_" }, "de": { - "639-2": "deu", "gloss_source": "kaikki", - "has_trf": false, "name": "German", "spacy": "de_core_news_" }, "el": { - "639-2": "ell", "gloss_source": "dbnary", - "has_trf": false, "lemma_languages": [ "el", "en", @@ -40,23 +30,17 @@ "spacy": "el_core_news_" }, "en": { - "639-2": "eng", "gloss_source": "kaikki", - "has_trf": true, "name": "English", "spacy": "en_core_web_" }, "es": { - "639-2": "spa", "gloss_source": "kaikki", - "has_trf": false, "name": "Spanish", "spacy": "es_core_news_" }, "fi": { - "639-2": "fin", "gloss_source": "dbnary", - "has_trf": false, "lemma_languages": [ "fi" ], @@ -64,36 +48,38 @@ "spacy": "fi_core_news_" }, "fr": { - "639-2": "fra", "gloss_source": "kaikki", - "has_trf": false, "name": "French", "spacy": "fr_core_news_" }, "he": { - "639-2": "heb", "gloss_source": "kaikki", - "has_trf": false, "lemma_languages": [ "en" ], "name": "Hebrew", "spacy": "" }, + "hi": { + "gloss_source": "", + "name": "Hindi", + "spacy": "" + }, "hr": { - "639-2": "hrv", "gloss_source": "dbnary", - "has_trf": false, "lemma_languages": [ "hr" ], "name": "Serbo-Croatian", "spacy": "hr_core_news_" }, + "id": { + "gloss_source": "", + "name": "Indonesian", + "spacy": "" + }, "it": { - "639-2": "ita", "gloss_source": "dbnary", - "has_trf": false, "lemma_languages": [ "en", "it" @@ -102,23 +88,17 @@ "spacy": "it_core_news_" }, "ja": { - "639-2": "jpn", "gloss_source": "kaikki", - "has_trf": true, "name": "Japanese", "spacy": "ja_core_news_" }, "ko": { - "639-2": "kor", "gloss_source": "", - "has_trf": false, "name": "Korean", "spacy": "ko_core_news_" }, "lt": { - "639-2": "lit", "gloss_source": "dbnary", - "has_trf": false, "lemma_languages": [ "lt" ], @@ -126,16 +106,17 @@ "spacy": "lt_core_news_" }, "mk": { - "639-2": "mkd", "gloss_source": "", - "has_trf": false, "name": "Macedonian", "spacy": "mk_core_news_" }, + "ms": { + "gloss_source": "", + "name": "Malay", + "spacy": "" + }, "nl": { - "639-2": "nld", "gloss_source": "dbnary", - "has_trf": false, "lemma_languages": [ "nl" ], @@ -143,9 +124,7 @@ "spacy": "nl_core_news_" }, "nb": { - "639-2": "nob", "gloss_source": "dbnary", - "has_trf": false, "lemma_languages": [ "no" ], @@ -153,16 +132,12 @@ "spacy": "nb_core_news_" }, "pl": { - "639-2": "pol", "gloss_source": "kaikki", - "has_trf": false, "name": "Polish", "spacy": "pl_core_news_" }, "pt": { - "639-2": "por", "gloss_source": "dbnary", - "has_trf": false, "lemma_languages": [ "pt" ], @@ -170,30 +145,22 @@ "spacy": "pt_core_news_" }, "ro": { - "639-2": "ron", "gloss_source": "", - "has_trf": false, "name": "Romanian", "spacy": "ro_core_news_" }, "ru": { - "639-2": "rus", "gloss_source": "kaikki", - "has_trf": false, "name": "Russian", "spacy": "ru_core_news_" }, "sl": { - "639-2": "slv", "gloss_source": "", - "has_trf": true, "name": "Slovene", "spacy": "sl_core_news_" }, "sv": { - "639-2": "swe", "gloss_source": "dbnary", - "has_trf": false, "lemma_languages": [ "sv" ], @@ -201,16 +168,12 @@ "spacy": "sv_core_news_" }, "uk": { - "639-2": "ukr", "gloss_source": "", - "has_trf": true, "name": "Ukrainian", "spacy": "uk_core_news_" }, "zh": { - "639-2": "zho", "gloss_source": "kaikki", - "has_trf": true, "name": "Chinese", "spacy": "zh_core_web_" } diff --git a/docs/index.rst b/docs/index.rst index de7bb38..3abb244 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -42,36 +42,39 @@ Supported languages Supported Word Wise definition languages are listed in the plugin's customize window. -============= ===== -Book language X-Ray -============= ===== -Bokmål ✅ -Català ✅ -Dansk ✅ -Deutsch ✅ -English ✅ -Español ✅ -Français ✅ -Hrvatski ✅ -Italiano ✅ -Lietuvių ✅ -Nederlands ✅ -Polski ✅ -Português ✅ -Română ✅ -Slovenščina ✅ -Suomi ✅ -Svenska ✅ -čeština ❌ -Ελληνικά ✅ -Македонски ✅ -Русский ✅ -Українська ✅ -עִבְֿרִית‎ ❌ -中文 ✅ -日本語 ✅ -한국어 ✅ -============= ===== +================ ===== +Book language X-Ray +================ ===== +Bahasa Indonesia ❌ +Bahasa Melayu ❌ +Bokmål ✅ +Català ✅ +Dansk ✅ +Deutsch ✅ +English ✅ +Español ✅ +Français ✅ +Hrvatski ✅ +Italiano ✅ +Lietuvių ✅ +Nederlands ✅ +Polski ✅ +Português ✅ +Română ✅ +Slovenščina ✅ +Suomi ✅ +Svenska ✅ +čeština ❌ +Ελληνικά ✅ +Македонски ✅ +Русский ✅ +Українська ✅ +עִבְֿרִית‎ ❌ +हिन्दी ❌ +中文 ✅ +日本語 ✅ +한국어 ✅ +================ ===== Community --------- diff --git a/metadata.py b/metadata.py index 4ef6482..6e2d655 100644 --- a/metadata.py +++ b/metadata.py @@ -33,17 +33,18 @@ def is_ww_supported(book_lang: str, gloss_lang: str) -> bool: def check_metadata(gui: Any, book_id: int, custom_x_ray: bool) -> MetaDataResult | None: + from calibre.utils.localization import lang_as_iso639_1 + from .config import prefs from .error_dialogs import unsupported_format_dialog, unsupported_language_dialog from .utils import get_plugin_path, load_languages_data db = gui.current_db.new_api lang_dict = load_languages_data(get_plugin_path(), False) - supported_languages = {v["639-2"]: k for k, v in lang_dict.items()} mi = db.get_metadata(book_id, get_cover=True) # https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes - calibre_book_lang = mi.get("language") - if calibre_book_lang not in supported_languages: + book_lang = lang_as_iso639_1(mi.get("language")) + if book_lang not in lang_dict: unsupported_language_dialog(mi.get("title")) return None @@ -63,7 +64,6 @@ def check_metadata(gui: Any, book_id: int, custom_x_ray: bool) -> MetaDataResult if not prefs["use_all_formats"]: supported_fmts = [supported_fmts[0]] - book_lang = supported_languages[calibre_book_lang] support_ww_list = [] for fmt in supported_fmts: gloss_lang = prefs["gloss_lang"] @@ -81,11 +81,11 @@ def check_metadata(gui: Any, book_id: int, custom_x_ray: bool) -> MetaDataResult def cli_check_metadata(book_path_str: str, log: Any) -> MetaDataResult | None: + from calibre.utils.localization import lang_as_iso639_1 + from .config import prefs from .utils import get_plugin_path, load_languages_data - lang_dict = load_languages_data(get_plugin_path(), False) - supported_languages = {v["639-2"]: k for k, v in lang_dict.items()} book_path = Path(book_path_str) book_fmt = book_path.suffix.upper()[1:] mi = None @@ -112,14 +112,14 @@ def cli_check_metadata(book_path_str: str, log: Any) -> MetaDataResult | None: mi = get_metadata(f) if mi is not None: - calibre_book_lang = mi.get("language") - if calibre_book_lang not in supported_languages: + lang_dict = load_languages_data(get_plugin_path(), False) + book_lang = lang_as_iso639_1(mi.get("language")) + if book_lang not in lang_dict: log.prints( log.WARN, f"The language of the book {mi.get('title')} is not supported.", ) return None - book_lang = supported_languages[calibre_book_lang] gloss_lang = prefs["gloss_lang"] return MetaDataResult( book_fmts=[book_fmt], diff --git a/utils.py b/utils.py index 1e971ce..a2855a3 100644 --- a/utils.py +++ b/utils.py @@ -9,7 +9,7 @@ from typing import Any, TypedDict CJK_LANGS = ["zh", "ja", "ko"] -PROFICIENCY_VERSION = "0.5.21" +PROFICIENCY_VERSION = "0.5.22" PROFICIENCY_RELEASE_URL = ( f"https://github.com/xxyzz/Proficiency/releases/download/v{PROFICIENCY_VERSION}" )