Skip to content

Commit

Permalink
Add Hindi, Indonesian, Malay language data
Browse files Browse the repository at this point in the history
  • Loading branch information
xxyzz committed Sep 23, 2024
1 parent ac15023 commit 3e54a5b
Show file tree
Hide file tree
Showing 5 changed files with 59 additions and 93 deletions.
2 changes: 1 addition & 1 deletion data/deps.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"lxml": "5.3.0",
"rapidfuzz": "3.9.7",
"rapidfuzz": "3.10.0",
"spacy": "3.7.6",
"spacy_cpu_model": "3.7.0",
"en_spacy_cpu_model": "3.7.1",
Expand Down
67 changes: 15 additions & 52 deletions data/languages.json
Original file line number Diff line number Diff line change
@@ -1,36 +1,26 @@
{
"ca": {
"639-2": "cat",
"gloss_source": "",
"has_trf": true,
"name": "Catalan",
"spacy": "ca_core_news_"
},
"cs": {
"639-2": "ces",
"gloss_source": "",
"has_trf": false,
"name": "Czech",
"spacy": ""
},
"da": {
"639-2": "dan",
"gloss_source": "",
"has_trf": true,
"name": "Danish",
"spacy": "da_core_news_"
},
"de": {
"639-2": "deu",
"gloss_source": "kaikki",
"has_trf": false,
"name": "German",
"spacy": "de_core_news_"
},
"el": {
"639-2": "ell",
"gloss_source": "dbnary",
"has_trf": false,
"lemma_languages": [
"el",
"en",
Expand All @@ -40,60 +30,56 @@
"spacy": "el_core_news_"
},
"en": {
"639-2": "eng",
"gloss_source": "kaikki",
"has_trf": true,
"name": "English",
"spacy": "en_core_web_"
},
"es": {
"639-2": "spa",
"gloss_source": "kaikki",
"has_trf": false,
"name": "Spanish",
"spacy": "es_core_news_"
},
"fi": {
"639-2": "fin",
"gloss_source": "dbnary",
"has_trf": false,
"lemma_languages": [
"fi"
],
"name": "Finnish",
"spacy": "fi_core_news_"
},
"fr": {
"639-2": "fra",
"gloss_source": "kaikki",
"has_trf": false,
"name": "French",
"spacy": "fr_core_news_"
},
"he": {
"639-2": "heb",
"gloss_source": "kaikki",
"has_trf": false,
"lemma_languages": [
"en"
],
"name": "Hebrew",
"spacy": ""
},
"hi": {
"gloss_source": "",
"name": "Hindi",
"spacy": ""
},
"hr": {
"639-2": "hrv",
"gloss_source": "dbnary",
"has_trf": false,
"lemma_languages": [
"hr"
],
"name": "Serbo-Croatian",
"spacy": "hr_core_news_"
},
"id": {
"gloss_source": "",
"name": "Indonesian",
"spacy": ""
},
"it": {
"639-2": "ita",
"gloss_source": "dbnary",
"has_trf": false,
"lemma_languages": [
"en",
"it"
Expand All @@ -102,115 +88,92 @@
"spacy": "it_core_news_"
},
"ja": {
"639-2": "jpn",
"gloss_source": "kaikki",
"has_trf": true,
"name": "Japanese",
"spacy": "ja_core_news_"
},
"ko": {
"639-2": "kor",
"gloss_source": "",
"has_trf": false,
"name": "Korean",
"spacy": "ko_core_news_"
},
"lt": {
"639-2": "lit",
"gloss_source": "dbnary",
"has_trf": false,
"lemma_languages": [
"lt"
],
"name": "Lithuanian",
"spacy": "lt_core_news_"
},
"mk": {
"639-2": "mkd",
"gloss_source": "",
"has_trf": false,
"name": "Macedonian",
"spacy": "mk_core_news_"
},
"ms": {
"gloss_source": "",
"name": "Malay",
"spacy": ""
},
"nl": {
"639-2": "nld",
"gloss_source": "dbnary",
"has_trf": false,
"lemma_languages": [
"nl"
],
"name": "Dutch",
"spacy": "nl_core_news_"
},
"nb": {
"639-2": "nob",
"gloss_source": "dbnary",
"has_trf": false,
"lemma_languages": [
"no"
],
"name": "Norwegian Bokmål",
"spacy": "nb_core_news_"
},
"pl": {
"639-2": "pol",
"gloss_source": "kaikki",
"has_trf": false,
"name": "Polish",
"spacy": "pl_core_news_"
},
"pt": {
"639-2": "por",
"gloss_source": "dbnary",
"has_trf": false,
"lemma_languages": [
"pt"
],
"name": "Portuguese",
"spacy": "pt_core_news_"
},
"ro": {
"639-2": "ron",
"gloss_source": "",
"has_trf": false,
"name": "Romanian",
"spacy": "ro_core_news_"
},
"ru": {
"639-2": "rus",
"gloss_source": "kaikki",
"has_trf": false,
"name": "Russian",
"spacy": "ru_core_news_"
},
"sl": {
"639-2": "slv",
"gloss_source": "",
"has_trf": true,
"name": "Slovene",
"spacy": "sl_core_news_"
},
"sv": {
"639-2": "swe",
"gloss_source": "dbnary",
"has_trf": false,
"lemma_languages": [
"sv"
],
"name": "Swedish",
"spacy": "sv_core_news_"
},
"uk": {
"639-2": "ukr",
"gloss_source": "",
"has_trf": true,
"name": "Ukrainian",
"spacy": "uk_core_news_"
},
"zh": {
"639-2": "zho",
"gloss_source": "kaikki",
"has_trf": true,
"name": "Chinese",
"spacy": "zh_core_web_"
}
Expand Down
63 changes: 33 additions & 30 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -42,36 +42,39 @@ Supported languages

Supported Word Wise definition languages are listed in the plugin's customize window.

============= =====
Book language X-Ray
============= =====
Bokmål ✅
Català ✅
Dansk ✅
Deutsch ✅
English ✅
Español ✅
Français ✅
Hrvatski ✅
Italiano ✅
Lietuvių ✅
Nederlands ✅
Polski ✅
Português ✅
Română ✅
Slovenščina ✅
Suomi ✅
Svenska ✅
čeština ❌
Ελληνικά ✅
Македонски ✅
Русский ✅
Українська ✅
עִבְֿרִית‎ ❌
中文 ✅
日本語 ✅
한국어 ✅
============= =====
================ =====
Book language X-Ray
================ =====
Bahasa Indonesia ❌
Bahasa Melayu ❌
Bokmål ✅
Català ✅
Dansk ✅
Deutsch ✅
English ✅
Español ✅
Français ✅
Hrvatski ✅
Italiano ✅
Lietuvių ✅
Nederlands ✅
Polski ✅
Português ✅
Română ✅
Slovenščina ✅
Suomi ✅
Svenska ✅
čeština ❌
Ελληνικά ✅
Македонски ✅
Русский ✅
Українська ✅
עִבְֿרִית‎ ❌
हिन्दी ❌
中文 ✅
日本語 ✅
한국어 ✅
================ =====

Community
---------
Expand Down
18 changes: 9 additions & 9 deletions metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,17 +33,18 @@ def is_ww_supported(book_lang: str, gloss_lang: str) -> bool:


def check_metadata(gui: Any, book_id: int, custom_x_ray: bool) -> MetaDataResult | None:
from calibre.utils.localization import lang_as_iso639_1

from .config import prefs
from .error_dialogs import unsupported_format_dialog, unsupported_language_dialog
from .utils import get_plugin_path, load_languages_data

db = gui.current_db.new_api
lang_dict = load_languages_data(get_plugin_path(), False)
supported_languages = {v["639-2"]: k for k, v in lang_dict.items()}
mi = db.get_metadata(book_id, get_cover=True)
# https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes
calibre_book_lang = mi.get("language")
if calibre_book_lang not in supported_languages:
book_lang = lang_as_iso639_1(mi.get("language"))
if book_lang not in lang_dict:
unsupported_language_dialog(mi.get("title"))
return None

Expand All @@ -63,7 +64,6 @@ def check_metadata(gui: Any, book_id: int, custom_x_ray: bool) -> MetaDataResult
if not prefs["use_all_formats"]:
supported_fmts = [supported_fmts[0]]

book_lang = supported_languages[calibre_book_lang]
support_ww_list = []
for fmt in supported_fmts:
gloss_lang = prefs["gloss_lang"]
Expand All @@ -81,11 +81,11 @@ def check_metadata(gui: Any, book_id: int, custom_x_ray: bool) -> MetaDataResult


def cli_check_metadata(book_path_str: str, log: Any) -> MetaDataResult | None:
from calibre.utils.localization import lang_as_iso639_1

from .config import prefs
from .utils import get_plugin_path, load_languages_data

lang_dict = load_languages_data(get_plugin_path(), False)
supported_languages = {v["639-2"]: k for k, v in lang_dict.items()}
book_path = Path(book_path_str)
book_fmt = book_path.suffix.upper()[1:]
mi = None
Expand All @@ -112,14 +112,14 @@ def cli_check_metadata(book_path_str: str, log: Any) -> MetaDataResult | None:
mi = get_metadata(f)

if mi is not None:
calibre_book_lang = mi.get("language")
if calibre_book_lang not in supported_languages:
lang_dict = load_languages_data(get_plugin_path(), False)
book_lang = lang_as_iso639_1(mi.get("language"))
if book_lang not in lang_dict:
log.prints(
log.WARN,
f"The language of the book {mi.get('title')} is not supported.",
)
return None
book_lang = supported_languages[calibre_book_lang]
gloss_lang = prefs["gloss_lang"]
return MetaDataResult(
book_fmts=[book_fmt],
Expand Down
2 changes: 1 addition & 1 deletion utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from typing import Any, TypedDict

CJK_LANGS = ["zh", "ja", "ko"]
PROFICIENCY_VERSION = "0.5.21"
PROFICIENCY_VERSION = "0.5.22"
PROFICIENCY_RELEASE_URL = (
f"https://github.com/xxyzz/Proficiency/releases/download/v{PROFICIENCY_VERSION}"
)
Expand Down

0 comments on commit 3e54a5b

Please sign in to comment.