From d687fdca0b312efc146c33c23485c9d91f1e2cbf Mon Sep 17 00:00:00 2001 From: hellofinch Date: Mon, 30 Dec 2024 11:34:40 +0800 Subject: [PATCH 1/8] feat (translator): add translators and update doc. --- docs/ADVANCED.md | 3 ++ docs/README_ja-JP.md | 3 ++ docs/README_zh-CN.md | 3 ++ pdf2zh/gui.py | 6 ++++ pdf2zh/translator.py | 82 ++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 97 insertions(+) diff --git a/docs/ADVANCED.md b/docs/ADVANCED.md index 942bcddf..e58936f5 100644 --- a/docs/ADVANCED.md +++ b/docs/ADVANCED.md @@ -66,6 +66,9 @@ We've provided a detailed table on the required [environment variables](https:// | **Dify** | `dify` | `DIFY_API_URL`, `DIFY_API_KEY` | `[Your DIFY URL]`, `[Your Key]` | See [Dify](https://github.com/langgenius/dify),Three variables, lang_out, lang_in, and text, need to be defined in Dify's workflow input. | | **AnythingLLM** | `anythingllm` | `AnythingLLM_URL`, `AnythingLLM_APIKEY` | `[Your AnythingLLM URL]`, `[Your Key]` | See [anything-llm](https://github.com/Mintplex-Labs/anything-llm) | |**Argos Translate**|`argos`| | |See [argos-translate](https://github.com/argosopentech/argos-translate)| +|**Grok**|`grok`| `GORK_API_KEY`, `GORK_MODEL` | `[Your GORK_API_KEY]`, `grok-2-1212` |See [Grok](https://docs.x.ai/docs/overview)| +|**DeepSeek**|`deepseek`| `DEEPSEEK_API_KEY`, `DEEPSEEK_MODEL` | `[Your DEEPSEEK_API_KEY]`, `deepseek-chat` |See [DeepSeek](https://www.deepseek.com/)| +|**OpenAI-Liked**|`openai-liked`| `OPENAILIKE_BASE_URL`, `OPENAILIKE_API_KEY`, `OPENAILIKE_MODEL` | `url`, `[Your Key]`, `model name` | None | For large language models that are compatible with the OpenAI API but not listed in the table above, you can set environment variables using the same method outlined for OpenAI in the table. diff --git a/docs/README_ja-JP.md b/docs/README_ja-JP.md index 202c186c..0e4d4248 100644 --- a/docs/README_ja-JP.md +++ b/docs/README_ja-JP.md @@ -224,6 +224,9 @@ pdf2zh example.pdf -li en -lo ja |**Dify**|`dify`|`DIFY_API_URL`, `DIFY_API_KEY`|`[Your DIFY URL]`, `[Your Key]`|See [Dify](https://github.com/langgenius/dify),Three variables, lang_out, lang_in, and text, need to be defined in Dify's workflow input.| |**AnythingLLM**|`anythingllm`|`AnythingLLM_URL`, `AnythingLLM_APIKEY`|`[Your AnythingLLM URL]`, `[Your Key]`|See [anything-llm](https://github.com/Mintplex-Labs/anything-llm)| |**Argos Translate**|`argos`| | |See [argos-translate](https://github.com/argosopentech/argos-translate)| +|**Grok**|`grok`| `GORK_API_KEY`, `GORK_MODEL` | `[Your GORK_API_KEY]`, `grok-2-1212` |See [Grok](https://docs.x.ai/docs/overview)| +|**DeepSeek**|`deepseek`| `DEEPSEEK_API_KEY`, `DEEPSEEK_MODEL` | `[Your DEEPSEEK_API_KEY]`, `deepseek-chat` |See [DeepSeek](https://www.deepseek.com/)| +|**OpenAI-Liked**|`openai-liked`| `OPENAILIKE_BASE_URL`, `OPENAILIKE_API_KEY`, `OPENAILIKE_MODEL` | `url`, `[Your Key]`, `model name` | None | (need Japenese translation) For large language models that are compatible with the OpenAI API but not listed in the table above, you can set environment variables using the same method outlined for OpenAI in the table. diff --git a/docs/README_zh-CN.md b/docs/README_zh-CN.md index a29d813e..31334d63 100644 --- a/docs/README_zh-CN.md +++ b/docs/README_zh-CN.md @@ -226,6 +226,9 @@ pdf2zh example.pdf -li en -lo ja |**Dify**|`dify`|`DIFY_API_URL`, `DIFY_API_KEY`|`[Your DIFY URL]`, `[Your Key]`|See [Dify](https://github.com/langgenius/dify),Three variables, lang_out, lang_in, and text, need to be defined in Dify's workflow input.| |**AnythingLLM**|`anythingllm`|`AnythingLLM_URL`, `AnythingLLM_APIKEY`|`[Your AnythingLLM URL]`, `[Your Key]`|See [anything-llm](https://github.com/Mintplex-Labs/anything-llm)| |**Argos Translate**|`argos`| | |See [argos-translate](https://github.com/argosopentech/argos-translate)| +|**Grok**|`grok`| `GORK_API_KEY`, `GORK_MODEL` | `[Your GORK_API_KEY]`, `grok-2-1212` |See [Grok](https://docs.x.ai/docs/overview)| +|**DeepSeek**|`deepseek`| `DEEPSEEK_API_KEY`, `DEEPSEEK_MODEL` | `[Your DEEPSEEK_API_KEY]`, `deepseek-chat` |See [DeepSeek](https://www.deepseek.com/)| +|**OpenAI-Liked**|`openai-liked`| `OPENAILIKE_BASE_URL`, `OPENAILIKE_API_KEY`, `OPENAILIKE_MODEL` | `url`, `[Your Key]`, `model name` | None | 对于未在上述表格中的,并且兼容 OpenAI api 的大语言模型,可使用表格中的 OpenAI 的方式进行环境变量的设置。 diff --git a/pdf2zh/gui.py b/pdf2zh/gui.py index 42626662..ef93d59d 100644 --- a/pdf2zh/gui.py +++ b/pdf2zh/gui.py @@ -33,6 +33,9 @@ TencentTranslator, XinferenceTranslator, ZhipuTranslator, + GorkTranslator, + DeepseekTranslator, + OpenAIlikeTranslator, ) # The following variables associate strings with translators @@ -54,6 +57,9 @@ "Dify": DifyTranslator, "AnythingLLM": AnythingLLMTranslator, "Argos Translate": ArgosTranslator, + "Gork": GorkTranslator, + "DeepSeek": DeepseekTranslator, + "OpenAI-liked": OpenAIlikeTranslator, } # The following variables associate strings with specific languages diff --git a/pdf2zh/translator.py b/pdf2zh/translator.py index ca1f71db..9ac56015 100644 --- a/pdf2zh/translator.py +++ b/pdf2zh/translator.py @@ -701,3 +701,85 @@ def translate(self, text): translation = from_lang.get_translation(to_lang) translatedText = translation.translate(text) return translatedText + + +class GorkTranslator(OpenAITranslator): + # https://docs.x.ai/docs/overview#getting-started + name = "grok" + envs = { + "GORK_API_KEY": None, + "GORK_MODEL": "grok-2-1212", + } + CustomPrompt = True + + def __init__(self, lang_in, lang_out, model, envs=None, prompt=None): + self.set_envs(envs) + base_url = "https://api.x.ai/v1" + api_key = self.envs["GORK_API_KEY"] + if not model: + model = self.envs["GORK_MODEL"] + super().__init__(lang_in, lang_out, model, base_url=base_url, api_key=api_key) + self.prompttext = prompt + if prompt: + self.add_cache_impact_parameters("prompt", prompt) + + +class DeepseekTranslator(OpenAITranslator): + name = "deepseek" + envs = { + "DEEPSEEK_API_KEY": None, + "DEEPSEEK_MODEL": "deepseek-chat", + } + CustomPrompt = True + + def __init__(self, lang_in, lang_out, model, envs=None, prompt=None): + self.set_envs(envs) + base_url = "https://api.deepseek.com/v1" + api_key = self.envs["DEEPSEEK_API_KEY"] + if not model: + model = self.envs["DEEPSEEK_MODEL"] + super().__init__(lang_in, lang_out, model, base_url=base_url, api_key=api_key) + self.prompttext = prompt + if prompt: + self.add_cache_impact_parameters("prompt", prompt) + + +class OpenAIlikeTranslator(BaseTranslator): + # https://github.com/openai/openai-python + name = "openai-liked" + envs = { + "OPENAILIKE_BASE_URL": None, + "OPENAILIKE_API_KEY": None, + "OPENAILIKE_MODEL": None, + } + CustomPrompt = True + + def __init__( + self, + lang_in, + lang_out, + model, + base_url=None, + api_key=None, + envs=None, + prompt=None, + ): + self.set_envs(envs) + if not model: + model = self.envs["OPENAILIKE_MODEL"] + super().__init__(lang_in, lang_out, model) + self.options = {"temperature": 0} # 随机采样可能会打断公式标记 + if ( + self.envs["OPENAILIKE_BASE_URL"] == None + or self.envs["OPENAILIKE_API_KEY"] == None + or self.envs["OPENAILIKE_MODEL"] == None + ): + raise ValueError("The variables are invalid.") + self.client = openai.OpenAI( + base_url=base_url or self.envs["OPENAILIKE_BASE_URL"], + api_key=api_key or self.envs["OPENAILIKE_API_KEY"], + ) + self.prompttext = prompt + self.add_cache_impact_parameters("temperature", self.options["temperature"]) + if prompt: + self.add_cache_impact_parameters("prompt", prompt) From 71107a4ea84f9a69727a778b730c592819c2730b Mon Sep 17 00:00:00 2001 From: hellofinch Date: Mon, 30 Dec 2024 15:45:23 +0800 Subject: [PATCH 2/8] fix(main) : envs missing --- pdf2zh/converter.py | 6 +++++- pdf2zh/gui.py | 4 ++-- pdf2zh/high_level.py | 4 ++-- pdf2zh/translator.py | 43 +++++++++++++------------------------------ 4 files changed, 22 insertions(+), 35 deletions(-) diff --git a/pdf2zh/converter.py b/pdf2zh/converter.py index d79fe413..977d2f3c 100644 --- a/pdf2zh/converter.py +++ b/pdf2zh/converter.py @@ -36,6 +36,9 @@ AnythingLLMTranslator, XinferenceTranslator, ArgosTranslator, + GorkTranslator, + DeepseekTranslator, + OpenAIlikedTranslator, ) from pymupdf import Font @@ -151,7 +154,8 @@ def __init__( service_name = param[0] service_model = param[1] if len(param) > 1 else None for translator in [GoogleTranslator, BingTranslator, DeepLTranslator, DeepLXTranslator, OllamaTranslator, XinferenceTranslator, AzureOpenAITranslator, - OpenAITranslator, ZhipuTranslator, ModelScopeTranslator, SiliconTranslator, GeminiTranslator, AzureTranslator, TencentTranslator, DifyTranslator, AnythingLLMTranslator, ArgosTranslator]: + OpenAIlikedTranslator, + OpenAITranslator, ZhipuTranslator, ModelScopeTranslator, SiliconTranslator, GeminiTranslator, AzureTranslator, TencentTranslator, DifyTranslator, AnythingLLMTranslator, ArgosTranslator, GorkTranslator, DeepseekTranslator,OpenAIlikedTranslator,]: if service_name == translator.name: self.translator = translator(lang_in, lang_out, service_model, envs=envs, prompt=prompt) if not self.translator: diff --git a/pdf2zh/gui.py b/pdf2zh/gui.py index ef93d59d..0f6eb6c3 100644 --- a/pdf2zh/gui.py +++ b/pdf2zh/gui.py @@ -35,7 +35,7 @@ ZhipuTranslator, GorkTranslator, DeepseekTranslator, - OpenAIlikeTranslator, + OpenAIlikedTranslator, ) # The following variables associate strings with translators @@ -59,7 +59,7 @@ "Argos Translate": ArgosTranslator, "Gork": GorkTranslator, "DeepSeek": DeepseekTranslator, - "OpenAI-liked": OpenAIlikeTranslator, + "OpenAI-liked": OpenAIlikedTranslator, } # The following variables associate strings with specific languages diff --git a/pdf2zh/high_level.py b/pdf2zh/high_level.py index d4d7d4c9..d1b4d3fe 100644 --- a/pdf2zh/high_level.py +++ b/pdf2zh/high_level.py @@ -102,8 +102,8 @@ def translate_patch( service, resfont, noto, - kwarg.get("envs", {}), - kwarg.get("prompt", []), + kwarg["kwarg"].get("envs", {}), + kwarg["kwarg"].get("prompt", []), ) assert device is not None diff --git a/pdf2zh/translator.py b/pdf2zh/translator.py index 9ac56015..f5194181 100644 --- a/pdf2zh/translator.py +++ b/pdf2zh/translator.py @@ -744,42 +744,25 @@ def __init__(self, lang_in, lang_out, model, envs=None, prompt=None): self.add_cache_impact_parameters("prompt", prompt) -class OpenAIlikeTranslator(BaseTranslator): - # https://github.com/openai/openai-python - name = "openai-liked" +class OpenAIlikedTranslator(OpenAITranslator): + name = "openailiked" envs = { - "OPENAILIKE_BASE_URL": None, - "OPENAILIKE_API_KEY": None, - "OPENAILIKE_MODEL": None, + "OPENAILIKED_BASE_URL": None, + "OPENAILIKED_API_KEY": None, + "OPENAILIKED_MODEL": None, } CustomPrompt = True - def __init__( - self, - lang_in, - lang_out, - model, - base_url=None, - api_key=None, - envs=None, - prompt=None, - ): + def __init__(self, lang_in, lang_out, model, envs=None, prompt=None): self.set_envs(envs) + base_url = self.envs["OPENAILIKED_BASE_URL"] if not model: - model = self.envs["OPENAILIKE_MODEL"] - super().__init__(lang_in, lang_out, model) - self.options = {"temperature": 0} # 随机采样可能会打断公式标记 - if ( - self.envs["OPENAILIKE_BASE_URL"] == None - or self.envs["OPENAILIKE_API_KEY"] == None - or self.envs["OPENAILIKE_MODEL"] == None - ): - raise ValueError("The variables are invalid.") - self.client = openai.OpenAI( - base_url=base_url or self.envs["OPENAILIKE_BASE_URL"], - api_key=api_key or self.envs["OPENAILIKE_API_KEY"], - ) + model = self.envs["OPENAILIKED_MODEL"] + if self.envs["OPENAILIKED_API_KEY"] is None: + api_key = "openailiked" + else: + api_key = self.envs["OPENAILIKED_API_KEY"] + super().__init__(lang_in, lang_out, model, base_url=base_url, api_key=api_key) self.prompttext = prompt - self.add_cache_impact_parameters("temperature", self.options["temperature"]) if prompt: self.add_cache_impact_parameters("prompt", prompt) From 538bd04300b1f9bee150818f9bc4e4a50fc6c459 Mon Sep 17 00:00:00 2001 From: hellofinch Date: Mon, 30 Dec 2024 15:59:07 +0800 Subject: [PATCH 3/8] add unit test for openailiked translator. --- pdf2zh/translator.py | 10 +++++++-- test/test_translator.py | 50 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 2 deletions(-) diff --git a/pdf2zh/translator.py b/pdf2zh/translator.py index f5194181..23f5466b 100644 --- a/pdf2zh/translator.py +++ b/pdf2zh/translator.py @@ -755,9 +755,15 @@ class OpenAIlikedTranslator(OpenAITranslator): def __init__(self, lang_in, lang_out, model, envs=None, prompt=None): self.set_envs(envs) - base_url = self.envs["OPENAILIKED_BASE_URL"] + if self.envs["OPENAILIKED_BASE_URL"]: + base_url = self.envs["OPENAILIKED_BASE_URL"] + else: + raise ValueError("The OPENAILIKED_BASE_URL is missing.") if not model: - model = self.envs["OPENAILIKED_MODEL"] + if self.envs["OPENAILIKED_MODEL"]: + model = self.envs["OPENAILIKED_MODEL"] + else: + raise ValueError("The OPENAILIKED_MODEL is missing.") if self.envs["OPENAILIKED_API_KEY"] is None: api_key = "openailiked" else: diff --git a/test/test_translator.py b/test/test_translator.py index ea9706c7..8b993157 100644 --- a/test/test_translator.py +++ b/test/test_translator.py @@ -1,5 +1,6 @@ import unittest from pdf2zh.translator import BaseTranslator +from pdf2zh.translator import OpenAIlikedTranslator from pdf2zh import cache @@ -72,6 +73,55 @@ def test_base_translator_throw(self): with self.assertRaises(NotImplementedError): translator.translate("Hello World") +class TestOpenAIlikedTranslator(unittest.TestCase): + def setUp(self) -> None: + self.default_envs = { + "OPENAILIKED_BASE_URL": "https://api.openailiked.com", + "OPENAILIKED_API_KEY": "test_api_key", + "OPENAILIKED_MODEL": "test_model", + } + def test_missing_base_url_raises_error(self): + """测试缺失 OPENAILIKED_BASE_URL 时抛出异常""" + with self.assertRaises(ValueError) as context: + OpenAIlikedTranslator(lang_in="en", lang_out="zh", model="test_model", envs={}) + self.assertIn("The OPENAILIKED_BASE_URL is missing.", str(context.exception)) + + def test_missing_model_raises_error(self): + """测试缺失 OPENAILIKED_MODEL 时抛出异常""" + envs_without_model = { + "OPENAILIKED_BASE_URL": "https://api.openailiked.com", + "OPENAILIKED_API_KEY": "test_api_key", + } + with self.assertRaises(ValueError) as context: + OpenAIlikedTranslator(lang_in="en", lang_out="zh", model=None, envs=envs_without_model) + self.assertIn("The OPENAILIKED_MODEL is missing.", str(context.exception)) + + def test_initialization_with_valid_envs(self): + """测试使用有效的环境变量初始化""" + translator = OpenAIlikedTranslator( + lang_in="en", + lang_out="zh", + model=None, + envs=self.default_envs, + ) + self.assertEqual(translator.envs["OPENAILIKED_BASE_URL"], self.default_envs["OPENAILIKED_BASE_URL"]) + self.assertEqual(translator.envs["OPENAILIKED_API_KEY"], self.default_envs["OPENAILIKED_API_KEY"]) + self.assertEqual(translator.model, self.default_envs["OPENAILIKED_MODEL"]) + + def test_default_api_key_fallback(self): + """测试当 OPENAILIKED_API_KEY 为空时使用默认值""" + envs_without_key = { + "OPENAILIKED_BASE_URL": "https://api.openailiked.com", + "OPENAILIKED_MODEL": "test_model", + } + translator = OpenAIlikedTranslator( + lang_in="en", + lang_out="zh", + model=None, + envs=envs_without_key, + ) + self.assertEqual(translator.envs["OPENAILIKED_BASE_URL"], self.default_envs["OPENAILIKED_BASE_URL"]) + self.assertEqual(translator.envs["OPENAILIKED_API_KEY"], None) if __name__ == "__main__": unittest.main() From a0a832276d7bb715ea0556abf2cd6c16bed19d32 Mon Sep 17 00:00:00 2001 From: hellofinch Date: Mon, 30 Dec 2024 16:09:20 +0800 Subject: [PATCH 4/8] chore : format --- test/test_translator.py | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/test/test_translator.py b/test/test_translator.py index 8b993157..ee0c55f0 100644 --- a/test/test_translator.py +++ b/test/test_translator.py @@ -73,6 +73,7 @@ def test_base_translator_throw(self): with self.assertRaises(NotImplementedError): translator.translate("Hello World") + class TestOpenAIlikedTranslator(unittest.TestCase): def setUp(self) -> None: self.default_envs = { @@ -80,10 +81,13 @@ def setUp(self) -> None: "OPENAILIKED_API_KEY": "test_api_key", "OPENAILIKED_MODEL": "test_model", } + def test_missing_base_url_raises_error(self): """测试缺失 OPENAILIKED_BASE_URL 时抛出异常""" with self.assertRaises(ValueError) as context: - OpenAIlikedTranslator(lang_in="en", lang_out="zh", model="test_model", envs={}) + OpenAIlikedTranslator( + lang_in="en", lang_out="zh", model="test_model", envs={} + ) self.assertIn("The OPENAILIKED_BASE_URL is missing.", str(context.exception)) def test_missing_model_raises_error(self): @@ -93,7 +97,9 @@ def test_missing_model_raises_error(self): "OPENAILIKED_API_KEY": "test_api_key", } with self.assertRaises(ValueError) as context: - OpenAIlikedTranslator(lang_in="en", lang_out="zh", model=None, envs=envs_without_model) + OpenAIlikedTranslator( + lang_in="en", lang_out="zh", model=None, envs=envs_without_model + ) self.assertIn("The OPENAILIKED_MODEL is missing.", str(context.exception)) def test_initialization_with_valid_envs(self): @@ -104,8 +110,14 @@ def test_initialization_with_valid_envs(self): model=None, envs=self.default_envs, ) - self.assertEqual(translator.envs["OPENAILIKED_BASE_URL"], self.default_envs["OPENAILIKED_BASE_URL"]) - self.assertEqual(translator.envs["OPENAILIKED_API_KEY"], self.default_envs["OPENAILIKED_API_KEY"]) + self.assertEqual( + translator.envs["OPENAILIKED_BASE_URL"], + self.default_envs["OPENAILIKED_BASE_URL"], + ) + self.assertEqual( + translator.envs["OPENAILIKED_API_KEY"], + self.default_envs["OPENAILIKED_API_KEY"], + ) self.assertEqual(translator.model, self.default_envs["OPENAILIKED_MODEL"]) def test_default_api_key_fallback(self): @@ -120,8 +132,12 @@ def test_default_api_key_fallback(self): model=None, envs=envs_without_key, ) - self.assertEqual(translator.envs["OPENAILIKED_BASE_URL"], self.default_envs["OPENAILIKED_BASE_URL"]) + self.assertEqual( + translator.envs["OPENAILIKED_BASE_URL"], + self.default_envs["OPENAILIKED_BASE_URL"], + ) self.assertEqual(translator.envs["OPENAILIKED_API_KEY"], None) + if __name__ == "__main__": unittest.main() From 8bdc253feac15576f90e534ef06f6b00aa5d3c5e Mon Sep 17 00:00:00 2001 From: hellofinch Date: Mon, 30 Dec 2024 16:12:40 +0800 Subject: [PATCH 5/8] format --- pdf2zh/converter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pdf2zh/converter.py b/pdf2zh/converter.py index 977d2f3c..2dcf96ab 100644 --- a/pdf2zh/converter.py +++ b/pdf2zh/converter.py @@ -155,7 +155,7 @@ def __init__( service_model = param[1] if len(param) > 1 else None for translator in [GoogleTranslator, BingTranslator, DeepLTranslator, DeepLXTranslator, OllamaTranslator, XinferenceTranslator, AzureOpenAITranslator, OpenAIlikedTranslator, - OpenAITranslator, ZhipuTranslator, ModelScopeTranslator, SiliconTranslator, GeminiTranslator, AzureTranslator, TencentTranslator, DifyTranslator, AnythingLLMTranslator, ArgosTranslator, GorkTranslator, DeepseekTranslator,OpenAIlikedTranslator,]: + OpenAITranslator, ZhipuTranslator, ModelScopeTranslator, SiliconTranslator, GeminiTranslator, AzureTranslator, TencentTranslator, DifyTranslator, AnythingLLMTranslator, ArgosTranslator, GorkTranslator, DeepseekTranslator, OpenAIlikedTranslator,]: if service_name == translator.name: self.translator = translator(lang_in, lang_out, service_model, envs=envs, prompt=prompt) if not self.translator: From 46de2bb93f71e4e2415460c821223e79a8f44991 Mon Sep 17 00:00:00 2001 From: hellofinch Date: Mon, 30 Dec 2024 16:18:59 +0800 Subject: [PATCH 6/8] format --- pdf2zh/converter.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pdf2zh/converter.py b/pdf2zh/converter.py index 2dcf96ab..81d32d41 100644 --- a/pdf2zh/converter.py +++ b/pdf2zh/converter.py @@ -154,7 +154,6 @@ def __init__( service_name = param[0] service_model = param[1] if len(param) > 1 else None for translator in [GoogleTranslator, BingTranslator, DeepLTranslator, DeepLXTranslator, OllamaTranslator, XinferenceTranslator, AzureOpenAITranslator, - OpenAIlikedTranslator, OpenAITranslator, ZhipuTranslator, ModelScopeTranslator, SiliconTranslator, GeminiTranslator, AzureTranslator, TencentTranslator, DifyTranslator, AnythingLLMTranslator, ArgosTranslator, GorkTranslator, DeepseekTranslator, OpenAIlikedTranslator,]: if service_name == translator.name: self.translator = translator(lang_in, lang_out, service_model, envs=envs, prompt=prompt) From d057e1d97c0a9454b230ecbec7918e3564e62719 Mon Sep 17 00:00:00 2001 From: hellofinch Date: Mon, 30 Dec 2024 16:46:08 +0800 Subject: [PATCH 7/8] fix : kwarg get removed. --- pdf2zh/converter.py | 4 ++++ pdf2zh/high_level.py | 14 ++++++++----- pdf2zh/translator.py | 48 ++++++++++++++++++++++---------------------- 3 files changed, 37 insertions(+), 29 deletions(-) diff --git a/pdf2zh/converter.py b/pdf2zh/converter.py index 81d32d41..c4e2acb4 100644 --- a/pdf2zh/converter.py +++ b/pdf2zh/converter.py @@ -153,6 +153,10 @@ def __init__( param = service.split(":", 1) service_name = param[0] service_model = param[1] if len(param) > 1 else None + if not envs: + envs = {} + if not prompt: + prompt = [] for translator in [GoogleTranslator, BingTranslator, DeepLTranslator, DeepLXTranslator, OllamaTranslator, XinferenceTranslator, AzureOpenAITranslator, OpenAITranslator, ZhipuTranslator, ModelScopeTranslator, SiliconTranslator, GeminiTranslator, AzureTranslator, TencentTranslator, DifyTranslator, AnythingLLMTranslator, ArgosTranslator, GorkTranslator, DeepseekTranslator, OpenAIlikedTranslator,]: if service_name == translator.name: diff --git a/pdf2zh/high_level.py b/pdf2zh/high_level.py index d1b4d3fe..204c9e54 100644 --- a/pdf2zh/high_level.py +++ b/pdf2zh/high_level.py @@ -8,7 +8,7 @@ import urllib.request from asyncio import CancelledError from pathlib import Path -from typing import Any, BinaryIO, List, Optional +from typing import Any, BinaryIO, List, Optional, Dict import numpy as np import requests @@ -87,6 +87,8 @@ def translate_patch( callback: object = None, cancellation_event: asyncio.Event = None, model: OnnxModel = None, + envs: Dict = None, + prompt: List = None, **kwarg: Any, ) -> None: rsrcmgr = PDFResourceManager() @@ -102,8 +104,8 @@ def translate_patch( service, resfont, noto, - kwarg["kwarg"].get("envs", {}), - kwarg["kwarg"].get("prompt", []), + envs, + prompt, ) assert device is not None @@ -179,6 +181,8 @@ def translate_stream( callback: object = None, cancellation_event: asyncio.Event = None, model: OnnxModel = None, + envs: Dict = None, + prompt: List = None, **kwarg: Any, ): font_list = [("tiro", None)] @@ -313,6 +317,8 @@ def translate( compatible: bool = False, cancellation_event: asyncio.Event = None, model: OnnxModel = None, + envs: Dict = None, + prompt: List = None, **kwarg: Any, ): if not files: @@ -367,8 +373,6 @@ def translate( os.unlink(file) s_mono, s_dual = translate_stream( s_raw, - envs=kwarg.get("envs", {}), - prompt=kwarg.get("prompt", []), **locals(), ) file_mono = Path(output) / f"{filename}-mono.pdf" diff --git a/pdf2zh/translator.py b/pdf2zh/translator.py index 23f5466b..4c919971 100644 --- a/pdf2zh/translator.py +++ b/pdf2zh/translator.py @@ -254,8 +254,8 @@ def __init__(self, lang_in, lang_out, model, envs=None, prompt=None): self.client = ollama.Client() self.prompttext = prompt self.add_cache_impact_parameters("temperature", self.options["temperature"]) - if prompt: - self.add_cache_impact_parameters("prompt", prompt) + # if prompt: + # self.add_cache_impact_parameters("prompt", prompt) def do_translate(self, text): maxlen = max(2000, len(text) * 5) @@ -297,8 +297,8 @@ def __init__(self, lang_in, lang_out, model, envs=None, prompt=None): self.client = xinference_client.RESTfulClient(self.envs["XINFERENCE_HOST"]) self.prompttext = prompt self.add_cache_impact_parameters("temperature", self.options["temperature"]) - if prompt: - self.add_cache_impact_parameters("prompt", prompt) + # if prompt: + # self.add_cache_impact_parameters("prompt", prompt) def do_translate(self, text): maxlen = max(2000, len(text) * 5) @@ -361,8 +361,8 @@ def __init__( ) self.prompttext = prompt self.add_cache_impact_parameters("temperature", self.options["temperature"]) - if prompt: - self.add_cache_impact_parameters("prompt", prompt) + # if prompt: + # self.add_cache_impact_parameters("prompt", prompt) def do_translate(self, text) -> str: response = self.client.chat.completions.create( @@ -406,8 +406,8 @@ def __init__( ) self.prompttext = prompt self.add_cache_impact_parameters("temperature", self.options["temperature"]) - if prompt: - self.add_cache_impact_parameters("prompt", prompt) + # if prompt: + # self.add_cache_impact_parameters("prompt", prompt) def do_translate(self, text) -> str: response = self.client.chat.completions.create( @@ -444,8 +444,8 @@ def __init__( model = self.envs["MODELSCOPE_MODEL"] super().__init__(lang_in, lang_out, model, base_url=base_url, api_key=api_key) self.prompttext = prompt - if prompt: - self.add_cache_impact_parameters("prompt", prompt) + # if prompt: + # self.add_cache_impact_parameters("prompt", prompt) class ZhipuTranslator(OpenAITranslator): @@ -465,8 +465,8 @@ def __init__(self, lang_in, lang_out, model, envs=None, prompt=None): model = self.envs["ZHIPU_MODEL"] super().__init__(lang_in, lang_out, model, base_url=base_url, api_key=api_key) self.prompttext = prompt - if prompt: - self.add_cache_impact_parameters("prompt", prompt) + # if prompt: + # self.add_cache_impact_parameters("prompt", prompt) def do_translate(self, text) -> str: try: @@ -502,8 +502,8 @@ def __init__(self, lang_in, lang_out, model, envs=None, prompt=None): model = self.envs["SILICON_MODEL"] super().__init__(lang_in, lang_out, model, base_url=base_url, api_key=api_key) self.prompttext = prompt - if prompt: - self.add_cache_impact_parameters("prompt", prompt) + # if prompt: + # self.add_cache_impact_parameters("prompt", prompt) class GeminiTranslator(OpenAITranslator): @@ -523,8 +523,8 @@ def __init__(self, lang_in, lang_out, model, envs=None, prompt=None): model = self.envs["GEMINI_MODEL"] super().__init__(lang_in, lang_out, model, base_url=base_url, api_key=api_key) self.prompttext = prompt - if prompt: - self.add_cache_impact_parameters("prompt", prompt) + # if prompt: + # self.add_cache_impact_parameters("prompt", prompt) class AzureTranslator(BaseTranslator): @@ -602,8 +602,8 @@ def __init__(self, lang_out, lang_in, model, envs=None, prompt=None): "Content-Type": "application/json", } self.prompttext = prompt - if prompt: - self.add_cache_impact_parameters("prompt", prompt) + # if prompt: + # self.add_cache_impact_parameters("prompt", prompt) def do_translate(self, text): messages = self.prompt(text, self.prompttext) @@ -720,8 +720,8 @@ def __init__(self, lang_in, lang_out, model, envs=None, prompt=None): model = self.envs["GORK_MODEL"] super().__init__(lang_in, lang_out, model, base_url=base_url, api_key=api_key) self.prompttext = prompt - if prompt: - self.add_cache_impact_parameters("prompt", prompt) + # if prompt: + # self.add_cache_impact_parameters("prompt", prompt) class DeepseekTranslator(OpenAITranslator): @@ -740,8 +740,8 @@ def __init__(self, lang_in, lang_out, model, envs=None, prompt=None): model = self.envs["DEEPSEEK_MODEL"] super().__init__(lang_in, lang_out, model, base_url=base_url, api_key=api_key) self.prompttext = prompt - if prompt: - self.add_cache_impact_parameters("prompt", prompt) + # if prompt: + # self.add_cache_impact_parameters("prompt", prompt) class OpenAIlikedTranslator(OpenAITranslator): @@ -770,5 +770,5 @@ def __init__(self, lang_in, lang_out, model, envs=None, prompt=None): api_key = self.envs["OPENAILIKED_API_KEY"] super().__init__(lang_in, lang_out, model, base_url=base_url, api_key=api_key) self.prompttext = prompt - if prompt: - self.add_cache_impact_parameters("prompt", prompt) + # if prompt: + # self.add_cache_impact_parameters("prompt", prompt) From 69cd4dad3ae41351009c13f5e20d8f487c6b8d62 Mon Sep 17 00:00:00 2001 From: hellofinch Date: Mon, 30 Dec 2024 17:10:57 +0800 Subject: [PATCH 8/8] fix cache --- pdf2zh/translator.py | 48 ++++++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/pdf2zh/translator.py b/pdf2zh/translator.py index 4c919971..bb6d0013 100644 --- a/pdf2zh/translator.py +++ b/pdf2zh/translator.py @@ -254,8 +254,8 @@ def __init__(self, lang_in, lang_out, model, envs=None, prompt=None): self.client = ollama.Client() self.prompttext = prompt self.add_cache_impact_parameters("temperature", self.options["temperature"]) - # if prompt: - # self.add_cache_impact_parameters("prompt", prompt) + if prompt: + self.add_cache_impact_parameters("prompt", prompt.template) def do_translate(self, text): maxlen = max(2000, len(text) * 5) @@ -297,8 +297,8 @@ def __init__(self, lang_in, lang_out, model, envs=None, prompt=None): self.client = xinference_client.RESTfulClient(self.envs["XINFERENCE_HOST"]) self.prompttext = prompt self.add_cache_impact_parameters("temperature", self.options["temperature"]) - # if prompt: - # self.add_cache_impact_parameters("prompt", prompt) + if prompt: + self.add_cache_impact_parameters("prompt", prompt.template) def do_translate(self, text): maxlen = max(2000, len(text) * 5) @@ -361,8 +361,8 @@ def __init__( ) self.prompttext = prompt self.add_cache_impact_parameters("temperature", self.options["temperature"]) - # if prompt: - # self.add_cache_impact_parameters("prompt", prompt) + if prompt: + self.add_cache_impact_parameters("prompt", prompt.template) def do_translate(self, text) -> str: response = self.client.chat.completions.create( @@ -406,8 +406,8 @@ def __init__( ) self.prompttext = prompt self.add_cache_impact_parameters("temperature", self.options["temperature"]) - # if prompt: - # self.add_cache_impact_parameters("prompt", prompt) + if prompt: + self.add_cache_impact_parameters("prompt", prompt.template) def do_translate(self, text) -> str: response = self.client.chat.completions.create( @@ -444,8 +444,8 @@ def __init__( model = self.envs["MODELSCOPE_MODEL"] super().__init__(lang_in, lang_out, model, base_url=base_url, api_key=api_key) self.prompttext = prompt - # if prompt: - # self.add_cache_impact_parameters("prompt", prompt) + if prompt: + self.add_cache_impact_parameters("prompt", prompt.template) class ZhipuTranslator(OpenAITranslator): @@ -465,8 +465,8 @@ def __init__(self, lang_in, lang_out, model, envs=None, prompt=None): model = self.envs["ZHIPU_MODEL"] super().__init__(lang_in, lang_out, model, base_url=base_url, api_key=api_key) self.prompttext = prompt - # if prompt: - # self.add_cache_impact_parameters("prompt", prompt) + if prompt: + self.add_cache_impact_parameters("prompt", prompt.template) def do_translate(self, text) -> str: try: @@ -502,8 +502,8 @@ def __init__(self, lang_in, lang_out, model, envs=None, prompt=None): model = self.envs["SILICON_MODEL"] super().__init__(lang_in, lang_out, model, base_url=base_url, api_key=api_key) self.prompttext = prompt - # if prompt: - # self.add_cache_impact_parameters("prompt", prompt) + if prompt: + self.add_cache_impact_parameters("prompt", prompt.template) class GeminiTranslator(OpenAITranslator): @@ -523,8 +523,8 @@ def __init__(self, lang_in, lang_out, model, envs=None, prompt=None): model = self.envs["GEMINI_MODEL"] super().__init__(lang_in, lang_out, model, base_url=base_url, api_key=api_key) self.prompttext = prompt - # if prompt: - # self.add_cache_impact_parameters("prompt", prompt) + if prompt: + self.add_cache_impact_parameters("prompt", prompt.template) class AzureTranslator(BaseTranslator): @@ -602,8 +602,8 @@ def __init__(self, lang_out, lang_in, model, envs=None, prompt=None): "Content-Type": "application/json", } self.prompttext = prompt - # if prompt: - # self.add_cache_impact_parameters("prompt", prompt) + if prompt: + self.add_cache_impact_parameters("prompt", prompt.template) def do_translate(self, text): messages = self.prompt(text, self.prompttext) @@ -720,8 +720,8 @@ def __init__(self, lang_in, lang_out, model, envs=None, prompt=None): model = self.envs["GORK_MODEL"] super().__init__(lang_in, lang_out, model, base_url=base_url, api_key=api_key) self.prompttext = prompt - # if prompt: - # self.add_cache_impact_parameters("prompt", prompt) + if prompt: + self.add_cache_impact_parameters("prompt", prompt.template) class DeepseekTranslator(OpenAITranslator): @@ -740,8 +740,8 @@ def __init__(self, lang_in, lang_out, model, envs=None, prompt=None): model = self.envs["DEEPSEEK_MODEL"] super().__init__(lang_in, lang_out, model, base_url=base_url, api_key=api_key) self.prompttext = prompt - # if prompt: - # self.add_cache_impact_parameters("prompt", prompt) + if prompt: + self.add_cache_impact_parameters("prompt", prompt.template) class OpenAIlikedTranslator(OpenAITranslator): @@ -770,5 +770,5 @@ def __init__(self, lang_in, lang_out, model, envs=None, prompt=None): api_key = self.envs["OPENAILIKED_API_KEY"] super().__init__(lang_in, lang_out, model, base_url=base_url, api_key=api_key) self.prompttext = prompt - # if prompt: - # self.add_cache_impact_parameters("prompt", prompt) + if prompt: + self.add_cache_impact_parameters("prompt", prompt.template)