From ab874c97713e4634b9d7a9f55ecade69d99b51cd Mon Sep 17 00:00:00 2001 From: A0nameless0man <1395943920@qq.com> Date: Sun, 25 Feb 2024 05:52:04 +0000 Subject: [PATCH] =?UTF-8?q?feat:=20=E5=A2=9E=E5=8A=A0=20Sakura=20=E6=A8=A1?= =?UTF-8?q?=E5=9E=8B=E6=94=AF=E6=8C=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 2 +- README_CN.md | 2 +- manga_translator/server/web_main.py | 1 + manga_translator/translators/__init__.py | 3 +- manga_translator/translators/chatgpt.py | 80 +++++++++++++++++++++++- manga_translator/translators/keys.py | 3 + 6 files changed, 85 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 154d5e23..c284be32 100644 --- a/README.md +++ b/README.md @@ -389,7 +389,7 @@ Colorizer: **mc2** --upscale-ratio UPSCALE_RATIO Image upscale ratio applied before detection. Can improve text detection. --colorizer {mc2} Colorization model to use. ---translator {google,youdao,baidu,deepl,papago,caiyun,gpt3,gpt3.5,gpt4,none,original,offline,nllb,nllb_big,sugoi,jparacrawl,jparacrawl_big,m2m100,m2m100_big} +--translator {google,youdao,baidu,deepl,papago,caiyun,gpt3,gpt3.5,gpt4,none,original,offline,nllb,nllb_big,sugoi,jparacrawl,jparacrawl_big,m2m100,m2m100_big,sakura} Language translator to use --translator-chain TRANSLATOR_CHAIN Output of one translator goes in another. Example: --translator-chain "google:JPN;sugoi:ENG". diff --git a/README_CN.md b/README_CN.md index 596249a4..825a2587 100644 --- a/README_CN.md +++ b/README_CN.md @@ -140,7 +140,7 @@ IND: Indonesian --upscale-ratio UPSCALE_RATIO Image upscale ratio applied before detection. Can improve text detection. --colorizer {mc2} Colorization model to use. ---translator {google,youdao,baidu,deepl,papago,caiyun,gpt3,gpt3.5,gpt4,none,original,offline,nllb,nllb_big,sugoi,jparacrawl,jparacrawl_big,m2m100,m2m100_big} +--translator {google,youdao,baidu,deepl,papago,caiyun,gpt3,gpt3.5,gpt4,none,original,offline,nllb,nllb_big,sugoi,jparacrawl,jparacrawl_big,m2m100,sakura} Language translator to use --translator-chain TRANSLATOR_CHAIN Output of one translator goes in another. Example: --translator-chain "google:JPN;sugoi:ENG". diff --git a/manga_translator/server/web_main.py b/manga_translator/server/web_main.py index c0b83a01..858bd2b1 100644 --- a/manga_translator/server/web_main.py +++ b/manga_translator/server/web_main.py @@ -58,6 +58,7 @@ 'jparacrawl_big', 'm2m100', 'm2m100_big', + 'sakura', 'none', 'original', ] diff --git a/manga_translator/translators/__init__.py b/manga_translator/translators/__init__.py index 783a91b1..b1186025 100644 --- a/manga_translator/translators/__init__.py +++ b/manga_translator/translators/__init__.py @@ -7,7 +7,7 @@ from .deepl import DeeplTranslator from .papago import PapagoTranslator from .caiyun import CaiyunTranslator -from .chatgpt import GPT3Translator, GPT35TurboTranslator, GPT4Translator +from .chatgpt import GPT3Translator, GPT35TurboTranslator, GPT4Translator, SakuraTranslator from .nllb import NLLBTranslator, NLLBBigTranslator from .sugoi import JparacrawlTranslator, JparacrawlBigTranslator, SugoiTranslator from .m2m100 import M2M100Translator, M2M100BigTranslator @@ -24,6 +24,7 @@ 'jparacrawl_big': JparacrawlBigTranslator, 'm2m100': M2M100Translator, 'm2m100_big': M2M100BigTranslator, + 'sakura': SakuraTranslator, } TRANSLATORS = { diff --git a/manga_translator/translators/chatgpt.py b/manga_translator/translators/chatgpt.py index 04546931..0e811781 100644 --- a/manga_translator/translators/chatgpt.py +++ b/manga_translator/translators/chatgpt.py @@ -9,7 +9,7 @@ from typing import List, Dict from .common import CommonTranslator, MissingAPIKeyException -from .keys import OPENAI_API_KEY, OPENAI_HTTP_PROXY, OPENAI_API_BASE +from .keys import OPENAI_API_KEY, OPENAI_HTTP_PROXY, OPENAI_API_BASE, SAKURA_API_BASE, SAKURA_API_KEY CONFIG = None @@ -54,11 +54,11 @@ class GPT3Translator(CommonTranslator): _INCLUDE_TEMPLATE = True _PROMPT_TEMPLATE = 'Please help me to translate the following text from a manga to {to_lang} (if it\'s already in {to_lang} or looks like gibberish you have to output it as it is instead):\n' - def __init__(self): + def __init__(self, check_openai_key = True): super().__init__() openai.api_key = openai.api_key or OPENAI_API_KEY openai.api_base = OPENAI_API_BASE - if not openai.api_key: + if not openai.api_key and check_openai_key: raise MissingAPIKeyException('Please set the OPENAI_API_KEY environment variable before using the chatgpt translator.') if OPENAI_HTTP_PROXY: proxies = { @@ -320,3 +320,77 @@ async def _request_translation(self, to_lang: str, prompt: str) -> str: # If no response with text is found, return the first response's content (which may be empty) return response.choices[0].message.content + + +class SakuraTranslator(GPT3Translator): + _CONFIG_KEY = 'sakura' + _MAX_REQUESTS_PER_MINUTE = 200 + _RETRY_ATTEMPTS = 5 + _MAX_TOKENS = 8192 + _CHAT_SYSTEM_TEMPLATE = ( + '你是一个轻小说翻译模型,可以流畅通顺地以日本轻小说的风格将日文翻译成简体中文,并联系上下文正确使用人称代词,不擅自添加原文中没有的代词。' + ) + def __init__(self): + super().__init__(check_openai_key=False) + + async def _request_translation(self, to_lang: str, prompt: str) -> str: + messages = [ + {'role': 'system', 'content': self._CHAT_SYSTEM_TEMPLATE}, + {'role': 'user', 'content': '将下面的日文文本翻译成中文:'+prompt}, + ] + + response = await openai.ChatCompletion.acreate( + model='gpt-4-0613', + messages=messages, + max_tokens=self._MAX_TOKENS // 2, + temperature=self.temperature, + top_p=self.top_p, + api_key=SAKURA_API_KEY, + api_base=SAKURA_API_BASE, + ) + + self.token_count += response.usage['total_tokens'] + self.token_count_last = response.usage['total_tokens'] + for choice in response.choices: + if 'text' in choice: + return choice.text + + # If no response with text is found, return the first response's content (which may be empty) + return response.choices[0].message.content + async def _translate(self, from_lang: str, to_lang: str, queries: List[str]) -> List[str]: + translations = [] + self.logger.debug(f'Temperature: {self.temperature}, TopP: {self.top_p}') + + for query in queries: + + ratelimit_attempt = 0 + server_error_attempt = 0 + timeout_attempt = 0 + while True: + request_task = asyncio.create_task(self._request_translation(to_lang, query)) + try: + response = await request_task + break + except openai.error.RateLimitError: # Server returned ratelimit response + ratelimit_attempt += 1 + if ratelimit_attempt >= self._RATELIMIT_RETRY_ATTEMPTS: + raise + self.logger.warn(f'Restarting request due to ratelimiting by openai servers. Attempt: {ratelimit_attempt}') + await asyncio.sleep(2) + except openai.error.APIError: # Server returned 500 error (probably server load) + server_error_attempt += 1 + if server_error_attempt >= self._RETRY_ATTEMPTS: + self.logger.error('Sakura encountered a server error, possibly due to high server load. Use a different translator or try again later.') + raise + self.logger.warn(f'Restarting request due to a server error. Attempt: {server_error_attempt}') + await asyncio.sleep(1) + + self.logger.debug('-- Sakura Response --\n' + response) + + translations.extend([response]) + + self.logger.debug(translations) + if self.token_count_last: + self.logger.info(f'Used {self.token_count_last} tokens (Total: {self.token_count})') + + return translations \ No newline at end of file diff --git a/manga_translator/translators/keys.py b/manga_translator/translators/keys.py index 1b5f9c15..d90b267b 100644 --- a/manga_translator/translators/keys.py +++ b/manga_translator/translators/keys.py @@ -17,3 +17,6 @@ OPENAI_API_BASE = os.getenv('OPENAI_API_BASE', 'https://api.openai.com/v1') #使用api-for-open-llm例子 http://127.0.0.1:8000/v1 CAIYUN_TOKEN = os.getenv('CAIYUN_TOKEN', '') # 彩云小译API访问令牌 + +SAKURA_API_KEY = os.getenv('SAKURA_API_KEY', '') +SAKURA_API_BASE = os.getenv('SAKURA_API_BASE', 'http://127.0.0.1:5000/v1') \ No newline at end of file