From 8bd1465e09b44d5eb34b54c21466e09e9f81e9fe Mon Sep 17 00:00:00 2001 From: op200 <112297583+op200@users.noreply.github.com> Date: Thu, 5 Dec 2024 16:04:18 +0800 Subject: [PATCH] v0.5 --- src/TEF.py | 31 +++++++++---------- src/TEF_log.py | 1 + src/TEF_sub.py | 83 ++++++++++++++++++++++++++++++++++++++++++++++++-- src/TEF_tr.py | 83 +++++++++++++++++++------------------------------- 4 files changed, 128 insertions(+), 70 deletions(-) diff --git a/src/TEF.py b/src/TEF.py index d05eb94..f246dd3 100644 --- a/src/TEF.py +++ b/src/TEF.py @@ -8,7 +8,7 @@ PROJECT_NAME = "Translate Enhanced Framework" -PROJECT_VERSION = "0.4.1" +PROJECT_VERSION = "0.5" PROJECT_URL = "https://github.com/ziyii01/use_zhconvert" @@ -69,9 +69,12 @@ def run_command(command: str): Tr.current_sub_list = get_all_sub_path(Tr.dir_path) elif command == "tr all" or command == "translate all": - for pathname in (sub.pathname for sub in Tr.current_sub_list - if sub.lang == "zh-Hans" and sub.suffix == "ass"): - Tr.translate(pathname, 'Traditional') + prograss = 1 + for sub in (sub for sub in Tr.current_sub_list + if sub.lang == "zh-Hans" and sub.suffix == "ass"): + log.info(f"Translate all zh-Hans ass: {prograss} / {len(Tr.current_sub_list)} ...") + Tr.tr_and_overwrite(sub, 'Traditional') + prograss = prograss+1 elif command[:2] == "cd": new_path = command.replace("cd","").strip() @@ -95,30 +98,26 @@ def run_command(command: str): print(repr(e)) else: - pathname = command + ".zh-Hans.ass" - if os.path.exists(pathname): - Tr.translate(pathname, 'Traditional') + sub = list(filter( + lambda sub: sub.prefix == command and sub.lang == "zh-Hans" and sub.suffix == "ass", + Tr.current_sub_list + ))[0] + if os.path.exists(sub.pathname): + log.info(f'Translate "{sub.pathname}"') + Tr.tr_and_overwrite(sub, 'Traditional') else: - log.warning(f'Can not find the file "{pathname}", cancel the translation') + log.warning(f'Can not find the file "{sub.pathname}", cancel the translation') if __name__ == "__main__": Tr.dir_path = os.getcwd() Tr.current_sub_list = get_all_sub_path(Tr.dir_path) - - # def get_zh_hans_ass_list(): - # all_ass_list = get_all_ass_path(dir_path) - # zh_hans_ass_list = match_suffix_in_list(all_ass_list, ".zh-Hans.ass") - # return all_ass_list, zh_hans_ass_list def show_list(list: list[Sub] | tuple[Sub], list_name: str): print(f'{list_name}:') for sub in list: print(f' {sub.pathname}') - - - # all_ass_list, zh_hans_ass_list = get_zh_hans_ass_list() for argv in sys.argv[1:]: run_command(argv) diff --git a/src/TEF_log.py b/src/TEF_log.py index 1139f01..0b24daf 100644 --- a/src/TEF_log.py +++ b/src/TEF_log.py @@ -1,5 +1,6 @@ import sys from loguru import logger as log + log.remove() log.add(sys.stderr, format="{time:YYYY.MM.DD HH:mm:ss.SS} [{level}] {message}") \ No newline at end of file diff --git a/src/TEF_sub.py b/src/TEF_sub.py index b0860c0..3a2d7f0 100644 --- a/src/TEF_sub.py +++ b/src/TEF_sub.py @@ -1,16 +1,35 @@ import os import sys +import locale +import codecs + +import chardet from TEF_log import log class Sub: + # must exist + path: str + prefix: str + lang: str | None + suffix: str + + # generate + pathname: str + + encoding: str | None + text: str + + + def splice_pathname(self): self.pathname = '.'.join([ os.path.join(self.path, self.prefix), self.lang, self.suffix ]) - + + def split_pathname(self): self.path, filename = os.path.split(self.pathname) @@ -30,6 +49,41 @@ def split_pathname(self): self.lang = None + def get_encoding(self): + with open(self.pathname, "rb") as file: + text_data = file.read() + encoding = chardet.detect(text_data)["encoding"] + + if encoding == None: + log.warning(f'Can not find the encoding from "{self.pathname}", auto use UTF-8') + return 'utf8' + + if encoding == 'GB2312' or encoding == 'cp936': + encoding = 'gb18030-2000' + + elif not encoding.startswith("UTF"): + locale_encoding = locale.getencoding() + log.warning(f'Auto find the encoding is {encoding}, it may be {locale_encoding} ({codecs.lookup(locale_encoding).name}), has been auto changed it') + encoding = locale_encoding + + log.info(f"Auto find the encoding is {encoding} ({codecs.lookup(encoding).name})") + + self.encoding = encoding + + + def get_text(self): + if not self.encoding: + raise Exception("A Sub obj is missing the member var 'encoding': Sub.get_text") + + if os.path.exists(self.pathname): + with open(self.pathname, 'rt', encoding = self.encoding) as file: + self.text = file.read() + else: + log.error(f'The input file "{self.pathname}" is not exist') + self.text = "" + + + def __init__(self, **vars): if 'pathname' in vars: @@ -44,8 +98,21 @@ def __init__(self, **vars): self.splice_pathname() else: - log.error("The class Sub init error") - sys.exit() + raise Exception("This Sub obj init error, obj has been corrupted") + + log.info(f"Creating new Sub obj: pathname: {self.pathname}") + + if 'encoding' in vars: + self.encoding = vars['encoding'] + else: + self.get_encoding() + + if 'text' in vars: + self.text = vars['text'] + else: + self.get_text() + + # == def __eq__(self, other): @@ -74,3 +141,13 @@ def __ge__(self, other): # != def __ne__(self, other): return not self == other + + + + def overwrite_file(self) -> None: + + if os.path.exists(self.pathname): + log.warning(f'The output file "{self.pathname}" already exists, auto overwrite it') + + with open(self.pathname, "wt", encoding = self.encoding) as file: + file.write(self.text) \ No newline at end of file diff --git a/src/TEF_tr.py b/src/TEF_tr.py index f55abb4..f4d4882 100644 --- a/src/TEF_tr.py +++ b/src/TEF_tr.py @@ -1,46 +1,20 @@ -import os -import locale -import codecs +import copy import httpx -import chardet from TEF_log import log from TEF_sub import Sub - -def get_encoding_by_text(pathname: str) -> str | None: - with open(pathname, "rb") as file: - text_data = file.read() - encoding = chardet.detect(text_data)["encoding"] - - if encoding == None: - log.warning(f'Can not find the encoding from "{pathname}", auto use UTF-8') - return 'utf8' - - if encoding == 'GB2312' or encoding == 'cp936': - encoding = 'gb18030-2000' - - if not encoding.startswith("UTF") and not encoding == 'gb18030-2000': - locale_encoding = locale.getencoding() - log.warning(f'Auto find the encoding is {encoding}, it may be {locale_encoding} ({codecs.lookup(locale_encoding).name}), has been auto changed it') - encoding = locale_encoding - - log.info(f"Auto find the encoding is {encoding} ({codecs.lookup(encoding).name})") - return encoding - - - FHJ_API_URL = "https://api.zhconvert.org" class Tr: - dir_path:str + dir_path: str current_sub_list: list[Sub] @staticmethod - def convert( + def fhj_convert( text: str, converter: str, apiKey = "", @@ -69,7 +43,8 @@ def convert( log.error(f"Tr.convert Timeout, trying to reconnect. Times of reconnect: {time}. Remaining reconnect times: {max_timeout_times-time}.") return response - + + @staticmethod def find_langTag_by_converter(converter: str) -> str: lang_tag: str = "Unknow" @@ -87,34 +62,40 @@ def find_langTag_by_converter(converter: str) -> str: return lang_tag + @staticmethod - def translate(input_pathname: str, converter: str) -> bool: - if not os.path.exists(input_pathname): - log.error(f'The input file "{input_pathname}" is not exist') - return False - - encoding = get_encoding_by_text(input_pathname) - - with open(input_pathname, "rt", encoding = encoding) as file: - text_data = file.read() - response = Tr.convert(text_data, converter) + def translate(sub: Sub, converter: str) -> Sub | None: + + response = Tr.fhj_convert(sub.text, converter) if response.status_code != 200: log.error(f"网址请求失败: {response.text}") - return False + return None + response_json_data = response.json() if response_json_data["code"] != 0: log.error(f"繁化姬接口失败: {response_json_data['msg']}") - return False + return None + else: - last_dot_index = input_pathname.rfind('.') - second_last_dot_index = input_pathname.rindex('.', 0, last_dot_index) - output_pathname = f'{input_pathname[:second_last_dot_index]}.{Tr.find_langTag_by_converter(converter)}.ass' + new_sub = copy.deepcopy(sub) - if os.path.exists(output_pathname): - log.warning(f'The output file "{output_pathname}" already exists, auto overwrite it') + new_sub.lang = Tr.find_langTag_by_converter(converter) + new_sub.splice_pathname() - with open(output_pathname, "wt", encoding = 'utf_8_sig') as file: - file.write(response_json_data["data"]["text"]) + new_sub.text = response_json_data["data"]["text"] + - log.info(f'Translate the file "{input_pathname}" to "{output_pathname}" success') - return True + + return new_sub + + + @staticmethod + def tr_and_overwrite(sub: Sub, converter: str, encoding: str = 'utf_8_sig') -> None: + + output_sub = Tr.translate(sub, converter) + + output_sub.encoding = encoding + output_sub.overwrite_file() + + log.info(f'Translate the file "{sub.pathname}" to "{output_sub.pathname}" success') +