From 8bd1465e09b44d5eb34b54c21466e09e9f81e9fe Mon Sep 17 00:00:00 2001
From: op200 <112297583+op200@users.noreply.github.com>
Date: Thu, 5 Dec 2024 16:04:18 +0800
Subject: [PATCH] v0.5
---
src/TEF.py | 31 +++++++++----------
src/TEF_log.py | 1 +
src/TEF_sub.py | 83 ++++++++++++++++++++++++++++++++++++++++++++++++--
src/TEF_tr.py | 83 +++++++++++++++++++-------------------------------
4 files changed, 128 insertions(+), 70 deletions(-)
diff --git a/src/TEF.py b/src/TEF.py
index d05eb94..f246dd3 100644
--- a/src/TEF.py
+++ b/src/TEF.py
@@ -8,7 +8,7 @@
PROJECT_NAME = "Translate Enhanced Framework"
-PROJECT_VERSION = "0.4.1"
+PROJECT_VERSION = "0.5"
PROJECT_URL = "https://github.com/ziyii01/use_zhconvert"
@@ -69,9 +69,12 @@ def run_command(command: str):
Tr.current_sub_list = get_all_sub_path(Tr.dir_path)
elif command == "tr all" or command == "translate all":
- for pathname in (sub.pathname for sub in Tr.current_sub_list
- if sub.lang == "zh-Hans" and sub.suffix == "ass"):
- Tr.translate(pathname, 'Traditional')
+ prograss = 1
+ for sub in (sub for sub in Tr.current_sub_list
+ if sub.lang == "zh-Hans" and sub.suffix == "ass"):
+ log.info(f"Translate all zh-Hans ass: {prograss} / {len(Tr.current_sub_list)} ...")
+ Tr.tr_and_overwrite(sub, 'Traditional')
+ prograss = prograss+1
elif command[:2] == "cd":
new_path = command.replace("cd","").strip()
@@ -95,30 +98,26 @@ def run_command(command: str):
print(repr(e))
else:
- pathname = command + ".zh-Hans.ass"
- if os.path.exists(pathname):
- Tr.translate(pathname, 'Traditional')
+ sub = list(filter(
+ lambda sub: sub.prefix == command and sub.lang == "zh-Hans" and sub.suffix == "ass",
+ Tr.current_sub_list
+ ))[0]
+ if os.path.exists(sub.pathname):
+ log.info(f'Translate "{sub.pathname}"')
+ Tr.tr_and_overwrite(sub, 'Traditional')
else:
- log.warning(f'Can not find the file "{pathname}", cancel the translation')
+ log.warning(f'Can not find the file "{sub.pathname}", cancel the translation')
if __name__ == "__main__":
Tr.dir_path = os.getcwd()
Tr.current_sub_list = get_all_sub_path(Tr.dir_path)
-
- # def get_zh_hans_ass_list():
- # all_ass_list = get_all_ass_path(dir_path)
- # zh_hans_ass_list = match_suffix_in_list(all_ass_list, ".zh-Hans.ass")
- # return all_ass_list, zh_hans_ass_list
def show_list(list: list[Sub] | tuple[Sub], list_name: str):
print(f'{list_name}:')
for sub in list:
print(f' {sub.pathname}')
-
-
- # all_ass_list, zh_hans_ass_list = get_zh_hans_ass_list()
for argv in sys.argv[1:]:
run_command(argv)
diff --git a/src/TEF_log.py b/src/TEF_log.py
index 1139f01..0b24daf 100644
--- a/src/TEF_log.py
+++ b/src/TEF_log.py
@@ -1,5 +1,6 @@
import sys
from loguru import logger as log
+
log.remove()
log.add(sys.stderr, format="{time:YYYY.MM.DD HH:mm:ss.SS} [{level}] {message}")
\ No newline at end of file
diff --git a/src/TEF_sub.py b/src/TEF_sub.py
index b0860c0..3a2d7f0 100644
--- a/src/TEF_sub.py
+++ b/src/TEF_sub.py
@@ -1,16 +1,35 @@
import os
import sys
+import locale
+import codecs
+
+import chardet
from TEF_log import log
class Sub:
+ # must exist
+ path: str
+ prefix: str
+ lang: str | None
+ suffix: str
+
+ # generate
+ pathname: str
+
+ encoding: str | None
+ text: str
+
+
+
def splice_pathname(self):
self.pathname = '.'.join([
os.path.join(self.path, self.prefix),
self.lang, self.suffix
])
-
+
+
def split_pathname(self):
self.path, filename = os.path.split(self.pathname)
@@ -30,6 +49,41 @@ def split_pathname(self):
self.lang = None
+ def get_encoding(self):
+ with open(self.pathname, "rb") as file:
+ text_data = file.read()
+ encoding = chardet.detect(text_data)["encoding"]
+
+ if encoding == None:
+ log.warning(f'Can not find the encoding from "{self.pathname}", auto use UTF-8')
+ return 'utf8'
+
+ if encoding == 'GB2312' or encoding == 'cp936':
+ encoding = 'gb18030-2000'
+
+ elif not encoding.startswith("UTF"):
+ locale_encoding = locale.getencoding()
+ log.warning(f'Auto find the encoding is {encoding}, it may be {locale_encoding} ({codecs.lookup(locale_encoding).name}), has been auto changed it')
+ encoding = locale_encoding
+
+ log.info(f"Auto find the encoding is {encoding} ({codecs.lookup(encoding).name})")
+
+ self.encoding = encoding
+
+
+ def get_text(self):
+ if not self.encoding:
+ raise Exception("A Sub obj is missing the member var 'encoding': Sub.get_text")
+
+ if os.path.exists(self.pathname):
+ with open(self.pathname, 'rt', encoding = self.encoding) as file:
+ self.text = file.read()
+ else:
+ log.error(f'The input file "{self.pathname}" is not exist')
+ self.text = ""
+
+
+
def __init__(self, **vars):
if 'pathname' in vars:
@@ -44,8 +98,21 @@ def __init__(self, **vars):
self.splice_pathname()
else:
- log.error("The class Sub init error")
- sys.exit()
+ raise Exception("This Sub obj init error, obj has been corrupted")
+
+ log.info(f"Creating new Sub obj: pathname: {self.pathname}")
+
+ if 'encoding' in vars:
+ self.encoding = vars['encoding']
+ else:
+ self.get_encoding()
+
+ if 'text' in vars:
+ self.text = vars['text']
+ else:
+ self.get_text()
+
+
# ==
def __eq__(self, other):
@@ -74,3 +141,13 @@ def __ge__(self, other):
# !=
def __ne__(self, other):
return not self == other
+
+
+
+ def overwrite_file(self) -> None:
+
+ if os.path.exists(self.pathname):
+ log.warning(f'The output file "{self.pathname}" already exists, auto overwrite it')
+
+ with open(self.pathname, "wt", encoding = self.encoding) as file:
+ file.write(self.text)
\ No newline at end of file
diff --git a/src/TEF_tr.py b/src/TEF_tr.py
index f55abb4..f4d4882 100644
--- a/src/TEF_tr.py
+++ b/src/TEF_tr.py
@@ -1,46 +1,20 @@
-import os
-import locale
-import codecs
+import copy
import httpx
-import chardet
from TEF_log import log
from TEF_sub import Sub
-
-def get_encoding_by_text(pathname: str) -> str | None:
- with open(pathname, "rb") as file:
- text_data = file.read()
- encoding = chardet.detect(text_data)["encoding"]
-
- if encoding == None:
- log.warning(f'Can not find the encoding from "{pathname}", auto use UTF-8')
- return 'utf8'
-
- if encoding == 'GB2312' or encoding == 'cp936':
- encoding = 'gb18030-2000'
-
- if not encoding.startswith("UTF") and not encoding == 'gb18030-2000':
- locale_encoding = locale.getencoding()
- log.warning(f'Auto find the encoding is {encoding}, it may be {locale_encoding} ({codecs.lookup(locale_encoding).name}), has been auto changed it')
- encoding = locale_encoding
-
- log.info(f"Auto find the encoding is {encoding} ({codecs.lookup(encoding).name})")
- return encoding
-
-
-
FHJ_API_URL = "https://api.zhconvert.org"
class Tr:
- dir_path:str
+ dir_path: str
current_sub_list: list[Sub]
@staticmethod
- def convert(
+ def fhj_convert(
text: str,
converter: str,
apiKey = "",
@@ -69,7 +43,8 @@ def convert(
log.error(f"Tr.convert Timeout, trying to reconnect. Times of reconnect: {time}. Remaining reconnect times: {max_timeout_times-time}.")
return response
-
+
+
@staticmethod
def find_langTag_by_converter(converter: str) -> str:
lang_tag: str = "Unknow"
@@ -87,34 +62,40 @@ def find_langTag_by_converter(converter: str) -> str:
return lang_tag
+
@staticmethod
- def translate(input_pathname: str, converter: str) -> bool:
- if not os.path.exists(input_pathname):
- log.error(f'The input file "{input_pathname}" is not exist')
- return False
-
- encoding = get_encoding_by_text(input_pathname)
-
- with open(input_pathname, "rt", encoding = encoding) as file:
- text_data = file.read()
- response = Tr.convert(text_data, converter)
+ def translate(sub: Sub, converter: str) -> Sub | None:
+
+ response = Tr.fhj_convert(sub.text, converter)
if response.status_code != 200:
log.error(f"网址请求失败: {response.text}")
- return False
+ return None
+
response_json_data = response.json()
if response_json_data["code"] != 0:
log.error(f"繁化姬接口失败: {response_json_data['msg']}")
- return False
+ return None
+
else:
- last_dot_index = input_pathname.rfind('.')
- second_last_dot_index = input_pathname.rindex('.', 0, last_dot_index)
- output_pathname = f'{input_pathname[:second_last_dot_index]}.{Tr.find_langTag_by_converter(converter)}.ass'
+ new_sub = copy.deepcopy(sub)
- if os.path.exists(output_pathname):
- log.warning(f'The output file "{output_pathname}" already exists, auto overwrite it')
+ new_sub.lang = Tr.find_langTag_by_converter(converter)
+ new_sub.splice_pathname()
- with open(output_pathname, "wt", encoding = 'utf_8_sig') as file:
- file.write(response_json_data["data"]["text"])
+ new_sub.text = response_json_data["data"]["text"]
+
- log.info(f'Translate the file "{input_pathname}" to "{output_pathname}" success')
- return True
+
+ return new_sub
+
+
+ @staticmethod
+ def tr_and_overwrite(sub: Sub, converter: str, encoding: str = 'utf_8_sig') -> None:
+
+ output_sub = Tr.translate(sub, converter)
+
+ output_sub.encoding = encoding
+ output_sub.overwrite_file()
+
+ log.info(f'Translate the file "{sub.pathname}" to "{output_sub.pathname}" success')
+