Skip to content

Commit

Permalink
v0.5
Browse files Browse the repository at this point in the history
  • Loading branch information
op200 authored Dec 5, 2024
1 parent f2d661d commit 8bd1465
Show file tree
Hide file tree
Showing 4 changed files with 128 additions and 70 deletions.
31 changes: 15 additions & 16 deletions src/TEF.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@


PROJECT_NAME = "Translate Enhanced Framework"
PROJECT_VERSION = "0.4.1"
PROJECT_VERSION = "0.5"
PROJECT_URL = "https://github.com/ziyii01/use_zhconvert"


Expand Down Expand Up @@ -69,9 +69,12 @@ def run_command(command: str):
Tr.current_sub_list = get_all_sub_path(Tr.dir_path)

elif command == "tr all" or command == "translate all":
for pathname in (sub.pathname for sub in Tr.current_sub_list
if sub.lang == "zh-Hans" and sub.suffix == "ass"):
Tr.translate(pathname, 'Traditional')
prograss = 1
for sub in (sub for sub in Tr.current_sub_list
if sub.lang == "zh-Hans" and sub.suffix == "ass"):
log.info(f"Translate all zh-Hans ass: {prograss} / {len(Tr.current_sub_list)} ...")
Tr.tr_and_overwrite(sub, 'Traditional')
prograss = prograss+1

elif command[:2] == "cd":
new_path = command.replace("cd","").strip()
Expand All @@ -95,30 +98,26 @@ def run_command(command: str):
print(repr(e))

else:
pathname = command + ".zh-Hans.ass"
if os.path.exists(pathname):
Tr.translate(pathname, 'Traditional')
sub = list(filter(
lambda sub: sub.prefix == command and sub.lang == "zh-Hans" and sub.suffix == "ass",
Tr.current_sub_list
))[0]
if os.path.exists(sub.pathname):
log.info(f'Translate "{sub.pathname}"')
Tr.tr_and_overwrite(sub, 'Traditional')
else:
log.warning(f'Can not find the file "{pathname}", cancel the translation')
log.warning(f'Can not find the file "{sub.pathname}", cancel the translation')


if __name__ == "__main__":

Tr.dir_path = os.getcwd()
Tr.current_sub_list = get_all_sub_path(Tr.dir_path)

# def get_zh_hans_ass_list():
# all_ass_list = get_all_ass_path(dir_path)
# zh_hans_ass_list = match_suffix_in_list(all_ass_list, ".zh-Hans.ass")
# return all_ass_list, zh_hans_ass_list

def show_list(list: list[Sub] | tuple[Sub], list_name: str):
print(f'{list_name}:')
for sub in list:
print(f' {sub.pathname}')


# all_ass_list, zh_hans_ass_list = get_zh_hans_ass_list()

for argv in sys.argv[1:]:
run_command(argv)
Expand Down
1 change: 1 addition & 0 deletions src/TEF_log.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import sys

from loguru import logger as log

log.remove()
log.add(sys.stderr, format="<green>{time:YYYY.MM.DD HH:mm:ss.SS}</green><blue><level> [{level}] {message}</level></blue>")
83 changes: 80 additions & 3 deletions src/TEF_sub.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,35 @@
import os
import sys
import locale
import codecs

import chardet

from TEF_log import log

class Sub:

# must exist
path: str
prefix: str
lang: str | None
suffix: str

# generate
pathname: str

encoding: str | None
text: str



def splice_pathname(self):
self.pathname = '.'.join([
os.path.join(self.path, self.prefix),
self.lang, self.suffix
])



def split_pathname(self):
self.path, filename = os.path.split(self.pathname)

Expand All @@ -30,6 +49,41 @@ def split_pathname(self):
self.lang = None


def get_encoding(self):
with open(self.pathname, "rb") as file:
text_data = file.read()
encoding = chardet.detect(text_data)["encoding"]

if encoding == None:
log.warning(f'Can not find the encoding from "{self.pathname}", auto use UTF-8')
return 'utf8'

if encoding == 'GB2312' or encoding == 'cp936':
encoding = 'gb18030-2000'

elif not encoding.startswith("UTF"):
locale_encoding = locale.getencoding()
log.warning(f'Auto find the encoding is {encoding}, it may be {locale_encoding} ({codecs.lookup(locale_encoding).name}), has been auto changed it')
encoding = locale_encoding

log.info(f"Auto find the encoding is {encoding} ({codecs.lookup(encoding).name})")

self.encoding = encoding


def get_text(self):
if not self.encoding:
raise Exception("A Sub obj is missing the member var 'encoding': Sub.get_text")

if os.path.exists(self.pathname):
with open(self.pathname, 'rt', encoding = self.encoding) as file:
self.text = file.read()
else:
log.error(f'The input file "{self.pathname}" is not exist')
self.text = ""




def __init__(self, **vars):
if 'pathname' in vars:
Expand All @@ -44,8 +98,21 @@ def __init__(self, **vars):
self.splice_pathname()

else:
log.error("The class Sub init error")
sys.exit()
raise Exception("This Sub obj init error, obj has been corrupted")

log.info(f"Creating new Sub obj: pathname: {self.pathname}")

if 'encoding' in vars:
self.encoding = vars['encoding']
else:
self.get_encoding()

if 'text' in vars:
self.text = vars['text']
else:
self.get_text()



# ==
def __eq__(self, other):
Expand Down Expand Up @@ -74,3 +141,13 @@ def __ge__(self, other):
# !=
def __ne__(self, other):
return not self == other



def overwrite_file(self) -> None:

if os.path.exists(self.pathname):
log.warning(f'The output file "{self.pathname}" already exists, auto overwrite it')

with open(self.pathname, "wt", encoding = self.encoding) as file:
file.write(self.text)
83 changes: 32 additions & 51 deletions src/TEF_tr.py
Original file line number Diff line number Diff line change
@@ -1,46 +1,20 @@
import os
import locale
import codecs
import copy

import httpx
import chardet

from TEF_log import log
from TEF_sub import Sub



def get_encoding_by_text(pathname: str) -> str | None:
with open(pathname, "rb") as file:
text_data = file.read()
encoding = chardet.detect(text_data)["encoding"]

if encoding == None:
log.warning(f'Can not find the encoding from "{pathname}", auto use UTF-8')
return 'utf8'

if encoding == 'GB2312' or encoding == 'cp936':
encoding = 'gb18030-2000'

if not encoding.startswith("UTF") and not encoding == 'gb18030-2000':
locale_encoding = locale.getencoding()
log.warning(f'Auto find the encoding is {encoding}, it may be {locale_encoding} ({codecs.lookup(locale_encoding).name}), has been auto changed it')
encoding = locale_encoding

log.info(f"Auto find the encoding is {encoding} ({codecs.lookup(encoding).name})")
return encoding



FHJ_API_URL = "https://api.zhconvert.org"

class Tr:

dir_path:str
dir_path: str
current_sub_list: list[Sub]

@staticmethod
def convert(
def fhj_convert(
text: str,
converter: str,
apiKey = "",
Expand Down Expand Up @@ -69,7 +43,8 @@ def convert(
log.error(f"Tr.convert Timeout, trying to reconnect. Times of reconnect: {time}. Remaining reconnect times: {max_timeout_times-time}.")

return response



@staticmethod
def find_langTag_by_converter(converter: str) -> str:
lang_tag: str = "Unknow"
Expand All @@ -87,34 +62,40 @@ def find_langTag_by_converter(converter: str) -> str:

return lang_tag


@staticmethod
def translate(input_pathname: str, converter: str) -> bool:
if not os.path.exists(input_pathname):
log.error(f'The input file "{input_pathname}" is not exist')
return False

encoding = get_encoding_by_text(input_pathname)

with open(input_pathname, "rt", encoding = encoding) as file:
text_data = file.read()
response = Tr.convert(text_data, converter)
def translate(sub: Sub, converter: str) -> Sub | None:

response = Tr.fhj_convert(sub.text, converter)
if response.status_code != 200:
log.error(f"网址请求失败: {response.text}")
return False
return None

response_json_data = response.json()
if response_json_data["code"] != 0:
log.error(f"繁化姬接口失败: {response_json_data['msg']}")
return False
return None

else:
last_dot_index = input_pathname.rfind('.')
second_last_dot_index = input_pathname.rindex('.', 0, last_dot_index)
output_pathname = f'{input_pathname[:second_last_dot_index]}.{Tr.find_langTag_by_converter(converter)}.ass'
new_sub = copy.deepcopy(sub)

if os.path.exists(output_pathname):
log.warning(f'The output file "{output_pathname}" already exists, auto overwrite it')
new_sub.lang = Tr.find_langTag_by_converter(converter)
new_sub.splice_pathname()

with open(output_pathname, "wt", encoding = 'utf_8_sig') as file:
file.write(response_json_data["data"]["text"])
new_sub.text = response_json_data["data"]["text"]


log.info(f'Translate the file "{input_pathname}" to "{output_pathname}" success')
return True

return new_sub


@staticmethod
def tr_and_overwrite(sub: Sub, converter: str, encoding: str = 'utf_8_sig') -> None:

output_sub = Tr.translate(sub, converter)

output_sub.encoding = encoding
output_sub.overwrite_file()

log.info(f'Translate the file "{sub.pathname}" to "{output_sub.pathname}" success')

0 comments on commit 8bd1465

Please sign in to comment.