-
Notifications
You must be signed in to change notification settings - Fork 13
/
GUI.py
621 lines (558 loc) · 31.3 KB
/
GUI.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
print("正在加载 (Loading...)")
import logging
from datetime import datetime
import scripts.Renderer as Renderer
import scripts.PDF2ImagePlusRenderer as PDFHandler
import scripts.PDFMerger as PDFMerger
import scripts.TempCleaner as TempCleaner
from transformers import AutoModel, AutoTokenizer
import gradio as gr
import os
import glob
import json
from time import sleep
##########################
# 加载配置文件 (Load configuration file)
config_path = os.path.join("Configs", "Config.json")
try:
with open(config_path, 'r', encoding='utf-8') as file:
config = json.load(file)
except FileNotFoundError:
print("配置文件未找到 (The configuration file was not found)")
print("程序将在3秒后退出")
sleep(3)
exit(1)
##########################
# 日志记录器 (Logger)
current_time = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
logger = logging.getLogger(__name__)
logging.basicConfig(
filename=os.path.join("Logs", f"{current_time}.log"),
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S',
encoding='utf-8',
)
try:
lvl = config['logger_level']
if lvl.lower() == 'debug':
logger.setLevel(logging.DEBUG)
elif lvl.lower() == 'info':
logger.setLevel(logging.INFO)
elif lvl.lower() == 'warning':
logger.setLevel(logging.WARNING)
elif lvl.lower() == 'error':
logger.setLevel(logging.ERROR)
elif lvl.lower() == 'critical':
logger.setLevel(logging.CRITICAL)
else:
logger.warning("无效的日志级别,回滚到 INFO 级别 (Invalid log level, rolling back to INFO level)")
logger.warning("请检查配置文件 (Please check the configuration file)")
logger.setLevel(logging.INFO)
except KeyError:
logger.warning("配置文件中未找到日志级别,回滚到 INFO 级别 (The log level was not found in the configuration file, rolling back to INFO level)")
logger.warning("请检查配置文件 (Please check the configuration file)")
logger.setLevel(logging.INFO)
console = logging.StreamHandler()
console.setLevel(logging.WARNING)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
console.setFormatter(formatter)
logger.addHandler(console)
logger.info("日志记录器已初始化 (The logger has been initialized)")
##########################
# 加载语言设置 (Load language settings)
try:
with open(os.path.join("Locales", "gui", "config.json"), 'r', encoding='utf-8') as file:
lang_config = json.load(file)
lang = lang_config['language']
except FileNotFoundError:
logger.warning(
"语言配置文件未找到,回滚到简体中文 (The language configuration file was not found, rolling back to Simplified Chinese)")
lang = 'zh_CN'
try:
with open(os.path.join("Locales", "gui", f"{lang}.json"), 'r', encoding='utf-8') as file:
local = json.load(file)
logger.info(f"语言文件已加载 (The language file has been loaded): {lang}")
except FileNotFoundError:
logger.critical(f"语言文件未找到 (The language file was not found): {lang}")
print("程序将在3秒后退出")
sleep(3)
exit(1)
##########################
model = None
tokenizer = None
##########################
# 加载模型函数 (Loading model function)
def load_model():
logger.info("[load_model] 正在加载模型 (Loading model)")
global model, tokenizer
model = None
tokenizer = None
tokenizer = AutoTokenizer.from_pretrained('models', trust_remote_code=True)
model = AutoModel.from_pretrained('models', trust_remote_code=True, low_cpu_mem_usage=True, device_map='cuda',
use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
model = model.eval().cuda()
logger.info("[load_model] 模型加载完成 (Model loading completed)")
return local["info_model_already_loaded"]
##########################
# 卸载模型函数 (Unloading model function)
def unload_model():
global model, tokenizer
model = None
tokenizer = None
logger.info("[unload_model] 模型已卸载 (The model has been unloaded)")
return local["info_model_not_loaded"]
##########################
# 决定是否加载模型 (Deciding whether to load the model)
if config["load_model_on_start"]:
load_model()
else:
logger.warning("由于你的设置,模型加载已跳过 (The model loading has been skipped due to your settings)")
##########################
# 主题 (Theme)
try:
theme = gr.themes.Ocean(
primary_hue="indigo",
secondary_hue="violet",
radius_size="sm",
).set(
body_background_fill='*neutral_50',
body_background_fill_dark='*neutral_950',
body_text_color='*neutral_950',
body_text_color_dark='*neutral_200',
background_fill_secondary='*neutral_100',
button_transform_active='scale(0.98)',
button_large_radius='*radius_sm',
button_small_radius='*radius_sm'
)
logger.info("主题加载成功 (Successfully loaded theme)")
except AttributeError:
logger.warning("主题加载失败,回滚到默认主题 (Theme loading failed, rolling back to default theme")
theme = gr.themes.Default()
##########################
# 更新图片名称 (Updating image name)
def update_img_name(image_uploaded):
image_name_with_extension = os.path.basename(image_uploaded)
logger.debug(f"[update_img_name] 图片名称已更新 (The image name has been updated): {image_name_with_extension}")
return gr.Textbox(label=local["label_img_name"], value=image_name_with_extension)
##########################
# 更新 PDF 名称 (Updating PDF name)
def update_pdf_name(pdf_uploaded):
pdf_name_with_extension = os.path.basename(pdf_uploaded)
logger.debug(f"[update_pdf_name] PDF 名称已更新 (The PDF name has been updated): {pdf_name_with_extension}")
return gr.Textbox(label=local["label_pdf_file"], value=pdf_name_with_extension)
##########################
# 更新保存 PDF 勾选框可见性(PDF 标签页)/ Updating visibility of save as PDF checkbox (PDF tab)
def update_pdf_pdf_convert_confirm_visibility(pdf_ocr_mode):
if pdf_ocr_mode == "render":
logger.debug(
"[update_pdf_pdf_convert_confirm_visibility] PDF 标签页的保存 PDF 勾选框已启用 (The save PDF checkbox on the PDF tab has been enabled)")
return gr.Checkbox(label=local["label_save_as_pdf"], interactive=True, visible=True)
else:
logger.debug(
"[update_pdf_pdf_convert_confirm_visibility] PDF 标签页的保存 PDF 勾选框已禁用 (The save PDF checkbox on the PDF tab has been disabled)")
return gr.Checkbox(label=local["label_save_as_pdf"], interactive=True, visible=False, value=False)
##########################
# 更新合并 PDF 勾选框可见性(PDF 标签页) (Updating visibility of merge PDF checkbox (PDF tab))
def update_pdf_pdf_merge_confirm_visibility(pdf_convert_confirm):
if pdf_convert_confirm:
logger.debug(
"[update_pdf_pdf_merge_confirm_visibility] PDF 标签页的合并 PDF 勾选框已启用 (The merge PDF checkbox on the PDF tab has been enabled)")
return gr.Checkbox(label=local["label_merge_pdf"], interactive=True, visible=True)
else:
logger.debug(
"[update_pdf_pdf_merge_confirm_visibility] PDF 标签页的合并 PDF 勾选框已禁用 (The merge PDF checkbox on the PDF tab has been disabled)")
return gr.Checkbox(label=local["label_merge_pdf"], interactive=True, visible=False, value=False)
##########################
# 更新目标 DPI 输入框可见性(PDF 标签页) (Updating visibility of target DPI input box (PDF tab))
def update_pdf_pdf_dpi_visibility(pdf_ocr_mode):
if pdf_ocr_mode == "merge":
logger.debug(
"[update_pdf_pdf_dpi_visibility] PDF 标签页的目标 DPI 输入框已禁用 (The target DPI input box on the PDF tab has been disabled)")
return gr.Number(label=local["label_target_dpi"], minimum=72, maximum=300, step=1, value=150, visible=False)
else:
logger.debug(
"[update_pdf_pdf_dpi_visibility] PDF 标签页的目标 DPI 输入框已启用 (The target DPI input box on the PDF tab has been enabled)")
return gr.Number(label=local["label_target_dpi"], minimum=72, maximum=300, step=1, value=150, visible=True)
##########################
# 提取prefix (Extracting prefix)
def extract_pdf_pattern(filename):
"""
从文件名中提取前缀,如果文件名不满足格式 <string>_0.pdf, 则抛出 ValueError 异常
(Extracts the prefix from the filename, if the filename does not meet the format <string>_0.pdf, a ValueError exception is raised)
:param filename: 文件名 (Filename)
:return: 前缀 (Prefix)
"""
# 在最后一个下划线处分割文件名 (Split the filename at the last underscore)
parts = filename.rsplit('_')
logger.debug(f"[extract_pdf_pattern] 文件名分割结果 (The result of splitting the filename): {parts}")
# 检查最后一部分是否为 '0.pdf' (Check if the last part is '0.pdf')
if len(parts) == 2 and parts[1] == '0.pdf':
return parts[0]
else:
logger.error(
"[extract_pdf_pattern] 文件名不满足格式 <string>_0.pdf (Filename does not meet the format <string>_0.pdf)")
raise ValueError("输入不满足格式:<string>_0.pdf (Input does not meet the format: <string>_0.pdf)")
##########################
# 进行 OCR 识别 (Performing OCR recognition)
def ocr(image_uploaded, fine_grained_box_x1, fine_grained_box_y1, fine_grained_box_x2,
fine_grained_box_y2, OCR_type, fine_grained_color, pdf_convert_confirm, clean_temp):
# 默认值 (Default value)
res = local["error_ocr_mode_none"]
gr.Info(message=local["info_ocr_started"])
# 如果 result 文件夹不存在,则创建 (Creating the 'result' folder if it does not exist)
if not os.path.exists("result"):
os.makedirs("result")
logger.info("[ocr] result 文件夹不存在,已创建 (Result folder doesn't exists, created)")
try:
# 根据 OCR 类型进行 OCR 识别 (Performing OCR based on OCR type)
logger.info("[ocr] 正在执行 OCR (Performing OCR)")
if OCR_type == "ocr":
logger.debug("[ocr] 当前 OCR 模式:ocr (Current ocr mode: ocr)")
res = model.chat(tokenizer, image_uploaded, ocr_type='ocr')
elif OCR_type == "format":
logger.debug("[ocr] 当前 OCR 模式:format (Current ocr mode: format)")
res = model.chat(tokenizer, image_uploaded, ocr_type='format')
elif OCR_type == "fine-grained-ocr":
logger.debug("[ocr] 当前 OCR 模式:fine-grained-ocr (Current ocr mode: fine-grained-ocr)")
# 构建 OCR 框 (Building OCR box)
box = f"[{fine_grained_box_x1}, {fine_grained_box_y1}, {fine_grained_box_x2}, {fine_grained_box_y2}]"
logger.debug(f"[ocr] 当前 OCR 框 (Current ocr box): {box}")
res = model.chat(tokenizer, image_uploaded, ocr_type='ocr', ocr_box=box)
elif OCR_type == "fine-grained-format":
logger.debug("[ocr] 当前 OCR 模式:fine-grained-format (Current ocr mode: fine-grained-format)")
# 构建 OCR 框 (Building OCR box)
box = f"[{fine_grained_box_x1}, {fine_grained_box_y1}, {fine_grained_box_x2}, {fine_grained_box_y2}]"
logger.debug(f"[ocr] 当前 OCR 框 (Current ocr box): {box}")
res = model.chat(tokenizer, image_uploaded, ocr_type='format', ocr_box=box)
elif OCR_type == "fine-grained-color-ocr":
logger.debug("[ocr] 当前 OCR 模式:fine-grained-color-ocr (Current ocr mode: fine-grained-color-ocr)")
res = model.chat(tokenizer, image_uploaded, ocr_type='ocr', ocr_color=fine_grained_color)
elif OCR_type == "fine-grained-color-format":
logger.debug("[ocr] 当前 OCR 模式:fine-grained-color-format (Current ocr mode: fine-grained-color-format)")
res = model.chat(tokenizer, image_uploaded, ocr_type='format', ocr_color=fine_grained_color)
elif OCR_type == "multi-crop-ocr":
logger.debug("[ocr] 当前 OCR 模式:multi-crop-ocr (Current ocr mode: multi-crop-ocr)")
res = model.chat_crop(tokenizer, image_uploaded, ocr_type='ocr')
elif OCR_type == "multi-crop-format":
logger.debug("[ocr] 当前 OCR 模式:multi-crop-format (Current ocr mode: multi-crop-format)")
res = model.chat_crop(tokenizer, image_uploaded, ocr_type='format')
elif OCR_type == "render":
logger.debug("[ocr] 当前 OCR 模式:render (Current ocr mode: render)")
success = Renderer.render(model=model, tokenizer=tokenizer, image_path=image_uploaded,
convert_to_pdf=pdf_convert_confirm, wait=config["pdf_render_wait"],
time=config["pdf_render_wait_time"])
image_name_with_extension = os.path.basename(image_uploaded)
logger.debug(f"[ocr] 获取到图像名称 (Got image name): {image_name_with_extension}")
if success:
res = local["info_render_success"].format(img_file=image_name_with_extension)
logger.info("[ocr] 渲染已完成 (Render completed)")
if clean_temp and pdf_convert_confirm:
logger.info("[ocr] 正在清理临时文件 (Cleaning temporary files)")
TempCleaner.cleaner(["result"],
[f"{os.path.splitext(image_name_with_extension)[0]}-gb2312.html",
f"{os.path.splitext(image_name_with_extension)[0]}-utf8.html",
f"{os.path.splitext(image_name_with_extension)[0]}-utf8-local.html"])
if clean_temp and not pdf_convert_confirm:
logger.info("[ocr] 正在清理临时文件 (Cleaning temporary files)")
TempCleaner.cleaner(["result"], [f"{os.path.splitext(image_name_with_extension)[0]}-gb2312.html"])
else:
logger.info("[ocr] 跳过临时文件清理 (Skip cleaning temporary files)")
else:
res = local["error_render_fail"].format(img_file=image_name_with_extension)
logger.info("[ocr] OCR 已完成 (OCR completed)")
return res
except AttributeError:
logger.error(
f"[ocr] 你看起来没有加载模型,或没有上传图片 (You seem to have not loaded the model or uploaded an image)")
return local["error_no_model_or_img"]
except Exception as e:
logger.error(f"[ocr] OCR 失败 (OCR failed): {e}")
return str(e)
##########################
# 执行PDF OCR (Performing PDF OCR)
def pdf_ocr(mode, pdf_file, target_dpi, pdf_convert, pdf_merge, clean_temp):
logger.info("[pdf_ocr] 开始执行 PDF OCR (Starting PDF OCR)")
if not pdf_file:
logger.error("[pdf_ocr] PDF 文件未上传 (PDF file not uploaded)")
raise gr.Error(duration=0, message=local["error_no_pdf"])
pdf_name = os.path.basename(pdf_file)
logger.debug(f"[pdf_ocr] 获取到 PDF 名称 (Got PDF name): {pdf_name}")
# ---------------------------------- #
# 分割模式 (Split mode)
if mode == "split-to-image":
logger.debug("[pdf_ocr] 当前模式:split-to-image (Current mode: split-to-image)")
logger.info("[pdf_ocr] 开始分割 PDF 文件 (Starting to split PDF file)")
success = PDFHandler.split_pdf(pdf_path=pdf_file, img_path="imgs", target_dpi=target_dpi)
if success:
logger.info("[pdf_ocr] PDF 文件分割成功 (PDF file split successfully)")
gr.Info(message=local["info_pdf_split_success"].format(pdf_file=pdf_name))
else:
logger.error("[pdf_ocr] PDF 文件分割失败 (PDF file split failed)")
raise gr.Error(duration=0, message=local["error_pdf_split_fail"].format(pdf_file=pdf_name))
# ---------------------------------- #
# 渲染模式 (Rendering mode)
elif mode == "render":
logger.debug("[pdf_ocr] 当前模式:render (Current mode: render)")
logger.debug(f"[pdf_ocr] 开始渲染 PDF 文件 (Starting to render PDF file):{pdf_name}")
gr.Info(message=local["info_pdf_render_start"].format(pdf_file=pdf_name))
success = PDFHandler.pdf_renderer(model=model, tokenizer=tokenizer, pdf_path=pdf_file, target_dpi=target_dpi,
pdf_convert=pdf_convert, wait=config["pdf_render_wait"],
time=config["pdf_render_wait_time"])
# 渲染成功判定 (Rendering success determination)
if success:
logger.info(f"[pdf_ocr] PDF 文件渲染成功 (PDF file rendered successfully):{pdf_name}")
gr.Info(message=local["info_pdf_render_success"].format(pdf_file=pdf_name))
# 渲染成功则合并 (Render successfully and then merge)
if pdf_merge: # 决定是否要合并 (Deciding whether to merge or not)
logger.debug(f"[pdf_ocr] 开始合并 PDF 文件 (Starting to merge PDF file):{pdf_name}")
gr.Info(message=local["info_pdf_merge_start"].format(pdf_file=pdf_name))
success = PDFMerger.merge_pdfs(prefix=extract_pdf_pattern(pdf_name))
# 合并成功判定 (Merging success determination)
if success:
logger.info(f"[pdf_ocr] PDF 文件合并成功 (PDF file merged successfully):{pdf_name}")
gr.Info(message=local["info_pdf_merge_success"].format(pdf_file=pdf_name))
# 合并成功,清理临时文件 (Merged successfully, cleaning up temporary files)
if clean_temp:
logger.info("[pdf_ocr] 开始清理临时文件 (Starting to clean up temporary files)")
logger.debug(
f"获取到临时文件特征 (Got temp file pattern):{extract_pdf_pattern(pdf_name)}_\d+.pdf")
TempCleaner.cleaner(["result"], [f"{extract_pdf_pattern(pdf_name)}_\d+.pdf"])
else:
logger.error(f"[pdf_ocr] PDF 文件合并失败 (PDF file merge failed):{pdf_name}")
raise gr.Error(duration=0, message=local["error_pdf_merge_fail"].format(pdf_file=pdf_name))
else: # 不合并 (Not merging)
logger.info(f"[pdf_ocr] 跳过合并 PDF 文件 (Skipping merging PDF file):{pdf_name}")
gr.Info(message=local["info_pdf_merge_skip"].format(pdf_file=pdf_name))
else: # 渲染失败 (Failed to render)
logger.error(f"[pdf_ocr] PDF 文件渲染失败 (PDF file render failed):{pdf_name}")
raise gr.Error(duration=0, message=local["error_pdf_render_fail"].format(pdf_file=pdf_name))
# ---------------------------------- #
# 合并模式 (Merging mode)
elif mode == "merge":
logger.debug("[pdf_ocr] 当前模式:merge (Current mode: merge)")
gr.Info(message=local["info_pdf_merge_start"].format(pdf_file=pdf_name))
prefix = extract_pdf_pattern(pdf_name)
success = PDFMerger.merge_pdfs(prefix=prefix)
# 合并成功判定 (Merging success determination)
if success:
logger.info(f"[pdf_ocr] PDF 文件合并成功 (PDF file merged successfully):{pdf_name}")
gr.Info(message=local["info_pdf_merge_success"].format(pdf_file=pdf_name))
if clean_temp:
# 合并成功,清理临时文件 (Merged successfully, cleaning up temporary files)
logger.info("[pdf_ocr] 开始清理临时文件 (Starting to clean up temporary files)")
TempCleaner.cleaner(["result"], [f"{extract_pdf_pattern(pdf_name)}_\d+.pdf"])
logger.debug(f"获取到临时文件特征 (Got temp file pattern):{extract_pdf_pattern(pdf_name)}_\d+.pdf")
else:
logger.info(f"[Info-GUI] 跳过清理临时文件 (Skipping cleaning up temporary files)")
else:
logger.error(f"[pdf_ocr] PDF 文件合并失败 (PDF file merge failed):{pdf_name}")
raise gr.Error(duration=0, message=local["error_pdf_merge_fail"].format(pdf_file=pdf_name))
##########################
# 渲染器 (Renderer)
def renderer(imgs_path, pdf_convert_confirm, clean_temp):
# 获取图片文件列表 (Get a list of image files)
image_files = glob.glob(os.path.join(imgs_path, '*.jpg')) + glob.glob(os.path.join(imgs_path, '*.png'))
logger.debug(f"[renderer] 获取到图片文件列表 (Got image file list):{image_files}")
# 逐个发送图片给 renderer 的 render 函数 (Sending images one by one to the 'render' function of renderer)
for image_path in image_files:
logger.info(f"[renderer] 开始渲染:{image_path}")
success = Renderer.render(model=model, tokenizer=tokenizer, image_path=image_path,
convert_to_pdf=pdf_convert_confirm, wait=config["pdf_render_wait"],
time=config["pdf_render_wait_time"])
if success == 1:
logger.info(f"[renderer] 渲染成功:{image_path}")
if clean_temp and pdf_convert_confirm:
logger.info("[renderer] 开始清理临时文件 (Starting to clean up temporary files)")
TempCleaner.cleaner(["result"],
[f"{os.path.splitext(os.path.basename(image_path))[0]}-gb2312.html",
f"{os.path.splitext(os.path.basename(image_path))[0]}-utf8.html",
f"{os.path.splitext(os.path.basename(image_path))[0]}-utf8-local.html"])
if clean_temp and not pdf_convert_confirm:
logger.info("[renderer] 开始清理临时文件 (Starting to clean up temporary files)")
TempCleaner.cleaner(["result"], [f"{os.path.splitext(os.path.basename(image_path))[0]}-gb2312.html"])
else:
logger.info(f"[renderer] 跳过清理临时文件 (Skipping cleaning up temporary files)")
elif success == 2:
logger.error(
f"[renderer] 你看起来没有加载模型,或没有上传图片 (You seem to have not loaded the model or uploaded an image)")
raise gr.Error(duration=0, message=local["error_no_model_or_img"])
elif success == 3:
logger.error(f"[renderer] 渲染失败:{image_path}")
raise gr.Error(duration=0, message=local["error_render_fail"].format(img_file=image_path))
##########################
# Gradio GUI
with gr.Blocks(theme=theme) as demo:
# ---------------------------------- #
# 模型面板 (Model panel)
with gr.Row(variant="panel", equal_height=True):
# 根据配置文件决定模型初始状态显示 (Decide model initial state display based on configuration file)
if config["load_model_on_start"]:
model_status = gr.Textbox(local["info_model_already_loaded"], show_label=False)
else:
model_status = gr.Textbox(local["info_model_not_loaded"], show_label=False)
# 模型按钮 (Model buttons)
unload_model_btn = gr.Button(local["btn_unload_model"], variant="secondary")
load_model_btn = gr.Button(local["btn_load_model"], variant="primary")
# ---------------------------------- #
# OCR 选项卡 (OCR tab)
with gr.Tab(local["tab_ocr"]):
# 特殊模式设置 (Special mode settings)
with gr.Row():
# Fine-grained 设置
with gr.Column():
# Fine-grained 设置 (Fine-grained settings)
gr.Markdown(local["label_fine_grained_settings"])
with gr.Row():
fine_grained_box_x1 = gr.Number(label=local["label_fine_grained_box_x1"], value=0)
fine_grained_box_y1 = gr.Number(label=local["label_fine_grained_box_y1"], value=0)
fine_grained_box_x2 = gr.Number(label=local["label_fine_grained_box_x2"], value=0)
fine_grained_box_y2 = gr.Number(label=local["label_fine_grained_box_y2"], value=0)
fine_grained_color = gr.Dropdown(choices=["red", "green", "blue"],
label=local["label_fine_grained_color"], value="red")
# 渲染设置 (Rendering settings)
with gr.Column():
gr.Markdown(local["label_render_settings"])
img_name = gr.Textbox(label=local["label_img_name"], value="ocr")
with gr.Row(equal_height=True):
pdf_convert_confirm = gr.Checkbox(label=local["label_save_as_pdf"])
clean_temp_render = gr.Checkbox(label=local["label_clean_temp"])
# OCR 相关 (OCR Settings)
gr.Markdown(local["label_ocr_settings"])
with gr.Row():
# 上传图片 (Upload Image)
upload_img = gr.Image(type="filepath", label=local["label_upload_img"])
# 其他组件 (Other Components)
with gr.Column():
# 模式 (Mode)
ocr_mode = gr.Dropdown(
choices=[(local["mode_ocr"], "ocr"), (local["mode_format"], "format"),
(local["mode_fine-grained-ocr"], "fine-grained-ocr"),
(local["mode_fine-grained-format"], "fine-grained-format"),
(local["mode_fine-grained-color-ocr"], "fine-grained-color-ocr"),
(local["mode_fine-grained-color-format"], "fine-grained-color-format"),
(local["mode_multi-crop-ocr"], "multi-crop-ocr"),
(local["mode_multi-crop-format"], "multi-crop-format"), (local["mode_render"], "render")],
label=local["label_ocr_mode"], value="ocr")
# OCR (Buttons and Results)
do_ocr = gr.Button(local["btn_do_ocr"], variant="primary")
result = gr.Textbox(label=local["label_result"])
# ---------------------------------- #
# 渲染器选项卡 (Renderer tab)
with gr.Tab(local["tab_renderer"]):
# 输入 (Input folder path)
input_folder_path = gr.Textbox(label=local["label_input_folder_path"], value="imgs", interactive=True)
with gr.Row(equal_height=True):
# PDF 转换设置 (Save as PDF settings)
batch_pdf_convert_confirm = gr.Checkbox(label=local["label_save_as_pdf"], value=True, interactive=True)
# 清理临时文件 (Clean temporary files)
clean_temp_renderer = gr.Checkbox(label=local["label_clean_temp"], value=True, interactive=True)
# 按钮 (Render button)
batch_render_btn = gr.Button(local["btn_render"], variant="primary", scale=2)
# PDF 选项卡 (PDF tab)
with gr.Tab("PDF"):
with gr.Row():
# PDF 文件 (PDF file path)
with gr.Column():
pdf_file_name = gr.Textbox(value="input.pdf", interactive=False, label=local["label_pdf_file_name"])
pdf_file = gr.File(label=local["label_pdf_file"], file_count="single", file_types=[".pdf"])
# OCR 设置 (set up OCR)
with gr.Column():
# 模式和 DPI (mode and DPI)
with gr.Group():
pdf_ocr_mode = gr.Dropdown(
choices=["split-to-image", "render", "merge"],
label=local["label_ocr_mode"], value="split-to-image", interactive=True)
dpi = gr.Number(label=local["label_target_dpi"], minimum=72, maximum=300, step=1, value=150)
# PDF 转换设置 (PDF conversion settings)
with gr.Row():
# 渲染结果为 PDF (Render the result as PDF)
pdf_pdf_convert_confirm = gr.Checkbox(label=local["label_save_as_pdf"], interactive=True,
visible=False)
# 合并每一页 (Merge per page)
pdf_pdf_merge_confirm = gr.Checkbox(label=local["label_merge_pdf"], interactive=True,
visible=False)
# 按钮 (Buttons)
with gr.Row(equal_height=True):
pdf_ocr_btn = gr.Button(local["btn_pdf_ocr"], variant="primary", scale=2)
clean_temp = gr.Checkbox(label=local["label_clean_temp"], value=True, interactive=True)
# 指南选项卡 (Instructions tab)
with gr.Tab(local["tab_instructions"]):
# 从对应语言的md文件中加载指南 (Loading instructions from the corresponding language md file)
with open(os.path.join('Locales', 'gui', 'instructions', f'{lang}.md'), 'r', encoding='utf-8') as file:
instructions = file.read()
gr.Markdown(instructions)
# ---------------------------------- #
# 点击进行 OCR 识别 (Click to perform OCR recognition)
do_ocr.click(
fn=ocr,
inputs=[upload_img, fine_grained_box_x1, fine_grained_box_y1, fine_grained_box_x2,
fine_grained_box_y2, ocr_mode, fine_grained_color, pdf_convert_confirm, clean_temp_render],
outputs=result
)
# ---------------------------------- #
# 点击渲染 (Click to render)
batch_render_btn.click(
fn=renderer,
inputs=[input_folder_path, batch_pdf_convert_confirm, clean_temp_renderer],
outputs=None
)
# ---------------------------------- #
# 更新图片名称 (Updating image name)
upload_img.change(
fn=update_img_name,
inputs=upload_img,
outputs=img_name
)
# ---------------------------------- #
# 更新 PDF OCR 保存 PDF 选项 (Updating save as PDF option for PDF OCR)
pdf_ocr_mode.change(
fn=update_pdf_pdf_convert_confirm_visibility,
inputs=pdf_ocr_mode,
outputs=pdf_pdf_convert_confirm
)
# ----------------------------------- #
# 更新 PDF OCR DPI 输入框 (Updating target DPI input box for PDF OCR)
pdf_ocr_mode.change(
fn=update_pdf_pdf_dpi_visibility,
inputs=pdf_ocr_mode,
outputs=dpi
)
# ----------------------------------- #
# 更新 PDF OCR 合并 PDF 选项 (Updating merge PDF option for PDF OCR)
pdf_pdf_convert_confirm.change(
fn=update_pdf_pdf_merge_confirm_visibility,
inputs=pdf_pdf_convert_confirm,
outputs=pdf_pdf_merge_confirm
)
# ----------------------------------- #
# 执行PDF OCR (Performing PDF OCR)
pdf_ocr_btn.click(
fn=pdf_ocr,
inputs=[pdf_ocr_mode, pdf_file, dpi, pdf_pdf_convert_confirm, pdf_pdf_merge_confirm, clean_temp],
outputs=None
)
# ----------------------------------- #
# 更新 PDF 名称 (Updating PDF name)
pdf_file.change(
fn=update_pdf_name,
inputs=pdf_file,
outputs=pdf_file_name
)
# ----------------------------------- #
# 加载模型 (Loading model)
load_model_btn.click(
fn=load_model,
inputs=None,
outputs=model_status
)
# ----------------------------------- #
# 卸载模型 (Unloading model)
unload_model_btn.click(
fn=unload_model,
inputs=None,
outputs=model_status
)
# ----------------------------------- #
##########################
# 启动 gradio 界面 (Starting the Gradio interface)
demo.launch()