diff --git a/Preprocess/data_process/read_pdf_noocr.py b/Preprocess/data_process/read_pdf_noocr.py index 4fe931e..ca89a53 100644 --- a/Preprocess/data_process/read_pdf_noocr.py +++ b/Preprocess/data_process/read_pdf_noocr.py @@ -6,7 +6,7 @@ def read_pdf(pdf_loc): - """ 讀取單個PDF文件並返回其文本內容 """ + """讀取單個PDF文件並返回其文本內容""" pdf = pdfplumber.open(pdf_loc) pdf_text = '' for page in pdf.pages: @@ -18,7 +18,7 @@ def read_pdf(pdf_loc): def load_data_by_category(source_path, category): - """ 從指定資料夾載入PDF文件,並根據資料夾名稱設定category """ + """從指定資料夾載入PDF文件,並根據資料夾名稱設定category""" pdf_files = [f for f in os.listdir(source_path) if f.endswith('.pdf')] data = [] for file in tqdm(pdf_files): @@ -29,7 +29,7 @@ def load_data_by_category(source_path, category): def generate_json(output_path): - """ Gen JSON 主程式 """ + """Gen JSON 主程式""" all_data = [] # 載入不同類別的PDF資料