diff --git a/Preprocess/insert_data.py b/Preprocess/insert_data.py index 6cb5a24..61de115 100644 --- a/Preprocess/insert_data.py +++ b/Preprocess/insert_data.py @@ -124,7 +124,7 @@ def split_and_insert(self, pid, content, category): elif not result: # 如果失敗且非長度問題 failed_records.append({'pid': pid, 'category': category}) - # 將失敗的資料寫入 JSON 檔案 + # 將失敗的資料寫入 JSON 檔案,之後有需要可以利用此 JSON 檔案重新匯入 if failed_records: with open('failed_imports.json', 'w', encoding='utf-8') as f: json.dump(failed_records, f, ensure_ascii=False, indent=4) diff --git a/README.md b/README.md index 3a2a2d6..4f44136 100644 --- a/README.md +++ b/README.md @@ -64,7 +64,11 @@ To set up the development environment, follow these steps: ``` 4. Manually add your `secret key` to the `config.ini`. - (需展開解釋 config.ini 內的每一項 key) + ``` + [OpenAI] 的 api_key 可以在 openai 官網註冊取得 + [VoyageAI] 的 api_key 可以在 voyageai 官網註冊取得 + [Api_docs] 的 password 可以自己隨意輸入 (flask_app.py 啟動後,直接訪問 http://127.0.0.1:5000/ 即可看到 swagger api_docs 頁面) + ``` 5. Create a `logs` directory: ``` @@ -82,18 +86,26 @@ To set up the development environment, follow these steps: ``` 8. Data preprocessing: + ``` + (TODO: 等 data 那邊處理好) + ``` 9. Data insert to weaviate: + ``` + python3 Preprocess/insert_data.py + ``` -10. Run the Flask app: +10. Run the Flask app (/ 是 API Docs, /api/chat/ 是我們的 Retrieval API): ``` - python3 src/flask_app.py + python3 Model/flask_app.py ``` -11. 將主辦方提供的 questions.json 測試資料塞入 data/: +11. 將主辦方提供的題目 json 檔案改名為 questions.json 並塞入 data/ 12. 運行 main.py 進行測試得出 data/pred_retrieve.json 提交最終結果給主辦方: - + ``` + python3 main.py + ``` ## Folder-specific Details For more detailed information about each folder and its purpose, refer to the individual `README.md` files located in their respective directories. diff --git a/config_example.ini b/config_example.ini index 5c8955a..2034db1 100644 --- a/config_example.ini +++ b/config_example.ini @@ -1,11 +1,11 @@ [Weaviate] -weaviate_url = +weaviate_url = http://127.0.0.1:8882 [OpenAI] -api_key = +api_key = [Api_docs] -password = +password = [VoyageAI] -api_key = +api_key = diff --git a/main.py b/main.py index f083100..1015c61 100644 --- a/main.py +++ b/main.py @@ -4,7 +4,7 @@ import requests # 讀取主辦提供的 Question JSON 檔案 -with open('data/questions_example.json', encoding='utf-8') as file: +with open('data/questions.json', encoding='utf-8') as file: questions = json.load(file)['questions'] # 初始化輸出資料格式 diff --git a/testing/get_best_alpha.py b/testing/get_best_alpha.py index b2dec99..b552275 100644 --- a/testing/get_best_alpha.py +++ b/testing/get_best_alpha.py @@ -4,7 +4,7 @@ import requests # Load questions from the JSON file -with open('data/questions_example.json', encoding='utf-8') as file: +with open('data/questions.json', encoding='utf-8') as file: questions = json.load(file)['questions'] # Load ground truth data