From 56c8d30c61f3dcea2e6c20584d9889a68ae2d7ce Mon Sep 17 00:00:00 2001 From: NICHOLAI MITCHKO Date: Sun, 16 Jul 2023 15:25:29 +0000 Subject: [PATCH] Introduces Local Dataset Loading Ability This addition allows for a user to load a dataset directory they crafted locally using the save_to_disk functionality of HuggingFace Datasets. If you pass in a dataset with a directory name (ending in / or \ ) it will be treated as a local huggingface dataset https://huggingface.co/docs/datasets/process#save --- qlora.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/qlora.py b/qlora.py index 59e2a701..552b9aaa 100644 --- a/qlora.py +++ b/qlora.py @@ -28,7 +28,7 @@ LlamaTokenizer ) -from datasets import load_dataset, Dataset +from datasets import load_dataset, Dataset, load_from_disk import evaluate from peft import ( @@ -481,6 +481,8 @@ def local_dataset(dataset_name): full_dataset = Dataset.from_pandas(pd.read_csv(dataset_name)) elif dataset_name.endswith('.tsv'): full_dataset = Dataset.from_pandas(pd.read_csv(dataset_name, delimiter='\t')) + elif dataset_name.endswith('/') or dataset_name.endswith('\\'): + full_dataset = load_from_disk(dataset_name) else: raise ValueError(f"Unsupported dataset format: {dataset_name}")