Skip to content

Commit

Permalink
fix pylint
Browse files Browse the repository at this point in the history
Signed-off-by: Sukriti-Sharma4 <[email protected]>
  • Loading branch information
Ssukriti committed May 24, 2024
1 parent 2f70e34 commit 45827ce
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 9 deletions.
8 changes: 6 additions & 2 deletions tests/utils/test_data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@ def test_formatting_function():
json_dataset = datasets.load_dataset("json", data_files=TWITTER_COMPLAINTS_DATA)
template = "### Input: {{Tweet text}} \n\n ### Response: {{text_label}}"
# First response from the data file that is read.
expected_response = "### Input: @HMRCcustomers No this is my first job \n\n ### Response: no complaint"
expected_response = (
"### Input: No this is my first job \n\n ### Response: no complaint"
)
formatted_dataset, dataset_text_field = data_utils.formatting_function(
json_dataset, template
)
Expand All @@ -43,7 +45,9 @@ def test_formatting_function_adds_eos_token():
json_dataset = datasets.load_dataset("json", data_files=TWITTER_COMPLAINTS_DATA)
template = "### Input: {{Tweet text}} \n\n ### Response: {{text_label}}"
# First response from the data file that is read.
expected_response = "### Input: @HMRCcustomers No this is my first job \n\n ### Response: no complaintEOS"
expected_response = (
"### Input: No this is my first job \n\n ### Response: no complaintEOS"
)
formatted_dataset, dataset_text_field = data_utils.formatting_function(
json_dataset, template, "EOS"
)
Expand Down
11 changes: 7 additions & 4 deletions tuning/config/configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ class DataArguments:
default=None,
metadata={
"help": "Training dataset text field containing single sequence. \
Either the dataset_text_field or data_formatter_template need to be supplied."
Either the dataset_text_field \
or data_formatter_template need to be supplied."
},
)
validation_data_path: str = field(
Expand All @@ -64,9 +65,11 @@ class DataArguments:
data_formatter_template: str = field(
default=None,
metadata={
"help": "formatter template to format a single sequence from each instance in JSONL files. \
Keys of JSON can be referred to as {{key}} in template. Either the dataset_text_field \
or data_formatter_template needs to be supplied."
"help": "formatter template to format a single sequence \
from each instance in JSONL files. \
Keys of JSON can be referred to as {{key}} in template. \
Either the dataset_text_field \
or data_formatter_template needs to be supplied."
},
)

Expand Down
3 changes: 0 additions & 3 deletions tuning/utils/data_utils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
# Standard
import re

# Third Party
from datasets import Dataset


def formatting_function(dataset, template, eos_token=""):
"""Function to format datasets with Alpaca style / other templates.
Expand Down

0 comments on commit 45827ce

Please sign in to comment.