Skip to content

Commit

Permalink
Exception handling when reading the jsonl file.
Browse files Browse the repository at this point in the history
  • Loading branch information
babu-namburi committed Jan 2, 2025
1 parent a2fe0ca commit 95c714d
Showing 1 changed file with 34 additions and 4 deletions.
38 changes: 34 additions & 4 deletions assets/training/distillation/src/validate_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import pandas as pd
import json
from argparse import Namespace

from pathlib import Path
from azureml.acft.contrib.hf import VERSION, PROJECT_NAME
from azureml.acft.contrib.hf.nlp.constants.constants import (
LOGS_TO_BE_FILTERED_IN_APPINSIGHTS,
Expand Down Expand Up @@ -93,9 +93,39 @@ def __init__(self, args: Namespace) -> None:
self._validate_data_generation_inputs()

def _get_dataframe(self, file_path: str):
return pd.read_json(
file_path, lines=True, chunksize=self._args.request_batch_size
)
if not Path(file_path).is_file():
raise ACFTValidationException._with_error(
AzureMLError.create(
ACFTUserError,
pii_safe_message=(
f"File not found at {file_path}. Please provide a valid file path."
),
)
)
try:
return pd.read_json(
file_path, lines=True, chunksize=self._args.request_batch_size
)
except ValueError as e:
# If the file is not present pandas will read it as jsonl string and raises a ValueError if it is not a valid jsonl string.
# also raises value error if it is not a valid jsonl file.
raise ACFTValidationException._with_error(
AzureMLError.create(
ACFTUserError,
pii_safe_message=(
f"Error while reading JSON file. Make sure the file is a valid jsonl file. Error: {e}"
),
)
)
except Exception as e:
raise ACFTValidationException._with_error(
AzureMLError.create(
ACFTUserError,
pii_safe_message=(
f"An unexpected error occurred while reading the file: {e}"
),
)
)

def _get_inference_request_headers(self) -> dict:
key = self._args.teacher_model_endpoint_key
Expand Down

0 comments on commit 95c714d

Please sign in to comment.