Skip to content

Commit

Permalink
Fix linter
Browse files Browse the repository at this point in the history
  • Loading branch information
jverre committed Nov 22, 2024
1 parent 9273544 commit 69c313a
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 18 deletions.
17 changes: 12 additions & 5 deletions sdks/python/src/opik/evaluation/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@ def evaluate(
nb_samples: Optional[int] = None,
task_threads: int = 16,
prompt: Optional[Prompt] = None,
scoring_key_mapping: Optional[Dict[str, Union[str, Callable[[dataset_item.DatasetItem], Any]]]] = None,
scoring_key_mapping: Optional[
Dict[str, Union[str, Callable[[dataset_item.DatasetItem], Any]]]
] = None,
) -> evaluation_result.EvaluationResult:
"""
Performs task evaluation on a given dataset.
Expand Down Expand Up @@ -60,7 +62,7 @@ def evaluate(
scoring_key_mapping: A dictionary that allows you to rename keys present in either the dataset item or the task output
so that they match the keys expected by the scoring metrics. For example if you have a dataset item with the following content:
{"user_question": "What is Opik ?"} and a scoring metric that expects a key "input", you can use scoring_key_mapping
{"user_question": "What is Opik ?"} and a scoring metric that expects a key "input", you can use scoring_key_mapping
`{"input": "user_question"}` to map the "user_question" key to "input".
"""
client = opik_client.get_client_cached()
Expand Down Expand Up @@ -124,7 +126,9 @@ def evaluate_experiment(
scoring_metrics: List[base_metric.BaseMetric],
scoring_threads: int = 16,
verbose: int = 1,
scoring_key_mapping: Optional[Dict[str, Union[str, Callable[[dataset_item.DatasetItem], Any]]]] = None,
scoring_key_mapping: Optional[
Dict[str, Union[str, Callable[[dataset_item.DatasetItem], Any]]]
] = None,
) -> evaluation_result.EvaluationResult:
"""Update existing experiment with new evaluation metrics.
Expand All @@ -143,7 +147,7 @@ def evaluate_experiment(
scoring_key_mapping: A dictionary that allows you to rename keys present in either the dataset item or the task output
so that they match the keys expected by the scoring metrics. For example if you have a dataset item with the following content:
{"user_question": "What is Opik ?"} and a scoring metric that expects a key "input", you can use scoring_key_mapping
{"user_question": "What is Opik ?"} and a scoring metric that expects a key "input", you can use scoring_key_mapping
`{"input": "user_question"}` to map the "user_question" key to "input".
"""
start_time = time.time()
Expand All @@ -155,7 +159,10 @@ def evaluate_experiment(
)

test_cases = utils.get_experiment_test_cases(
client=client, experiment_id=experiment.id, dataset_id=experiment.dataset_id, scoring_key_mapping=scoring_key_mapping
client=client,
experiment_id=experiment.id,
dataset_id=experiment.dataset_id,
scoring_key_mapping=scoring_key_mapping,
)

test_results = tasks_scorer.score(
Expand Down
31 changes: 19 additions & 12 deletions sdks/python/src/opik/evaluation/tasks_scorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,30 +55,33 @@ def _score_test_case(

return test_result_


def _create_scoring_inputs(
item: Dict[str, Any],
task_output: Dict[str, Any],
scoring_key_mapping: Optional[Dict[str, Union[str, Callable[[dataset_item.DatasetItem], Any]]]],
scoring_key_mapping: Optional[Dict[str, Union[str, Callable[[Any], Any]]]],
) -> Dict[str, Any]:

mapped_inputs = {**item, **task_output}

if scoring_key_mapping is not None:
for k, v in scoring_key_mapping.items():
if isinstance(v, Callable):
mapped_inputs[k] = v(item)
else:
mapped_inputs[k] = mapped_inputs[v]
for k, v in scoring_key_mapping.items():
if callable(v):
mapped_inputs[k] = v(item)
else:
mapped_inputs[k] = mapped_inputs[v]

return mapped_inputs


def _process_item(
client: opik_client.Opik,
item: dataset_item.DatasetItem,
task: LLMTask,
scoring_metrics: List[base_metric.BaseMetric],
project_name: Optional[str],
scoring_key_mapping: Optional[Dict[str, Union[str, Callable[[dataset_item.DatasetItem], Any]]]],
scoring_key_mapping: Optional[
Dict[str, Union[str, Callable[[dataset_item.DatasetItem], Any]]]
],
) -> test_result.TestResult:
try:
trace_data = trace.TraceData(
Expand All @@ -91,7 +94,9 @@ def _process_item(
task_output_ = task(item.get_content())
opik_context.update_current_trace(output=task_output_)

scoring_inputs = _create_scoring_inputs(item.get_content(), task_output_, scoring_key_mapping)
scoring_inputs = _create_scoring_inputs(
item.get_content(), task_output_, scoring_key_mapping
)

test_case_ = test_case.TestCase(
trace_id=trace_data.id,
Expand Down Expand Up @@ -122,7 +127,9 @@ def run(
nb_samples: Optional[int],
verbose: int,
project_name: Optional[str],
scoring_key_mapping: Optional[Dict[str, Union[str, Callable[[dataset_item.DatasetItem], Any]]]],
scoring_key_mapping: Optional[
Dict[str, Union[str, Callable[[dataset_item.DatasetItem], Any]]]
],
) -> List[test_result.TestResult]:
dataset_items = dataset_.__internal_api__get_items_as_dataclasses__(
nb_samples=nb_samples
Expand Down
7 changes: 6 additions & 1 deletion sdks/python/src/opik/evaluation/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,12 @@ def get_trace_project_name(client: opik_client.Opik, trace_id: str) -> str:


def get_experiment_test_cases(
client: opik_client.Opik, experiment_id: str, dataset_id: str, scoring_key_mapping: Optional[Dict[str, Union[str, Callable[[dataset_item.DatasetItem], Any]]]]
client: opik_client.Opik,
experiment_id: str,
dataset_id: str,
scoring_key_mapping: Optional[
Dict[str, Union[str, Callable[[dataset_item.DatasetItem], Any]]]
],
) -> List[test_case.TestCase]:
test_cases = []
page = 1
Expand Down

0 comments on commit 69c313a

Please sign in to comment.