Fix linter

comet-ml · Nov 22, 2024 · 69c313a · 69c313a
1 parent 9273544
commit 69c313a
Show file tree

Hide file tree

Showing 3 changed files with 37 additions and 18 deletions.
diff --git a/sdks/python/src/opik/evaluation/evaluator.py b/sdks/python/src/opik/evaluation/evaluator.py
@@ -21,7 +21,9 @@ def evaluate(
     nb_samples: Optional[int] = None,
     task_threads: int = 16,
     prompt: Optional[Prompt] = None,
-    scoring_key_mapping: Optional[Dict[str, Union[str, Callable[[dataset_item.DatasetItem], Any]]]] = None,
+    scoring_key_mapping: Optional[
+        Dict[str, Union[str, Callable[[dataset_item.DatasetItem], Any]]]
+    ] = None,
 ) -> evaluation_result.EvaluationResult:
     """
     Performs task evaluation on a given dataset.
@@ -60,7 +62,7 @@ def evaluate(
 
         scoring_key_mapping: A dictionary that allows you to rename keys present in either the dataset item or the task output
             so that they match the keys expected by the scoring metrics. For example if you have a dataset item with the following content:
-            {"user_question": "What is Opik ?"} and a scoring metric that expects a key "input", you can use scoring_key_mapping 
+            {"user_question": "What is Opik ?"} and a scoring metric that expects a key "input", you can use scoring_key_mapping
             `{"input": "user_question"}` to map the "user_question" key to "input".
     """
     client = opik_client.get_client_cached()
@@ -124,7 +126,9 @@ def evaluate_experiment(
     scoring_metrics: List[base_metric.BaseMetric],
     scoring_threads: int = 16,
     verbose: int = 1,
-    scoring_key_mapping: Optional[Dict[str, Union[str, Callable[[dataset_item.DatasetItem], Any]]]] = None,
+    scoring_key_mapping: Optional[
+        Dict[str, Union[str, Callable[[dataset_item.DatasetItem], Any]]]
+    ] = None,
 ) -> evaluation_result.EvaluationResult:
     """Update existing experiment with new evaluation metrics.
 
@@ -143,7 +147,7 @@ def evaluate_experiment(
 
         scoring_key_mapping: A dictionary that allows you to rename keys present in either the dataset item or the task output
             so that they match the keys expected by the scoring metrics. For example if you have a dataset item with the following content:
-            {"user_question": "What is Opik ?"} and a scoring metric that expects a key "input", you can use scoring_key_mapping 
+            {"user_question": "What is Opik ?"} and a scoring metric that expects a key "input", you can use scoring_key_mapping
             `{"input": "user_question"}` to map the "user_question" key to "input".
     """
     start_time = time.time()
@@ -155,7 +159,10 @@ def evaluate_experiment(
     )
 
     test_cases = utils.get_experiment_test_cases(
-        client=client, experiment_id=experiment.id, dataset_id=experiment.dataset_id, scoring_key_mapping=scoring_key_mapping
+        client=client,
+        experiment_id=experiment.id,
+        dataset_id=experiment.dataset_id,
+        scoring_key_mapping=scoring_key_mapping,
     )
 
     test_results = tasks_scorer.score(

diff --git a/sdks/python/src/opik/evaluation/tasks_scorer.py b/sdks/python/src/opik/evaluation/tasks_scorer.py
@@ -55,30 +55,33 @@ def _score_test_case(
 
     return test_result_
 
+
 def _create_scoring_inputs(
     item: Dict[str, Any],
     task_output: Dict[str, Any],
-    scoring_key_mapping: Optional[Dict[str, Union[str, Callable[[dataset_item.DatasetItem], Any]]]],
+    scoring_key_mapping: Optional[Dict[str, Union[str, Callable[[Any], Any]]]],
 ) -> Dict[str, Any]:
-
     mapped_inputs = {**item, **task_output}
-    
+
     if scoring_key_mapping is not None:
-       for k, v in scoring_key_mapping.items():
-           if isinstance(v, Callable):
-               mapped_inputs[k] = v(item)
-           else:
-               mapped_inputs[k] = mapped_inputs[v]
-    
+        for k, v in scoring_key_mapping.items():
+            if callable(v):
+                mapped_inputs[k] = v(item)
+            else:
+                mapped_inputs[k] = mapped_inputs[v]
+
     return mapped_inputs
 
+
 def _process_item(
     client: opik_client.Opik,
     item: dataset_item.DatasetItem,
     task: LLMTask,
     scoring_metrics: List[base_metric.BaseMetric],
     project_name: Optional[str],
-    scoring_key_mapping: Optional[Dict[str, Union[str, Callable[[dataset_item.DatasetItem], Any]]]],
+    scoring_key_mapping: Optional[
+        Dict[str, Union[str, Callable[[dataset_item.DatasetItem], Any]]]
+    ],
 ) -> test_result.TestResult:
     try:
         trace_data = trace.TraceData(
@@ -91,7 +94,9 @@ def _process_item(
         task_output_ = task(item.get_content())
         opik_context.update_current_trace(output=task_output_)
 
-        scoring_inputs = _create_scoring_inputs(item.get_content(), task_output_, scoring_key_mapping)
+        scoring_inputs = _create_scoring_inputs(
+            item.get_content(), task_output_, scoring_key_mapping
+        )
 
         test_case_ = test_case.TestCase(
             trace_id=trace_data.id,
@@ -122,7 +127,9 @@ def run(
     nb_samples: Optional[int],
     verbose: int,
     project_name: Optional[str],
-    scoring_key_mapping: Optional[Dict[str, Union[str, Callable[[dataset_item.DatasetItem], Any]]]],
+    scoring_key_mapping: Optional[
+        Dict[str, Union[str, Callable[[dataset_item.DatasetItem], Any]]]
+    ],
 ) -> List[test_result.TestResult]:
     dataset_items = dataset_.__internal_api__get_items_as_dataclasses__(
         nb_samples=nb_samples

diff --git a/sdks/python/src/opik/evaluation/utils.py b/sdks/python/src/opik/evaluation/utils.py
@@ -32,7 +32,12 @@ def get_trace_project_name(client: opik_client.Opik, trace_id: str) -> str:
 
 
 def get_experiment_test_cases(
-    client: opik_client.Opik, experiment_id: str, dataset_id: str, scoring_key_mapping: Optional[Dict[str, Union[str, Callable[[dataset_item.DatasetItem], Any]]]]
+    client: opik_client.Opik,
+    experiment_id: str,
+    dataset_id: str,
+    scoring_key_mapping: Optional[
+        Dict[str, Union[str, Callable[[dataset_item.DatasetItem], Any]]]
+    ],
 ) -> List[test_case.TestCase]:
     test_cases = []
     page = 1