Skip to content

Commit

Permalink
Dataset bugfixes, pytest integration bugfixes, evaluation improvements (
Browse files Browse the repository at this point in the history
#145)

* Fix dataset bugs, unite tasks running and scoring, update default base url, fix bug with llm_unit not working when pytest nodeid string contains more than one space

* Add missing files

* Add missing init file

* Always convert reason to string in hallucination metric

* Fix lint errors

* Stop installing sdk in editable mode during CI tests
  • Loading branch information
alexkuzmik authored Sep 2, 2024
1 parent 644bdc7 commit 375aaf2
Show file tree
Hide file tree
Showing 15 changed files with 151 additions and 188 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/lib-langchain-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ jobs:
python-version: ${{matrix.python_version}}

- name: Install opik
run: pip install -e .
run: pip install .

- name: Install test tools
run: |
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/lib-openai-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ jobs:
python-version: ${{matrix.python_version}}

- name: Install opik
run: pip install -e .
run: pip install .

- name: Install test tools
run: |
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/python_sdk_unit_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ jobs:
python-version: ${{ matrix.python_version }}

- name: Install opik
run: pip install -e .
run: pip install .

- name: Install test requirements
run: |
Expand Down
5 changes: 0 additions & 5 deletions sdks/python/src/opik/api_objects/dataset/converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,3 @@ def from_json(
result.append(dataset_item.DatasetItem(**item_kwargs))

return result


def from_dicts(dict_items: List[Dict[str, Any]]) -> List[dataset_item.DatasetItem]:
items = [dataset_item.DatasetItem(**dict_item) for dict_item in dict_items]
return items
12 changes: 6 additions & 6 deletions sdks/python/src/opik/api_objects/dataset/dataset.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import logging
import json
from typing import Optional, Any, List, Dict, Union, cast
from typing import Optional, Any, List, Dict, Union, Sequence

from opik.rest_api import client as rest_api_client
from opik.rest_api.types import dataset_item as rest_dataset_item
Expand Down Expand Up @@ -39,7 +39,7 @@ def description(self) -> Optional[str]:
return self._description

def insert(
self, items: Union[List[dataset_item.DatasetItem], List[Dict[str, Any]]]
self, items: Sequence[Union[dataset_item.DatasetItem, Dict[str, Any]]]
) -> None:
"""
Insert new items into the dataset.
Expand All @@ -48,10 +48,10 @@ def insert(
items: List of DatasetItem objects or dicts (which will be converted to DatasetItem objects)
to add to the dataset.
"""
if len(items) > 0 and isinstance(items[0], dict):
items = converters.from_dicts(items) # type: ignore

items = cast(List[dataset_item.DatasetItem], items)
items: List[dataset_item.DatasetItem] = [ # type: ignore
(dataset_item.DatasetItem(**item) if isinstance(item, dict) else item)
for item in items
]

rest_items = [
rest_dataset_item.DatasetItem(
Expand Down
7 changes: 4 additions & 3 deletions sdks/python/src/opik/api_objects/dataset/dataset_item.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
from typing import Optional, Dict, Any
import dataclasses
import pydantic
from .. import constants


@dataclasses.dataclass
class DatasetItem:
class DatasetItem(pydantic.BaseModel):
"""A DatasetItem object representing an item in a dataset."""

model_config = pydantic.ConfigDict(strict=True)

input: Dict[str, Any]
"""The input data for the dataset item."""

Expand Down
2 changes: 1 addition & 1 deletion sdks/python/src/opik/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def settings_customise_sources(

# Below are Opik configurations

url_override: str = "https://comet.com/opik/api"
url_override: str = "https://www.comet.com/opik/api"
"""Opik backend base URL"""

project_name: str = "Default Project"
Expand Down
12 changes: 3 additions & 9 deletions sdks/python/src/opik/evaluation/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from ..api_objects.experiment import experiment_item
from ..api_objects import opik_client

from . import task_runner, test_result, scoring_runner, scores_logger, report
from . import tasks_scorer, test_result, scores_logger, report


def evaluate(
Expand Down Expand Up @@ -47,18 +47,12 @@ def evaluate(
client = opik_client.get_client_cached()
start_time = time.time()

test_cases = task_runner.run(
test_results = tasks_scorer.run(
client=client,
dataset_=dataset,
task=task,
workers=task_threads,
verbose=verbose,
)

test_results = scoring_runner.run(
test_cases=test_cases,
scoring_metrics=scoring_metrics,
workers=scoring_threads,
workers=task_threads,
verbose=verbose,
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,9 @@ def _parse_model_output(self, content: str) -> score_result.ScoreResult:
verdict: str = dict_content[template.VERDICT_KEY]
score = 1.0 if verdict.lower() == template.HALLUCINATION_VERDICT else 0.0
return score_result.ScoreResult(
name=self.name, value=score, reason=dict_content[template.REASON_KEY]
name=self.name,
value=score,
reason=str(dict_content[template.REASON_KEY]),
)
except Exception:
raise exceptions.MetricComputationError(
Expand Down
76 changes: 0 additions & 76 deletions sdks/python/src/opik/evaluation/scoring_runner.py

This file was deleted.

74 changes: 0 additions & 74 deletions sdks/python/src/opik/evaluation/task_runner.py

This file was deleted.

Loading

0 comments on commit 375aaf2

Please sign in to comment.