Skip to content

Commit

Permalink
[OPIK-427]: remove pandas from the app dependencies; and add it to an…
Browse files Browse the repository at this point in the history
… unit test requirements; (#675)

* [OPIK-427]: remove pandas from the app dependencies; and add it to an unit test requirements;

* [OPIK-427]: run linter;

* [OPIK-427]: rename with _raise_if_pandas_is_unavailable, add a handle of numpy;

---------

Co-authored-by: Sasha <[email protected]>
  • Loading branch information
aadereiko and Sasha authored Nov 20, 2024
1 parent 2a65490 commit c00ac0e
Show file tree
Hide file tree
Showing 6 changed files with 44 additions and 15 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/python_sdk_unit_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ jobs:
- name: Install test requirements
run: |
cd ./tests
pip install --no-cache-dir --disable-pip-version-check -r test_requirements.txt
pip install --no-cache-dir --disable-pip-version-check -r test_requirements.txt -r ./unit/test_requirements.txt
- name: Running SDK Unit Tests
run: python -m pytest --cov=src/opik -vv tests/unit/
1 change: 0 additions & 1 deletion sdks/python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@
"levenshtein~=0.25.1",
"litellm",
"openai<2.0.0",
"pandas>=2.0.0,<3.0.0",
"pydantic-settings>=2.0.0,<3.0.0",
"pydantic>=2.0.0,<3.0.0",
"pytest",
Expand Down
28 changes: 24 additions & 4 deletions sdks/python/src/opik/api_objects/dataset/converters.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,34 @@
import pandas as pd
import json

from typing import List, Callable, Any, Dict
from typing import List, Callable, Any, Dict, TYPE_CHECKING
import importlib.util
import logging

if TYPE_CHECKING:
import pandas as pd

from . import dataset_item

ItemConstructor = Callable[[Any], dataset_item.DatasetItem]


LOGGER = logging.getLogger(__name__)
IMPORT_PANDAS_ERROR = "The Python library Pandas is required for this method. You can install it with `pip install pandas`."


def _raise_if_pandas_is_unavailable() -> None:
module_spec = importlib.util.find_spec("pandas")
if module_spec is None:
raise ImportError(IMPORT_PANDAS_ERROR)


def to_pandas(
items: List[dataset_item.DatasetItem], keys_mapping: Dict[str, str]
) -> pd.DataFrame:
) -> "pd.DataFrame":
_raise_if_pandas_is_unavailable()

import pandas as pd

new_item_dicts = []

for item in items:
Expand Down Expand Up @@ -38,10 +56,12 @@ def from_jsonl_file(


def from_pandas(
dataframe: pd.DataFrame,
dataframe: "pd.DataFrame",
keys_mapping: Dict[str, str],
ignore_keys: List[str],
) -> List[dataset_item.DatasetItem]:
_raise_if_pandas_is_unavailable()

result = []
ignore_keys = [] if ignore_keys is None else ignore_keys
for _, row in dataframe.iterrows():
Expand Down
14 changes: 10 additions & 4 deletions sdks/python/src/opik/api_objects/dataset/dataset.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import logging
import json
from typing import Optional, Any, List, Dict, Sequence, Set
from typing import Optional, Any, List, Dict, Sequence, Set, TYPE_CHECKING

from opik.rest_api import client as rest_api_client
from opik.rest_api.types import dataset_item_write as rest_dataset_item
Expand All @@ -9,7 +9,9 @@

from .. import constants
from . import dataset_item, converters
import pandas

if TYPE_CHECKING:
import pandas as pd

LOGGER = logging.getLogger(__name__)

Expand Down Expand Up @@ -162,8 +164,10 @@ def clear(self) -> None:

self.delete(item_ids)

def to_pandas(self) -> pandas.DataFrame:
def to_pandas(self) -> "pd.DataFrame":
"""
Requires: `pandas` library to be installed.
Convert the dataset to a pandas DataFrame.
Returns:
Expand Down Expand Up @@ -298,11 +302,13 @@ def read_jsonl_from_file(

def insert_from_pandas(
self,
dataframe: pandas.DataFrame,
dataframe: "pd.DataFrame",
keys_mapping: Optional[Dict[str, str]] = None,
ignore_keys: Optional[List[str]] = None,
) -> None:
"""
Requires: `pandas` library to be installed.
Args:
dataframe: pandas dataframe
keys_mapping: Dictionary that maps dataframe column names to dataset item field names.
Expand Down
13 changes: 8 additions & 5 deletions sdks/python/src/opik/jsonable_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,13 @@
from pathlib import PurePath
from types import GeneratorType

import numpy as np

import opik.rest_api.core.datetime_utils as datetime_utils

try:
import numpy as np
except ImportError:
np = None

LOGGER = logging.getLogger(__name__)

_ENCODER_EXTENSIONS: Set[Tuple[Type, Callable[[Any], Any]]] = set()
Expand Down Expand Up @@ -55,13 +58,13 @@ def jsonable_encoder(obj: Any) -> Any:
encoded_list.append(jsonable_encoder(item))
return encoded_list

if isinstance(obj, np.ndarray):
return jsonable_encoder(obj.tolist())

for type_, encoder in _ENCODER_EXTENSIONS:
if isinstance(obj, type_):
return jsonable_encoder(encoder(obj))

if np is not None and isinstance(obj, np.ndarray):
return jsonable_encoder(obj.tolist())

except Exception:
LOGGER.debug("Failed to serialize object.", exc_info=True)

Expand Down
1 change: 1 addition & 0 deletions sdks/python/tests/unit/test_requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pandas

0 comments on commit c00ac0e

Please sign in to comment.