diff --git a/.github/workflows/sdk-e2e-tests.yaml b/.github/workflows/sdk-e2e-tests.yaml index 4058930603..16805df4b8 100644 --- a/.github/workflows/sdk-e2e-tests.yaml +++ b/.github/workflows/sdk-e2e-tests.yaml @@ -38,7 +38,7 @@ jobs: cd deployment/docker-compose docker compose up -d --build - - name: Check Opik server avialability + - name: Check Opik server availability shell: bash run: | chmod +x ${{ github.workspace }}/tests_end_to_end/installer/*.sh diff --git a/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/priv/DatasetsResourceTest.java b/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/priv/DatasetsResourceTest.java index 21ebd76470..99e2f60b09 100644 --- a/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/priv/DatasetsResourceTest.java +++ b/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/priv/DatasetsResourceTest.java @@ -76,6 +76,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -2834,39 +2835,29 @@ private void getItemAndAssert(DatasetItem expectedDatasetItem, String workspaceN private static DatasetItem mergeInputMap(DatasetItem expectedDatasetItem, Map> inputData) { - if (expectedDatasetItem.expectedOutput() != null) { - Map oldColumns = Map.of( - "input", new DatasetItemInputValue.JsonValue(expectedDatasetItem.input()), - "expected_output", new DatasetItemInputValue.JsonValue(expectedDatasetItem.expectedOutput())); + Map> newMap = new HashMap<>(); - Map> mergedMap = Stream - .concat(inputData.entrySet().stream(), oldColumns.entrySet().stream()) - .collect(toMap( - Map.Entry::getKey, - Map.Entry::getValue, - (v1, v2) -> v2 // In case of conflict, use the value from map2 - )); + if (expectedDatasetItem.expectedOutput() != null) { + newMap.put("expected_output", new DatasetItemInputValue.JsonValue(expectedDatasetItem.expectedOutput())); + } - expectedDatasetItem = expectedDatasetItem.toBuilder() - .inputData(mergedMap) - .build(); - } else { - Map oldColumns = Map.of("input", - new DatasetItemInputValue.JsonValue(expectedDatasetItem.input())); + if (expectedDatasetItem.input() != null) { + newMap.put("input", new DatasetItemInputValue.JsonValue(expectedDatasetItem.input())); + } - Map> mergedMap = Stream - .concat(inputData.entrySet().stream(), oldColumns.entrySet().stream()) - .collect(toMap( - Map.Entry::getKey, - Map.Entry::getValue, - (v1, v2) -> v2 // In case of conflict, use the value from map2 - )); + Map> mergedMap = Stream + .concat(inputData.entrySet().stream(), newMap.entrySet().stream()) + .collect(toMap( + Map.Entry::getKey, + Map.Entry::getValue, + (v1, v2) -> v2 // In case of conflict, use the value from map2 + )); + + expectedDatasetItem = expectedDatasetItem.toBuilder() + .inputData(mergedMap) + .build(); - expectedDatasetItem = expectedDatasetItem.toBuilder() - .inputData(mergedMap) - .build(); - } return expectedDatasetItem; } diff --git a/apps/opik-frontend/src/api/datasets/useCompareExperimentsList.ts b/apps/opik-frontend/src/api/datasets/useCompareExperimentsList.ts index c3b43e8e3b..4f26e07b90 100644 --- a/apps/opik-frontend/src/api/datasets/useCompareExperimentsList.ts +++ b/apps/opik-frontend/src/api/datasets/useCompareExperimentsList.ts @@ -1,12 +1,15 @@ import { QueryFunctionContext, useQuery } from "@tanstack/react-query"; import api, { DATASETS_REST_ENDPOINT, QueryConfig } from "@/api/api"; import { ExperimentsCompare } from "@/types/datasets"; +import { Filters } from "@/types/filters"; +import { processFilters } from "@/lib/filters"; type UseCompareExperimentsListParams = { workspaceName: string; datasetId: string; experimentsIds: string[]; search?: string; + filters?: Filters; page: number; size: number; }; @@ -23,6 +26,7 @@ const getCompareExperimentsList = async ( datasetId, experimentsIds, search, + filters, size, page, }: UseCompareExperimentsListParams, @@ -34,6 +38,7 @@ const getCompareExperimentsList = async ( params: { workspace_name: workspaceName, experiment_ids: JSON.stringify(experimentsIds), + ...processFilters(filters), ...(search && { name: search }), size, page, diff --git a/apps/opik-frontend/src/components/pages/CompareExperimentsPage/ExperimentItemsTab/ExperimentItemsTab.tsx b/apps/opik-frontend/src/components/pages/CompareExperimentsPage/ExperimentItemsTab/ExperimentItemsTab.tsx index 1924a64594..0de7a0652e 100644 --- a/apps/opik-frontend/src/components/pages/CompareExperimentsPage/ExperimentItemsTab/ExperimentItemsTab.tsx +++ b/apps/opik-frontend/src/components/pages/CompareExperimentsPage/ExperimentItemsTab/ExperimentItemsTab.tsx @@ -2,7 +2,12 @@ import React, { useCallback, useMemo } from "react"; import isObject from "lodash/isObject"; import findIndex from "lodash/findIndex"; import find from "lodash/find"; -import { NumberParam, StringParam, useQueryParam } from "use-query-params"; +import { + JsonParam, + NumberParam, + StringParam, + useQueryParam, +} from "use-query-params"; import { keepPreviousData } from "@tanstack/react-query"; import useLocalStorageState from "use-local-storage-state"; @@ -24,6 +29,7 @@ import CompareExperimentAddHeader from "@/components/pages/CompareExperimentsPag import TraceDetailsPanel from "@/components/shared/TraceDetailsPanel/TraceDetailsPanel"; import CompareExperimentsPanel from "@/components/pages/CompareExperimentsPage/CompareExperimentsPanel/CompareExperimentsPanel"; import ColumnsButton from "@/components/shared/ColumnsButton/ColumnsButton"; +import FiltersButton from "@/components/shared/FiltersButton/FiltersButton"; import Loader from "@/components/shared/Loader/Loader"; import useCompareExperimentsList from "@/api/datasets/useCompareExperimentsList"; import useAppStore from "@/store/AppStore"; @@ -49,13 +55,7 @@ const SELECTED_COLUMNS_KEY = "compare-experiments-selected-columns"; const COLUMNS_WIDTH_KEY = "compare-experiments-columns-width"; const COLUMNS_ORDER_KEY = "compare-experiments-columns-order"; -export const DEFAULT_COLUMNS: ColumnData[] = [ - { - id: "id", - label: "Item ID", - type: COLUMN_TYPE.string, - cell: IdCell as never, - }, +export const SHARED_COLUMNS: ColumnData[] = [ { id: "input", label: "Input", @@ -90,6 +90,16 @@ export const DEFAULT_COLUMNS: ColumnData[] = [ : row.metadata || "", cell: CodeCell as never, }, +]; + +export const DEFAULT_COLUMNS: ColumnData[] = [ + { + id: "id", + label: "Item ID", + type: COLUMN_TYPE.string, + cell: IdCell as never, + }, + ...SHARED_COLUMNS, { id: "created_at", label: "Created", @@ -98,6 +108,20 @@ export const DEFAULT_COLUMNS: ColumnData[] = [ }, ]; +export const FILTER_COLUMNS: ColumnData[] = [ + ...SHARED_COLUMNS, + { + id: "feedback_scores", + label: "Feedback scores", + type: COLUMN_TYPE.numberDictionary, + }, + { + id: "output", + label: "Output", + type: COLUMN_TYPE.string, + }, +]; + export const DEFAULT_SELECTED_COLUMNS: string[] = ["id", "input"]; export type ExperimentItemsTabProps = { @@ -140,6 +164,10 @@ const ExperimentItemsTab: React.FunctionComponent = ({ }, ); + const [filters = [], setFilters] = useQueryParam("filters", JsonParam, { + updateType: "replaceIn", + }); + const [columnsWidth, setColumnsWidth] = useLocalStorageState< Record >(COLUMNS_WIDTH_KEY, { @@ -210,6 +238,7 @@ const ExperimentItemsTab: React.FunctionComponent = ({ workspaceName, datasetId, experimentsIds, + filters, page: page as number, size: size as number, }, @@ -264,7 +293,13 @@ const ExperimentItemsTab: React.FunctionComponent = ({ return (
-
+
+ +
Union[openai.OpenAI, openai.AsyncOpenAI]: """Adds Opik tracking to an OpenAI client. @@ -15,6 +16,7 @@ def track_openai( Args: openai_client: An instance of OpenAI or AsyncOpenAI client. + project_name: The name of the project to log data. Returns: The modified OpenAI client with Opik tracking enabled. @@ -24,6 +26,7 @@ def track_openai( type="llm", name="chat_completion_create", generations_aggregator=chunks_aggregator.aggregate, + project_name=project_name, ) openai_client.chat.completions.create = wrapper( openai_client.chat.completions.create diff --git a/sdks/python/tests/library_integration/openai/test_openai.py b/sdks/python/tests/library_integration/openai/test_openai.py index 244cf3cd93..1be10fb69d 100644 --- a/sdks/python/tests/library_integration/openai/test_openai.py +++ b/sdks/python/tests/library_integration/openai/test_openai.py @@ -7,6 +7,7 @@ import opik from opik.message_processing import streamer_constructors from opik.integrations.openai import track_openai +from opik.config import OPIK_PROJECT_DEFAULT_NAME from ...testlib import backend_emulator_message_processor from ...testlib import ( SpanModel, @@ -28,7 +29,16 @@ def ensure_openai_configured(): raise Exception("OpenAI not configured!") -def test_openai_client_chat_completions_create__happyflow(fake_streamer): +@pytest.mark.parametrize( + "project_name, expected_project_name", + [ + (None, OPIK_PROJECT_DEFAULT_NAME), + ("openai-integration-test", "openai-integration-test"), + ], +) +def test_openai_client_chat_completions_create__happyflow( + fake_streamer, project_name, expected_project_name +): fake_message_processor_: ( backend_emulator_message_processor.BackendEmulatorMessageProcessor ) @@ -43,7 +53,10 @@ def test_openai_client_chat_completions_create__happyflow(fake_streamer): mock_construct_online_streamer, ): client = openai.OpenAI() - wrapped_client = track_openai(client) + wrapped_client = track_openai( + openai_client=client, + project_name=project_name, + ) messages = [ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Tell a fact"}, @@ -72,6 +85,7 @@ def test_openai_client_chat_completions_create__happyflow(fake_streamer): }, start_time=ANY_BUT_NONE, end_time=ANY_BUT_NONE, + project_name=expected_project_name, spans=[ SpanModel( id=ANY_BUT_NONE, @@ -90,6 +104,7 @@ def test_openai_client_chat_completions_create__happyflow(fake_streamer): usage=ANY_BUT_NONE, start_time=ANY_BUT_NONE, end_time=ANY_BUT_NONE, + project_name=expected_project_name, spans=[], ) ], @@ -141,6 +156,7 @@ def test_openai_client_chat_completions_create__create_raises_an_error__span_and }, start_time=ANY_BUT_NONE, end_time=ANY_BUT_NONE, + project_name=ANY_BUT_NONE, spans=[ SpanModel( id=ANY_BUT_NONE, @@ -157,6 +173,7 @@ def test_openai_client_chat_completions_create__create_raises_an_error__span_and usage=None, start_time=ANY_BUT_NONE, end_time=ANY_BUT_NONE, + project_name=ANY_BUT_NONE, spans=[], ) ], @@ -178,6 +195,8 @@ def test_openai_client_chat_completions_create__openai_call_made_in_another_trac mock_construct_online_streamer = mock.Mock() mock_construct_online_streamer.return_value = streamer + project_name = "openai-integration-test" + with mock.patch.object( streamer_constructors, "construct_online_streamer", @@ -188,10 +207,15 @@ def test_openai_client_chat_completions_create__openai_call_made_in_another_trac {"role": "user", "content": "Tell a fact"}, ] - @opik.track() + @opik.track(project_name=project_name) def f(): client = openai.OpenAI() - wrapped_client = track_openai(client) + wrapped_client = track_openai( + openai_client=client, + # we are trying to log span into another project, but parent's project name will be used + project_name="openai-integration-test-nested-level", + ) + _ = wrapped_client.chat.completions.create( model="gpt-3.5-turbo", messages=messages, @@ -210,6 +234,7 @@ def f(): output=None, start_time=ANY_BUT_NONE, end_time=ANY_BUT_NONE, + project_name=project_name, spans=[ SpanModel( id=ANY_BUT_NONE, @@ -218,6 +243,7 @@ def f(): output=None, start_time=ANY_BUT_NONE, end_time=ANY_BUT_NONE, + project_name=project_name, spans=[ SpanModel( id=ANY_BUT_NONE, @@ -236,6 +262,7 @@ def f(): usage=ANY_BUT_NONE, start_time=ANY_BUT_NONE, end_time=ANY_BUT_NONE, + project_name=project_name, spans=[], ) ], @@ -291,6 +318,7 @@ async def async_f(): output=None, start_time=ANY_BUT_NONE, end_time=ANY_BUT_NONE, + project_name=ANY_BUT_NONE, spans=[ SpanModel( id=ANY_BUT_NONE, @@ -299,6 +327,7 @@ async def async_f(): output=None, start_time=ANY_BUT_NONE, end_time=ANY_BUT_NONE, + project_name=ANY_BUT_NONE, spans=[ SpanModel( id=ANY_BUT_NONE, @@ -317,6 +346,7 @@ async def async_f(): usage=ANY_BUT_NONE, start_time=ANY_BUT_NONE, end_time=ANY_BUT_NONE, + project_name=ANY_BUT_NONE, spans=[], ) ], @@ -382,6 +412,7 @@ def test_openai_client_chat_completions_create__stream_mode_is_on__generator_tra }, start_time=ANY_BUT_NONE, end_time=ANY_BUT_NONE, + project_name=ANY_BUT_NONE, spans=[ SpanModel( id=ANY_BUT_NONE, @@ -402,6 +433,7 @@ def test_openai_client_chat_completions_create__stream_mode_is_on__generator_tra usage=ANY_BUT_NONE, start_time=ANY_BUT_NONE, end_time=ANY_BUT_NONE, + project_name=ANY_BUT_NONE, spans=[], ) ], @@ -459,6 +491,7 @@ async def async_f(): output=None, start_time=ANY_BUT_NONE, end_time=ANY_BUT_NONE, + project_name=ANY_BUT_NONE, spans=[ SpanModel( id=ANY_BUT_NONE, @@ -467,6 +500,7 @@ async def async_f(): output=None, start_time=ANY_BUT_NONE, end_time=ANY_BUT_NONE, + project_name=ANY_BUT_NONE, spans=[ SpanModel( id=ANY_BUT_NONE, @@ -487,6 +521,7 @@ async def async_f(): usage=ANY_BUT_NONE, start_time=ANY_BUT_NONE, end_time=ANY_BUT_NONE, + project_name=ANY_BUT_NONE, spans=[], ) ], diff --git a/sdks/python/tests/testlib/backend_emulator_message_processor.py b/sdks/python/tests/testlib/backend_emulator_message_processor.py index d24e4ce883..81466dd102 100644 --- a/sdks/python/tests/testlib/backend_emulator_message_processor.py +++ b/sdks/python/tests/testlib/backend_emulator_message_processor.py @@ -87,6 +87,7 @@ def process(self, message: messages.BaseMessage) -> None: metadata=message.metadata, start_time=message.start_time, end_time=message.end_time, + project_name=message.project_name, ) self._trace_trees.append(trace) @@ -104,6 +105,7 @@ def process(self, message: messages.BaseMessage) -> None: start_time=message.start_time, end_time=message.end_time, usage=message.usage, + project_name=message.project_name, ) self._span_to_parent_span[span.id] = message.parent_span_id diff --git a/sdks/python/tests/testlib/models.py b/sdks/python/tests/testlib/models.py index c4c6823e87..2fe9c9231c 100644 --- a/sdks/python/tests/testlib/models.py +++ b/sdks/python/tests/testlib/models.py @@ -3,6 +3,8 @@ import dataclasses import datetime +from opik.config import OPIK_PROJECT_DEFAULT_NAME + @dataclasses.dataclass class SpanModel: @@ -16,6 +18,7 @@ class SpanModel: type: str = "general" usage: Optional[Dict[str, Any]] = None end_time: Optional[datetime.datetime] = None + project_name: str = OPIK_PROJECT_DEFAULT_NAME spans: List["SpanModel"] = dataclasses.field(default_factory=list) feedback_scores: List["FeedbackScoreModel"] = dataclasses.field( default_factory=list @@ -27,11 +30,13 @@ class TraceModel: id: str start_time: datetime.datetime name: Optional[str] + project_name: str input: Any = None output: Any = None tags: Optional[List[str]] = None metadata: Optional[Dict[str, Any]] = None end_time: Optional[datetime.datetime] = None + project_name: str = OPIK_PROJECT_DEFAULT_NAME spans: List["SpanModel"] = dataclasses.field(default_factory=list) feedback_scores: List["FeedbackScoreModel"] = dataclasses.field( default_factory=list