From 4fe574cd62f67c686594a531ae001004e4cbbba1 Mon Sep 17 00:00:00 2001 From: Andres Cruz Date: Wed, 11 Sep 2024 13:21:34 +0200 Subject: [PATCH] OPIK-71: Add metadata to Experiment --- .../com/comet/opik/api/DatasetItemUpdate.java | 17 ------- .../java/com/comet/opik/api/Experiment.java | 2 + .../com/comet/opik/domain/DatasetService.java | 2 +- .../com/comet/opik/domain/ExperimentDAO.java | 46 +++++++++++-------- .../000003_add_metadata_to_experiments.sql | 7 +++ .../v1/priv/ExperimentsResourceTest.java | 7 ++- 6 files changed, 41 insertions(+), 40 deletions(-) delete mode 100644 apps/opik-backend/src/main/java/com/comet/opik/api/DatasetItemUpdate.java create mode 100644 apps/opik-backend/src/main/resources/liquibase/db-app-analytics/migrations/000003_add_metadata_to_experiments.sql diff --git a/apps/opik-backend/src/main/java/com/comet/opik/api/DatasetItemUpdate.java b/apps/opik-backend/src/main/java/com/comet/opik/api/DatasetItemUpdate.java deleted file mode 100644 index 341d6a569e..0000000000 --- a/apps/opik-backend/src/main/java/com/comet/opik/api/DatasetItemUpdate.java +++ /dev/null @@ -1,17 +0,0 @@ -package com.comet.opik.api; - -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.PropertyNamingStrategies; -import com.fasterxml.jackson.databind.annotation.JsonNaming; -import lombok.Builder; - -@Builder(toBuilder = true) -@JsonIgnoreProperties(ignoreUnknown = true) -@JsonNaming(PropertyNamingStrategies.SnakeCaseStrategy.class) -public record DatasetItemUpdate( - JsonNode input, - JsonNode expectedOutput, - JsonNode metadata) { - -} diff --git a/apps/opik-backend/src/main/java/com/comet/opik/api/Experiment.java b/apps/opik-backend/src/main/java/com/comet/opik/api/Experiment.java index e4492c49bc..22319829b4 100644 --- a/apps/opik-backend/src/main/java/com/comet/opik/api/Experiment.java +++ b/apps/opik-backend/src/main/java/com/comet/opik/api/Experiment.java @@ -2,6 +2,7 @@ import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import com.fasterxml.jackson.annotation.JsonView; +import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.PropertyNamingStrategies; import com.fasterxml.jackson.databind.annotation.JsonNaming; import io.swagger.v3.oas.annotations.media.Schema; @@ -21,6 +22,7 @@ public record Experiment( @JsonView({Experiment.View.Public.class, Experiment.View.Write.class}) @NotBlank String datasetName, @JsonView({Experiment.View.Public.class}) @Schema(accessMode = Schema.AccessMode.READ_ONLY) UUID datasetId, @JsonView({Experiment.View.Public.class, Experiment.View.Write.class}) @NotBlank String name, + @JsonView({Experiment.View.Public.class, Experiment.View.Write.class}) JsonNode metadata, @JsonView({ Experiment.View.Public.class}) @Schema(accessMode = Schema.AccessMode.READ_ONLY) List feedbackScores, @JsonView({Experiment.View.Public.class}) @Schema(accessMode = Schema.AccessMode.READ_ONLY) Long traceCount, diff --git a/apps/opik-backend/src/main/java/com/comet/opik/domain/DatasetService.java b/apps/opik-backend/src/main/java/com/comet/opik/domain/DatasetService.java index d710ed454a..c0b5c610c7 100644 --- a/apps/opik-backend/src/main/java/com/comet/opik/domain/DatasetService.java +++ b/apps/opik-backend/src/main/java/com/comet/opik/domain/DatasetService.java @@ -180,7 +180,7 @@ public Dataset findById(@NonNull UUID id, @NonNull String workspaceId) { log.info("Finding dataset with id '{}', workspaceId '{}'", id, workspaceId); return template.inTransaction(READ_ONLY, handle -> { var dao = handle.attach(DatasetDAO.class); - var dataset = dao.findById(id, workspaceId).orElseThrow(this::newNotFoundException); + var dataset = dao.findById(id, workspaceId).orElseThrow(this::newNotFoundException); log.info("Found dataset with id '{}', workspaceId '{}'", id, workspaceId); return dataset; }); diff --git a/apps/opik-backend/src/main/java/com/comet/opik/domain/ExperimentDAO.java b/apps/opik-backend/src/main/java/com/comet/opik/domain/ExperimentDAO.java index ce7c00f8f9..cb5e7558e0 100644 --- a/apps/opik-backend/src/main/java/com/comet/opik/domain/ExperimentDAO.java +++ b/apps/opik-backend/src/main/java/com/comet/opik/domain/ExperimentDAO.java @@ -3,6 +3,8 @@ import com.comet.opik.api.Experiment; import com.comet.opik.api.ExperimentSearchCriteria; import com.comet.opik.api.FeedbackScoreAverage; +import com.comet.opik.utils.JsonUtils; +import com.fasterxml.jackson.databind.JsonNode; import io.r2dbc.spi.Connection; import io.r2dbc.spi.ConnectionFactory; import io.r2dbc.spi.Result; @@ -45,6 +47,7 @@ INSERT INTO experiments ( dataset_id, name, workspace_id, + metadata, created_by, last_updated_by ) @@ -57,6 +60,7 @@ INSERT INTO experiments ( new.dataset_id, new.name, new.workspace_id, + new.metadata, new.created_by, new.last_updated_by FROM ( @@ -65,6 +69,7 @@ INSERT INTO experiments ( :dataset_id AS dataset_id, :name AS name, :workspace_id AS workspace_id, + :metadata AS metadata, :created_by AS created_by, :last_updated_by AS last_updated_by ) AS new @@ -86,6 +91,7 @@ LEFT JOIN ( e.dataset_id as dataset_id, e.id as id, e.name as name, + e.metadata as metadata, e.created_at as created_at, e.last_updated_at as last_updated_at, e.created_by as created_by, @@ -188,6 +194,7 @@ INNER JOIN ( e.dataset_id, e.id, e.name, + e.metadata as metadata, e.created_at, e.last_updated_at, e.created_by, @@ -202,6 +209,7 @@ INNER JOIN ( e.dataset_id as dataset_id, e.id as id, e.name as name, + e.metadata as metadata, e.created_at as created_at, e.last_updated_at as last_updated_at, e.created_by as created_by, @@ -305,6 +313,7 @@ INNER JOIN ( e.dataset_id, e.id, e.name, + e.metadata as metadata, e.created_at, e.last_updated_at, e.created_by, @@ -351,22 +360,22 @@ private Publisher insert(Experiment experiment, Connection con var statement = connection.createStatement(INSERT) .bind("id", experiment.id()) .bind("dataset_id", experiment.datasetId()) - .bind("name", experiment.name()); - + .bind("name", experiment.name()) + .bind("metadata", getOrDefault(experiment.metadata())); return makeFluxContextAware((userName, workspaceName, workspaceId) -> { - log.info("Inserting experiment with id '{}', datasetId '{}', datasetName '{}', workspaceId '{}'", experiment.id(), experiment.datasetId(), experiment.datasetName(), workspaceId); - - statement - .bind("created_by", userName) + statement.bind("created_by", userName) .bind("last_updated_by", userName) .bind("workspace_id", workspaceId); - return Flux.from(statement.execute()); }); } + private String getOrDefault(JsonNode jsonNode) { + return Optional.ofNullable(jsonNode).map(JsonNode::toString).orElse(""); + } + Mono getById(@NonNull UUID id) { return Mono.from(connectionFactory.create()) .flatMapMany(connection -> getById(id, connection)) @@ -379,7 +388,6 @@ private Publisher getById(UUID id, Connection connection) { var statement = connection.createStatement(SELECT_BY_ID) .bind("id", id) .bind("entity_type", FeedbackScoreDAO.EntityType.TRACE.getType()); - return makeFluxContextAware(bindWorkspaceIdToFlux(statement)); } @@ -388,6 +396,7 @@ private Publisher mapToDto(Result result) { .id(row.get("id", UUID.class)) .datasetId(row.get("dataset_id", UUID.class)) .name(row.get("name", String.class)) + .metadata(getOrDefault(row.get("metadata", String.class))) .createdAt(row.get("created_at", Instant.class)) .lastUpdatedAt(row.get("last_updated_at", Instant.class)) .createdBy(row.get("created_by", String.class)) @@ -397,6 +406,13 @@ private Publisher mapToDto(Result result) { .build()); } + private JsonNode getOrDefault(String field) { + return Optional.ofNullable(field) + .filter(s -> !s.isBlank()) + .map(JsonUtils::getJsonNodeFromString) + .orElse(null); + } + private static List getFeedbackScores(Row row) { List feedbackScoresAvg = Arrays .stream(Optional.ofNullable(row.get("feedback_scores", List[].class)) @@ -406,12 +422,11 @@ private static List getFeedbackScores(Row row) { .map(scores -> new FeedbackScoreAverage(scores.getFirst().toString(), new BigDecimal(scores.get(1).toString()))) .toList(); - return feedbackScoresAvg.isEmpty() ? null : feedbackScoresAvg; } - Mono find(int page, int size, - @NonNull ExperimentSearchCriteria experimentSearchCriteria) { + Mono find( + int page, int size, @NonNull ExperimentSearchCriteria experimentSearchCriteria) { return countTotal(experimentSearchCriteria).flatMap(total -> find(page, size, experimentSearchCriteria, total)); } @@ -432,7 +447,6 @@ private Publisher find( .bind("limit", size) .bind("offset", (page - 1) * size); bindSearchCriteria(statement, experimentSearchCriteria, false); - return makeFluxContextAware(bindWorkspaceIdToFlux(statement)); } @@ -443,13 +457,12 @@ private Mono countTotal(ExperimentSearchCriteria experimentSearchCriteria) .reduce(0L, Long::sum); } - private Publisher countTotal(ExperimentSearchCriteria experimentSearchCriteria, - Connection connection) { + private Publisher countTotal( + ExperimentSearchCriteria experimentSearchCriteria, Connection connection) { log.info("Counting experiments by '{}'", experimentSearchCriteria); var template = newFindTemplate(FIND_COUNT, experimentSearchCriteria); var statement = connection.createStatement(template.render()); bindSearchCriteria(statement, experimentSearchCriteria, true); - return makeFluxContextAware(bindWorkspaceIdToFlux(statement)); } @@ -473,11 +486,9 @@ private void bindSearchCriteria(Statement statement, ExperimentSearchCriteria cr } public Flux getExperimentWorkspaces(@NonNull Set experimentIds) { - if (experimentIds.isEmpty()) { return Flux.empty(); } - return Mono.from(connectionFactory.create()) .flatMapMany(connection -> { var statement = connection.createStatement(FIND_EXPERIMENT_AND_WORKSPACE_BY_DATASET_IDS); @@ -488,5 +499,4 @@ public Flux getExperimentWorkspaces(@NonNull Set e row.get("workspace_id", String.class), row.get("id", UUID.class)))); } - } diff --git a/apps/opik-backend/src/main/resources/liquibase/db-app-analytics/migrations/000003_add_metadata_to_experiments.sql b/apps/opik-backend/src/main/resources/liquibase/db-app-analytics/migrations/000003_add_metadata_to_experiments.sql new file mode 100644 index 0000000000..bc86e3a9a0 --- /dev/null +++ b/apps/opik-backend/src/main/resources/liquibase/db-app-analytics/migrations/000003_add_metadata_to_experiments.sql @@ -0,0 +1,7 @@ +--liquibase formatted sql +--changeset andrescrz:add_metadata_to_experiments + +ALTER TABLE ${ANALYTICS_DB_DATABASE_NAME}.experiments + ADD COLUMN metadata String DEFAULT ''; + +--rollback ALTER TABLE ${ANALYTICS_DB_DATABASE_NAME}.experiments DROP COLUMN metadata; diff --git a/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/priv/ExperimentsResourceTest.java b/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/priv/ExperimentsResourceTest.java index e0da4f17db..3880b5eb96 100644 --- a/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/priv/ExperimentsResourceTest.java +++ b/apps/opik-backend/src/test/java/com/comet/opik/api/resources/v1/priv/ExperimentsResourceTest.java @@ -91,7 +91,6 @@ class ExperimentsResourceTest { private static final String[] EXPERIMENT_IGNORED_FIELDS = new String[]{ "id", "datasetId", "feedbackScores", "traceCount", "createdAt", "lastUpdatedAt", "createdBy", "lastUpdatedBy"}; - public static final String[] IGNORED_FIELDS = {"input", "output", "feedbackScores", "createdAt", "lastUpdatedAt", "createdBy", "lastUpdatedBy"}; @@ -102,9 +101,7 @@ class ExperimentsResourceTest { private static final TimeBasedEpochGenerator GENERATOR = Generators.timeBasedEpochGenerator(); private static final RedisContainer REDIS = RedisContainerUtils.newRedisContainer(); - private static final MySQLContainer MY_SQL_CONTAINER = MySQLContainerUtils.newMySQLContainer(); - private static final ClickHouseContainer CLICK_HOUSE_CONTAINER = ClickHouseContainerUtils.newClickHouseContainer(); @RegisterExtension @@ -686,6 +683,7 @@ void findByDatasetIdAndName() { .map(experiment -> experiment.toBuilder() .datasetName(datasetName) .name(name) + .metadata(null) .build()) .toList(); experiments.forEach(expectedExperiment -> ExperimentsResourceTest.this.createAndAssert(expectedExperiment, @@ -1248,10 +1246,11 @@ void createAndGetFeedbackAvg() { } @Test - void createWithoutIdAndGet() { + void createWithoutOptionalFieldsAndGet() { var expectedExperiment = podamFactory.manufacturePojo(Experiment.class) .toBuilder() .id(null) + .metadata(null) .build(); var expectedId = createAndAssert(expectedExperiment, API_KEY, TEST_WORKSPACE);