From d7d55972c95d17124c85fedad30db37de3e4b4ea Mon Sep 17 00:00:00 2001 From: VladYoSlav Date: Mon, 28 Oct 2024 00:34:32 +0300 Subject: [PATCH] feat(primitives): dd --- internal/domain/task/__init__.py | 1 + internal/domain/task/entities/__init__.py | 3 ++ internal/domain/task/entities/dd/__init__.py | 1 + internal/domain/task/entities/dd/dd_task.py | 48 +++++++++++++++++++ .../domain/task/value_objects/__init__.py | 3 ++ .../domain/task/value_objects/dd/__init__.py | 23 +++++++++ .../task/value_objects/dd/algo_config.py | 28 +++++++++++ .../value_objects/dd/algo_descriptions.py | 5 ++ .../domain/task/value_objects/dd/algo_name.py | 5 ++ .../task/value_objects/dd/algo_result.py | 14 ++++++ .../task/value_objects/primitive_name.py | 1 + 11 files changed, 132 insertions(+) create mode 100644 internal/domain/task/entities/dd/__init__.py create mode 100644 internal/domain/task/entities/dd/dd_task.py create mode 100644 internal/domain/task/value_objects/dd/__init__.py create mode 100644 internal/domain/task/value_objects/dd/algo_config.py create mode 100644 internal/domain/task/value_objects/dd/algo_descriptions.py create mode 100644 internal/domain/task/value_objects/dd/algo_name.py create mode 100644 internal/domain/task/value_objects/dd/algo_result.py diff --git a/internal/domain/task/__init__.py b/internal/domain/task/__init__.py index d9268af..6f4159d 100644 --- a/internal/domain/task/__init__.py +++ b/internal/domain/task/__init__.py @@ -5,3 +5,4 @@ from internal.domain.task.entities import AindTask # noqa: F401 from internal.domain.task.entities import ArTask # noqa: F401 from internal.domain.task.entities import CfdTask # noqa: F401 +from internal.domain.task.entities import DdTask # noqa: F401 diff --git a/internal/domain/task/entities/__init__.py b/internal/domain/task/entities/__init__.py index e960b5f..dd83a8b 100644 --- a/internal/domain/task/entities/__init__.py +++ b/internal/domain/task/entities/__init__.py @@ -7,6 +7,7 @@ from internal.domain.task.entities.aind import AindTask from internal.domain.task.entities.ar import ArTask from internal.domain.task.entities.cfd import CfdTask +from internal.domain.task.entities.dd import DdTask from internal.domain.task.value_objects import PrimitiveName @@ -38,4 +39,6 @@ def match_task_by_primitive_name(primitive_name: PrimitiveName): return ArTask() case PrimitiveName.cfd: return CfdTask() + case PrimitiveName.dd: + return DdTask() assert_never(primitive_name) diff --git a/internal/domain/task/entities/dd/__init__.py b/internal/domain/task/entities/dd/__init__.py new file mode 100644 index 0000000..345ce6f --- /dev/null +++ b/internal/domain/task/entities/dd/__init__.py @@ -0,0 +1 @@ +from internal.domain.task.entities.dd.dd_task import DdTask # noqa: F401 diff --git a/internal/domain/task/entities/dd/dd_task.py b/internal/domain/task/entities/dd/dd_task.py new file mode 100644 index 0000000..06aaeab --- /dev/null +++ b/internal/domain/task/entities/dd/dd_task.py @@ -0,0 +1,48 @@ +from desbordante.dd.algorithms import Split +from internal.domain.task.entities.task import Task +from internal.domain.task.value_objects import PrimitiveName, IncorrectAlgorithmName +from internal.domain.task.value_objects.dd import DdTaskConfig, DdTaskResult +from internal.domain.task.value_objects.dd import DdAlgoName, DdModel, DdAlgoResult + + +class DdTask(Task[Split, DdTaskConfig, DdTaskResult]): + """ + Task class for discovering Data Dependencies (DD). + + This class handles the execution of the DD algorithm and formats + the results for further processing. + + Methods: + - _match_algo_by_name(algo_name: DdAlgoName) -> Split: + Match the DD algorithm by its name. + - _collect_result(algo: Split) -> DdTaskResult: + Process the output of the DD algorithm and return the result. + """ + + def _collect_result(self, algo: Split) -> DdTaskResult: + """ + Collect and process the DD result. + + Args: + algo (Split): DD algorithm instance to process. + Returns: + DdTaskResult: The processed result containing data dependencies. + """ + dds = algo.get_dds() + algo_result = DdAlgoResult(dds=[DdModel.from_dd(dd) for dd in dds]) + return DdTaskResult(primitive_name=PrimitiveName.dd, result=algo_result) + + def _match_algo_by_name(self, algo_name: str) -> Split: + """ + Match the DD algorithm by name. + + Args: + algo_name (DdAlgoName): The name of the DD algorithm. + Returns: + Split: The corresponding algorithm instance. + """ + match algo_name: + case DdAlgoName.Split: + return Split() + case _: + raise IncorrectAlgorithmName(algo_name, "DD") diff --git a/internal/domain/task/value_objects/__init__.py b/internal/domain/task/value_objects/__init__.py index 5d03793..3d90e87 100644 --- a/internal/domain/task/value_objects/__init__.py +++ b/internal/domain/task/value_objects/__init__.py @@ -8,6 +8,7 @@ from internal.domain.task.value_objects.aind import AindTaskConfig, AindTaskResult from internal.domain.task.value_objects.ar import ArTaskConfig, ArTaskResult from internal.domain.task.value_objects.cfd import CfdTaskConfig, CfdTaskResult +from internal.domain.task.value_objects.dd import DdTaskConfig, DdTaskResult from internal.domain.task.value_objects.config import TaskConfig # noqa: F401 from internal.domain.task.value_objects.result import TaskResult # noqa: F401 @@ -34,6 +35,7 @@ AindTaskConfig, ArTaskConfig, CfdTaskConfig, + DdTaskConfig, ], Field(discriminator="primitive_name"), ] @@ -47,6 +49,7 @@ AindTaskResult, ArTaskResult, CfdTaskResult, + DdTaskResult, ], Field(discriminator="primitive_name"), ] diff --git a/internal/domain/task/value_objects/dd/__init__.py b/internal/domain/task/value_objects/dd/__init__.py new file mode 100644 index 0000000..67cb820 --- /dev/null +++ b/internal/domain/task/value_objects/dd/__init__.py @@ -0,0 +1,23 @@ +from typing import Literal + +from pydantic import BaseModel + +from internal.domain.task.value_objects.primitive_name import PrimitiveName +from internal.domain.task.value_objects.dd.algo_config import OneOfDdAlgoConfig +from internal.domain.task.value_objects.dd.algo_result import ( # noqa: F401 + DdAlgoResult, + DdModel, +) +from internal.domain.task.value_objects.dd.algo_name import DdAlgoName # noqa: F401 + + +class BaseDdTaskModel(BaseModel): + primitive_name: Literal[PrimitiveName.dd] + + +class DdTaskConfig(BaseDdTaskModel): + config: OneOfDdAlgoConfig + + +class DdTaskResult(BaseDdTaskModel): + result: DdAlgoResult diff --git a/internal/domain/task/value_objects/dd/algo_config.py b/internal/domain/task/value_objects/dd/algo_config.py new file mode 100644 index 0000000..36b72c3 --- /dev/null +++ b/internal/domain/task/value_objects/dd/algo_config.py @@ -0,0 +1,28 @@ +from typing import Literal, Annotated +from pydantic import Field +from internal.domain.common import OptionalModel +from internal.domain.task.value_objects.dd.algo_name import DdAlgoName +from internal.domain.task.value_objects.dd.algo_descriptions import descriptions + + +class BaseDdConfig(OptionalModel): + __non_optional_fields__ = { + "algo_name", + } + + +class SplitConfig(BaseDdConfig): + algo_name: Literal[DdAlgoName.Split] + + num_rows: Annotated[int, Field(ge=1, description=descriptions["num_rows"])] + num_columns: Annotated[int, Field(ge=1, description=descriptions["num_columns"])] + # TODO: diff table is not string + difference_table: Annotated[ + str, Field(description=descriptions["difference_table"]) + ] + + +OneOfDdAlgoConfig = Annotated[ + SplitConfig, + Field(discriminator="algo_name"), +] diff --git a/internal/domain/task/value_objects/dd/algo_descriptions.py b/internal/domain/task/value_objects/dd/algo_descriptions.py new file mode 100644 index 0000000..10f5deb --- /dev/null +++ b/internal/domain/task/value_objects/dd/algo_descriptions.py @@ -0,0 +1,5 @@ +descriptions = { + "num_rows": "Number of rows from the dataset to process", + "num_columns": "Number of columns from the dataset to process", + "difference_table": "CSV table with difference limits for each column", +} diff --git a/internal/domain/task/value_objects/dd/algo_name.py b/internal/domain/task/value_objects/dd/algo_name.py new file mode 100644 index 0000000..3f7bb23 --- /dev/null +++ b/internal/domain/task/value_objects/dd/algo_name.py @@ -0,0 +1,5 @@ +from enum import StrEnum, auto + + +class DdAlgoName(StrEnum): + Split = auto() diff --git a/internal/domain/task/value_objects/dd/algo_result.py b/internal/domain/task/value_objects/dd/algo_result.py new file mode 100644 index 0000000..6c562cf --- /dev/null +++ b/internal/domain/task/value_objects/dd/algo_result.py @@ -0,0 +1,14 @@ +from pydantic import BaseModel +from desbordante.dd import DD + + +class DdModel(BaseModel): + description: str + + @classmethod + def from_dd(cls, dd: DD): + return cls(description=str(dd)) + + +class DdAlgoResult(BaseModel): + dds: list[DdModel] diff --git a/internal/domain/task/value_objects/primitive_name.py b/internal/domain/task/value_objects/primitive_name.py index 8e2709a..f0a7378 100644 --- a/internal/domain/task/value_objects/primitive_name.py +++ b/internal/domain/task/value_objects/primitive_name.py @@ -9,6 +9,7 @@ class PrimitiveName(StrEnum): ind = auto() aind = auto() cfd = auto() + dd = auto() # fd_verification = auto() # mfd_verification = auto() # statistics = auto()