diff --git a/.env.example b/.env.example index 372fc79..b65c990 100644 --- a/.env.example +++ b/.env.example @@ -1,13 +1,13 @@ POSTGRES_DIALECT_DRIVER=postgresql+psycopg POSTGRES_USER=admin POSTGRES_PASSWORD=admin -POSTGRES_HOST=postgres +POSTGRES_HOST=localhost POSTGRES_DB=desbordante POSTGRES_PORT=5432 RABBITMQ_DEFAULT_USER=guest RABBITMQ_DEFAULT_PASSWORD=guest -RABBITMQ_HOST=rabbitmq +RABBITMQ_HOST=localhost RABBITMQ_PORT=5672 RABBITMQ_HTTP_PORT=15672 @@ -16,4 +16,4 @@ UPLOADED_FILES_DIR_PATH=/volumes/uploads FLOWER_USER=admin FLOWER_PASSWORD=admin -FLOWER_PORT=5555 +FLOWER_PORT=5555 \ No newline at end of file diff --git a/dev-docker-compose.yaml b/dev-docker-compose.yaml index 817bd57..ccf21aa 100644 --- a/dev-docker-compose.yaml +++ b/dev-docker-compose.yaml @@ -45,6 +45,9 @@ services: - ./volumes/uploads:${UPLOADED_FILES_DIR_PATH} env_file: - .env + environment: + - RABBITMQ_HOST=rabbitmq + - POSTGRES_HOST=postgres depends_on: - rabbitmq restart: always @@ -58,6 +61,9 @@ services: - ./volumes/uploads:${UPLOADED_FILES_DIR_PATH} env_file: - .env + environment: + - RABBITMQ_HOST=rabbitmq + - POSTGRES_HOST=postgres depends_on: - rabbitmq - celery diff --git a/internal/domain/task/__init__.py b/internal/domain/task/__init__.py index 8a37c34..281d711 100644 --- a/internal/domain/task/__init__.py +++ b/internal/domain/task/__init__.py @@ -1,2 +1,3 @@ from internal.domain.task.entities import FdTask # noqa: F401 from internal.domain.task.entities import AfdTask # noqa: F401 +from internal.domain.task.entities import AcTask # noqa: F401 diff --git a/internal/domain/task/entities/__init__.py b/internal/domain/task/entities/__init__.py index 975311f..7639e55 100644 --- a/internal/domain/task/entities/__init__.py +++ b/internal/domain/task/entities/__init__.py @@ -2,6 +2,7 @@ from internal.domain.task.entities.fd import FdTask from internal.domain.task.entities.afd import AfdTask +from internal.domain.task.entities.ac import AcTask from internal.domain.task.value_objects import PrimitiveName @@ -23,4 +24,6 @@ def match_task_by_primitive_name(primitive_name: PrimitiveName): return FdTask() case PrimitiveName.afd: return AfdTask() + case PrimitiveName.ac: + return AcTask() assert_never(primitive_name) diff --git a/internal/domain/task/entities/ac/__init__.py b/internal/domain/task/entities/ac/__init__.py new file mode 100644 index 0000000..c887a8a --- /dev/null +++ b/internal/domain/task/entities/ac/__init__.py @@ -0,0 +1 @@ +from internal.domain.task.entities.ac.ac_task import AcTask # noqa: F401 diff --git a/internal/domain/task/entities/ac/ac_task.py b/internal/domain/task/entities/ac/ac_task.py new file mode 100644 index 0000000..ac754be --- /dev/null +++ b/internal/domain/task/entities/ac/ac_task.py @@ -0,0 +1,60 @@ +from desbordante.ac.algorithms import AcAlgorithm +from desbordante.ac.algorithms import Default + +from internal.domain.task.entities.task import Task +from internal.domain.task.value_objects import PrimitiveName, IncorrectAlgorithmName +from internal.domain.task.value_objects.ac import AcTaskConfig, AcTaskResult +from internal.domain.task.value_objects.ac import ( + AcAlgoName, + AcModel, + AcAlgoResult, + AcExceptionModel, +) + + +class AcTask(Task[AcAlgorithm, AcTaskConfig, AcTaskResult]): + """ + Task class for Approximate Consistency (AC) profiling. + + This class handles the execution of different AC algorithms and processes + the results into the appropriate format. It implements the abstract methods + defined in the Task base class. + + Methods: + - _match_algo_by_name(algo_name: AcAlgoName) -> AcAlgorithm: + Match AC algorithm by its name. + - _collect_result(algo: AcAlgorithm) -> AcTaskResult: + Process the output of the AC algorithm and return the result. + """ + + def _collect_result(self, algo: AcAlgorithm) -> AcTaskResult: + """ + Collect and process the AC result. + + Args: + algo (AcAlgorithm): AC algorithm to process. + Returns: + AcTaskResult: The processed result containing AC ranges and exceptions. + """ + ac_ranges = algo.get_ac_ranges() + ac_exceptions = algo.get_ac_exceptions() + algo_result = AcAlgoResult( + ranges=list(map(AcModel.from_ac_range, ac_ranges)), + exceptions=list(map(AcExceptionModel.from_ac_exception, ac_exceptions)), + ) + return AcTaskResult(primitive_name=PrimitiveName.ac, result=algo_result) + + def _match_algo_by_name(self, algo_name: str) -> AcAlgorithm: + """ + Match the approximate consistency algorithm by name. + + Args: + algo_name (AcAlgoName): The name of the AC algorithm. + Returns: + AcAlgorithm: The corresponding algorithm instance. + """ + match algo_name: + case AcAlgoName.Default: + return Default() + case _: + raise IncorrectAlgorithmName(algo_name, "AC") diff --git a/internal/domain/task/value_objects/__init__.py b/internal/domain/task/value_objects/__init__.py index fb8c415..36447df 100644 --- a/internal/domain/task/value_objects/__init__.py +++ b/internal/domain/task/value_objects/__init__.py @@ -3,6 +3,7 @@ from internal.domain.task.value_objects.afd import AfdTaskConfig, AfdTaskResult from internal.domain.task.value_objects.fd import FdTaskConfig, FdTaskResult +from internal.domain.task.value_objects.ac import AcTaskConfig, AcTaskResult from internal.domain.task.value_objects.config import TaskConfig # noqa: F401 from internal.domain.task.value_objects.result import TaskResult # noqa: F401 @@ -21,17 +22,11 @@ ) OneOfTaskConfig = Annotated[ - Union[ - FdTaskConfig, - AfdTaskConfig, - ], + Union[FdTaskConfig, AfdTaskConfig, AcTaskConfig], Field(discriminator="primitive_name"), ] OneOfTaskResult = Annotated[ - Union[ - FdTaskResult, - AfdTaskResult, - ], + Union[FdTaskResult, AfdTaskResult, AcTaskResult], Field(discriminator="primitive_name"), ] diff --git a/internal/domain/task/value_objects/ac/__init__.py b/internal/domain/task/value_objects/ac/__init__.py new file mode 100644 index 0000000..1e3b8d1 --- /dev/null +++ b/internal/domain/task/value_objects/ac/__init__.py @@ -0,0 +1,24 @@ +from typing import Literal + +from pydantic import BaseModel + +from internal.domain.task.value_objects.primitive_name import PrimitiveName +from internal.domain.task.value_objects.ac.algo_config import OneOfAcAlgoConfig +from internal.domain.task.value_objects.ac.algo_result import ( # noqa: F401 + AcAlgoResult, + AcModel, + AcExceptionModel, +) +from internal.domain.task.value_objects.ac.algo_name import AcAlgoName # noqa: F401 + + +class BaseAcTaskModel(BaseModel): + primitive_name: Literal[PrimitiveName.ac] + + +class AcTaskConfig(BaseAcTaskModel): + config: OneOfAcAlgoConfig + + +class AcTaskResult(BaseAcTaskModel): + result: AcAlgoResult diff --git a/internal/domain/task/value_objects/ac/algo_config.py b/internal/domain/task/value_objects/ac/algo_config.py new file mode 100644 index 0000000..0854598 --- /dev/null +++ b/internal/domain/task/value_objects/ac/algo_config.py @@ -0,0 +1,33 @@ +from typing import Literal, Annotated +from pydantic import Field +from internal.domain.common import OptionalModel +from internal.domain.task.value_objects.ac.algo_name import AcAlgoName +from internal.domain.task.value_objects.ac.algo_descriptions import descriptions + + +class BaseAcConfig(OptionalModel): + __non_optional_fields__ = { + "algo_name", + } + + +class DefaultAcConfig(BaseAcConfig): + algo_name: Literal[AcAlgoName.Default] + + bin_operation: Annotated[str, Field(description=descriptions["bin_operation"])] + fuzziness: Annotated[ + float, Field(ge=0.1, le=1.0, description=descriptions["fuzziness"]) + ] + p_fuzz: Annotated[float, Field(ge=0.1, le=1.0, description=descriptions["p_fuzz"])] + weight: Annotated[float, Field(ge=0.1, le=1.0, description=descriptions["weight"])] + bumps_limit: Annotated[int, Field(ge=0, description=descriptions["bumps_limit"])] + iterations_limit: Annotated[ + int, Field(ge=1, description=descriptions["iterations_limit"]) + ] + ac_seed: Annotated[int, Field(description=descriptions["ac_seed"])] + + +OneOfAcAlgoConfig = Annotated[ + DefaultAcConfig, + Field(discriminator="algo_name"), +] diff --git a/internal/domain/task/value_objects/ac/algo_descriptions.py b/internal/domain/task/value_objects/ac/algo_descriptions.py new file mode 100644 index 0000000..65b77d2 --- /dev/null +++ b/internal/domain/task/value_objects/ac/algo_descriptions.py @@ -0,0 +1,9 @@ +descriptions = { + "bin_operation": "Binary operation: one of available operations: /, *, +, -", + "fuzziness": "Fraction of exceptional records, lies in (0, 1]", + "p_fuzz": "Probability, the fraction of exceptional records outside the bump intervals", + "weight": "Value lies in (0, 1]. Closer to 0 - many short intervals, closer to 1 - small number of long intervals", + "bumps_limit": "Maximum number of intervals considered. Pass 0 to remove limit", + "iterations_limit": "Limit for iterations of sampling", + "ac_seed": "Seed for random number generation in data sampling", +} diff --git a/internal/domain/task/value_objects/ac/algo_name.py b/internal/domain/task/value_objects/ac/algo_name.py new file mode 100644 index 0000000..6c72be7 --- /dev/null +++ b/internal/domain/task/value_objects/ac/algo_name.py @@ -0,0 +1,5 @@ +from enum import StrEnum, auto + + +class AcAlgoName(StrEnum): + Default = auto() diff --git a/internal/domain/task/value_objects/ac/algo_result.py b/internal/domain/task/value_objects/ac/algo_result.py new file mode 100644 index 0000000..dca4ac3 --- /dev/null +++ b/internal/domain/task/value_objects/ac/algo_result.py @@ -0,0 +1,27 @@ +from pydantic import BaseModel +from desbordante.ac import ACRanges, ACException + + +class AcModel(BaseModel): + @classmethod + def from_ac_range(cls, ac_range: ACRanges): + return cls(column_indices=ac_range.column_indices, ranges=ac_range.ranges) + + column_indices: tuple[int, int] + ranges: list[tuple[float, float]] + + +class AcExceptionModel(BaseModel): + @classmethod + def from_ac_exception(cls, ac_exception: ACException): + return cls( + row_index=ac_exception.row_index, column_pairs=ac_exception.column_pairs + ) + + row_index: int + column_pairs: list[tuple[int, int]] + + +class AcAlgoResult(BaseModel): + ranges: list[AcModel] + exceptions: list[AcExceptionModel] diff --git a/internal/domain/task/value_objects/primitive_name.py b/internal/domain/task/value_objects/primitive_name.py index 1959867..f09e8b4 100644 --- a/internal/domain/task/value_objects/primitive_name.py +++ b/internal/domain/task/value_objects/primitive_name.py @@ -5,7 +5,7 @@ class PrimitiveName(StrEnum): fd = auto() afd = auto() # ar = auto() - # ac = auto() + ac = auto() # fd_verification = auto() # mfd_verification = auto() # statistics = auto()