Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OPIK-556: Add embedded python service POC #955

Merged
merged 2 commits into from
Dec 24, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions apps/opik-backend/opik-embedded-python-backend/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Opik Embedded Python Backend

## Requirements

- Install Python: at least the minimum version compatible with the Opik Python SDK.
- Create and enable a Python virtual environment.
- Install all dependencies from `requirements.txt`.
- For running tests, also install dependencies from `tests/test_requirements.txt`.

## Running the Flask service

> [!TIP]
> Run in debug mode for development purposes, it reloads the code automatically.

- From `apps/opik-backend/opik-embedded-python-backend` directory.
- Run the `opik_embedded_backend` module.
- Debug mode is enabled with `--debug`.

```bash
flask --app src/opik_embedded_backend --debug run
```

Service is reachable at: `http://localhost:5000`
61 changes: 61 additions & 0 deletions apps/opik-backend/opik-embedded-python-backend/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
aiohappyeyeballs==2.4.4
aiohttp==3.11.11
aiosignal==1.3.2
annotated-types==0.7.0
anyio==4.7.0
attrs==24.3.0
blinker==1.9.0
certifi==2024.12.14
charset-normalizer==3.4.0
click==8.1.7
distro==1.9.0
filelock==3.16.1
Flask==3.1.0
frozenlist==1.5.0
fsspec==2024.12.0
h11==0.14.0
httpcore==1.0.7
httpx==0.27.2
huggingface-hub==0.27.0
idna==3.10
importlib_metadata==8.5.0
iniconfig==2.0.0
itsdangerous==2.2.0
Jinja2==3.1.4
jiter==0.8.2
jsonschema==4.23.0
jsonschema-specifications==2024.10.1
Levenshtein==0.26.1
litellm==1.55.7
markdown-it-py==3.0.0
MarkupSafe==3.0.2
mdurl==0.1.2
multidict==6.1.0
openai==1.58.1
opik==1.3.0
packaging==24.2
pluggy==1.5.0
propcache==0.2.1
pydantic==2.10.4
pydantic-settings==2.7.0
pydantic_core==2.27.2
Pygments==2.18.0
python-dotenv==1.0.1
PyYAML==6.0.2
RapidFuzz==3.11.0
referencing==0.35.1
regex==2024.11.6
requests==2.32.3
rich==13.9.4
rpds-py==0.22.3
sniffio==1.3.1
tenacity==9.0.0
tiktoken==0.8.0
tokenizers==0.21.0
tqdm==4.67.1
typing_extensions==4.12.2
urllib3==2.2.3
uuid7==0.1.0
Werkzeug==3.1.3
yarl==1.18.3
zipp==3.21.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import logging

from flask import Flask

logger = logging.getLogger(__name__)


def create_app(test_config=None):
app = Flask(__name__, instance_relative_config=True)

if test_config is None:
# load the instance config, if it exists, when not testing
app.config.from_pyfile('config.py', silent=True)
else:
# load the test config if passed in
app.config.from_mapping(test_config)

from opik_embedded_backend.evaluator import evaluator
app.register_blueprint(evaluator)

return app
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import inspect
from types import ModuleType
from typing import Type, Union, List, Any, Dict

from flask import request, abort, jsonify, Blueprint, current_app
from opik.evaluation.metrics import BaseMetric
from opik.evaluation.metrics.score_result import ScoreResult
from werkzeug.exceptions import HTTPException

from .helpers.id_helpers import uuid4_str

evaluator = Blueprint('evaluator', __name__, url_prefix='/v1/private/evaluators')


def get_module(code: str, module_name: str = uuid4_str()) -> ModuleType:
module: ModuleType = ModuleType(module_name)
exec(code, module.__dict__)
return module


def get_metric_class(module: ModuleType) -> Type[BaseMetric]:
for _, cls in inspect.getmembers(module, inspect.isclass):
if issubclass(cls, BaseMetric):
return cls


def evaluate_metric(metric_class: Type[BaseMetric], data: Dict[Any, Any]) -> Union[ScoreResult, List[ScoreResult]]:
base_metric: BaseMetric = metric_class()
return base_metric.score(**data)


def to_scores(score_result: Union[ScoreResult, List[ScoreResult]]) -> List[ScoreResult]:
scores: List[ScoreResult] = []
if isinstance(score_result, ScoreResult):
scores = [score_result]
elif isinstance(score_result, list):
for item in score_result:
if isinstance(item, ScoreResult):
scores.append(item)
return scores


@evaluator.errorhandler(400)
def bad_request(exception: HTTPException):
return jsonify(error=str(exception)), 400


@evaluator.route("", methods=["POST"])
def execute_evaluator():
if request.method != "POST":
return

payload: Any = request.get_json(force=True)

code: str = payload.get("code")
if code is None:
abort(400, "Field 'code' is missing in the request")

data: Dict[Any, Any] = payload.get("data")
if data is None:
abort(400, "Field 'data' is missing in the request")

try:
module: ModuleType = get_module(code)
metric_class: Type[BaseMetric] = get_metric_class(module)
except Exception as exception:
current_app.logger.info("Exception getting metric class, message '%s', code '%s'", exception, code)
abort(400, "Field 'code' contains invalid Python code")

if metric_class is None:
current_app.logger.info("Missing BaseMetric in code '%s'", code)
abort(400,
"Field 'code' in the request doesn't contain a subclass implementation of 'opik.evaluation.metrics.BaseMetric'")

score_result: List[ScoreResult] = []
try:
score_result = evaluate_metric(metric_class, data)
except Exception as exception:
current_app.logger.info("Exception evaluating metric, message '%s', data '%s', code '%s'",
exception, data, code)
abort(400, "The provided 'code' and 'data' fields can't be evaluated")

scores: List[ScoreResult] = to_scores(score_result)
if len(scores) == 0:
current_app.logger.info("Missing ScoreResult in code '%s'", code)
abort(400, "The provided 'code' field didn't return any 'opik.evaluation.metrics.ScoreResult'")

return jsonify({"scores": scores})
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import uuid


def uuid4_str() -> str:
return str(uuid.uuid4())
27 changes: 27 additions & 0 deletions apps/opik-backend/opik-embedded-python-backend/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import pytest

from opik_embedded_backend import create_app


@pytest.fixture()
def app():
app = create_app()
app.config.update({
"TESTING": True,
})

# other setup can go here

yield app

# clean up / reset resources here


@pytest.fixture()
def client(app):
return app.test_client()


@pytest.fixture()
def runner(app):
return app.test_cli_runner()
Loading
Loading