From e5b1af0078cd895f11f63c1f691b4566f89350d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacques=20Verr=C3=A9?= Date: Thu, 12 Sep 2024 23:25:23 +0100 Subject: [PATCH] Update to docs (#232) --- CONTRIBUTING.md | 3 +- .../cookbook/evaluate_moderation_metric.ipynb | 84 ++----------------- .../cookbook/evaluate_moderation_metric.md | 30 ------- .../documentation/docs/cookbook/ragas.ipynb | 56 +++++++++---- .../documentation/docs/cookbook/ragas.md | 56 +++++++++---- .../documentation/docs/quickstart.md | 2 +- .../docs/tracing/integrations/ragas.md | 46 +++++++--- .../docs/tracing/log_distributed_traces.md | 6 +- .../documentation/docs/tracing/log_traces.md | 31 +++---- .../documentation/sidebars.ts | 11 ++- .../python-sdk-docs/source/Objects/Span.rst | 1 + .../source/Objects/SpanData.rst | 5 ++ .../python-sdk-docs/source/Objects/Trace.rst | 1 + .../source/Objects/TraceData.rst | 5 ++ .../python-sdk-docs/source/Opik.rst | 1 + .../python-sdk-docs/source/conf.py | 3 +- .../source/evaluation/Dataset.rst | 3 +- .../source/evaluation/DatasetItem.rst | 3 +- .../source/evaluation/metrics/BaseMetric.rst | 6 ++ .../source/evaluation/metrics/index.rst | 2 + .../python-sdk-docs/source/index.rst | 3 + .../source/opik_context/get_current_span.rst | 4 - .../opik_context/get_current_span_data.rst | 4 + .../source/opik_context/get_current_trace.rst | 4 - .../opik_context/get_current_trace_data.rst | 4 + .../get_distributed_trace_headers.rst | 4 + .../source/opik_context/index.rst | 40 +++++++-- .../opik_context/update_current_span.rst | 4 + .../opik_context/update_current_trace.rst | 4 + .../src/opik/api_objects/opik_client.py | 2 +- sdks/python/src/opik/api_objects/span.py | 5 +- sdks/python/src/opik/api_objects/trace.py | 6 +- .../src/opik/evaluation/metrics/__init__.py | 2 + .../opik/evaluation/metrics/base_metric.py | 23 +++++ sdks/python/src/opik/opik_context.py | 28 ++++++- 35 files changed, 290 insertions(+), 202 deletions(-) create mode 100644 apps/opik-documentation/python-sdk-docs/source/Objects/SpanData.rst create mode 100644 apps/opik-documentation/python-sdk-docs/source/Objects/TraceData.rst create mode 100644 apps/opik-documentation/python-sdk-docs/source/evaluation/metrics/BaseMetric.rst delete mode 100644 apps/opik-documentation/python-sdk-docs/source/opik_context/get_current_span.rst create mode 100644 apps/opik-documentation/python-sdk-docs/source/opik_context/get_current_span_data.rst delete mode 100644 apps/opik-documentation/python-sdk-docs/source/opik_context/get_current_trace.rst create mode 100644 apps/opik-documentation/python-sdk-docs/source/opik_context/get_current_trace_data.rst create mode 100644 apps/opik-documentation/python-sdk-docs/source/opik_context/get_distributed_trace_headers.rst create mode 100644 apps/opik-documentation/python-sdk-docs/source/opik_context/update_current_span.rst create mode 100644 apps/opik-documentation/python-sdk-docs/source/opik_context/update_current_trace.rst diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 1a6669a027..6515a7e98b 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -136,8 +136,7 @@ minikube stop ``` Next time you will start the minikube, it will run everything with the same configuration and data you had before. - -### Contributing to the documentation +### Contributing to the documentation The documentation is made up of three main parts: diff --git a/apps/opik-documentation/documentation/docs/cookbook/evaluate_moderation_metric.ipynb b/apps/opik-documentation/documentation/docs/cookbook/evaluate_moderation_metric.ipynb index fcda92c93b..2976958537 100644 --- a/apps/opik-documentation/documentation/docs/cookbook/evaluate_moderation_metric.ipynb +++ b/apps/opik-documentation/documentation/docs/cookbook/evaluate_moderation_metric.ipynb @@ -6,8 +6,6 @@ "source": [ "# Evaluating Opik's Moderation Metric\n", "\n", - "*This cookbook was created from a Jypyter notebook which can be found [here](TBD).*\n", - "\n", "For this guide we will be evaluating the Moderation metric included in the LLM Evaluation SDK which will showcase both how to use the `evaluation` functionality in the platform as well as the quality of the Moderation metric included in the SDK." ] }, @@ -24,7 +22,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -46,7 +44,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -65,24 +63,16 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Note: you may need to restart the kernel to use updated packages.\n" - ] - } - ], + "outputs": [], "source": [ "%pip install opik --upgrade --quiet" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -102,17 +92,9 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "status_code: 409, body: {'errors': ['Dataset already exists']}\n" - ] - } - ], + "outputs": [], "source": [ "# Create dataset\n", "from opik import Opik, DatasetItem\n", @@ -173,57 +155,9 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Evaluation: 100%|██████████| 50/50 [00:06<00:00, 8.09it/s]\n" - ] - }, - { - "data": { - "text/html": [ - "
╭─ OpenAIModerationDataset (50 samples) ─╮\n",
-       "│                                        │\n",
-       "│ Total time:        00:00:06            │\n",
-       "│ Number of samples: 50                  │\n",
-       "│                                        │\n",
-       "│ Correct moderation score: 0.8400 (avg) │\n",
-       "│                                        │\n",
-       "╰────────────────────────────────────────╯\n",
-       "
\n" - ], - "text/plain": [ - "╭─ OpenAIModerationDataset (50 samples) ─╮\n", - "│ │\n", - "│ \u001b[1mTotal time: \u001b[0m 00:00:06 │\n", - "│ \u001b[1mNumber of samples:\u001b[0m 50 │\n", - "│ │\n", - "│ \u001b[1;32mCorrect moderation score: 0.8400 (avg)\u001b[0m │\n", - "│ │\n", - "╰────────────────────────────────────────╯\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Uploading results to Opik ... \n",
-       "
\n" - ], - "text/plain": [ - "Uploading results to Opik \u001b[33m...\u001b[0m \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "from opik.evaluation.metrics import Moderation, Equals\n", "from opik.evaluation import evaluate\n", diff --git a/apps/opik-documentation/documentation/docs/cookbook/evaluate_moderation_metric.md b/apps/opik-documentation/documentation/docs/cookbook/evaluate_moderation_metric.md index 66b262badd..35efae80a3 100644 --- a/apps/opik-documentation/documentation/docs/cookbook/evaluate_moderation_metric.md +++ b/apps/opik-documentation/documentation/docs/cookbook/evaluate_moderation_metric.md @@ -1,7 +1,5 @@ # Evaluating Opik's Moderation Metric -*This cookbook was created from a Jypyter notebook which can be found [here](TBD).* - For this guide we will be evaluating the Moderation metric included in the LLM Evaluation SDK which will showcase both how to use the `evaluation` functionality in the platform as well as the quality of the Moderation metric included in the SDK. ## Creating an account on Comet.com @@ -38,9 +36,6 @@ First, we will install the necessary libraries and configure the OpenAI API key %pip install opik --upgrade --quiet ``` - Note: you may need to restart the kernel to use updated packages. - - ```python import os @@ -95,9 +90,6 @@ except Exception as e: print(e) ``` - status_code: 409, body: {'errors': ['Dataset already exists']} - - ## Evaluating the moderation metric In order to evaluate the performance of the Opik moderation metric, we will define: @@ -153,28 +145,6 @@ res = evaluate( ) ``` - Evaluation: 100%|██████████| 50/50 [00:06<00:00, 8.09it/s] - - - -
╭─ OpenAIModerationDataset (50 samples) ─╮
-│                                        │
-│ Total time:        00:00:06            │
-│ Number of samples: 50                  │
-│                                        │
-│ Correct moderation score: 0.8400 (avg) │
-│                                        │
-╰────────────────────────────────────────╯
-
- - - - -
Uploading results to Opik ... 
-
- - - We are able to detect ~85% of moderation violations, this can be improved further by providing some additional examples to the model. We can view a breakdown of the results in the Opik UI: ![Moderation Evaluation](https://raw.githubusercontent.com/comet-ml/opik/main/apps/opik-documentation/documentation/static/img/cookbook/moderation_metric_cookbook.png) diff --git a/apps/opik-documentation/documentation/docs/cookbook/ragas.ipynb b/apps/opik-documentation/documentation/docs/cookbook/ragas.ipynb index 9c232140fc..a2e20e60ce 100644 --- a/apps/opik-documentation/documentation/docs/cookbook/ragas.ipynb +++ b/apps/opik-documentation/documentation/docs/cookbook/ragas.ipynb @@ -149,28 +149,34 @@ "source": [ "import asyncio\n", "from ragas.integrations.opik import OpikTracer\n", + "from ragas.dataset_schema import SingleTurnSample\n", + "\n", "\n", "# Define the scoring function\n", - "def compute_metric(opik_tracer, metric, row):\n", + "def compute_metric(metric, row):\n", + " row = SingleTurnSample(**row)\n", + "\n", + " opik_tracer = OpikTracer()\n", + "\n", " async def get_score(opik_tracer, metric, row):\n", - " score = await metric.ascore(row, callbacks=[opik_tracer])\n", + " score = await metric.single_turn_ascore(row, callbacks=[OpikTracer()])\n", " return score\n", "\n", " # Run the async function using the current event loop\n", " loop = asyncio.get_event_loop()\n", - " \n", + "\n", " result = loop.run_until_complete(get_score(opik_tracer, metric, row))\n", " return result\n", "\n", + "\n", "# Score a simple example\n", "row = {\n", - " \"question\": \"What is the capital of France?\",\n", - " \"answer\": \"Paris\",\n", - " \"contexts\": [\"Paris is the capital of France.\", \"Paris is in France.\"]\n", + " \"user_input\": \"What is the capital of France?\",\n", + " \"response\": \"Paris\",\n", + " \"retrieved_contexts\": [\"Paris is the capital of France.\", \"Paris is in France.\"],\n", "}\n", "\n", - "opik_tracer = OpikTracer()\n", - "score = compute_metric(opik_tracer, answer_relevancy_metric, row)\n", + "score = compute_metric(answer_relevancy_metric, row)\n", "print(\"Answer Relevancy score:\", score)" ] }, @@ -182,7 +188,7 @@ "\n", "#### Score traces\n", "\n", - "You can score traces by using the `get_current_trace` function to get the current trace and then calling the `log_feedback_score` function.\n", + "You can score traces by using the `update_current_trace` function.\n", "\n", "The advantage of this approach is that the scoring span is added to the trace allowing for a more fine-grained analysis of the RAG pipeline. It will however run the Ragas metric calculation synchronously and so might not be suitable for production use-cases." ] @@ -193,38 +199,43 @@ "metadata": {}, "outputs": [], "source": [ - "from opik import track\n", - "from opik.opik_context import get_current_trace\n", + "from opik import track, opik_context\n", + "\n", "\n", "@track\n", "def retrieve_contexts(question):\n", " # Define the retrieval function, in this case we will hard code the contexts\n", " return [\"Paris is the capital of France.\", \"Paris is in France.\"]\n", "\n", + "\n", "@track\n", "def answer_question(question, contexts):\n", " # Define the answer function, in this case we will hard code the answer\n", " return \"Paris\"\n", "\n", + "\n", "@track(name=\"Compute Ragas metric score\", capture_input=False)\n", "def compute_rag_score(answer_relevancy_metric, question, answer, contexts):\n", " # Define the score function\n", - " row = {\"question\": question, \"answer\": answer, \"contexts\": contexts}\n", + " row = {\"user_input\": question, \"response\": answer, \"retrieved_contexts\": contexts}\n", " score = compute_metric(answer_relevancy_metric, row)\n", " return score\n", "\n", + "\n", "@track\n", "def rag_pipeline(question):\n", " # Define the pipeline\n", " contexts = retrieve_contexts(question)\n", " answer = answer_question(question, contexts)\n", "\n", - " trace = get_current_trace()\n", " score = compute_rag_score(answer_relevancy_metric, question, answer, contexts)\n", - " trace.log_feedback_score(\"answer_relevancy\", round(score, 4), category_name=\"ragas\")\n", - " \n", + " opik_context.update_current_trace(\n", + " feedback_scores=[{\"name\": \"answer_relevancy\", \"value\": round(score, 4)}]\n", + " )\n", + "\n", " return answer\n", "\n", + "\n", "rag_pipeline(\"What is the capital of France?\")" ] }, @@ -252,12 +263,23 @@ "\n", "fiqa_eval = load_dataset(\"explodinggradients/fiqa\", \"ragas_eval\")\n", "\n", + "# Reformat the dataset to match the schema expected by the Ragas evaluate function\n", + "dataset = fiqa_eval[\"baseline\"].select(range(3))\n", + "\n", + "dataset = dataset.map(\n", + " lambda x: {\n", + " \"user_input\": x[\"question\"],\n", + " \"reference\": x[\"ground_truths\"][0],\n", + " \"retrieved_contexts\": x[\"contexts\"],\n", + " }\n", + ")\n", + "\n", "opik_tracer_eval = OpikTracer(tags=[\"ragas_eval\"], metadata={\"evaluation_run\": True})\n", "\n", "result = evaluate(\n", - " fiqa_eval[\"baseline\"].select(range(3)),\n", + " dataset,\n", " metrics=[context_precision, faithfulness, answer_relevancy],\n", - " callbacks=[opik_tracer_eval]\n", + " callbacks=[opik_tracer_eval],\n", ")\n", "\n", "print(result)" diff --git a/apps/opik-documentation/documentation/docs/cookbook/ragas.md b/apps/opik-documentation/documentation/docs/cookbook/ragas.md index 6c32796a41..3288c74a24 100644 --- a/apps/opik-documentation/documentation/docs/cookbook/ragas.md +++ b/apps/opik-documentation/documentation/docs/cookbook/ragas.md @@ -93,28 +93,34 @@ nest_asyncio.apply() ```python import asyncio from ragas.integrations.opik import OpikTracer +from ragas.dataset_schema import SingleTurnSample + # Define the scoring function -def compute_metric(opik_tracer, metric, row): +def compute_metric(metric, row): + row = SingleTurnSample(**row) + + opik_tracer = OpikTracer() + async def get_score(opik_tracer, metric, row): - score = await metric.ascore(row, callbacks=[opik_tracer]) + score = await metric.single_turn_ascore(row, callbacks=[OpikTracer()]) return score # Run the async function using the current event loop loop = asyncio.get_event_loop() - + result = loop.run_until_complete(get_score(opik_tracer, metric, row)) return result + # Score a simple example row = { - "question": "What is the capital of France?", - "answer": "Paris", - "contexts": ["Paris is the capital of France.", "Paris is in France."] + "user_input": "What is the capital of France?", + "response": "Paris", + "retrieved_contexts": ["Paris is the capital of France.", "Paris is in France."], } -opik_tracer = OpikTracer() -score = compute_metric(opik_tracer, answer_relevancy_metric, row) +score = compute_metric(answer_relevancy_metric, row) print("Answer Relevancy score:", score) ``` @@ -122,44 +128,49 @@ If you now navigate to Opik, you will be able to see that a new trace has been c #### Score traces -You can score traces by using the `get_current_trace` function to get the current trace and then calling the `log_feedback_score` function. +You can score traces by using the `update_current_trace` function. The advantage of this approach is that the scoring span is added to the trace allowing for a more fine-grained analysis of the RAG pipeline. It will however run the Ragas metric calculation synchronously and so might not be suitable for production use-cases. ```python -from opik import track -from opik.opik_context import get_current_trace +from opik import track, opik_context + @track def retrieve_contexts(question): # Define the retrieval function, in this case we will hard code the contexts return ["Paris is the capital of France.", "Paris is in France."] + @track def answer_question(question, contexts): # Define the answer function, in this case we will hard code the answer return "Paris" + @track(name="Compute Ragas metric score", capture_input=False) def compute_rag_score(answer_relevancy_metric, question, answer, contexts): # Define the score function - row = {"question": question, "answer": answer, "contexts": contexts} + row = {"user_input": question, "response": answer, "retrieved_contexts": contexts} score = compute_metric(answer_relevancy_metric, row) return score + @track def rag_pipeline(question): # Define the pipeline contexts = retrieve_contexts(question) answer = answer_question(question, contexts) - trace = get_current_trace() score = compute_rag_score(answer_relevancy_metric, question, answer, contexts) - trace.log_feedback_score("answer_relevancy", round(score, 4), category_name="ragas") - + opik_context.update_current_trace( + feedback_scores=[{"name": "answer_relevancy", "value": round(score, 4)}] + ) + return answer + rag_pipeline("What is the capital of France?") ``` @@ -178,12 +189,23 @@ from ragas.integrations.opik import OpikTracer fiqa_eval = load_dataset("explodinggradients/fiqa", "ragas_eval") +# Reformat the dataset to match the schema expected by the Ragas evaluate function +dataset = fiqa_eval["baseline"].select(range(3)) + +dataset = dataset.map( + lambda x: { + "user_input": x["question"], + "reference": x["ground_truths"][0], + "retrieved_contexts": x["contexts"], + } +) + opik_tracer_eval = OpikTracer(tags=["ragas_eval"], metadata={"evaluation_run": True}) result = evaluate( - fiqa_eval["baseline"].select(range(3)), + dataset, metrics=[context_precision, faithfulness, answer_relevancy], - callbacks=[opik_tracer_eval] + callbacks=[opik_tracer_eval], ) print(result) diff --git a/apps/opik-documentation/documentation/docs/quickstart.md b/apps/opik-documentation/documentation/docs/quickstart.md index 5ad8444b7e..270239aaff 100644 --- a/apps/opik-documentation/documentation/docs/quickstart.md +++ b/apps/opik-documentation/documentation/docs/quickstart.md @@ -49,6 +49,6 @@ def your_llm_application(input): return output ``` -To learn more about the `track` decorator, see the [track documentation](/tracing/log_traces.md#log-using-function-annotators). Once the traces are logged, you can view them in the OPIK UI: +To learn more about the `track` decorator, see the [track documentation](/tracing/log_traces.md#logging-traces-and-spans). Once the traces are logged, you can view them in the OPIK UI: ![Opik Traces](/img/home/traces_page_for_quickstart.png) diff --git a/apps/opik-documentation/documentation/docs/tracing/integrations/ragas.md b/apps/opik-documentation/documentation/docs/tracing/integrations/ragas.md index 8d4fac4a83..3d432cd8d5 100644 --- a/apps/opik-documentation/documentation/docs/tracing/integrations/ragas.md +++ b/apps/opik-documentation/documentation/docs/tracing/integrations/ragas.md @@ -36,17 +36,22 @@ from ragas.integrations.opik import OpikTracer # Initialize the Ragas metric llm = LangchainLLMWrapper(ChatOpenAI()) emb = LangchainEmbeddingsWrapper(OpenAIEmbeddings()) +answer_relevancy_metric = AnswerRelevancy(llm=llm, embeddings=emb) # Define the scoring function -def compute_metric(opik_tracer, metric, row): - async def get_score(): - score = await metric.ascore(row, callbacks=[opik_tracer]) +def compute_metric(metric, row): + row = SingleTurnSample(**row) + + opik_tracer = OpikTracer() + + async def get_score(opik_tracer, metric, row): + score = await metric.single_turn_ascore(row, callbacks=[OpikTracer()]) return score # Run the async function using the current event loop loop = asyncio.get_event_loop() - - result = loop.run_until_complete(get_score()) + + result = loop.run_until_complete(get_score(opik_tracer, metric, row)) return result ``` @@ -54,37 +59,43 @@ Once the `compute_metric` function is defined, you can use it to score a trace o ```python from opik import track -from opik.opik_context import get_current_trace +from opik.opik_context import update_current_trace + @track def retrieve_contexts(question): # Define the retrieval function, in this case we will hard code the contexts return ["Paris is the capital of France.", "Paris is in France."] + @track def answer_question(question, contexts): # Define the answer function, in this case we will hard code the answer return "Paris" + @track(name="Compute Ragas metric score", capture_input=False) def compute_rag_score(answer_relevancy_metric, question, answer, contexts): # Define the score function - row = {"question": question, "answer": answer, "contexts": contexts} + row = {"user_input": question, "response": answer, "retrieved_contexts": contexts} score = compute_metric(answer_relevancy_metric, row) return score + @track def rag_pipeline(question): # Define the pipeline contexts = retrieve_contexts(question) answer = answer_question(question, contexts) - trace = get_current_trace() score = compute_rag_score(answer_relevancy_metric, question, answer, contexts) - trace.log_feedback_score("answer_relevancy", round(score, 4), category_name="ragas") - + update_current_trace( + feedback_scores=[{"name": "answer_relevancy", "value": round(score, 4)}] + ) + return answer + rag_pipeline("What is the capital of France?") ``` @@ -117,12 +128,23 @@ from ragas.integrations.opik import OpikTracer fiqa_eval = load_dataset("explodinggradients/fiqa", "ragas_eval") +# Reformat the dataset to match the schema expected by the Ragas evaluate function +dataset = fiqa_eval["baseline"].select(range(3)) + +dataset = dataset.map( + lambda x: { + "user_input": x["question"], + "reference": x["ground_truths"][0], + "retrieved_contexts": x["contexts"], + } +) + opik_tracer_eval = OpikTracer(tags=["ragas_eval"], metadata={"evaluation_run": True}) result = evaluate( - fiqa_eval["baseline"].select(range(3)), + dataset, metrics=[context_precision, faithfulness, answer_relevancy], - callbacks=[opik_tracer_eval] + callbacks=[opik_tracer_eval], ) print(result) diff --git a/apps/opik-documentation/documentation/docs/tracing/log_distributed_traces.md b/apps/opik-documentation/documentation/docs/tracing/log_distributed_traces.md index 7cbe44291c..b05b365f02 100644 --- a/apps/opik-documentation/documentation/docs/tracing/log_distributed_traces.md +++ b/apps/opik-documentation/documentation/docs/tracing/log_distributed_traces.md @@ -14,16 +14,14 @@ For the purposes of this guide, we will assume that you have a simple LLM applic The Python SDK includes some helper functions to make it easier to fetch headers in the client and ingest them in the server: ```python title="client.py" -from opik import track -from opik.opik_context import get_current_span +from opik import track, opik_context @track() def my_client_function(prompt: str) -> str: headers = {} # Update the headers to include Opik Trace ID and Span ID - current_span = get_current_span() - headers.update(current_span.get_distributed_trace_headers()) + headers.update(opik_context.get_distributed_trace_headers()) # Make call to backend service response = requests.post("http://.../generate_response", headers=headers, json={"prompt": prompt}) diff --git a/apps/opik-documentation/documentation/docs/tracing/log_traces.md b/apps/opik-documentation/documentation/docs/tracing/log_traces.md index e5e13c6ee4..cec2c5a786 100644 --- a/apps/opik-documentation/documentation/docs/tracing/log_traces.md +++ b/apps/opik-documentation/documentation/docs/tracing/log_traces.md @@ -23,7 +23,7 @@ Opik has a number of integrations for popular LLM frameworks like LangChain or O ## Log using function decorators -### Logging traces and spans +### Logging traces and spans If you are manually defining your LLM chains and not using LangChain for example, you can use the `track` function decorators to track LLM calls: @@ -63,7 +63,7 @@ print(result) ``` :::tip - If the `track` function decorators are used in conjunction with the `track_openai` or `CometTracer` callbacks, the LLM calls will be automatically logged to the corresponding trace. +If the `track` function decorators are used in conjunction with the `track_openai` or `CometTracer` callbacks, the LLM calls will be automatically logged to the corresponding trace. ::: ### Capturing inputs and ouputs @@ -122,11 +122,10 @@ trace.end() ## Update trace and span attributes -You can access the Trace and Span objects to update their attributes. This is useful if you want to update the metadata attributes or log scores to a trace or span during the execution of the trace. This is achieved by using the `get_current_trace` and `get_current_span` functions: +You can access the Trace and Span objects to update their attributes. This is useful if you want to update the metadata attributes or log scores to a trace or span during the execution of the trace. This is achieved by using the `update_current_trace` and `update_current_span` functions: ```python -from opik.opik_context import get_current_trace, get_current_span -from opik import track +from opik import track, opik_context @track def llm_chain(input_text): @@ -134,19 +133,17 @@ def llm_chain(input_text): # ... # Update the trace - trace = get_current_trace() - - trace.update(tags=["llm_chatbot"]) - trace.log_feedback_score( - name="user_feedback", - value=1.0, - reason="The response was helpful and accurate." + opik_context.update_current_trace( + tags=["llm_chatbot"], + feedback_scores=[ + {"name": "user_feedback", "value": 1.0, "reason": "The response was helpful and accurate."} + ] ) # Update the span - span = get_current_span() - - span.update(name="llm_chain") + opik_context.update_current_span( + name="llm_chain" + ) ``` You can learn more about the `Trace` object in the [Trace reference docs](/python-sdk-reference/Objects/Trace.html) and the `Span` object in the [Span reference docs](/python-sdk-reference/Objects/Span.html). @@ -178,6 +175,10 @@ client.log_spans_feedback_scores( ) ``` +:::tip +If you want to log scores to traces or spans from within a decorated function, you can use the `update_current_trace` and `update_current_span` methods instead. +::: + ## Advanced usage Comet's logging functionality is designed with production environments in mind. To optimize performance, all logging operations are executed in a background thread. diff --git a/apps/opik-documentation/documentation/sidebars.ts b/apps/opik-documentation/documentation/sidebars.ts index 431c5301db..39b7d75d9b 100644 --- a/apps/opik-documentation/documentation/sidebars.ts +++ b/apps/opik-documentation/documentation/sidebars.ts @@ -27,7 +27,8 @@ const sidebars: SidebarsConfig = { items: ['tracing/log_traces', 'tracing/log_distributed_traces', 'tracing/annotate_traces', { type: 'category', label: 'Integrations', - items: ['tracing/integrations/overview', 'tracing/integrations/langchain', 'tracing/integrations/openai', 'tracing/integrations/llama_index'] + items: ['tracing/integrations/overview', 'tracing/integrations/langchain', 'tracing/integrations/openai', + 'tracing/integrations/llama_index', 'tracing/integrations/ragas'] }], }, { @@ -37,7 +38,9 @@ const sidebars: SidebarsConfig = { items: ['evaluation/manage_datasets', 'evaluation/evaluate_your_llm', { type: 'category', label: 'Metrics', - items: ['evaluation/metrics/overview', 'evaluation/metrics/heuristic_metrics', 'evaluation/metrics/hallucination', 'evaluation/metrics/moderation', 'evaluation/metrics/answer_relevance', 'evaluation/metrics/context_precision', 'evaluation/metrics/context_recall', 'evaluation/metrics/custom_metric'] + items: ['evaluation/metrics/overview', 'evaluation/metrics/heuristic_metrics', 'evaluation/metrics/hallucination', + 'evaluation/metrics/moderation', 'evaluation/metrics/answer_relevance', 'evaluation/metrics/context_precision', + 'evaluation/metrics/context_recall', 'evaluation/metrics/custom_metric'] }], }, { @@ -50,7 +53,9 @@ const sidebars: SidebarsConfig = { type: 'category', label: 'Cookbooks', collapsed: false, - items: ['cookbook/openai', 'cookbook/langchain', 'cookbook/llama-index', 'cookbook/evaluate_hallucination_metric', 'cookbook/evaluate_moderation_metric'], + items: ['cookbook/openai', 'cookbook/langchain', 'cookbook/llama-index', + 'cookbook/evaluate_hallucination_metric', 'cookbook/evaluate_moderation_metric', + 'cookbook/ragas'] }, ], }; diff --git a/apps/opik-documentation/python-sdk-docs/source/Objects/Span.rst b/apps/opik-documentation/python-sdk-docs/source/Objects/Span.rst index a1ebb0935c..d60459fb84 100644 --- a/apps/opik-documentation/python-sdk-docs/source/Objects/Span.rst +++ b/apps/opik-documentation/python-sdk-docs/source/Objects/Span.rst @@ -4,3 +4,4 @@ Span .. autoclass:: opik.api_objects.span.Span :members: :inherited-members: + :special-members: __init__ diff --git a/apps/opik-documentation/python-sdk-docs/source/Objects/SpanData.rst b/apps/opik-documentation/python-sdk-docs/source/Objects/SpanData.rst new file mode 100644 index 0000000000..afce71e846 --- /dev/null +++ b/apps/opik-documentation/python-sdk-docs/source/Objects/SpanData.rst @@ -0,0 +1,5 @@ +SpanData +========= + +.. autoclass:: opik.api_objects.span.SpanData + :members: diff --git a/apps/opik-documentation/python-sdk-docs/source/Objects/Trace.rst b/apps/opik-documentation/python-sdk-docs/source/Objects/Trace.rst index 6f853d6be5..833e80d1e9 100644 --- a/apps/opik-documentation/python-sdk-docs/source/Objects/Trace.rst +++ b/apps/opik-documentation/python-sdk-docs/source/Objects/Trace.rst @@ -4,3 +4,4 @@ Trace .. autoclass:: opik.api_objects.trace.Trace :members: :inherited-members: + :special-members: __init__ diff --git a/apps/opik-documentation/python-sdk-docs/source/Objects/TraceData.rst b/apps/opik-documentation/python-sdk-docs/source/Objects/TraceData.rst new file mode 100644 index 0000000000..b06fca6f5c --- /dev/null +++ b/apps/opik-documentation/python-sdk-docs/source/Objects/TraceData.rst @@ -0,0 +1,5 @@ +TraceData +========= + +.. autoclass:: opik.api_objects.trace.TraceData + :members: diff --git a/apps/opik-documentation/python-sdk-docs/source/Opik.rst b/apps/opik-documentation/python-sdk-docs/source/Opik.rst index 4a2361c5f7..dc4b037f9d 100644 --- a/apps/opik-documentation/python-sdk-docs/source/Opik.rst +++ b/apps/opik-documentation/python-sdk-docs/source/Opik.rst @@ -4,4 +4,5 @@ Opik .. autoclass:: opik.Opik :members: :inherited-members: + :special-members: __init__ \ No newline at end of file diff --git a/apps/opik-documentation/python-sdk-docs/source/conf.py b/apps/opik-documentation/python-sdk-docs/source/conf.py index 9666bdead0..f25f1564a4 100644 --- a/apps/opik-documentation/python-sdk-docs/source/conf.py +++ b/apps/opik-documentation/python-sdk-docs/source/conf.py @@ -38,7 +38,6 @@ # Document all functions, including __init__ and include members autodoc_default_options = { 'undoc-members': True, - 'special-members': '__init__', 'private-members': False, 'show-inheritance': True, } @@ -61,4 +60,4 @@ html_static_path = ["_static"] html_favicon = "_static/favicon.ico" -html_css_files = ["pied-piper-admonition.css"] \ No newline at end of file +html_css_files = ["pied-piper-admonition.css"] diff --git a/apps/opik-documentation/python-sdk-docs/source/evaluation/Dataset.rst b/apps/opik-documentation/python-sdk-docs/source/evaluation/Dataset.rst index b2fdaa2f5d..e1235c3826 100644 --- a/apps/opik-documentation/python-sdk-docs/source/evaluation/Dataset.rst +++ b/apps/opik-documentation/python-sdk-docs/source/evaluation/Dataset.rst @@ -2,4 +2,5 @@ Dataset ======= .. autoclass:: opik.Dataset - :members: \ No newline at end of file + :members: + :special-members: __init__ diff --git a/apps/opik-documentation/python-sdk-docs/source/evaluation/DatasetItem.rst b/apps/opik-documentation/python-sdk-docs/source/evaluation/DatasetItem.rst index 0445fd2deb..5ec434f272 100644 --- a/apps/opik-documentation/python-sdk-docs/source/evaluation/DatasetItem.rst +++ b/apps/opik-documentation/python-sdk-docs/source/evaluation/DatasetItem.rst @@ -2,4 +2,5 @@ DatasetItem =========== .. autoclass:: opik.DatasetItem - :members: \ No newline at end of file + :members: + :special-members: __init__ diff --git a/apps/opik-documentation/python-sdk-docs/source/evaluation/metrics/BaseMetric.rst b/apps/opik-documentation/python-sdk-docs/source/evaluation/metrics/BaseMetric.rst new file mode 100644 index 0000000000..6a251c9100 --- /dev/null +++ b/apps/opik-documentation/python-sdk-docs/source/evaluation/metrics/BaseMetric.rst @@ -0,0 +1,6 @@ +BaseMetric +========== + +.. autoclass:: opik.evaluation.metrics.BaseMetric + :members: + :inherited-members: diff --git a/apps/opik-documentation/python-sdk-docs/source/evaluation/metrics/index.rst b/apps/opik-documentation/python-sdk-docs/source/evaluation/metrics/index.rst index 865cb4a0f4..ab0b41ffb7 100644 --- a/apps/opik-documentation/python-sdk-docs/source/evaluation/metrics/index.rst +++ b/apps/opik-documentation/python-sdk-docs/source/evaluation/metrics/index.rst @@ -35,3 +35,5 @@ You can learn more about each metric in the following sections: AnswerRelevance ContextPrecision ContextRecall + + BaseMetric diff --git a/apps/opik-documentation/python-sdk-docs/source/index.rst b/apps/opik-documentation/python-sdk-docs/source/index.rst index 6fe37a24fd..496c73f9eb 100644 --- a/apps/opik-documentation/python-sdk-docs/source/index.rst +++ b/apps/opik-documentation/python-sdk-docs/source/index.rst @@ -162,9 +162,12 @@ You can learn more about the `opik` python SDK in the following sections: :maxdepth: 1 Objects/Trace.rst + Objects/TraceData.rst Objects/Span.rst + Objects/SpanData.rst Objects/FeedbackScoreDict.rst Objects/UsageDict.rst + .. toctree:: :caption: Documentation Guides diff --git a/apps/opik-documentation/python-sdk-docs/source/opik_context/get_current_span.rst b/apps/opik-documentation/python-sdk-docs/source/opik_context/get_current_span.rst deleted file mode 100644 index ec1a47df3c..0000000000 --- a/apps/opik-documentation/python-sdk-docs/source/opik_context/get_current_span.rst +++ /dev/null @@ -1,4 +0,0 @@ -get_current_span -================ - -.. autofunction:: opik.opik_context.get_current_span diff --git a/apps/opik-documentation/python-sdk-docs/source/opik_context/get_current_span_data.rst b/apps/opik-documentation/python-sdk-docs/source/opik_context/get_current_span_data.rst new file mode 100644 index 0000000000..9fcbe43ded --- /dev/null +++ b/apps/opik-documentation/python-sdk-docs/source/opik_context/get_current_span_data.rst @@ -0,0 +1,4 @@ +get_current_span_data +===================== + +.. autofunction:: opik.opik_context.get_current_span_data diff --git a/apps/opik-documentation/python-sdk-docs/source/opik_context/get_current_trace.rst b/apps/opik-documentation/python-sdk-docs/source/opik_context/get_current_trace.rst deleted file mode 100644 index aaf13b07b9..0000000000 --- a/apps/opik-documentation/python-sdk-docs/source/opik_context/get_current_trace.rst +++ /dev/null @@ -1,4 +0,0 @@ -get_current_trace -================= - -.. autofunction:: opik.opik_context.get_current_trace diff --git a/apps/opik-documentation/python-sdk-docs/source/opik_context/get_current_trace_data.rst b/apps/opik-documentation/python-sdk-docs/source/opik_context/get_current_trace_data.rst new file mode 100644 index 0000000000..9afc5c1c20 --- /dev/null +++ b/apps/opik-documentation/python-sdk-docs/source/opik_context/get_current_trace_data.rst @@ -0,0 +1,4 @@ +get_current_trace_data +====================== + +.. autofunction:: opik.opik_context.get_current_trace_data diff --git a/apps/opik-documentation/python-sdk-docs/source/opik_context/get_distributed_trace_headers.rst b/apps/opik-documentation/python-sdk-docs/source/opik_context/get_distributed_trace_headers.rst new file mode 100644 index 0000000000..8816300ca7 --- /dev/null +++ b/apps/opik-documentation/python-sdk-docs/source/opik_context/get_distributed_trace_headers.rst @@ -0,0 +1,4 @@ +get_distributed_trace_headers +============================= + +.. autofunction:: opik.opik_context.get_distributed_trace_headers diff --git a/apps/opik-documentation/python-sdk-docs/source/opik_context/index.rst b/apps/opik-documentation/python-sdk-docs/source/opik_context/index.rst index f0475eb708..5989fb348c 100644 --- a/apps/opik-documentation/python-sdk-docs/source/opik_context/index.rst +++ b/apps/opik-documentation/python-sdk-docs/source/opik_context/index.rst @@ -1,14 +1,36 @@ opik_context ============ -The opik context module provides a way to access the current span and trace from within a tracked function:: +The opik context module provides a way to access the current span and trace data from within a tracked function:: from opik import opik_context, track @track - def my_function(): - span = opik_context.get_current_span() - trace = opik_context.get_current_trace() + def my_function(): + + # Get the current span data + span_data = opik_context.get_current_span_data() + print(span_data) + + # Get the current trace data + trace_data = opik_context.get_current_trace_data() + print(trace_data) + + # Update the current span metadata + opik_context.update_current_span(metadata={"my_key": "my_value"}) + + # Update the current trace tags + opik_context.update_current_trace(tags=["my_tag"]) + + +You can also use the `get_distributed_trace_headers` function to get the distributed trace headers from the current trace:: + + from opik import opik_context, track + + @track + def my_function(): + distributed_trace_headers = opik_context.get_distributed_trace_headers() + print(distributed_trace_headers) You can learn more about each function in the following sections: @@ -16,5 +38,11 @@ You can learn more about each function in the following sections: :maxdepth: 4 :titlesonly: - get_current_span - get_current_trace + get_current_span_data + get_current_trace_data + + update_current_span + update_current_trace + + get_distributed_trace_headers + \ No newline at end of file diff --git a/apps/opik-documentation/python-sdk-docs/source/opik_context/update_current_span.rst b/apps/opik-documentation/python-sdk-docs/source/opik_context/update_current_span.rst new file mode 100644 index 0000000000..46e2682cb4 --- /dev/null +++ b/apps/opik-documentation/python-sdk-docs/source/opik_context/update_current_span.rst @@ -0,0 +1,4 @@ +update_current_span +=================== + +.. autofunction:: opik.opik_context.update_current_span diff --git a/apps/opik-documentation/python-sdk-docs/source/opik_context/update_current_trace.rst b/apps/opik-documentation/python-sdk-docs/source/opik_context/update_current_trace.rst new file mode 100644 index 0000000000..05f3e549e4 --- /dev/null +++ b/apps/opik-documentation/python-sdk-docs/source/opik_context/update_current_trace.rst @@ -0,0 +1,4 @@ +update_current_trace +==================== + +.. autofunction:: opik.opik_context.update_current_trace diff --git a/sdks/python/src/opik/api_objects/opik_client.py b/sdks/python/src/opik/api_objects/opik_client.py index 64ccea6f87..479071c080 100644 --- a/sdks/python/src/opik/api_objects/opik_client.py +++ b/sdks/python/src/opik/api_objects/opik_client.py @@ -37,7 +37,7 @@ def __init__( Args: project_name: The name of the project. If not provided, traces and spans will be logged to the `Default Project`. workspace: The name of the workspace. If not provided, `default` will be used. - host: The host URL for the Opik server. If not provided, it will default to `http://localhost:5173/api`. + host: The host URL for the Opik server. If not provided, it will default to `https://www.comet.com/opik/api`. Returns: None """ diff --git a/sdks/python/src/opik/api_objects/span.py b/sdks/python/src/opik/api_objects/span.py index 21396e5fe1..c7eed09a13 100644 --- a/sdks/python/src/opik/api_objects/span.py +++ b/sdks/python/src/opik/api_objects/span.py @@ -22,7 +22,7 @@ def __init__( parent_span_id: Optional[str] = None, ): """ - A Span object. This object should not be created directly, instead use the `span` method of a Trace (:func:`opik.Trace`) or another Span (:func:`opik.Span.span`). + A Span object. This object should not be created directly, instead use the `span` method of a Trace (:func:`opik.Opik.span`) or another Span (:meth:`opik.Span.span`). """ self.id = id self.trace_id = trace_id @@ -215,6 +215,9 @@ def get_distributed_trace_headers(self) -> DistributedTraceHeadersDict: @dataclasses.dataclass class SpanData: + """ + The SpanData object is returned when calling :func:`opik.opik_context.get_current_span_data` from a tracked function. + """ trace_id: str id: str = dataclasses.field(default_factory=helpers.generate_id) parent_span_id: Optional[str] = None diff --git a/sdks/python/src/opik/api_objects/trace.py b/sdks/python/src/opik/api_objects/trace.py index 68e2bcd80b..38cb4258fe 100644 --- a/sdks/python/src/opik/api_objects/trace.py +++ b/sdks/python/src/opik/api_objects/trace.py @@ -20,8 +20,7 @@ def __init__( project_name: str, ): """ - A Trace object. This object should not be created directly, instead using :meth:`opik.Opik.trace` or - :func:`opik.opik_context.get_current_trace` if you are using function decorators. + A Trace object. This object should not be created directly, instead use :meth:`opik.Opik.trace` to create a new trace. """ self.id = id self._streamer = message_streamer @@ -197,6 +196,9 @@ def log_feedback_score( @dataclasses.dataclass class TraceData: + """ + The TraceData object is returned when calling :func:`opik.opik_context.get_current_trace_data` from a tracked function. + """ id: str = dataclasses.field(default_factory=helpers.generate_id) name: Optional[str] = None start_time: Optional[datetime.datetime] = dataclasses.field( diff --git a/sdks/python/src/opik/evaluation/metrics/__init__.py b/sdks/python/src/opik/evaluation/metrics/__init__.py index 105bb04571..248e36b9de 100644 --- a/sdks/python/src/opik/evaluation/metrics/__init__.py +++ b/sdks/python/src/opik/evaluation/metrics/__init__.py @@ -8,6 +8,7 @@ from .llm_judges.context_recall.metric import ContextRecall from .llm_judges.hallucination.metric import Hallucination from .llm_judges.moderation.metric import Moderation +from .base_metric import BaseMetric from .exceptions import MetricComputationError # from .llm_judges.factuality.metric import Factuality @@ -25,4 +26,5 @@ "Moderation", "RegexMatch", "MetricComputationError", + "BaseMetric" ] diff --git a/sdks/python/src/opik/evaluation/metrics/base_metric.py b/sdks/python/src/opik/evaluation/metrics/base_metric.py index b7afc84883..632a2639f6 100644 --- a/sdks/python/src/opik/evaluation/metrics/base_metric.py +++ b/sdks/python/src/opik/evaluation/metrics/base_metric.py @@ -5,6 +5,29 @@ class BaseMetric(abc.ABC): + """ + Abstract base class for all metrics. When creating a new metric, you should inherit + from this class and implement the abstract methods. + + Args: + name: The name of the metric. + + Example: + >>> from opik.evaluation.metrics import base_metric, score_result + >>> + >>> class MyCustomMetric(base_metric.BaseMetric): + >>> def __init__(self, name: str): + >>> self.name = name + >>> + >>> def score(self, input: str, output: str, **ignored_kwargs: Any): + >>> # Add you logic here + >>> + >>> return score_result.ScoreResult( + >>> value=0, + >>> name=self.name, + >>> reason="Optional reason for the score" + >>> ) + """ def __init__(self, name: str) -> None: self.name = name diff --git a/sdks/python/src/opik/opik_context.py b/sdks/python/src/opik/opik_context.py index 09d12929d5..48e122f235 100644 --- a/sdks/python/src/opik/opik_context.py +++ b/sdks/python/src/opik/opik_context.py @@ -8,7 +8,6 @@ def get_current_span_data() -> Optional[span.SpanData]: """ Returns current span created by track() decorator or None if no span was found. - Context-wise. """ span_data = context_storage.top_span_data() if span_data is None: @@ -20,7 +19,6 @@ def get_current_span_data() -> Optional[span.SpanData]: def get_current_trace_data() -> Optional[trace.TraceData]: """ Returns current trace created by track() decorator or None if no trace was found. - Context-wise. """ trace_data = context_storage.get_trace_data() if trace_data is None: @@ -31,8 +29,7 @@ def get_current_trace_data() -> Optional[trace.TraceData]: def get_distributed_trace_headers() -> DistributedTraceHeadersDict: """ - Returns headers dictionary to be passed into tracked - function on remote node. + Returns headers dictionary to be passed into tracked function on remote node. Requires an existing span in the context, otherwise raises an error. """ current_span_data = context_storage.top_span_data() @@ -55,6 +52,18 @@ def update_current_span( usage: Optional[UsageDict] = None, feedback_scores: Optional[List[FeedbackScoreDict]] = None, ) -> None: + """ + Update the current span with the provided parameters. This method is usually called within a tracked function. + + Args: + name: The name of the span. + input: The input data of the span. + output: The output data of the span. + metadata: The metadata of the span. + tags: The tags of the span. + usage: The usage data of the span. + feedback_scores: The feedback scores of the span. + """ new_params = { "name": name, "input": input, @@ -79,6 +88,17 @@ def update_current_trace( tags: Optional[List[str]] = None, feedback_scores: Optional[List[FeedbackScoreDict]] = None, ) -> None: + """ + Update the current trace with the provided parameters. This method is usually called within a tracked function. + + Args: + name: The name of the trace. + input: The input data of the trace. + output: The output data of the trace. + metadata: The metadata of the trace. + tags: The tags of the trace. + feedback_scores: The feedback scores of the trace. + """ new_params = { "name": name, "input": input,