diff --git a/apps/opik-documentation/documentation/docs/cookbook/evaluate_moderation_metric.ipynb b/apps/opik-documentation/documentation/docs/cookbook/evaluate_moderation_metric.ipynb
index 62c4fe1bac..1e2c8fd885 100644
--- a/apps/opik-documentation/documentation/docs/cookbook/evaluate_moderation_metric.ipynb
+++ b/apps/opik-documentation/documentation/docs/cookbook/evaluate_moderation_metric.ipynb
@@ -22,18 +22,37 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
+      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
+      "Note: you may need to restart the kernel to use updated packages.\n"
+     ]
+    }
+   ],
    "source": [
     "%pip install --upgrade --quiet opik pandas"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "OPIK: Opik is already configured. You can check the settings by viewing the config file at /Users/jacquesverre/.opik.config\n"
+     ]
+    }
+   ],
    "source": [
     "import opik\n",
     "\n",
@@ -51,7 +70,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -73,7 +92,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -105,7 +124,7 @@
     "\n",
     "    dataset_records.append(\n",
     "        {\n",
-    "            \"input\": x[\"prompt\"],\n",
+    "            \"output\": x[\"prompt\"],\n",
     "            \"expected_output\": expected_output,\n",
     "            \"moderated_fields\": moderated_fields,\n",
     "        }\n",
@@ -132,9 +151,71 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Evaluation:   0%|          | 0/50 [00:00<?, ?it/s]OPIK: Started logging traces to the \"Default Project\" project at https://www.comet.com/opik/jacques-comet/redirect/projects?name=Default%20Project.\n",
+      "Evaluation: 100%|██████████| 50/50 [00:08<00:00,  6.11it/s]\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">╭─ OpenAIModerationDataset (50 samples) ─╮\n",
+       "│                                        │\n",
+       "│ <span style=\"font-weight: bold\">Total time:       </span> 00:00:08            │\n",
+       "│ <span style=\"font-weight: bold\">Number of samples:</span> 50                  │\n",
+       "│                                        │\n",
+       "│ <span style=\"color: #008000; text-decoration-color: #008000; font-weight: bold\">Correct moderation score: 0.8800 (avg)</span> │\n",
+       "│                                        │\n",
+       "╰────────────────────────────────────────╯\n",
+       "</pre>\n"
+      ],
+      "text/plain": [
+       "╭─ OpenAIModerationDataset (50 samples) ─╮\n",
+       "│                                        │\n",
+       "│ \u001b[1mTotal time:       \u001b[0m 00:00:08            │\n",
+       "│ \u001b[1mNumber of samples:\u001b[0m 50                  │\n",
+       "│                                        │\n",
+       "│ \u001b[1;32mCorrect moderation score: 0.8800 (avg)\u001b[0m │\n",
+       "│                                        │\n",
+       "╰────────────────────────────────────────╯\n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Uploading results to Opik <span style=\"color: #808000; text-decoration-color: #808000\">...</span> \n",
+       "</pre>\n"
+      ],
+      "text/plain": [
+       "Uploading results to Opik \u001b[33m...\u001b[0m \n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">View the results <a href=\"https://www.comet.com/opik/jacques-comet/experiments/01939721-3a29-7f9a-ac8b-737923e30e31/compare?experiments=%5B%2206751b32-5e8a-7276-8000-2da2be3b1cd9%22%5D\" target=\"_blank\">in your Opik dashboard</a>.\n",
+       "</pre>\n"
+      ],
+      "text/plain": [
+       "View the results \u001b]8;id=595231;https://www.comet.com/opik/jacques-comet/experiments/01939721-3a29-7f9a-ac8b-737923e30e31/compare?experiments=%5B%2206751b32-5e8a-7276-8000-2da2be3b1cd9%22%5D\u001b\\in your Opik dashboard\u001b]8;;\u001b\\.\n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
    "source": [
     "from opik.evaluation.metrics import Moderation, Equals\n",
     "from opik.evaluation import evaluate\n",
@@ -147,7 +228,7 @@
     "def evaluation_task(x: Dict):\n",
     "    metric = Moderation()\n",
     "    try:\n",
-    "        metric_score = metric.score(input=x[\"input\"])\n",
+    "        metric_score = metric.score(output=x[\"output\"])\n",
     "        moderation_score = metric_score.value\n",
     "        moderation_reason = metric_score.reason\n",
     "    except Exception as e:\n",
@@ -172,7 +253,7 @@
     "\n",
     "# Add the prompt template as an experiment configuration\n",
     "experiment_config = {\n",
-    "    \"prompt_template\": generate_query(input=\"{input}\", few_shot_examples=[])\n",
+    "    \"prompt_template\": generate_query(output=\"{output}\", few_shot_examples=[])\n",
     "}\n",
     "\n",
     "res = evaluate(\n",
diff --git a/apps/opik-documentation/documentation/docs/evaluation/metrics/moderation.md b/apps/opik-documentation/documentation/docs/evaluation/metrics/moderation.md
index 8ea6ec9eb9..0ea20a31de 100644
--- a/apps/opik-documentation/documentation/docs/evaluation/metrics/moderation.md
+++ b/apps/opik-documentation/documentation/docs/evaluation/metrics/moderation.md
@@ -4,7 +4,7 @@ sidebar_label: Moderation
 
 # Moderation
 
-The Moderation metric allows you to evaluate the appropriateness of the LLM's response to the given input question or prompt. It does this by asking the LLM to rate the appropriateness of the response on a scale of 1 to 10, where 1 is the least appropriate and 10 is the most appropriate.
+The Moderation metric allows you to evaluate the appropriateness of the LLM's response to the given LLM output. It does this by asking the LLM to rate the appropriateness of the response on a scale of 1 to 10, where 1 is the least appropriate and 10 is the most appropriate.
 
 ## How to use the Moderation metric
 
@@ -16,9 +16,7 @@ from opik.evaluation.metrics import Moderation
 metric = Moderation()
 
 metric.score(
-    input="What is the capital of France?",
-    output="The capital of France is Paris. It is famous for its iconic Eiffel Tower and rich cultural heritage.",
-    context=["France is a country in Western Europe. Its capital is Paris, which is known for landmarks like the Eiffel Tower."],
+    output="The capital of France is Paris. It is famous for its iconic Eiffel Tower and rich cultural heritage."
 )
 ```
 
@@ -87,7 +85,7 @@ Example response for borderline content:
 
 Analyze the following text and provide your verdict, score, and reason in the specified JSON format:
 
-{input}
+{output}
 ```
 
 with `VERDICT_KEY` being `moderation_score` and `REASON_KEY` being `reason`.
diff --git a/sdks/python/src/opik/evaluation/metrics/llm_judges/moderation/metric.py b/sdks/python/src/opik/evaluation/metrics/llm_judges/moderation/metric.py
index 543118b95f..64678d04e2 100644
--- a/sdks/python/src/opik/evaluation/metrics/llm_judges/moderation/metric.py
+++ b/sdks/python/src/opik/evaluation/metrics/llm_judges/moderation/metric.py
@@ -58,12 +58,11 @@ def _init_model(
         else:
             self._model = models_factory.get(model_name=model)
 
-    def score(self, input: str, **ignored_kwargs: Any) -> score_result.ScoreResult:
+    def score(self, output: str, **ignored_kwargs: Any) -> score_result.ScoreResult:
         """
         Calculate the moderation score for the given input-output pair.
 
         Args:
-            input: The input text to be evaluated.
             output: The output text to be evaluated.
             **ignored_kwargs (Any): Additional keyword arguments that are ignored.
 
@@ -72,7 +71,7 @@ def score(self, input: str, **ignored_kwargs: Any) -> score_result.ScoreResult:
             (between 0.0 and 1.0) and a reason for the score.
         """
         llm_query = template.generate_query(
-            input=input, few_shot_examples=self.few_shot_examples
+            output=output, few_shot_examples=self.few_shot_examples
         )
         model_output = self._model.generate_string(
             input=llm_query, response_format=ModerationResponseFormat
@@ -81,7 +80,7 @@ def score(self, input: str, **ignored_kwargs: Any) -> score_result.ScoreResult:
         return self._parse_model_output(model_output)
 
     async def ascore(
-        self, input: str, **ignored_kwargs: Any
+        self, output: str, **ignored_kwargs: Any
     ) -> score_result.ScoreResult:
         """
         Asynchronously calculate the moderation score for the given input-output pair.
@@ -90,7 +89,6 @@ async def ascore(
         please refer to the :meth:`score` method.
 
         Args:
-            input: The input text to be evaluated.
             output: The output text to be evaluated.
             **ignored_kwargs: Additional keyword arguments that are ignored.
 
@@ -99,7 +97,7 @@ async def ascore(
         """
 
         llm_query = template.generate_query(
-            input=input, few_shot_examples=self.few_shot_examples
+            output=output, few_shot_examples=self.few_shot_examples
         )
         model_output = await self._model.agenerate_string(
             input=llm_query, response_format=ModerationResponseFormat
diff --git a/sdks/python/src/opik/evaluation/metrics/llm_judges/moderation/template.py b/sdks/python/src/opik/evaluation/metrics/llm_judges/moderation/template.py
index e7c9c242e7..158bae98ee 100644
--- a/sdks/python/src/opik/evaluation/metrics/llm_judges/moderation/template.py
+++ b/sdks/python/src/opik/evaluation/metrics/llm_judges/moderation/template.py
@@ -2,13 +2,13 @@
 
 
 class FewShotExampleModeration(TypedDict):
-    input: str
+    output: str
     score: float
     reason: str
 
 
 def generate_query(
-    input: str,
+    output: str,
     few_shot_examples: List[FewShotExampleModeration],
 ) -> str:
     examples_str = ""
@@ -16,7 +16,7 @@ def generate_query(
         examples_str = "\n\n".join(
             [
                 f"Example {i+1}:\n"
-                f"Input: {example['input']}\n"
+                f"Output: {example['output']}\n"
                 f"{{\"score\": {example['score']}, \"reason\": \"{example['reason']}\"}}"
                 for i, example in enumerate(few_shot_examples)
             ]
@@ -75,5 +75,5 @@ def generate_query(
 
 Analyze the following text and provide your verdict score, and reason in the specified JSON format:
 
-{input}
+{output}
     """