Fixing linting errors and bumpversion

sibyl-dev · Oct 21, 2024 · dd41e34 · dd41e34
1 parent 24f7d7e
commit dd41e34
Show file tree

Hide file tree

Showing 6 changed files with 94 additions and 83 deletions.
diff --git a/.flake8 b/.flake8
@@ -0,0 +1,4 @@
+[flake8]
+max-line-length = 99
+exclude = docs, .git, __pycache__, .ipynb_checkpoints
+extend-ignore = E203
diff --git a/explingo/__init__.py b/explingo/__init__.py
@@ -1,9 +1,8 @@
-from explingo import testing
 from explingo.grader import Grader
 from explingo.narrator import Narrator
 
 __author__ = "MIT Data To AI Lab"
 __email__ = "[email protected]"
-__version__ = "0.1.0.1"
+__version__ = "0.1.1"
 
 __all__ = ["Narrator", "Grader"]
diff --git a/explingo/grader.py b/explingo/grader.py
@@ -1,5 +1,3 @@
-import random
-
 import dspy
 import pandas as pd
 
@@ -14,7 +12,8 @@ class RubricAssess(dspy.Signature):
     rubric = dspy.InputField()
 
     assessment = dspy.OutputField(
-        desc="A single number from the options in the rubric. Provide only a single number with no other text."
+        desc="A single number from the options in the rubric. "
+        "Provide only a single number with no other text."
     )
 
 
@@ -40,13 +39,17 @@ def __init__(
         Grades narratives
 
         Args:
-            llm (LLM): LLM to use to grade accuracy, completeness, and fluency. One of llm or openai_api_key must be provided
-            openai_api_key (string): OpenAI API key to use to grade accuracy, completeness, and fluency
-            metrics (list of strings or "all"): One or more of  accuracy", "completeness", "fluency", "conciseness"
+            llm (LLM): LLM to use to grade accuracy, completeness, and fluency.
+                One of llm or openai_api_key must be provided
+            openai_api_key (string): OpenAI API key to use to grade accuracy, completeness,
+                and fluency
+            metrics (list of strings or "all"): One or more of
+                "accuracy", "completeness", "fluency", "conciseness"
             sample_narratives (list of strings, or (string, string) tuples):
                 Sample narratives to use to grade fluency. Can pass in either just the narratives
                 or (explanation, narrative) tuples
-            max_optimal_length (int): Hyperparameter for conciseness metric, defaults to number of words in longest sample narrative or 100 if not given
+            max_optimal_length (int): Hyperparameter for conciseness metric, defaults to number of
+                words in longest sample narrative or 100 if not given
         """
         self.metrics = metrics
 
@@ -70,9 +73,7 @@ def __init__(
             isinstance(self.sample_narratives[0], list)
             or isinstance(self.sample_narratives[0], tuple)
         ):
-            self.sample_narratives = [
-                narrative[1] for narrative in self.sample_narratives
-            ]
+            self.sample_narratives = [narrative[1] for narrative in self.sample_narratives]
 
         self.max_optimal_length = max_optimal_length
         if max_optimal_length is None and self.sample_narratives is not None:
@@ -95,9 +96,7 @@ def __init__(
     def run_metrics(self, input_, output_, trace):
         results = {}
         if "accuracy" in self.metrics:
-            results["accuracy"] = accuracy(
-                input_, output_, grader=self.grader_llm, trace=trace
-            )
+            results["accuracy"] = accuracy(input_, output_, grader=self.grader_llm, trace=trace)
         if "completeness" in self.metrics:
             results["completeness"] = completeness(
                 input_, output_, grader=self.grader_llm, trace=trace
@@ -126,9 +125,7 @@ def run_metrics(self, input_, output_, trace):
             )
 
     def __call__(self, explanation, explanation_format, narrative, trace=None):
-        input_ = dspy.Example(
-            explanation=explanation, explanation_format=explanation_format
-        )
+        input_ = dspy.Example(explanation=explanation, explanation_format=explanation_format)
         output_ = dspy.Prediction(narrative=narrative)
         return self.run_metrics(input_, output_, trace)
 
@@ -185,14 +182,20 @@ def compute_score_from_rubric(
 def accuracy(input_, output_, grader, trace=None):
     question = (
         f"How accurate is the information in the narrative, based on the explanation given? "
-        f"A narrative can score 4 even if it is missing information as long as everything in the narrative is correct. "
-        f"Make sure the contribution direction is correct - positive contributions increase the output, negative contributions decrease the output."
+        f"A narrative can score 4 even if it is missing information as long as everything "
+        f"in the narrative is correct. Make sure the contribution direction is correct - "
+        f"positive contributions increase the output, negative contributions decrease the output."
         f"\n\nExplanation format: {input_.explanation_format}.\nExplanation: {input_.explanation}"
     )
-    rubric = f"0 - Contains one or more errors in value or contribution direction. 4 - Contains no errors, but may be missing information."
+    rubric = (
+        "0 - Contains one or more errors in value or contribution direction. "
+        "4 - Contains no errors, but may be missing information."
+    )
 
     rational_type = dspy.OutputField(
-        prefix="Start by listing out all the features in the narrative, and then for each one compare it to the explanation to ensure its value and contribution are approximately correct.",
+        prefix="Start by listing out all the features in the narrative, and then for each one "
+        "compare it to the explanation to ensure its value and contribution "
+        "are approximately correct.",
     )
 
     return compute_score_from_rubric(
@@ -207,27 +210,39 @@ def accuracy(input_, output_, grader, trace=None):
 
 def fluency(input_, output_, grader, trace=None, good_narratives=None):
     if good_narratives is None:
-        question = f"How natural and human is the narrative?"
+        question = "How natural and human is the narrative?"
     else:
-        question = f"How well does the style of the narrative match the style of the example narratives? Consider only the linguistic style, not the topic. Example narratives:"
+        question = (
+            "How well does the style of the narrative match the style of the example "
+            "narratives? Consider only the linguistic style, not the topic. "
+            "Example narratives:"
+        )
         for narrative in good_narratives:
             question += f"\n{narrative}"
     if good_narratives is not None:
-        rubric = f"0: Very dissimilar. 1: Dissimilar. 2: Neutral. 3: Similar. 4: Very similar"
+        rubric = "0: Very dissimilar. 1: Dissimilar. 2: Neutral. 3: Similar. 4: Very similar"
     else:
-        rubric = (
-            f"0: Very unnatural. 1: Unnatural. 2: Neutral. 3: Natural. 4: Very natural"
-        )
-    return compute_score_from_rubric(
-        "fluency", question, rubric, output_.narrative, grader
-    )
+        rubric = "0: Very unnatural. 1: Unnatural. 2: Neutral. 3: Natural. 4: Very natural"
+    return compute_score_from_rubric("fluency", question, rubric, output_.narrative, grader)
 
 
 def completeness(input_, output_, grader, trace=None):
-    question = f"How completely does the narrative below describe the explanation given in <<>>?\nExplanation format: {input_.explanation_format}.\nExplanation: <<{input_.explanation}>>"
-    rubric = "0 - One or more feature names from the explanation are not mentioned at all in the narrative. 2 - All features are mentioned, but not all feature values and/or contribution directions. 4 - All features are mentioned, and for each feature, includes at least an approximation of the feature's value and contribution direction."
+    question = (
+        f"How completely does the narrative below describe the explanation given?"
+        f"\nExplanation format: {input_.explanation_format}."
+        f"\nExplanation: {input_.explanation}"
+    )
+    rubric = (
+        "0 - One or more feature names from the explanation are not mentioned at all in the "
+        "narrative. 2 - All features are mentioned, but not all feature values and/or "
+        "contribution directions. 4 - All features are mentioned, and for each feature, "
+        "includes at least an approximation of the feature's value and contribution "
+        "direction."
+    )
     rational_type = dspy.OutputField(
-        prefix="Start by listing out all the features in the explanations, and then determine every feature is present in the narrative, along with its value and contribution direction.",
+        prefix="Start by listing out all the features in the explanations, and then determine "
+        "every feature is present in the narrative, along with its value and "
+        "contribution direction.",
     )
 
     return compute_score_from_rubric(
@@ -240,9 +255,7 @@ def completeness(input_, output_, grader, trace=None):
     )
 
 
-def conciseness(
-    input_, output_, grader=None, trace=None, max_optimal_length_per_feature=20
-):
+def conciseness(input_, output_, grader=None, trace=None, max_optimal_length_per_feature=20):
     num_features = input_.explanation.count("(")
     if num_features == 0:
         num_features = 1
@@ -259,13 +272,9 @@ def conciseness(
 
 
 def context_awareness(input_, output_, grader, trace=None):
-    question = (
-        f"How well does the rationalization help explain the logic in the narrative?"
-    )
-    rubric = f"0: Not at all. 2: Somewhat. 4: Very well."
-    narrative_input = (
-        f"Narrative: {output_.narrative}. Rationalization: {output_.rationalization}"
-    )
+    question = "How well does the rationalization help explain the logic in the narrative?"
+    rubric = "0: Not at all. 2: Somewhat. 4: Very well."
+    narrative_input = f"Narrative: {output_.narrative}. Rationalization: {output_.rationalization}"
     return compute_score_from_rubric(
         "context_awareness", question, rubric, narrative_input, grader
     )
diff --git a/explingo/narrator.py b/explingo/narrator.py
@@ -1,7 +1,7 @@
 import random
 
 import dspy
-from dspy.teleprompt import BootstrapFewShot, LabeledFewShot
+from dspy.teleprompt import BootstrapFewShot
 
 
 def _manually_parse_output(output):
@@ -18,16 +18,14 @@ def _manually_parse_output(output):
 
 
 class NarratorSig(dspy.Signature):
-    """You are helping users understand an ML model's prediction. Given an explanation and information about the model,
-    convert the explanation into a human-readable narrative."""
+    """You are helping users understand an ML model's prediction. Given an explanation
+    and information about the model, convert the explanation into a human-readable narrative."""
 
     context = dspy.InputField(desc="what the ML model predicts")
     explanation = dspy.InputField(desc="explanation of an ML model's prediction")
     explanation_format = dspy.InputField(desc="format the explanation is given in")
 
-    narrative = dspy.OutputField(
-        desc="human-readable narrative version of the explanation"
-    )
+    narrative = dspy.OutputField(desc="human-readable narrative version of the explanation")
     # rationalization = dspy.OutputField(
     #     desc="explains why given features may be relevant"
     # )
@@ -46,19 +44,20 @@ def __init__(
         """
         Args:
             explanation_format (string): Format explanations will take
-            context (string): Brief description of what the model predicts (ie. "the model predicts house prices")
+            context (string): Brief description of what the model predicts
+                (ie. "the model predicts house prices")
             llm (LLM object): DSPy LLM object to use.
                 See https://dspy-docs.vercel.app/docs/building-blocks/language_models for examples
                 One of llm or openai_api_key must be provided
             openai_api_key (string): OpenAI API key to use
-            gpt_model_name (string): if openai_api_key is provided, specifies the GPT version to use
-            sample_narratives (list of tuples of strings): List of (explanation, narrative) examples
+            gpt_model_name (string): if openai_api_key is provided,
+                specifies the GPT version to use
+            sample_narratives (list of tuples of strings):
+                List of (explanation, narrative) examples
         """
         self.llm = llm
         if self.llm is None and openai_api_key is not None:
-            self.llm = dspy.OpenAI(
-                model=gpt_model_name, api_key=openai_api_key, max_tokens=1000
-            )
+            self.llm = dspy.OpenAI(model=gpt_model_name, api_key=openai_api_key, max_tokens=1000)
         self.context = context
         self.explanation_format = explanation_format
         self.sample_narratives = []
@@ -81,16 +80,14 @@ def __init__(
             "convert the explanation into a human-readable narrative."
         )
 
-    def _assemble_prompt(
-        self, prompt, explanation, explanation_format, examples=None, n=3
-    ):
+    def _assemble_prompt(self, prompt, explanation, explanation_format, examples=None, n=3):
         header_string = f"{prompt}\n"
         format_string = (
-            f"Follow the following format\n"
-            f"Context: what the model predicts\n"
-            f"Explanation: explanation of the model's prediction\n"
-            f"Explanation Format: format the explanation is given in\n"
-            f"Narrative: human-readable narrative version of the explanation\n"
+            "Follow the following format\n"
+            "Context: what the model predicts\n"
+            "Explanation: explanation of the model's prediction\n"
+            "Explanation Format: format the explanation is given in\n"
+            "Narrative: human-readable narrative version of the explanation\n"
         )
         input_string = (
             f"Context: {self.context}\n"
@@ -115,9 +112,7 @@ def _assemble_prompt(
         if len(examples_string) == 0:
             return "---\n".join([header_string, format_string, input_string])
         else:
-            return "---\n".join(
-                [header_string, format_string, examples_string, input_string]
-            )
+            return "---\n".join([header_string, format_string, examples_string, input_string])
 
     def narrate(self, explanation, n_examples=3, n_bootstrapped=0, grader=None):
         """
@@ -129,7 +124,8 @@ def narrate(self, explanation, n_examples=3, n_bootstrapped=0, grader=None):
             n_bootstrapped (int): Number of bootstrapped examples to pass. Increasing this number
                 will incur additional calls to the LLM, but may improve the quality of the output
                 n_bootstrapped should be less than or equal to n_examples
-            grader (Grader): Grader object to use for bootstrapping. Must be provided if n_bootstrapped > 0
+            grader (Grader): Grader object to use for bootstrapping. Must be provided if
+                n_bootstrapped > 0
         """
         if n_bootstrapped > 0:
             return self.bootstrap_few_shot(
@@ -158,15 +154,11 @@ def basic_prompt(self, explanation, explanation_format, prompt=None, few_shot_n=
         """
         if prompt is None:
             prompt = self.default_prompt
-        full_prompt = self._assemble_prompt(
-            prompt, explanation, explanation_format, examples=None
-        )
+        full_prompt = self._assemble_prompt(prompt, explanation, explanation_format, examples=None)
         output = self.llm(full_prompt)[0]
         return _manually_parse_output(output)
 
-    def few_shot(
-        self, explanation, explanation_format, prompt=None, n_few_shot=3, use_dspy=False
-    ):
+    def few_shot(self, explanation, explanation_format, prompt=None, n_few_shot=3, use_dspy=False):
         """
         Few-shot prompting
 
@@ -175,7 +167,8 @@ def few_shot(
             explanation_format (string): Explanation format
             prompt (string): Prompt
             n_few_shot (int): Number of examples to use in few-shot learning
-            use_dspy (bool): Should be set to False, saving legacy version using DSPy in case needed later
+            use_dspy (bool): Should be set to False, saving legacy version using DSPy
+                in case needed later
 
         Returns:
             DSPy Prediction object
@@ -209,7 +202,8 @@ def bootstrap_few_shot(
             explanation_format (string): Explanation format
             metric (func): Metric to use for optimization
             n_labeled_few_shot (int): Number of examples to use in few-shot learning
-            n_bootstrapped_few_shot (int): Number of bootstrapped examples to use in few-shot learning
+            n_bootstrapped_few_shot (int): Number of bootstrapped examples to use in
+                few-shot learning
 
         Returns:
             DSPy Prediction object

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "explingo"
-version = "0.1.0.1"
+version = "0.1.1"
 description = ""
 authors = ["Ola Zytek <[email protected]>"]
 readme = "README.md"
@@ -22,3 +22,12 @@ jupyter = "^1.1.1"
 [build-system]
 requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"
+
+[tool.black]
+line-length = 99
+preview = true
+
+[tool.isort]
+profile = "black"
+line_length = 99
+skip = ["__init__.py"]
diff --git a/tests/test_narrator.py b/tests/test_narrator.py
@@ -4,9 +4,7 @@
 def test_narrate_basic_prompt():
     response = "narrative"
     mock_llm = explingo.testing.MockNarratorLLM(response)
-    narrator = explingo.Narrator(
-        llm=mock_llm, explanation_format="test", context="test"
-    )
+    narrator = explingo.Narrator(llm=mock_llm, explanation_format="test", context="test")
     explanation = "explanation"
     assert narrator.narrate(explanation) == response
 
@@ -40,8 +38,6 @@ def test_narrative_bootstrapped_few_shot():
     )
     explanation = "explanation"
     assert (
-        narrator.narrate(
-            explanation, n_examples=2, n_bootstrapped=2, grader=mock_grader
-        )
+        narrator.narrate(explanation, n_examples=2, n_bootstrapped=2, grader=mock_grader)
         == response
     )