Skip to content

Commit

Permalink
Fixing linting errors and bumpversion
Browse files Browse the repository at this point in the history
  • Loading branch information
zyteka committed Oct 21, 2024
1 parent 24f7d7e commit dd41e34
Show file tree
Hide file tree
Showing 6 changed files with 94 additions and 83 deletions.
4 changes: 4 additions & 0 deletions .flake8
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[flake8]
max-line-length = 99
exclude = docs, .git, __pycache__, .ipynb_checkpoints
extend-ignore = E203
3 changes: 1 addition & 2 deletions explingo/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
from explingo import testing
from explingo.grader import Grader
from explingo.narrator import Narrator

__author__ = "MIT Data To AI Lab"
__email__ = "[email protected]"
__version__ = "0.1.0.1"
__version__ = "0.1.1"

__all__ = ["Narrator", "Grader"]
93 changes: 51 additions & 42 deletions explingo/grader.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import random

import dspy
import pandas as pd

Expand All @@ -14,7 +12,8 @@ class RubricAssess(dspy.Signature):
rubric = dspy.InputField()

assessment = dspy.OutputField(
desc="A single number from the options in the rubric. Provide only a single number with no other text."
desc="A single number from the options in the rubric. "
"Provide only a single number with no other text."
)


Expand All @@ -40,13 +39,17 @@ def __init__(
Grades narratives
Args:
llm (LLM): LLM to use to grade accuracy, completeness, and fluency. One of llm or openai_api_key must be provided
openai_api_key (string): OpenAI API key to use to grade accuracy, completeness, and fluency
metrics (list of strings or "all"): One or more of accuracy", "completeness", "fluency", "conciseness"
llm (LLM): LLM to use to grade accuracy, completeness, and fluency.
One of llm or openai_api_key must be provided
openai_api_key (string): OpenAI API key to use to grade accuracy, completeness,
and fluency
metrics (list of strings or "all"): One or more of
"accuracy", "completeness", "fluency", "conciseness"
sample_narratives (list of strings, or (string, string) tuples):
Sample narratives to use to grade fluency. Can pass in either just the narratives
or (explanation, narrative) tuples
max_optimal_length (int): Hyperparameter for conciseness metric, defaults to number of words in longest sample narrative or 100 if not given
max_optimal_length (int): Hyperparameter for conciseness metric, defaults to number of
words in longest sample narrative or 100 if not given
"""
self.metrics = metrics

Expand All @@ -70,9 +73,7 @@ def __init__(
isinstance(self.sample_narratives[0], list)
or isinstance(self.sample_narratives[0], tuple)
):
self.sample_narratives = [
narrative[1] for narrative in self.sample_narratives
]
self.sample_narratives = [narrative[1] for narrative in self.sample_narratives]

self.max_optimal_length = max_optimal_length
if max_optimal_length is None and self.sample_narratives is not None:
Expand All @@ -95,9 +96,7 @@ def __init__(
def run_metrics(self, input_, output_, trace):
results = {}
if "accuracy" in self.metrics:
results["accuracy"] = accuracy(
input_, output_, grader=self.grader_llm, trace=trace
)
results["accuracy"] = accuracy(input_, output_, grader=self.grader_llm, trace=trace)
if "completeness" in self.metrics:
results["completeness"] = completeness(
input_, output_, grader=self.grader_llm, trace=trace
Expand Down Expand Up @@ -126,9 +125,7 @@ def run_metrics(self, input_, output_, trace):
)

def __call__(self, explanation, explanation_format, narrative, trace=None):
input_ = dspy.Example(
explanation=explanation, explanation_format=explanation_format
)
input_ = dspy.Example(explanation=explanation, explanation_format=explanation_format)
output_ = dspy.Prediction(narrative=narrative)
return self.run_metrics(input_, output_, trace)

Expand Down Expand Up @@ -185,14 +182,20 @@ def compute_score_from_rubric(
def accuracy(input_, output_, grader, trace=None):
question = (
f"How accurate is the information in the narrative, based on the explanation given? "
f"A narrative can score 4 even if it is missing information as long as everything in the narrative is correct. "
f"Make sure the contribution direction is correct - positive contributions increase the output, negative contributions decrease the output."
f"A narrative can score 4 even if it is missing information as long as everything "
f"in the narrative is correct. Make sure the contribution direction is correct - "
f"positive contributions increase the output, negative contributions decrease the output."
f"\n\nExplanation format: {input_.explanation_format}.\nExplanation: {input_.explanation}"
)
rubric = f"0 - Contains one or more errors in value or contribution direction. 4 - Contains no errors, but may be missing information."
rubric = (
"0 - Contains one or more errors in value or contribution direction. "
"4 - Contains no errors, but may be missing information."
)

rational_type = dspy.OutputField(
prefix="Start by listing out all the features in the narrative, and then for each one compare it to the explanation to ensure its value and contribution are approximately correct.",
prefix="Start by listing out all the features in the narrative, and then for each one "
"compare it to the explanation to ensure its value and contribution "
"are approximately correct.",
)

return compute_score_from_rubric(
Expand All @@ -207,27 +210,39 @@ def accuracy(input_, output_, grader, trace=None):

def fluency(input_, output_, grader, trace=None, good_narratives=None):
if good_narratives is None:
question = f"How natural and human is the narrative?"
question = "How natural and human is the narrative?"
else:
question = f"How well does the style of the narrative match the style of the example narratives? Consider only the linguistic style, not the topic. Example narratives:"
question = (
"How well does the style of the narrative match the style of the example "
"narratives? Consider only the linguistic style, not the topic. "
"Example narratives:"
)
for narrative in good_narratives:
question += f"\n{narrative}"
if good_narratives is not None:
rubric = f"0: Very dissimilar. 1: Dissimilar. 2: Neutral. 3: Similar. 4: Very similar"
rubric = "0: Very dissimilar. 1: Dissimilar. 2: Neutral. 3: Similar. 4: Very similar"
else:
rubric = (
f"0: Very unnatural. 1: Unnatural. 2: Neutral. 3: Natural. 4: Very natural"
)
return compute_score_from_rubric(
"fluency", question, rubric, output_.narrative, grader
)
rubric = "0: Very unnatural. 1: Unnatural. 2: Neutral. 3: Natural. 4: Very natural"
return compute_score_from_rubric("fluency", question, rubric, output_.narrative, grader)


def completeness(input_, output_, grader, trace=None):
question = f"How completely does the narrative below describe the explanation given in <<>>?\nExplanation format: {input_.explanation_format}.\nExplanation: <<{input_.explanation}>>"
rubric = "0 - One or more feature names from the explanation are not mentioned at all in the narrative. 2 - All features are mentioned, but not all feature values and/or contribution directions. 4 - All features are mentioned, and for each feature, includes at least an approximation of the feature's value and contribution direction."
question = (
f"How completely does the narrative below describe the explanation given?"
f"\nExplanation format: {input_.explanation_format}."
f"\nExplanation: {input_.explanation}"
)
rubric = (
"0 - One or more feature names from the explanation are not mentioned at all in the "
"narrative. 2 - All features are mentioned, but not all feature values and/or "
"contribution directions. 4 - All features are mentioned, and for each feature, "
"includes at least an approximation of the feature's value and contribution "
"direction."
)
rational_type = dspy.OutputField(
prefix="Start by listing out all the features in the explanations, and then determine every feature is present in the narrative, along with its value and contribution direction.",
prefix="Start by listing out all the features in the explanations, and then determine "
"every feature is present in the narrative, along with its value and "
"contribution direction.",
)

return compute_score_from_rubric(
Expand All @@ -240,9 +255,7 @@ def completeness(input_, output_, grader, trace=None):
)


def conciseness(
input_, output_, grader=None, trace=None, max_optimal_length_per_feature=20
):
def conciseness(input_, output_, grader=None, trace=None, max_optimal_length_per_feature=20):
num_features = input_.explanation.count("(")
if num_features == 0:
num_features = 1
Expand All @@ -259,13 +272,9 @@ def conciseness(


def context_awareness(input_, output_, grader, trace=None):
question = (
f"How well does the rationalization help explain the logic in the narrative?"
)
rubric = f"0: Not at all. 2: Somewhat. 4: Very well."
narrative_input = (
f"Narrative: {output_.narrative}. Rationalization: {output_.rationalization}"
)
question = "How well does the rationalization help explain the logic in the narrative?"
rubric = "0: Not at all. 2: Somewhat. 4: Very well."
narrative_input = f"Narrative: {output_.narrative}. Rationalization: {output_.rationalization}"
return compute_score_from_rubric(
"context_awareness", question, rubric, narrative_input, grader
)
58 changes: 26 additions & 32 deletions explingo/narrator.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import random

import dspy
from dspy.teleprompt import BootstrapFewShot, LabeledFewShot
from dspy.teleprompt import BootstrapFewShot


def _manually_parse_output(output):
Expand All @@ -18,16 +18,14 @@ def _manually_parse_output(output):


class NarratorSig(dspy.Signature):
"""You are helping users understand an ML model's prediction. Given an explanation and information about the model,
convert the explanation into a human-readable narrative."""
"""You are helping users understand an ML model's prediction. Given an explanation
and information about the model, convert the explanation into a human-readable narrative."""

context = dspy.InputField(desc="what the ML model predicts")
explanation = dspy.InputField(desc="explanation of an ML model's prediction")
explanation_format = dspy.InputField(desc="format the explanation is given in")

narrative = dspy.OutputField(
desc="human-readable narrative version of the explanation"
)
narrative = dspy.OutputField(desc="human-readable narrative version of the explanation")
# rationalization = dspy.OutputField(
# desc="explains why given features may be relevant"
# )
Expand All @@ -46,19 +44,20 @@ def __init__(
"""
Args:
explanation_format (string): Format explanations will take
context (string): Brief description of what the model predicts (ie. "the model predicts house prices")
context (string): Brief description of what the model predicts
(ie. "the model predicts house prices")
llm (LLM object): DSPy LLM object to use.
See https://dspy-docs.vercel.app/docs/building-blocks/language_models for examples
One of llm or openai_api_key must be provided
openai_api_key (string): OpenAI API key to use
gpt_model_name (string): if openai_api_key is provided, specifies the GPT version to use
sample_narratives (list of tuples of strings): List of (explanation, narrative) examples
gpt_model_name (string): if openai_api_key is provided,
specifies the GPT version to use
sample_narratives (list of tuples of strings):
List of (explanation, narrative) examples
"""
self.llm = llm
if self.llm is None and openai_api_key is not None:
self.llm = dspy.OpenAI(
model=gpt_model_name, api_key=openai_api_key, max_tokens=1000
)
self.llm = dspy.OpenAI(model=gpt_model_name, api_key=openai_api_key, max_tokens=1000)
self.context = context
self.explanation_format = explanation_format
self.sample_narratives = []
Expand All @@ -81,16 +80,14 @@ def __init__(
"convert the explanation into a human-readable narrative."
)

def _assemble_prompt(
self, prompt, explanation, explanation_format, examples=None, n=3
):
def _assemble_prompt(self, prompt, explanation, explanation_format, examples=None, n=3):
header_string = f"{prompt}\n"
format_string = (
f"Follow the following format\n"
f"Context: what the model predicts\n"
f"Explanation: explanation of the model's prediction\n"
f"Explanation Format: format the explanation is given in\n"
f"Narrative: human-readable narrative version of the explanation\n"
"Follow the following format\n"
"Context: what the model predicts\n"
"Explanation: explanation of the model's prediction\n"
"Explanation Format: format the explanation is given in\n"
"Narrative: human-readable narrative version of the explanation\n"
)
input_string = (
f"Context: {self.context}\n"
Expand All @@ -115,9 +112,7 @@ def _assemble_prompt(
if len(examples_string) == 0:
return "---\n".join([header_string, format_string, input_string])
else:
return "---\n".join(
[header_string, format_string, examples_string, input_string]
)
return "---\n".join([header_string, format_string, examples_string, input_string])

def narrate(self, explanation, n_examples=3, n_bootstrapped=0, grader=None):
"""
Expand All @@ -129,7 +124,8 @@ def narrate(self, explanation, n_examples=3, n_bootstrapped=0, grader=None):
n_bootstrapped (int): Number of bootstrapped examples to pass. Increasing this number
will incur additional calls to the LLM, but may improve the quality of the output
n_bootstrapped should be less than or equal to n_examples
grader (Grader): Grader object to use for bootstrapping. Must be provided if n_bootstrapped > 0
grader (Grader): Grader object to use for bootstrapping. Must be provided if
n_bootstrapped > 0
"""
if n_bootstrapped > 0:
return self.bootstrap_few_shot(
Expand Down Expand Up @@ -158,15 +154,11 @@ def basic_prompt(self, explanation, explanation_format, prompt=None, few_shot_n=
"""
if prompt is None:
prompt = self.default_prompt
full_prompt = self._assemble_prompt(
prompt, explanation, explanation_format, examples=None
)
full_prompt = self._assemble_prompt(prompt, explanation, explanation_format, examples=None)
output = self.llm(full_prompt)[0]
return _manually_parse_output(output)

def few_shot(
self, explanation, explanation_format, prompt=None, n_few_shot=3, use_dspy=False
):
def few_shot(self, explanation, explanation_format, prompt=None, n_few_shot=3, use_dspy=False):
"""
Few-shot prompting
Expand All @@ -175,7 +167,8 @@ def few_shot(
explanation_format (string): Explanation format
prompt (string): Prompt
n_few_shot (int): Number of examples to use in few-shot learning
use_dspy (bool): Should be set to False, saving legacy version using DSPy in case needed later
use_dspy (bool): Should be set to False, saving legacy version using DSPy
in case needed later
Returns:
DSPy Prediction object
Expand Down Expand Up @@ -209,7 +202,8 @@ def bootstrap_few_shot(
explanation_format (string): Explanation format
metric (func): Metric to use for optimization
n_labeled_few_shot (int): Number of examples to use in few-shot learning
n_bootstrapped_few_shot (int): Number of bootstrapped examples to use in few-shot learning
n_bootstrapped_few_shot (int): Number of bootstrapped examples to use in
few-shot learning
Returns:
DSPy Prediction object
Expand Down
11 changes: 10 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "explingo"
version = "0.1.0.1"
version = "0.1.1"
description = ""
authors = ["Ola Zytek <[email protected]>"]
readme = "README.md"
Expand All @@ -22,3 +22,12 @@ jupyter = "^1.1.1"
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"

[tool.black]
line-length = 99
preview = true

[tool.isort]
profile = "black"
line_length = 99
skip = ["__init__.py"]
8 changes: 2 additions & 6 deletions tests/test_narrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,7 @@
def test_narrate_basic_prompt():
response = "narrative"
mock_llm = explingo.testing.MockNarratorLLM(response)
narrator = explingo.Narrator(
llm=mock_llm, explanation_format="test", context="test"
)
narrator = explingo.Narrator(llm=mock_llm, explanation_format="test", context="test")
explanation = "explanation"
assert narrator.narrate(explanation) == response

Expand Down Expand Up @@ -40,8 +38,6 @@ def test_narrative_bootstrapped_few_shot():
)
explanation = "explanation"
assert (
narrator.narrate(
explanation, n_examples=2, n_bootstrapped=2, grader=mock_grader
)
narrator.narrate(explanation, n_examples=2, n_bootstrapped=2, grader=mock_grader)
== response
)

0 comments on commit dd41e34

Please sign in to comment.