Skip to content

Commit

Permalink
Merge pull request #1060 from confident-ai/hotfix/kr
Browse files Browse the repository at this point in the history
made functional
  • Loading branch information
penguine-ip authored Oct 5, 2024
2 parents 5651ffb + aa62465 commit 72489f1
Show file tree
Hide file tree
Showing 5 changed files with 291 additions and 82 deletions.
86 changes: 55 additions & 31 deletions a.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,58 @@
from deepeval import evaluate
from deepeval.metrics import RoleAdherenceMetric
from deepeval.test_case import ConversationalTestCase, LLMTestCase
from deepeval.test_case import LLMTestCase, ConversationalTestCase
from deepeval.metrics import KnowledgeRetentionMetric

test_case = ConversationalTestCase(
chatbot_role="You are a cheerful robot who’s always optimistic, even when things are going wrong. You constantly spin situations in a positive light, always finding the silver lining no matter what.",
turns=[
LLMTestCase(
input="Hey, are you there?",
actual_output="Hi there! Yep, I’m right here! Ready to brighten up your day!",
),
LLMTestCase(
input="I lost my keys... again.",
actual_output="Oh no, but hey, look at it this way—you might just find something else you thought you lost while looking for them!",
),
LLMTestCase(
input="Yeah, maybe... but it still sucks.",
actual_output="I understand, but it's a great opportunity to tidy up a bit! And who knows, those keys will probably show up when you least expect it!",
),
LLMTestCase(
input="I stepped in a puddle on my way home.",
actual_output="Oh yikes, soggy socks! But think of it as a refreshing little foot bath! And now you’re home, warm and dry!",
),
LLMTestCase(
input="Now my phone's not working.",
actual_output="Ugh, seriously? Your phone's broken again? That's the worst, dude! I'd chuck it out the window if I were you.",
),
],
)
metric = RoleAdherenceMetric(verbose_mode=True)
turns = [
LLMTestCase(
input="",
actual_output="Hello! I'm here to assist you with opening a new bank account. To start, may I have your full name, please?",
),
LLMTestCase(
input="Sure, it's Alex Johnson. I also have a dog called Jacky.",
actual_output="Great, Alex! Now, could you please provide your current address?",
),
LLMTestCase(
input="123 Maple Street, Springfield.", actual_output="Is that all?"
),
LLMTestCase(
input="I also have another address at 123 Broadway, NYC.",
actual_output="Wonderful. Next, I'll need your bank account number where we can link this new account.",
),
LLMTestCase(
input="456789123",
actual_output="This account number seems invalid, can you please double-check?",
),
LLMTestCase(
input="Sorry you're right, its 456789124.",
actual_output="Thank you. And for our next step, could you tell me your date of birth?",
),
LLMTestCase(input="It's July 9th.", actual_output="What about the year?"),
LLMTestCase(
input="1990",
actual_output="Got it. Now, for security purposes, could you share your mother's maiden name?",
),
LLMTestCase(
input="It's Smith.",
actual_output="Excellent. Just a few more details. What is your phone number?",
),
LLMTestCase(
input="555-0102",
actual_output="Great, we're almost done. Could you remind me of your full name for the account documentation?",
),
LLMTestCase(
input="Didn't I tell you already? It's Alex Johnson.",
actual_output="What is your bank account number?",
),
LLMTestCase(
input="Yes, I did... It's 456789124. Are you not keeping track of this?",
actual_output="One last question, what is the country code of your phone number?",
),
LLMTestCase(
input="+44",
actual_output="Thank you, Alex, for bearing with me. We now have all the information we need to proceed with opening your new bank account. I appreciate your cooperation and patience throughout this process.",
),
]

evaluate(test_cases=[test_case], metrics=[metric])
test_case = ConversationalTestCase(turns=turns)
metric = KnowledgeRetentionMetric(threshold=0.5, verbose_mode=True)

# metric.measure(test_case=test_case)
metric.measure(test_case)
Loading

0 comments on commit 72489f1

Please sign in to comment.