Merge pull request #618 from pipecat-ai/aleix/examples-switch-to-llm-…

…context examples: use OpenAILLMContext in all the examples
pipecat-ai · Oct 20, 2024 · 3556c9c · 3556c9c
2 parents 4f66e5d + f971dbe
commit 3556c9c
Show file tree

Hide file tree

Showing 36 changed files with 358 additions and 416 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -33,6 +33,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Renamed `OpenAILLMServiceRealtimeBeta` to `OpenAIRealtimeBetaLLMService` to
   match other services.
 
+### Deprecated
+
+- `LLMUserResponseAggregator` and `LLMAssistantResponseAggregator` are
+  mostly deprecated, use `OpenAILLMContext` instead.
+
 - The `vad` package is now deprecated and `audio.vad` should be used
   instead. The `avd` package will get removed in a future release.
 

diff --git a/examples/canonical-metrics/bot.py b/examples/canonical-metrics/bot.py
@@ -19,10 +19,7 @@
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
-from pipecat.processors.aggregators.llm_response import (
-    LLMAssistantResponseAggregator,
-    LLMUserResponseAggregator,
-)
+from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
 from pipecat.processors.audio.audio_buffer_processor import AudioBufferProcessor
 from pipecat.services.canonical import CanonicalMetricsService
 from pipecat.services.elevenlabs import ElevenLabsTTSService
@@ -92,8 +89,8 @@ async def main():
             },
         ]
 
-        user_response = LLMUserResponseAggregator()
-        assistant_response = LLMAssistantResponseAggregator()
+        context = OpenAILLMContext(messages)
+        context_aggregator = llm.create_context_aggregator(context)
 
         """
         CanonicalMetrics uses AudioBufferProcessor under the hood to buffer the audio. On
@@ -113,13 +110,13 @@ async def main():
         pipeline = Pipeline(
             [
                 transport.input(),  # microphone
-                user_response,
+                context_aggregator.user(),
                 llm,
                 tts,
                 transport.output(),
                 audio_buffer_processor,  # captures audio into a buffer
                 canonical,  # uploads audio buffer to Canonical AI for metrics
-                assistant_response,
+                context_aggregator.assistant(),
             ]
         )
 

diff --git a/examples/chatbot-audio-recording/bot.py b/examples/chatbot-audio-recording/bot.py
@@ -9,6 +9,8 @@
 import sys
 
 import aiohttp
+import datetime
+import wave
 from dotenv import load_dotenv
 from loguru import logger
 from runner import configure
@@ -18,10 +20,7 @@
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
-from pipecat.processors.aggregators.llm_response import (
-    LLMAssistantResponseAggregator,
-    LLMUserResponseAggregator,
-)
+from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
 from pipecat.processors.audio.audio_buffer_processor import AudioBufferProcessor
 from pipecat.services.elevenlabs import ElevenLabsTTSService
 from pipecat.services.openai import OpenAILLMService
@@ -33,6 +32,20 @@
 logger.add(sys.stderr, level="DEBUG")
 
 
+async def save_audio(audiobuffer):
+    if audiobuffer.has_audio():
+        merged_audio = audiobuffer.merge_audio_buffers()
+        filename = f"conversation_recording{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.wav"
+        with wave.open(filename, "wb") as wf:
+            wf.setnchannels(2)
+            wf.setsampwidth(2)
+            wf.setframerate(audiobuffer._sample_rate)
+            wf.writeframes(merged_audio)
+        print(f"Merged audio saved to {filename}")
+    else:
+        print("No audio data to save")
+
+
 async def main():
     async with aiohttp.ClientSession() as session:
         (room_url, token) = await configure(session)
@@ -90,19 +103,19 @@ async def main():
             },
         ]
 
-        user_response = LLMUserResponseAggregator()
-        assistant_response = LLMAssistantResponseAggregator()
+        context = OpenAILLMContext(messages)
+        context_aggregator = llm.create_context_aggregator(context)
 
         audiobuffer = AudioBufferProcessor()
         pipeline = Pipeline(
             [
                 transport.input(),  # microphone
-                user_response,
+                context_aggregator.user(),
                 llm,
                 tts,
                 transport.output(),
                 audiobuffer,  # used to buffer the audio in the pipeline
-                assistant_response,
+                context_aggregator.assistant(),
             ]
         )
 
@@ -117,11 +130,7 @@ async def on_first_participant_joined(transport, participant):
         async def on_participant_left(transport, participant, reason):
             print(f"Participant left: {participant}")
             await task.queue_frame(EndFrame())
-
-        @transport.event_handler("on_call_state_updated")
-        async def on_call_state_updated(transport, state):
-            if state == "left":
-                await task.queue_frame(EndFrame())
+            await save_audio(audiobuffer)
 
         runner = PipelineRunner()
 

diff --git a/examples/deployment/flyio-example/bot.py b/examples/deployment/flyio-example/bot.py
@@ -7,11 +7,8 @@
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
-from pipecat.processors.aggregators.llm_response import (
-    LLMAssistantResponseAggregator,
-    LLMUserResponseAggregator,
-)
 from pipecat.frames.frames import LLMMessagesFrame, EndFrame
+from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
 from pipecat.services.openai import OpenAILLMService
 from pipecat.services.elevenlabs import ElevenLabsTTSService
 from pipecat.transports.services.daily import DailyParams, DailyTransport
@@ -60,17 +57,17 @@ async def main(room_url: str, token: str):
         },
     ]
 
-    tma_in = LLMUserResponseAggregator(messages)
-    tma_out = LLMAssistantResponseAggregator(messages)
+    context = OpenAILLMContext(messages)
+    context_aggregator = llm.create_context_aggregator(context)
 
     pipeline = Pipeline(
         [
             transport.input(),
-            tma_in,
+            context_aggregator.user(),
             llm,
             tts,
             transport.output(),
-            tma_out,
+            context_aggregator.assistant(),
         ]
     )
 

diff --git a/examples/dialin-chatbot/bot_daily.py b/examples/dialin-chatbot/bot_daily.py
@@ -7,11 +7,8 @@
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
-from pipecat.processors.aggregators.llm_response import (
-    LLMAssistantResponseAggregator,
-    LLMUserResponseAggregator,
-)
 from pipecat.frames.frames import LLMMessagesFrame, EndFrame
+from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
 from pipecat.services.elevenlabs import ElevenLabsTTSService
 from pipecat.services.openai import OpenAILLMService
 from pipecat.transports.services.daily import DailyParams, DailyTransport, DailyDialinSettings
@@ -66,17 +63,17 @@ async def main(room_url: str, token: str, callId: str, callDomain: str):
         },
     ]
 
-    tma_in = LLMUserResponseAggregator(messages)
-    tma_out = LLMAssistantResponseAggregator(messages)
+    context = OpenAILLMContext(messages)
+    context_aggregator = llm.create_context_aggregator(context)
 
     pipeline = Pipeline(
         [
             transport.input(),
-            tma_in,
+            context_aggregator.user(),
             llm,
             tts,
             transport.output(),
-            tma_out,
+            context_aggregator.assistant(),
         ]
     )
 

diff --git a/examples/dialin-chatbot/bot_runner.py b/examples/dialin-chatbot/bot_runner.py
@@ -108,11 +108,9 @@ async def _create_daily_room(room_url, callId, callDomain=None, vendor="daily"):
     # Spawn a new agent, and join the user session
     # Note: this is mostly for demonstration purposes (refer to 'deployment' in docs)
     if vendor == "daily":
-        bot_proc = f"python3 - m bot_daily - u {room.url} - t {token} - i {
-            callId} - d {callDomain}"
+        bot_proc = f"python3 -m bot_daily -u {room.url} -t {token} -i {callId} -d {callDomain}"
     else:
-        bot_proc = f"python3 - m bot_twilio - u {room.url} - t {
-            token} - i {callId} - s {room.config.sip_endpoint}"
+        bot_proc = f"python3 -m bot_twilio -u {room.url} -t {token} -i {callId} -s {room.config.sip_endpoint}"
 
     try:
         subprocess.Popen(

diff --git a/examples/dialin-chatbot/bot_twilio.py b/examples/dialin-chatbot/bot_twilio.py
@@ -7,11 +7,8 @@
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
-from pipecat.processors.aggregators.llm_response import (
-    LLMAssistantResponseAggregator,
-    LLMUserResponseAggregator,
-)
 from pipecat.frames.frames import LLMMessagesFrame, EndFrame
+from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
 from pipecat.services.elevenlabs import ElevenLabsTTSService
 from pipecat.services.openai import OpenAILLMService
 from pipecat.transports.services.daily import DailyParams, DailyTransport
@@ -69,17 +66,17 @@ async def main(room_url: str, token: str, callId: str, sipUri: str):
         },
     ]
 
-    tma_in = LLMUserResponseAggregator(messages)
-    tma_out = LLMAssistantResponseAggregator(messages)
+    context = OpenAILLMContext(messages)
+    context_aggregator = llm.create_context_aggregator(context)
 
     pipeline = Pipeline(
         [
             transport.input(),
-            tma_in,
+            context_aggregator.user(),
             llm,
             tts,
             transport.output(),
-            tma_out,
+            context_aggregator.assistant(),
         ]
     )
 

diff --git a/examples/foundational/06-listen-and-respond.py b/examples/foundational/06-listen-and-respond.py
@@ -20,10 +20,7 @@
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineTask
-from pipecat.processors.aggregators.llm_response import (
-    LLMAssistantResponseAggregator,
-    LLMUserResponseAggregator,
-)
+from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
 from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
 from pipecat.services.cartesia import CartesiaTTSService
 from pipecat.services.openai import OpenAILLMService
@@ -92,18 +89,19 @@ async def main():
                 "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
             },
         ]
-        tma_in = LLMUserResponseAggregator(messages)
-        tma_out = LLMAssistantResponseAggregator(messages)
+
+        context = OpenAILLMContext(messages)
+        context_aggregator = llm.create_context_aggregator(context)
 
         pipeline = Pipeline(
             [
                 transport.input(),
-                tma_in,
+                context_aggregator.user(),
                 llm,
                 tts,
                 ml,
                 transport.output(),
-                tma_out,
+                context_aggregator.assistant(),
             ]
         )
 

diff --git a/examples/foundational/06a-image-sync.py b/examples/foundational/06a-image-sync.py
@@ -16,10 +16,7 @@
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineTask
-from pipecat.processors.aggregators.llm_response import (
-    LLMAssistantResponseAggregator,
-    LLMUserResponseAggregator,
-)
+from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
 from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
 from pipecat.services.cartesia import CartesiaHttpTTSService
 from pipecat.services.openai import OpenAILLMService
@@ -105,8 +102,8 @@ async def main():
             },
         ]
 
-        tma_in = LLMUserResponseAggregator(messages)
-        tma_out = LLMAssistantResponseAggregator(messages)
+        context = OpenAILLMContext(messages)
+        context_aggregator = llm.create_context_aggregator(context)
 
         image_sync_aggregator = ImageSyncAggregator(
             os.path.join(os.path.dirname(__file__), "assets", "speaking.png"),
@@ -117,11 +114,11 @@ async def main():
             [
                 transport.input(),
                 image_sync_aggregator,
-                tma_in,
+                context_aggregator.user(),
                 llm,
                 tts,
                 transport.output(),
-                tma_out,
+                context_aggregator.assistant(),
             ]
         )
 

diff --git a/examples/foundational/07-interruptible-vad.py b/examples/foundational/07-interruptible-vad.py
@@ -13,11 +13,8 @@
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
-from pipecat.processors.aggregators.llm_response import (
-    LLMAssistantResponseAggregator,
-    LLMUserResponseAggregator,
-)
 from pipecat.processors.audio.vad.silero import SileroVAD
+from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
 from pipecat.services.cartesia import CartesiaTTSService
 from pipecat.services.openai import OpenAILLMService
 from pipecat.transports.services.daily import DailyParams, DailyTransport
@@ -65,18 +62,18 @@ async def main():
             },
         ]
 
-        tma_in = LLMUserResponseAggregator(messages)
-        tma_out = LLMAssistantResponseAggregator(messages)
+        context = OpenAILLMContext(messages)
+        context_aggregator = llm.create_context_aggregator(context)
 
         pipeline = Pipeline(
             [
-                transport.input(),  # Transport user input
+                transport.input(),
                 vad,
-                tma_in,  # User responses
-                llm,  # LLM
-                tts,  # TTS
-                transport.output(),  # Transport bot output
-                tma_out,  # Assistant spoken responses
+                context_aggregator.user(),
+                llm,
+                tts,
+                transport.output(),
+                context_aggregator.assistant(),
             ]
         )
 

diff --git a/examples/foundational/07-interruptible.py b/examples/foundational/07-interruptible.py
@@ -14,10 +14,7 @@
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
-from pipecat.processors.aggregators.llm_response import (
-    LLMAssistantResponseAggregator,
-    LLMUserResponseAggregator,
-)
+from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
 from pipecat.services.cartesia import CartesiaTTSService
 from pipecat.services.openai import OpenAILLMService
 from pipecat.transports.services.daily import DailyParams, DailyTransport
@@ -64,17 +61,17 @@ async def main():
             },
         ]
 
-        tma_in = LLMUserResponseAggregator(messages)
-        tma_out = LLMAssistantResponseAggregator(messages)
+        context = OpenAILLMContext(messages)
+        context_aggregator = llm.create_context_aggregator(context)
 
         pipeline = Pipeline(
             [
                 transport.input(),  # Transport user input
-                tma_in,  # User responses
+                context_aggregator.user(),  # User responses
                 llm,  # LLM
                 tts,  # TTS
                 transport.output(),  # Transport bot output
-                tma_out,  # Assistant spoken responses
+                context_aggregator.assistant(),  # Assistant spoken responses
             ]
         )