Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revert "no longer necessary to call super().process_frame(frame, direction)" #853

Merged
merged 1 commit into from
Dec 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 0 additions & 6 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
Tamil) and PlayHT (Afrikans, Albanian, Amharic, Arabic, Bengali, Croatian,
Galician, Hebrew, Mandarin, Serbian, Tagalog, Urdu, Xhosa).

### Changed

- It's no longer necessary to call `super().process_frame(frame, direction)` if
you subclass and implement `FrameProcessor.process_frame()`. This is all now
done internally and will avoid possible issues if you forget to add it.

### Deprecated

- `AWSTTSService` is now deprecated, use `PollyTTSService` instead.
Expand Down
2 changes: 2 additions & 0 deletions examples/foundational/05-sync-speech-and-image.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ def __init__(self):
self.prepend_to_next_text_frame = False

async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)

if isinstance(frame, MonthFrame):
self.most_recent_month = frame.month
elif self.prepend_to_next_text_frame and isinstance(frame, TextFrame):
Expand Down
6 changes: 6 additions & 0 deletions examples/foundational/05a-local-sync-speech-and-image.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ def __init__(self):
self.text = ""

async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)

if isinstance(frame, TextFrame):
self.text = frame.text
await self.push_frame(frame, direction)
Expand All @@ -73,6 +75,8 @@ def __init__(self):
self.frame = None

async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)

if isinstance(frame, TTSAudioRawFrame):
self.audio.extend(frame.audio)
self.frame = OutputAudioRawFrame(
Expand All @@ -86,6 +90,8 @@ def __init__(self):
self.frame = None

async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)

if isinstance(frame, URLImageRawFrame):
self.frame = frame
await self.push_frame(frame, direction)
Expand Down
2 changes: 2 additions & 0 deletions examples/foundational/06a-image-sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ def __init__(self, speaking_path: str, waiting_path: str):
self._waiting_image_bytes = self._waiting_image.tobytes()

async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)

if not isinstance(frame, SystemFrame) and direction == FrameDirection.DOWNSTREAM:
await self.push_frame(
OutputImageRawFrame(
Expand Down
5 changes: 5 additions & 0 deletions examples/foundational/07s-interruptible-google-audio-in.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ def __init__(self, context, user_context_aggregator):
self._user_speaking = False

async def process_frame(self, frame, direction):
await super().process_frame(frame, direction)

if isinstance(frame, TranscriptionFrame):
# We could gracefully handle both audio input and text/transcription input ...
# but let's leave that as an exercise to the reader. :-)
Expand Down Expand Up @@ -124,6 +126,7 @@ def reset(self):
self._accumulating_transcript = False

async def process_frame(self, frame, direction):
await super().process_frame(frame, direction)
if isinstance(frame, LLMFullResponseStartFrame):
self._processing_llm_response = True
self._accumulating_transcript = True
Expand Down Expand Up @@ -177,6 +180,8 @@ def add_transcript_back_to_inference_output(self):
self._context.messages[-1].parts[-1].text += f"\n\n{marker}\n{self._transcript}\n"

async def process_frame(self, frame, direction):
await super().process_frame(frame, direction)

if isinstance(frame, MagicDemoTranscriptionFrame):
self._transcript = frame.text
elif isinstance(frame, LLMFullResponseEndFrame) or isinstance(
Expand Down
2 changes: 2 additions & 0 deletions examples/foundational/09-mirror.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@

class MirrorProcessor(FrameProcessor):
async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)

if isinstance(frame, InputAudioRawFrame):
await self.push_frame(
OutputAudioRawFrame(
Expand Down
2 changes: 2 additions & 0 deletions examples/foundational/09a-local-mirror.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@

class MirrorProcessor(FrameProcessor):
async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)

if isinstance(frame, InputAudioRawFrame):
await self.push_frame(
OutputAudioRawFrame(
Expand Down
4 changes: 4 additions & 0 deletions examples/foundational/11-sound-effects.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@

class OutboundSoundEffectWrapper(FrameProcessor):
async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)

if isinstance(frame, LLMFullResponseEndFrame):
await self.push_frame(sounds["ding1.wav"])
# In case anything else downstream needs it
Expand All @@ -70,6 +72,8 @@ async def process_frame(self, frame: Frame, direction: FrameDirection):

class InboundSoundEffectWrapper(FrameProcessor):
async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)

if isinstance(frame, OpenAILLMContextFrame):
await self.push_frame(sounds["ding2.wav"])
# In case anything else downstream needs it
Expand Down
2 changes: 2 additions & 0 deletions examples/foundational/12-describe-video.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ def set_participant_id(self, participant_id: str):
self._participant_id = participant_id

async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)

if self._participant_id and isinstance(frame, TextFrame):
await self.push_frame(
UserImageRequestFrame(self._participant_id), FrameDirection.UPSTREAM
Expand Down
2 changes: 2 additions & 0 deletions examples/foundational/12a-describe-video-gemini-flash.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ def set_participant_id(self, participant_id: str):
self._participant_id = participant_id

async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)

if self._participant_id and isinstance(frame, TextFrame):
await self.push_frame(
UserImageRequestFrame(self._participant_id), FrameDirection.UPSTREAM
Expand Down
2 changes: 2 additions & 0 deletions examples/foundational/12b-describe-video-gpt-4o.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ def set_participant_id(self, participant_id: str):
self._participant_id = participant_id

async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)

if self._participant_id and isinstance(frame, TextFrame):
await self.push_frame(
UserImageRequestFrame(self._participant_id), FrameDirection.UPSTREAM
Expand Down
2 changes: 2 additions & 0 deletions examples/foundational/12c-describe-video-anthropic.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ def set_participant_id(self, participant_id: str):
self._participant_id = participant_id

async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)

if self._participant_id and isinstance(frame, TextFrame):
await self.push_frame(
UserImageRequestFrame(self._participant_id), FrameDirection.UPSTREAM
Expand Down
2 changes: 2 additions & 0 deletions examples/foundational/13-whisper-transcription.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@

class TranscriptionLogger(FrameProcessor):
async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)

if isinstance(frame, TranscriptionFrame):
print(f"Transcription: {frame.text}")

Expand Down
2 changes: 2 additions & 0 deletions examples/foundational/13a-whisper-local.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@

class TranscriptionLogger(FrameProcessor):
async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)

if isinstance(frame, TranscriptionFrame):
print(f"Transcription: {frame.text}")

Expand Down
2 changes: 2 additions & 0 deletions examples/foundational/13b-deepgram-transcription.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@

class TranscriptionLogger(FrameProcessor):
async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)

if isinstance(frame, TranscriptionFrame):
print(f"Transcription: {frame.text}")

Expand Down
2 changes: 2 additions & 0 deletions examples/foundational/13c-gladia-transcription.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@

class TranscriptionLogger(FrameProcessor):
async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)

if isinstance(frame, TranscriptionFrame):
print(f"Transcription: {frame.text}")

Expand Down
2 changes: 2 additions & 0 deletions examples/foundational/13d-assemblyai-transcription.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@

class TranscriptionLogger(FrameProcessor):
async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)

if isinstance(frame, TranscriptionFrame):
print(f"Transcription: {frame.text}")

Expand Down
4 changes: 4 additions & 0 deletions examples/foundational/22b-natural-conversation-proposal.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ def __init__(self, notifier: BaseNotifier, **kwargs):
self._notifier = notifier

async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)
# We must not block system frames.
if isinstance(frame, SystemFrame):
await self.push_frame(frame, direction)
Expand Down Expand Up @@ -117,6 +118,7 @@ def __init__(self, notifier: BaseNotifier):
self._notifier = notifier

async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)
if isinstance(frame, TextFrame) and frame.text == "YES":
logger.debug("Completeness check YES")
await self.push_frame(UserStoppedSpeakingFrame())
Expand All @@ -139,6 +141,8 @@ def open_gate(self):
self._gate_open = True

async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)

# We must not block system frames.
if isinstance(frame, SystemFrame):
if isinstance(frame, StartFrame):
Expand Down
41 changes: 23 additions & 18 deletions examples/foundational/22c-natural-conversation-mixed-llms.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,12 +101,12 @@
Examples:
# Complete Wh-question
[{"role": "assistant", "content": "I can help you learn."},
[{"role": "assistant", "content": "I can help you learn."},
{"role": "user", "content": "What's the fastest way to learn Spanish"}]
Output: YES
# Complete Yes/No question despite STT error
[{"role": "assistant", "content": "I know about planets."},
[{"role": "assistant", "content": "I know about planets."},
{"role": "user", "content": "Is is Jupiter the biggest planet"}]
Output: YES
Expand All @@ -118,12 +118,12 @@
Examples:
# Direct instruction
[{"role": "assistant", "content": "I can explain many topics."},
[{"role": "assistant", "content": "I can explain many topics."},
{"role": "user", "content": "Tell me about black holes"}]
Output: YES
# Action demand
[{"role": "assistant", "content": "I can help with math."},
[{"role": "assistant", "content": "I can help with math."},
{"role": "user", "content": "Solve this equation x plus 5 equals 12"}]
Output: YES
Expand All @@ -134,12 +134,12 @@
Examples:
# Specific answer
[{"role": "assistant", "content": "What's your favorite color?"},
[{"role": "assistant", "content": "What's your favorite color?"},
{"role": "user", "content": "I really like blue"}]
Output: YES
# Option selection
[{"role": "assistant", "content": "Would you prefer morning or evening?"},
[{"role": "assistant", "content": "Would you prefer morning or evening?"},
{"role": "user", "content": "Morning"}]
Output: YES
Expand All @@ -153,17 +153,17 @@
Examples:
# Self-correction reaching completion
[{"role": "assistant", "content": "What would you like to know?"},
[{"role": "assistant", "content": "What would you like to know?"},
{"role": "user", "content": "Tell me about... no wait, explain how rainbows form"}]
Output: YES
# Topic change with complete thought
[{"role": "assistant", "content": "The weather is nice today."},
[{"role": "assistant", "content": "The weather is nice today."},
{"role": "user", "content": "Actually can you tell me who invented the telephone"}]
Output: YES
# Mid-sentence completion
[{"role": "assistant", "content": "Hello I'm ready."},
[{"role": "assistant", "content": "Hello I'm ready."},
{"role": "user", "content": "What's the capital of? France"}]
Output: YES
Expand All @@ -175,12 +175,12 @@
Examples:
# Acknowledgment
[{"role": "assistant", "content": "Should we talk about history?"},
[{"role": "assistant", "content": "Should we talk about history?"},
{"role": "user", "content": "Sure"}]
Output: YES
# Disagreement with completion
[{"role": "assistant", "content": "Is that what you meant?"},
[{"role": "assistant", "content": "Is that what you meant?"},
{"role": "user", "content": "No not really"}]
Output: YES
Expand All @@ -194,12 +194,12 @@
Examples:
# Word repetition but complete
[{"role": "assistant", "content": "I can help with that."},
[{"role": "assistant", "content": "I can help with that."},
{"role": "user", "content": "What what is the time right now"}]
Output: YES
# Missing punctuation but complete
[{"role": "assistant", "content": "I can explain that."},
[{"role": "assistant", "content": "I can explain that."},
{"role": "user", "content": "Please tell me how computers work"}]
Output: YES
Expand All @@ -211,12 +211,12 @@
Examples:
# Filler words but complete
[{"role": "assistant", "content": "What would you like to know?"},
[{"role": "assistant", "content": "What would you like to know?"},
{"role": "user", "content": "Um uh how do airplanes fly"}]
Output: YES
# Thinking pause but incomplete
[{"role": "assistant", "content": "I can explain anything."},
[{"role": "assistant", "content": "I can explain anything."},
{"role": "user", "content": "Well um I want to know about the"}]
Output: NO
Expand All @@ -241,17 +241,17 @@
Examples:
# Incomplete despite corrections
[{"role": "assistant", "content": "What would you like to know about?"},
[{"role": "assistant", "content": "What would you like to know about?"},
{"role": "user", "content": "Can you tell me about"}]
Output: NO
# Complete despite multiple artifacts
[{"role": "assistant", "content": "I can help you learn."},
[{"role": "assistant", "content": "I can help you learn."},
{"role": "user", "content": "How do you I mean what's the best way to learn programming"}]
Output: YES
# Trailing off incomplete
[{"role": "assistant", "content": "I can explain anything."},
[{"role": "assistant", "content": "I can explain anything."},
{"role": "user", "content": "I was wondering if you could tell me why"}]
Output: NO
"""
Expand All @@ -268,6 +268,7 @@ def __init__(self, notifier: BaseNotifier, **kwargs):
self._notifier = notifier

async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)
# We must not block system frames.
if isinstance(frame, SystemFrame):
await self.push_frame(frame, direction)
Expand Down Expand Up @@ -319,6 +320,8 @@ def __init__(self, notifier: BaseNotifier):
self._notifier = notifier

async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)

if isinstance(frame, TextFrame) and frame.text == "YES":
logger.debug("!!! Completeness check YES")
await self.push_frame(UserStoppedSpeakingFrame())
Expand All @@ -341,6 +344,8 @@ def open_gate(self):
self._gate_open = True

async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)

# We must not block system frames.
if isinstance(frame, SystemFrame):
if isinstance(frame, StartFrame):
Expand Down
Loading
Loading