diff --git a/CHANGELOG.md b/CHANGELOG.md index 5610525b5..bd6aef5df 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,22 @@ All notable changes to **Pipecat** will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] - TBD + +### Added + +- Added a new foundational example `07e-interruptible-playht-http.py` for easy + testing of `PlayHTHttpTTSService`. + +### Changed + +- Changed the default model for `PlayHTHttpTTSService` to `Play3.0-mini-http`. + +### Fixed + +- Fixed an import issue for `PlayHTHttpTTSService`. Also removed language and + seed which are not supported `TTSOptions` by the PlayHT python client. + ## [0.0.52] - 2024-12-24 ### Added diff --git a/examples/foundational/07e-interruptible-playht-http.py b/examples/foundational/07e-interruptible-playht-http.py new file mode 100644 index 000000000..af2844ff5 --- /dev/null +++ b/examples/foundational/07e-interruptible-playht-http.py @@ -0,0 +1,101 @@ +# +# Copyright (c) 2024, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os +import sys + +import aiohttp +from dotenv import load_dotenv +from loguru import logger +from runner import configure + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMMessagesFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext +from pipecat.services.openai import OpenAILLMService +from pipecat.services.playht import PlayHTHttpTTSService +from pipecat.transcriptions.language import Language +from pipecat.transports.services.daily import DailyParams, DailyTransport + +load_dotenv(override=True) + +logger.remove(0) +logger.add(sys.stderr, level="DEBUG") + + +async def main(): + async with aiohttp.ClientSession() as session: + (room_url, token) = await configure(session) + + transport = DailyTransport( + room_url, + token, + "Respond bot", + DailyParams( + audio_out_enabled=True, + transcription_enabled=True, + vad_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + ) + + tts = PlayHTHttpTTSService( + user_id=os.getenv("PLAYHT_USER_ID"), + api_key=os.getenv("PLAYHT_API_KEY"), + voice_url="s3://voice-cloning-zero-shot/d9ff78ba-d016-47f6-b0ef-dd630f59414e/female-cs/manifest.json", + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o") + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = OpenAILLMContext(messages) + context_aggregator = llm.create_context_aggregator(context) + + pipeline = Pipeline( + [ + transport.input(), # Transport user input + context_aggregator.user(), # User responses + llm, # LLM + tts, # TTS + transport.output(), # Transport bot output + context_aggregator.assistant(), # Assistant spoken responses + ] + ) + + task = PipelineTask( + pipeline, + PipelineParams( + allow_interruptions=True, + enable_metrics=True, + enable_usage_metrics=True, + report_only_initial_ttfb=True, + ), + ) + + @transport.event_handler("on_first_participant_joined") + async def on_first_participant_joined(transport, participant): + await transport.capture_participant_transcription(participant["id"]) + # Kick off the conversation. + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMMessagesFrame(messages)]) + + runner = PipelineRunner() + + await runner.run(task) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/src/pipecat/services/playht.py b/src/pipecat/services/playht.py index 115198c02..fd0a16594 100644 --- a/src/pipecat/services/playht.py +++ b/src/pipecat/services/playht.py @@ -37,8 +37,7 @@ try: from pyht.async_client import AsyncClient - from pyht.client import TTSOptions - from pyht.protos.api_pb2 import Format + from pyht.client import Format, TTSOptions except ModuleNotFoundError as e: logger.error(f"Exception: {e}") logger.error( @@ -353,9 +352,7 @@ async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]: class PlayHTHttpTTSService(TTSService): class InputParams(BaseModel): - language: Optional[Language] = Language.EN speed: Optional[float] = 1.0 - seed: Optional[int] = None def __init__( self, @@ -363,7 +360,7 @@ def __init__( api_key: str, user_id: str, voice_url: str, - voice_engine: str = "Play3.0-mini", + voice_engine: str = "Play3.0-mini-http", # Options: Play3.0-mini-ws, Play3.0-mini-http, Play3.0-mini-grpc sample_rate: int = 24000, params: InputParams = InputParams(), **kwargs, @@ -379,31 +376,22 @@ def __init__( ) self._settings = { "sample_rate": sample_rate, - "language": self.language_to_service_language(params.language) - if params.language - else "english", "format": Format.FORMAT_WAV, "voice_engine": voice_engine, "speed": params.speed, - "seed": params.seed, } self.set_model_name(voice_engine) self.set_voice(voice_url) self._options = TTSOptions( voice=self._voice_id, - language=self._settings["language"], sample_rate=self._settings["sample_rate"], format=self._settings["format"], speed=self._settings["speed"], - seed=self._settings["seed"], ) def can_generate_metrics(self) -> bool: return True - def language_to_service_language(self, language: Language) -> str | None: - return language_to_playht_language(language) - async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]: logger.debug(f"Generating TTS: [{text}]")