From c23c9e046cd000f6a28d3dbb1380c44805079e3b Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Wed, 8 Jan 2025 13:46:04 -0500 Subject: [PATCH] Add setters for model and voice to RimeHttpTTSService --- CHANGELOG.md | 6 ++++++ src/pipecat/services/rime.py | 6 ++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index eaccd2d26..3545b0744 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,12 +13,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Added `29-livekit-audio-chat.py`, as a new foundational examples for `LiveKitTransportLayer`. + - Added `enable_prejoin_ui`, `max_participants` and `start_video_off` params to `DailyRoomProperties`. + - Added `session_timeout` to `FastAPIWebsocketTransport` and `WebsocketServerTransport` for configuring session timeouts (in seconds). Triggers `on_session_timeout` for custom timeout handling. See [examples/websocket-server/bot.py](https://github.com/pipecat-ai/pipecat/blob/main/examples/websocket-server/bot.py). + - Added the new modalities option and helper function to set Gemini output modalities. + - Added `examples/foundational/26d-gemini-multimodal-live-text.py` which is using Gemini as TEXT modality and using another TTS provider for TTS process. ### Changed @@ -32,6 +36,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Fixed an issue where `OpenAIRealtimeBetaLLMService` audio chunks were hitting an error when truncating audio content. +- Fixed an issue where setting the voice and model for `RimeHttpTTSService` wasn't working. + ## [0.0.52] - 2024-12-24 ### Added diff --git a/src/pipecat/services/rime.py b/src/pipecat/services/rime.py index 4bfa56b20..ef64be162 100644 --- a/src/pipecat/services/rime.py +++ b/src/pipecat/services/rime.py @@ -43,14 +43,14 @@ def __init__( self._api_key = api_key self._base_url = "https://users.rime.ai/v1/rime-tts" self._settings = { - "speaker": voice_id, - "modelId": model, "samplingRate": sample_rate, "speedAlpha": params.speed_alpha, "reduceLatency": params.reduce_latency, "pauseBetweenBrackets": params.pause_between_brackets, "phonemizeBetweenBrackets": params.phonemize_between_brackets, } + self.set_voice(voice_id) + self.set_model_name(model) if params.inline_speed_alpha: self._settings["inlineSpeedAlpha"] = params.inline_speed_alpha @@ -69,6 +69,8 @@ async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]: payload = self._settings.copy() payload["text"] = text + payload["speaker"] = self._voice_id + payload["modelId"] = self._model_name try: await self.start_ttfb_metrics()