Skip to content

Commit

Permalink
PlayHTHttpTTSService fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
markbackman committed Jan 4, 2025
1 parent 386ba61 commit d6d50dc
Show file tree
Hide file tree
Showing 3 changed files with 119 additions and 14 deletions.
16 changes: 16 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,22 @@ All notable changes to **Pipecat** will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased] - TBD

### Added

- Added a new foundational example `07e-interruptible-playht-http.py` for easy
testing of `PlayHTHttpTTSService`.

### Changed

- Changed the default model for `PlayHTHttpTTSService` to `Play3.0-mini-http`.

### Fixed

- Fixed an import issue for `PlayHTHttpTTSService`. Also removed language and
seed which are not supported `TTSOptions` by the PlayHT python client.

## [0.0.52] - 2024-12-24

### Added
Expand Down
101 changes: 101 additions & 0 deletions examples/foundational/07e-interruptible-playht-http.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
#
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#

import asyncio
import os
import sys

import aiohttp
from dotenv import load_dotenv
from loguru import logger
from runner import configure

from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMMessagesFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.services.openai import OpenAILLMService
from pipecat.services.playht import PlayHTHttpTTSService
from pipecat.transcriptions.language import Language
from pipecat.transports.services.daily import DailyParams, DailyTransport

load_dotenv(override=True)

logger.remove(0)
logger.add(sys.stderr, level="DEBUG")


async def main():
async with aiohttp.ClientSession() as session:
(room_url, token) = await configure(session)

transport = DailyTransport(
room_url,
token,
"Respond bot",
DailyParams(
audio_out_enabled=True,
transcription_enabled=True,
vad_enabled=True,
vad_analyzer=SileroVADAnalyzer(),
),
)

tts = PlayHTHttpTTSService(
user_id=os.getenv("PLAYHT_USER_ID"),
api_key=os.getenv("PLAYHT_API_KEY"),
voice_url="s3://voice-cloning-zero-shot/d9ff78ba-d016-47f6-b0ef-dd630f59414e/female-cs/manifest.json",
)

llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")

messages = [
{
"role": "system",
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
},
]

context = OpenAILLMContext(messages)
context_aggregator = llm.create_context_aggregator(context)

pipeline = Pipeline(
[
transport.input(), # Transport user input
context_aggregator.user(), # User responses
llm, # LLM
tts, # TTS
transport.output(), # Transport bot output
context_aggregator.assistant(), # Assistant spoken responses
]
)

task = PipelineTask(
pipeline,
PipelineParams(
allow_interruptions=True,
enable_metrics=True,
enable_usage_metrics=True,
report_only_initial_ttfb=True,
),
)

@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
await transport.capture_participant_transcription(participant["id"])
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([LLMMessagesFrame(messages)])

runner = PipelineRunner()

await runner.run(task)


if __name__ == "__main__":
asyncio.run(main())
16 changes: 2 additions & 14 deletions src/pipecat/services/playht.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,7 @@

try:
from pyht.async_client import AsyncClient
from pyht.client import TTSOptions
from pyht.protos.api_pb2 import Format
from pyht.client import Format, TTSOptions
except ModuleNotFoundError as e:
logger.error(f"Exception: {e}")
logger.error(
Expand Down Expand Up @@ -353,17 +352,15 @@ async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:

class PlayHTHttpTTSService(TTSService):
class InputParams(BaseModel):
language: Optional[Language] = Language.EN
speed: Optional[float] = 1.0
seed: Optional[int] = None

def __init__(
self,
*,
api_key: str,
user_id: str,
voice_url: str,
voice_engine: str = "Play3.0-mini",
voice_engine: str = "Play3.0-mini-http", # Options: Play3.0-mini-ws, Play3.0-mini-http, Play3.0-mini-grpc
sample_rate: int = 24000,
params: InputParams = InputParams(),
**kwargs,
Expand All @@ -379,31 +376,22 @@ def __init__(
)
self._settings = {
"sample_rate": sample_rate,
"language": self.language_to_service_language(params.language)
if params.language
else "english",
"format": Format.FORMAT_WAV,
"voice_engine": voice_engine,
"speed": params.speed,
"seed": params.seed,
}
self.set_model_name(voice_engine)
self.set_voice(voice_url)
self._options = TTSOptions(
voice=self._voice_id,
language=self._settings["language"],
sample_rate=self._settings["sample_rate"],
format=self._settings["format"],
speed=self._settings["speed"],
seed=self._settings["seed"],
)

def can_generate_metrics(self) -> bool:
return True

def language_to_service_language(self, language: Language) -> str | None:
return language_to_playht_language(language)

async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
logger.debug(f"Generating TTS: [{text}]")

Expand Down

0 comments on commit d6d50dc

Please sign in to comment.