Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add fastpitch service #776

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions examples/foundational/01c-fastpitch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#

import asyncio
import aiohttp
import os
import sys

from pipecat.frames.frames import EndFrame, TTSSpeakFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.task import PipelineTask
from pipecat.pipeline.runner import PipelineRunner
from pipecat.services.fastpitch import FastpitchHttpTTSService
from pipecat.transports.services.daily import DailyParams, DailyTransport

from runner import configure

from loguru import logger

from dotenv import load_dotenv

load_dotenv(override=True)

logger.remove(0)
logger.add(sys.stderr, level="DEBUG")


async def main():
async with aiohttp.ClientSession() as session:
(room_url, _) = await configure(session)

transport = DailyTransport(
room_url, None, "Say One Thing", DailyParams(audio_out_enabled=True)
)

tts = FastpitchHttpTTSService(
api_key=os.getenv("NVIDIA_API_KEY"),
)

runner = PipelineRunner()

task = PipelineTask(Pipeline([tts, transport.output()]))

# Register an event handler so we can play the audio when the
# participant joins.
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
participant_name = participant.get("info", {}).get("userName", "")
await task.queue_frames(
[TTSSpeakFrame(f"Hello there, {participant_name}!"), EndFrame()]
)

await runner.run(task)


if __name__ == "__main__":
asyncio.run(main())
147 changes: 147 additions & 0 deletions src/pipecat/audio/audio_io.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
# SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not sure if this should be checked in here, but it was the fastest way to get things working. open to figuring out other ways to get this.

# SPDX-License-Identifier: MIT

import queue
from typing import Dict, Union, Optional

import pyaudio


class MicrophoneStream:
"""Opens a recording stream as responses yielding the audio chunks."""

def __init__(self, rate: int, chunk: int, device: int = None) -> None:
self._rate = rate
self._chunk = chunk
self._device = device

# Create a thread-safe buffer of audio data
self._buff = queue.Queue()
self.closed = True

def __enter__(self):
self._audio_interface = pyaudio.PyAudio()
self._audio_stream = self._audio_interface.open(
format=pyaudio.paInt16,
input_device_index=self._device,
channels=1,
rate=self._rate,
input=True,
frames_per_buffer=self._chunk,
stream_callback=self._fill_buffer,
)

self.closed = False

return self

def close(self) -> None:
self._audio_stream.stop_stream()
self._audio_stream.close()
self.closed = True
# Signal the responses to terminate so that the client's
# streaming_recognize method will not block the process termination.
self._buff.put(None)
self._audio_interface.terminate()

def __exit__(self, type, value, traceback):
self.close()

def _fill_buffer(self, in_data, frame_count, time_info, status_flags):
"""Continuously collect data from the audio stream into the buffer."""
self._buff.put(in_data)
return None, pyaudio.paContinue

def __next__(self) -> bytes:
if self.closed:
raise StopIteration
chunk = self._buff.get()
if chunk is None:
raise StopIteration
data = [chunk]

while True:
try:
chunk = self._buff.get(block=False)
if chunk is None:
assert not self.closed
data.append(chunk)
except queue.Empty:
break

return b"".join(data)

def __iter__(self):
return self


def get_audio_device_info(device_id: int) -> Dict[str, Union[int, float, str]]:
p = pyaudio.PyAudio()
info = p.get_device_info_by_index(device_id)
p.terminate()
return info


def get_default_input_device_info() -> Optional[Dict[str, Union[int, float, str]]]:
p = pyaudio.PyAudio()
try:
info = p.get_default_input_device_info()
except OSError:
info = None
p.terminate()
return info


def list_output_devices() -> None:
p = pyaudio.PyAudio()
print("Output audio devices:")
for i in range(p.get_device_count()):
info = p.get_device_info_by_index(i)
if info["maxOutputChannels"] < 1:
continue
print(f"{info['index']}: {info['name']}")
p.terminate()


def list_input_devices() -> None:
p = pyaudio.PyAudio()
print("Input audio devices:")
for i in range(p.get_device_count()):
info = p.get_device_info_by_index(i)
if info["maxInputChannels"] < 1:
continue
print(f"{info['index']}: {info['name']}")
p.terminate()


class SoundCallBack:
def __init__(
self,
output_device_index: Optional[int],
sampwidth: int,
nchannels: int,
framerate: int,
) -> None:
self.pa = pyaudio.PyAudio()
self.stream = self.pa.open(
output_device_index=output_device_index,
format=self.pa.get_format_from_width(sampwidth),
channels=nchannels,
rate=framerate,
output=True,
)
self.opened = True

def __call__(self, audio_data: bytes, audio_length: float = None) -> None:
self.stream.write(audio_data)

def __enter__(self):
return self

def __exit__(self, type_, value, traceback) -> None:
self.close()

def close(self) -> None:
self.stream.close()
self.pa.terminate()
self.opened = False
Loading
Loading