Skip to content

Commit

Permalink
Merge pull request #795 from pipecat-ai/vp-nvidia-riva
Browse files Browse the repository at this point in the history
[WIP] add nvidia riva
  • Loading branch information
vipyne authored Dec 10, 2024
2 parents 6ba2dea + a49b4e4 commit 1db6245
Show file tree
Hide file tree
Showing 6 changed files with 421 additions and 6 deletions.
2 changes: 1 addition & 1 deletion dev-requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
build~=1.2.1
grpcio-tools~=1.62.2
grpcio-tools~=1.65.4
pip-tools~=7.4.1
pyright~=1.1.376
pytest~=8.3.2
Expand Down
56 changes: 56 additions & 0 deletions examples/foundational/01c-fastpitch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#

import asyncio
import aiohttp
import os
import sys

from pipecat.frames.frames import EndFrame, TTSSpeakFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.task import PipelineTask
from pipecat.pipeline.runner import PipelineRunner
from pipecat.services.riva import FastpitchTTSService
from pipecat.transports.services.daily import DailyParams, DailyTransport

from runner import configure

from loguru import logger

from dotenv import load_dotenv

load_dotenv(override=True)

logger.remove(0)
logger.add(sys.stderr, level="DEBUG")


async def main():
async with aiohttp.ClientSession() as session:
(room_url, _) = await configure(session)

transport = DailyTransport(
room_url, None, "Say One Thing", DailyParams(audio_out_enabled=True)
)

tts = FastpitchTTSService(api_key=os.getenv("NVIDIA_API_KEY"))

runner = PipelineRunner()

task = PipelineTask(Pipeline([tts, transport.output()]))

# Register an event handler so we can play the audio when the
# participant joins.
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
participant_name = participant.get("info", {}).get("userName", "")
await task.queue_frames([TTSSpeakFrame(f"Aloha, {participant_name}!"), EndFrame()])

await runner.run(task)


if __name__ == "__main__":
asyncio.run(main())
92 changes: 92 additions & 0 deletions examples/foundational/07r-interruptible-riva-nim.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
#
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#

import asyncio
import os
import sys

import aiohttp
from dotenv import load_dotenv
from loguru import logger
from runner import configure

from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMMessagesFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.services.nim import NimLLMService
from pipecat.services.riva import FastpitchTTSService, ParakeetSTTService
from pipecat.transports.services.daily import DailyParams, DailyTransport

load_dotenv(override=True)

logger.remove(0)
logger.add(sys.stderr, level="DEBUG")


async def main():
async with aiohttp.ClientSession() as session:
(room_url, _) = await configure(session)

transport = DailyTransport(
room_url,
None,
"Respond bot",
DailyParams(
audio_out_enabled=True,
vad_enabled=True,
vad_analyzer=SileroVADAnalyzer(),
vad_audio_passthrough=True,
),
)

stt = ParakeetSTTService(api_key=os.getenv("NVIDIA_API_KEY"))

llm = NimLLMService(
api_key=os.getenv("NVIDIA_API_KEY"), model="meta/llama-3.1-405b-instruct"
)

tts = FastpitchTTSService(api_key=os.getenv("NVIDIA_API_KEY"))

messages = [
{
"role": "system",
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
},
]

context = OpenAILLMContext(messages)
context_aggregator = llm.create_context_aggregator(context)

pipeline = Pipeline(
[
transport.input(), # Transport user input
stt, # STT
context_aggregator.user(), # User responses
llm, # LLM
tts, # TTS
transport.output(), # Transport bot output
context_aggregator.assistant(), # Assistant spoken responses
]
)

task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))

@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([LLMMessagesFrame(messages)])

runner = PipelineRunner()

await runner.run(task)


if __name__ == "__main__":
asyncio.run(main())
7 changes: 4 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ dependencies = [
"Markdown~=3.7",
"numpy~=1.26.4",
"Pillow~=10.4.0",
"protobuf~=4.25.4",
"protobuf~=5.26.1",
"pydantic~=2.8.2",
"pyloudnorm~=0.1.1",
"resampy~=0.4.3",
Expand All @@ -48,7 +48,7 @@ elevenlabs = [ "websockets~=13.1" ]
examples = [ "python-dotenv~=1.0.1", "flask~=3.0.3", "flask_cors~=4.0.1" ]
fal = [ "fal-client~=0.4.1" ]
gladia = [ "websockets~=13.1" ]
google = [ "google-generativeai~=0.8.3", "google-cloud-texttospeech~=2.17.2" ]
google = [ "google-generativeai~=0.8.3", "google-cloud-texttospeech~=2.21.1" ]
grok = [ "openai~=1.50.2" ]
groq = [ "openai~=1.50.2" ]
gstreamer = [ "pygobject~=3.48.2" ]
Expand All @@ -63,7 +63,8 @@ nim = [ "openai~=1.50.2" ]
noisereduce = [ "noisereduce~=3.0.3" ]
openai = [ "openai~=1.50.2", "websockets~=13.1", "python-deepcompare~=1.0.1" ]
openpipe = [ "openpipe~=4.38.0" ]
playht = [ "pyht~=0.1.4", "websockets~=13.1" ]
playht = [ "pyht~=0.1.8", "websockets~=13.1" ]
riva = [ "nvidia-riva-client~=2.17.0" ]
silero = [ "onnxruntime~=1.19.2" ]
soundfile = [ "soundfile~=0.12.1" ]
together = [ "openai~=1.50.2" ]
Expand Down
Loading

0 comments on commit 1db6245

Please sign in to comment.