diff --git a/CHANGELOG.md b/CHANGELOG.md index 412921b02..e2e3fac34 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,12 @@ All notable changes to **pipecat** will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unrelease] + +### Fixed + +- STT services should be using ISO 8601 time format for transcription frames. + ## [0.0.37] - 2024-07-22 ### Added diff --git a/src/pipecat/services/deepgram.py b/src/pipecat/services/deepgram.py index e6ac09991..e3a9ee478 100644 --- a/src/pipecat/services/deepgram.py +++ b/src/pipecat/services/deepgram.py @@ -5,7 +5,6 @@ # import aiohttp -import time from typing import AsyncGenerator @@ -21,9 +20,11 @@ TranscriptionFrame) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.ai_services import AsyncAIService, TTSService +from pipecat.utils.time import time_now_iso8601 from loguru import logger + # See .env.example for Deepgram configuration needed try: from deepgram import ( @@ -148,6 +149,6 @@ async def _on_message(self, *args, **kwargs): transcript = result.channel.alternatives[0].transcript if len(transcript) > 0: if is_final: - await self.queue_frame(TranscriptionFrame(transcript, "", int(time.time_ns() / 1000000))) + await self.queue_frame(TranscriptionFrame(transcript, "", time_now_iso8601())) else: - await self.queue_frame(InterimTranscriptionFrame(transcript, "", int(time.time_ns() / 1000000))) + await self.queue_frame(InterimTranscriptionFrame(transcript, "", time_now_iso8601())) diff --git a/src/pipecat/services/gladia.py b/src/pipecat/services/gladia.py index 6f6a1ba27..4043e1283 100644 --- a/src/pipecat/services/gladia.py +++ b/src/pipecat/services/gladia.py @@ -6,7 +6,6 @@ import base64 import json -import time from typing import Optional from pydantic.main import BaseModel @@ -22,6 +21,7 @@ TranscriptionFrame) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.ai_services import AsyncAIService +from pipecat.utils.time import time_now_iso8601 from loguru import logger @@ -110,6 +110,6 @@ async def _receive_task_handler(self): transcript = utterance["transcription"] if confidence >= self._confidence: if type == "final": - await self.queue_frame(TranscriptionFrame(transcript, "", int(time.time_ns() / 1000000))) + await self.queue_frame(TranscriptionFrame(transcript, "", time_now_iso8601())) else: - await self.queue_frame(InterimTranscriptionFrame(transcript, "", int(time.time_ns() / 1000000))) + await self.queue_frame(InterimTranscriptionFrame(transcript, "", time_now_iso8601())) diff --git a/src/pipecat/services/whisper.py b/src/pipecat/services/whisper.py index 5ef06f135..8313a297b 100644 --- a/src/pipecat/services/whisper.py +++ b/src/pipecat/services/whisper.py @@ -7,7 +7,6 @@ """This module implements Whisper transcription with a locally-downloaded model.""" import asyncio -import time from enum import Enum from typing_extensions import AsyncGenerator @@ -16,6 +15,7 @@ from pipecat.frames.frames import ErrorFrame, Frame, TranscriptionFrame from pipecat.services.ai_services import STTService +from pipecat.utils.time import time_now_iso8601 from loguru import logger @@ -91,4 +91,4 @@ async def run_stt(self, audio: bytes) -> AsyncGenerator[Frame, None]: if text: await self.stop_ttfb_metrics() logger.debug(f"Transcription: [{text}]") - yield TranscriptionFrame(text, "", int(time.time_ns() / 1000000)) + yield TranscriptionFrame(text, "", time_now_iso8601()) diff --git a/src/pipecat/utils/time.py b/src/pipecat/utils/time.py new file mode 100644 index 000000000..af493e77b --- /dev/null +++ b/src/pipecat/utils/time.py @@ -0,0 +1,11 @@ +# +# Copyright (c) 2024, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import datetime + + +def time_now_iso8601() -> str: + return datetime.datetime.now(datetime.timezone.utc).isoformat(timespec="milliseconds")