Merge pull request #314 from pipecat-ai/aleix/transcription-timestamps

services: transcription timestamp should use ISO8601 format
pipecat-ai · Jul 23, 2024 · 1676693 · 1676693
2 parents eb998aa + 0852b50
commit 1676693
Show file tree

Hide file tree

Showing 5 changed files with 26 additions and 8 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,12 @@ All notable changes to **pipecat** will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [Unrelease]
+
+### Fixed
+
+- STT services should be using ISO 8601 time format for transcription frames.
+
 ## [0.0.37] - 2024-07-22
 
 ### Added

diff --git a/src/pipecat/services/deepgram.py b/src/pipecat/services/deepgram.py
@@ -5,7 +5,6 @@
 #
 
 import aiohttp
-import time
 
 from typing import AsyncGenerator
 
@@ -21,9 +20,11 @@
     TranscriptionFrame)
 from pipecat.processors.frame_processor import FrameDirection
 from pipecat.services.ai_services import AsyncAIService, TTSService
+from pipecat.utils.time import time_now_iso8601
 
 from loguru import logger
 
+
 # See .env.example for Deepgram configuration needed
 try:
     from deepgram import (
@@ -148,6 +149,6 @@ async def _on_message(self, *args, **kwargs):
         transcript = result.channel.alternatives[0].transcript
         if len(transcript) > 0:
             if is_final:
-                await self.queue_frame(TranscriptionFrame(transcript, "", int(time.time_ns() / 1000000)))
+                await self.queue_frame(TranscriptionFrame(transcript, "", time_now_iso8601()))
             else:
-                await self.queue_frame(InterimTranscriptionFrame(transcript, "", int(time.time_ns() / 1000000)))
+                await self.queue_frame(InterimTranscriptionFrame(transcript, "", time_now_iso8601()))
diff --git a/src/pipecat/services/gladia.py b/src/pipecat/services/gladia.py
@@ -6,7 +6,6 @@
 
 import base64
 import json
-import time
 
 from typing import Optional
 from pydantic.main import BaseModel
@@ -22,6 +21,7 @@
     TranscriptionFrame)
 from pipecat.processors.frame_processor import FrameDirection
 from pipecat.services.ai_services import AsyncAIService
+from pipecat.utils.time import time_now_iso8601
 
 from loguru import logger
 
@@ -110,6 +110,6 @@ async def _receive_task_handler(self):
                 transcript = utterance["transcription"]
                 if confidence >= self._confidence:
                     if type == "final":
-                        await self.queue_frame(TranscriptionFrame(transcript, "", int(time.time_ns() / 1000000)))
+                        await self.queue_frame(TranscriptionFrame(transcript, "", time_now_iso8601()))
                     else:
-                        await self.queue_frame(InterimTranscriptionFrame(transcript, "", int(time.time_ns() / 1000000)))
+                        await self.queue_frame(InterimTranscriptionFrame(transcript, "", time_now_iso8601()))
diff --git a/src/pipecat/services/whisper.py b/src/pipecat/services/whisper.py
@@ -7,7 +7,6 @@
 """This module implements Whisper transcription with a locally-downloaded model."""
 
 import asyncio
-import time
 
 from enum import Enum
 from typing_extensions import AsyncGenerator
@@ -16,6 +15,7 @@
 
 from pipecat.frames.frames import ErrorFrame, Frame, TranscriptionFrame
 from pipecat.services.ai_services import STTService
+from pipecat.utils.time import time_now_iso8601
 
 from loguru import logger
 
@@ -91,4 +91,4 @@ async def run_stt(self, audio: bytes) -> AsyncGenerator[Frame, None]:
         if text:
             await self.stop_ttfb_metrics()
             logger.debug(f"Transcription: [{text}]")
-            yield TranscriptionFrame(text, "", int(time.time_ns() / 1000000))
+            yield TranscriptionFrame(text, "", time_now_iso8601())
diff --git a/src/pipecat/utils/time.py b/src/pipecat/utils/time.py
@@ -0,0 +1,11 @@
+#
+# Copyright (c) 2024, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+import datetime
+
+
+def time_now_iso8601() -> str:
+    return datetime.datetime.now(datetime.timezone.utc).isoformat(timespec="milliseconds")