Skip to content

Commit

Permalink
Add speech activity segments in the output
Browse files Browse the repository at this point in the history
  • Loading branch information
Jeronymous committed Mar 1, 2024
1 parent 8352601 commit f7e6fff
Showing 1 changed file with 10 additions and 4 deletions.
14 changes: 10 additions & 4 deletions whisper_timestamped/transcribe.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
__author__ = "Jérôme Louradour"
__credits__ = ["Jérôme Louradour"]
__license__ = "GPLv3"
__version__ = "1.15.0"
__version__ = "1.15.1"

# Set some environment variables
import os
Expand Down Expand Up @@ -279,8 +279,10 @@ def transcribe_timestamped(

if vad:
audio = get_audio_tensor(audio)
audio, convert_timestamps = remove_non_speech(audio, method=vad, sample_rate=SAMPLE_RATE, plot=plot_word_alignment)

audio, vad_segments, convert_timestamps = remove_non_speech(audio, method=vad, sample_rate=SAMPLE_RATE, plot=plot_word_alignment)
else:
vad_segments = None

global num_alignment_for_plot
num_alignment_for_plot = 0

Expand Down Expand Up @@ -335,6 +337,9 @@ def transcribe_timestamped(
else:
segment["start"], segment["end"] = convert_timestamps(segment["start"], segment["end"])

if vad_segments is not None:
transcription["speech_activity"] = [{"start":s, "end":e} for (s,e) in vad_segments]

return transcription

def _transcribe_timestamped_efficient(
Expand Down Expand Up @@ -2116,7 +2121,7 @@ def remove_non_speech(audio,
if not use_sample:
segments = [(float(s)/sample_rate, float(e)/sample_rate) for s,e in segments]

return audio_speech, lambda t, t2 = None: do_convert_timestamps(segments, t, t2)
return audio_speech, segments, lambda t, t2 = None: do_convert_timestamps(segments, t, t2)

def do_convert_timestamps(segments, t, t2 = None):
"""
Expand Down Expand Up @@ -3132,6 +3137,7 @@ def filtered_keys(result, keys = [
"end",
"confidence",
"language_probs",
"speech_activity",
]):
if isinstance(result, dict):
return {k: (filtered_keys(v, keys) if k not in ["language_probs"] else v) for k, v in result.items() if k in keys}
Expand Down

0 comments on commit f7e6fff

Please sign in to comment.