forked from ufal/whisper_streaming
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmain.py
executable file
·74 lines (57 loc) · 1.92 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#!/usr/bin/env python3
import logging
import os
import time
from numpy import ndarray
from scipy.io.wavfile import write
from whisper_online import (SAMPLING_RATE, FasterWhisperASR,
OnlineASRProcessor, TimestampedSegment,
add_shared_args, load_audio, load_audio_chunk)
# Initalize the ASR model
# Start a separate thread for capturing the audio
# In the main thread listen for the 'q' key press
# When 'q' is pressed, stop the audio capture and pass the audio to the ASR model for processing
# Once the ASR model has processed the audio, add the transcript to the clipboard
SAMPLE_RATE = 16000
MAX_AUDIO_CAPTURE_DURATION_SECONDS = 60
MAX_SAMPLES = int(MAX_AUDIO_CAPTURE_DURATION_SECONDS * SAMPLE_RATE)
LOG_LEVEL = logging.INFO
logging = logging.getLogger(__name__)
logging.setLevel(LOG_LEVEL)
def clip(recording: ndarray, start: float, end: float):
beggining = int(start * SAMPLE_RATE)
end = int(end * SAMPLE_RATE)
return recording[beggining:end]
asr = FasterWhisperASR(
language="en",
model_size="tiny.en",
)
online_asr = OnlineASRProcessor(asr)
recording = sd.rec(
MAX_SAMPLES,
samplerate=SAMPLE_RATE,
channels=1,
dtype="int16",
)
start = time.time()
logging.debug("Recording started... Press Ctrl+C to stop recording")
try:
time.sleep(MAX_AUDIO_CAPTURE_DURATION_SECONDS)
except KeyboardInterrupt:
pass
sd.stop()
end = time.time()
duration = end - start
logging.debug(f"Recording stopped. Duration: {duration} seconds")
write("output.wav", SAMPLE_RATE, clip(recording, 0, duration))
audio = load_audio("output.wav")
online_asr.insert_audio_chunk(audio)
logging.debug("Processing audio...")
o = online_asr.process_iter()
o = online_asr.finish()
logging.debug("Finished processing audio")
# output_transcript(o, now=now)
logging.debug(o[2].strip())
# execute xclip
print(o[2].strip())
# os.system(f"echo '{o[2].strip()}' | xclip -selection clipboard")