forked from cp818/voice_bank_bot
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvoice.py
393 lines (337 loc) · 15.1 KB
/
voice.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0
"""
Purpose
Shows how to use the AWS SDK for Python (Boto3) with the Amazon Transcribe API to
transcribe an audio file to a text file. Also shows how to define a custom vocabulary
to improve the accuracy of the transcription.
This example uses a public domain audio file downloaded from Wikipedia and converted
from .ogg to .mp3 format. The file contains a reading of the poem Jabberwocky by
Lewis Carroll. The original audio source file can be found here:
https://en.wikisource.org/wiki/File:Jabberwocky.ogg
"""
import logging
import time
import boto3
import requests
from botocore.exceptions import ClientError
# Add relative path to include demo_tools in this code example without need for setup.
from demo_tools.custom_waiter import CustomWaiter, WaitState
logger = logging.getLogger(__name__)
class TranscribeCompleteWaiter(CustomWaiter):
"""
Waits for the transcription to complete.
"""
def __init__(self, client):
super().__init__(
'TranscribeComplete', 'GetTranscriptionJob',
'TranscriptionJob.TranscriptionJobStatus',
{'COMPLETED': WaitState.SUCCESS, 'FAILED': WaitState.FAILURE},
client)
def wait(self, job_name):
self._wait(TranscriptionJobName=job_name)
class VocabularyReadyWaiter(CustomWaiter):
"""
Waits for the custom vocabulary to be ready for use.
"""
def __init__(self, client):
super().__init__(
'VocabularyReady', 'GetVocabulary', 'VocabularyState',
{'READY': WaitState.SUCCESS}, client)
def wait(self, vocabulary_name):
self._wait(VocabularyName=vocabulary_name)
def start_job(
job_name, media_uri, media_format, language_code, transcribe_client,
vocabulary_name=None):
"""
Starts a transcription job. This function returns as soon as the job is started.
To get the current status of the job, call get_transcription_job. The job is
successfully completed when the job status is 'COMPLETED'.
:param job_name: The name of the transcription job. This must be unique for
your AWS account.
:param media_uri: The URI where the audio file is stored. This is typically
in an Amazon S3 bucket.
:param media_format: The format of the audio file. For example, mp3 or wav.
:param language_code: The language code of the audio file.
For example, en-US or ja-JP
:param transcribe_client: The Boto3 Transcribe client.
:param vocabulary_name: The name of a custom vocabulary to use when transcribing
the audio file.
:return: Data about the job.
"""
try:
job_args = {
'TranscriptionJobName': job_name,
'Media': {'MediaFileUri': media_uri},
'MediaFormat': media_format,
'LanguageCode': language_code}
if vocabulary_name is not None:
job_args['Settings'] = {'VocabularyName': vocabulary_name}
response = transcribe_client.start_transcription_job(**job_args)
job = response['TranscriptionJob']
logger.info("Started transcription job %s.", job_name)
except ClientError:
logger.exception("Couldn't start transcription job %s.", job_name)
raise
else:
return job
def list_jobs(job_filter, transcribe_client):
"""
Lists summaries of the transcription jobs for the current AWS account.
:param job_filter: The list of returned jobs must contain this string in their
names.
:param transcribe_client: The Boto3 Transcribe client.
:return: The list of retrieved transcription job summaries.
"""
try:
response = transcribe_client.list_transcription_jobs(
JobNameContains=job_filter)
jobs = response['TranscriptionJobSummaries']
next_token = response.get('NextToken')
while next_token is not None:
response = transcribe_client.list_transcription_jobs(
JobNameContains=job_filter, NextToken=next_token)
jobs += response['TranscriptionJobSummaries']
next_token = response.get('NextToken')
logger.info("Got %s jobs with filter %s.", len(jobs), job_filter)
except ClientError:
logger.exception("Couldn't get jobs with filter %s.", job_filter)
raise
else:
return jobs
def get_job(job_name, transcribe_client):
"""
Gets details about a transcription job.
:param job_name: The name of the job to retrieve.
:param transcribe_client: The Boto3 Transcribe client.
:return: The retrieved transcription job.
"""
try:
response = transcribe_client.get_transcription_job(
TranscriptionJobName=job_name)
job = response['TranscriptionJob']
logger.info("Got job %s.", job['TranscriptionJobName'])
except ClientError:
logger.exception("Couldn't get job %s.", job_name)
raise
else:
return job
def delete_job(job_name, transcribe_client):
"""
Deletes a transcription job. This also deletes the transcript associated with
the job.
:param job_name: The name of the job to delete.
:param transcribe_client: The Boto3 Transcribe client.
"""
try:
transcribe_client.delete_transcription_job(
TranscriptionJobName=job_name)
logger.info("Deleted job %s.", job_name)
except ClientError:
logger.exception("Couldn't delete job %s.", job_name)
raise
def create_vocabulary(
vocabulary_name, language_code, transcribe_client,
phrases=None, table_uri=None):
"""
Creates a custom vocabulary that can be used to improve the accuracy of
transcription jobs. This function returns as soon as the vocabulary processing
is started. Call get_vocabulary to get the current status of the vocabulary.
The vocabulary is ready to use when its status is 'READY'.
:param vocabulary_name: The name of the custom vocabulary.
:param language_code: The language code of the vocabulary.
For example, en-US or nl-NL.
:param transcribe_client: The Boto3 Transcribe client.
:param phrases: A list of comma-separated phrases to include in the vocabulary.
:param table_uri: A table of phrases and pronunciation hints to include in the
vocabulary.
:return: Information about the newly created vocabulary.
"""
try:
vocab_args = {'VocabularyName': vocabulary_name, 'LanguageCode': language_code}
if phrases is not None:
vocab_args['Phrases'] = phrases
elif table_uri is not None:
vocab_args['VocabularyFileUri'] = table_uri
response = transcribe_client.create_vocabulary(**vocab_args)
logger.info("Created custom vocabulary %s.", response['VocabularyName'])
except ClientError:
logger.exception("Couldn't create custom vocabulary %s.", vocabulary_name)
raise
else:
return response
def list_vocabularies(vocabulary_filter, transcribe_client):
"""
Lists the custom vocabularies created for this AWS account.
:param vocabulary_filter: The returned vocabularies must contain this string in
their names.
:param transcribe_client: The Boto3 Transcribe client.
:return: The list of retrieved vocabularies.
"""
try:
response = transcribe_client.list_vocabularies(
NameContains=vocabulary_filter)
vocabs = response['Vocabularies']
next_token = response.get('NextToken')
while next_token is not None:
response = transcribe_client.list_vocabularies(
NameContains=vocabulary_filter, NextToken=next_token)
vocabs += response['Vocabularies']
next_token = response.get('NextToken')
logger.info(
"Got %s vocabularies with filter %s.", len(vocabs), vocabulary_filter)
except ClientError:
logger.exception(
"Couldn't list vocabularies with filter %s.", vocabulary_filter)
raise
else:
return vocabs
def get_vocabulary(vocabulary_name, transcribe_client):
"""
Gets information about a customer vocabulary.
:param vocabulary_name: The name of the vocabulary to retrieve.
:param transcribe_client: The Boto3 Transcribe client.
:return: Information about the vocabulary.
"""
try:
response = transcribe_client.get_vocabulary(VocabularyName=vocabulary_name)
logger.info("Got vocabulary %s.", response['VocabularyName'])
except ClientError:
logger.exception("Couldn't get vocabulary %s.", vocabulary_name)
raise
else:
return response
def update_vocabulary(
vocabulary_name, language_code, transcribe_client, phrases=None,
table_uri=None):
"""
Updates an existing custom vocabulary. The entire vocabulary is replaced with
the contents of the update.
:param vocabulary_name: The name of the vocabulary to update.
:param language_code: The language code of the vocabulary.
:param transcribe_client: The Boto3 Transcribe client.
:param phrases: A list of comma-separated phrases to include in the vocabulary.
:param table_uri: A table of phrases and pronunciation hints to include in the
vocabulary.
"""
try:
vocab_args = {'VocabularyName': vocabulary_name, 'LanguageCode': language_code}
if phrases is not None:
vocab_args['Phrases'] = phrases
elif table_uri is not None:
vocab_args['VocabularyFileUri'] = table_uri
response = transcribe_client.update_vocabulary(**vocab_args)
logger.info(
"Updated custom vocabulary %s.", response['VocabularyName'])
except ClientError:
logger.exception("Couldn't update custom vocabulary %s.", vocabulary_name)
raise
def delete_vocabulary(vocabulary_name, transcribe_client):
"""
Deletes a custom vocabulary.
:param vocabulary_name: The name of the vocabulary to delete.
:param transcribe_client: The Boto3 Transcribe client.
"""
try:
transcribe_client.delete_vocabulary(VocabularyName=vocabulary_name)
logger.info("Deleted vocabulary %s.", vocabulary_name)
except ClientError:
logger.exception("Couldn't delete vocabulary %s.", vocabulary_name)
raise
def usage_demo():
"""Shows how to use the Amazon Transcribe service."""
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
bucket = boto3.client('s3')
transcribe_client = boto3.client('transcribe', region_name="us-east-1")
print('-'*88)
print("Welcome to the Amazon Transcribe demo!")
print('-'*88)
bucket_name = f'audiotranscribeforrasabot'
media_file_name = 'age.mp3'
media_object_key = 'audio.mp3'
print(f"Uploading media file {media_file_name}.")
bucket.upload_file(media_file_name, bucket_name, media_object_key)
media_uri = f's3://{bucket_name}/{media_object_key}'
job_name_simple = f'Jabber-{time.time_ns()}'
print(f"Starting transcription job {job_name_simple}.")
start_job(
job_name_simple, f's3://{bucket_name}/{media_object_key}', 'mp3', 'en-US',
transcribe_client)
transcribe_waiter = TranscribeCompleteWaiter(transcribe_client)
transcribe_waiter.wait(job_name_simple)
job_simple = get_job(job_name_simple, transcribe_client)
transcript_simple = requests.get(
job_simple['Transcript']['TranscriptFileUri']).json()
print(f"Transcript for job {transcript_simple['jobName']}:")
print(transcript_simple['results']['transcripts'][0]['transcript'])
print('-'*88)
print("Creating a custom vocabulary that lists the nonsense words to try to "
"improve the transcription.")
vocabulary_name = f'Jabber-vocabulary-{time.time_ns()}'
create_vocabulary(
vocabulary_name, 'en-US', transcribe_client,
phrases=[
'brillig', 'slithy', 'borogoves', 'mome', 'raths', 'Jub-Jub', 'frumious',
'manxome', 'Tumtum', 'uffish', 'whiffling', 'tulgey', 'thou', 'frabjous',
'callooh', 'callay', 'chortled'],
)
vocabulary_ready_waiter = VocabularyReadyWaiter(transcribe_client)
vocabulary_ready_waiter.wait(vocabulary_name)
job_name_vocabulary_list = f'Jabber-vocabulary-list-{time.time_ns()}'
print(f"Starting transcription job {job_name_vocabulary_list}.")
start_job(
job_name_vocabulary_list, media_uri, 'mp3', 'en-US', transcribe_client,
vocabulary_name)
transcribe_waiter.wait(job_name_vocabulary_list)
job_vocabulary_list = get_job(job_name_vocabulary_list, transcribe_client)
transcript_vocabulary_list = requests.get(
job_vocabulary_list['Transcript']['TranscriptFileUri']).json()
print(f"Transcript for job {transcript_vocabulary_list['jobName']}:")
print(transcript_vocabulary_list['results']['transcripts'][0]['transcript'])
print('-'*88)
print("Updating the custom vocabulary with table data that provides additional "
"pronunciation hints.")
table_vocab_file = 'age.txt'
bucket.upload_file(table_vocab_file, table_vocab_file)
update_vocabulary(
vocabulary_name, 'en-US', transcribe_client,
table_uri=f's3://{bucket_name}/{table_vocab_file}')
vocabulary_ready_waiter.wait(vocabulary_name)
job_name_vocab_table = f'Jabber-vocab-table-{time.time_ns()}'
print(f"Starting transcription job {job_name_vocab_table}.")
start_job(
job_name_vocab_table, media_uri, 'mp3', 'en-US', transcribe_client,
vocabulary_name=vocabulary_name)
transcribe_waiter.wait(job_name_vocab_table)
job_vocab_table = get_job(job_name_vocab_table, transcribe_client)
transcript_vocab_table = requests.get(
job_vocab_table['Transcript']['TranscriptFileUri']).json()
print(f"Transcript for job {transcript_vocab_table['jobName']}:")
print(transcript_vocab_table['results']['transcripts'][0]['transcript'])
print('-'*88)
print("Getting data for jobs and vocabularies.")
jabber_jobs = list_jobs('Jabber', transcribe_client)
print(f"Found {len(jabber_jobs)} jobs:")
for job_sum in jabber_jobs:
job = get_job(job_sum['TranscriptionJobName'], transcribe_client)
print(f"\t{job['TranscriptionJobName']}, {job['Media']['MediaFileUri']}, "
f"{job['Settings'].get('VocabularyName')}")
jabber_vocabs = list_vocabularies('Jabber', transcribe_client)
print(f"Found {len(jabber_vocabs)} vocabularies:")
for vocab_sum in jabber_vocabs:
vocab = get_vocabulary(vocab_sum['VocabularyName'], transcribe_client)
vocab_content = requests.get(vocab['DownloadUri']).text
print(f"\t{vocab['VocabularyName']} contents:")
print(vocab_content)
print('-'*88)
print("Deleting demo jobs.")
for job_name in [job_name_simple, job_name_vocabulary_list, job_name_vocab_table]:
delete_job(job_name, transcribe_client)
print("Deleting demo vocabulary.")
delete_vocabulary(vocabulary_name, transcribe_client)
print("Deleting demo bucket.")
bucket.objects.delete()
bucket.delete()
print("Thanks for watching!")
if __name__ == '__main__':
usage_demo()