-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpolly.py
74 lines (67 loc) · 3.17 KB
/
polly.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# -*- coding: utf-8 -*-
"""
__init__
A library to get text to speech from AWS cloud engine.
"""
# stdlib imports:
import hashlib
import logging
import os
from pathlib import Path
from typing_extensions import Final, Literal
import wave
# 3rd-party imports:
import boto3 # pip install boto3
logger = logging.getLogger( __name__ )
AWS_POLLY_VOICES = Literal['Aditi','Amy','Astrid','Bianca','Brian','Camila','Carla','Carmen','Celine','Chantal','Conchita','Cristiano','Dora','Emma','Enrique','Ewa','Filiz','Gabrielle','Geraint','Giorgio','Gwyneth','Hans','Ines','Ivy','Jacek','Jan','Joanna','Joey','Justin','Karl','Kendra','Kevin','Kimberly','Lea','Liv','Lotte','Lucia','Lupe','Mads','Maja','Marlene','Mathieu','Matthew','Maxim','Mia','Miguel','Mizuki','Naja','Nicole','Olivia','Penelope','Raveena','Ricardo','Ruben','Russell','Salli','Seoyeon','Takumi','Tatyana','Vicki','Vitoria','Zeina','Zhiyu','Aria','Ayanda','Arlet','Hannah','Arthur','Daniel','Liam','Pedro','Kajal']
aws_polly_voices = ('Aditi','Amy','Astrid','Bianca','Brian','Camila','Carla','Carmen','Celine','Chantal','Conchita','Cristiano','Dora','Emma','Enrique','Ewa','Filiz','Gabrielle','Geraint','Giorgio','Gwyneth','Hans','Ines','Ivy','Jacek','Jan','Joanna','Joey','Justin','Karl','Kendra','Kevin','Kimberly','Lea','Liv','Lotte','Lucia','Lupe','Mads','Maja','Marlene','Mathieu','Matthew','Maxim','Mia','Miguel','Mizuki','Naja','Nicole','Olivia','Penelope','Raveena','Ricardo','Ruben','Russell','Salli','Seoyeon','Takumi','Tatyana','Vicki','Vitoria','Zeina','Zhiyu','Aria','Ayanda','Arlet','Hannah','Arthur','Daniel','Liam','Pedro','Kajal')
class AWSPolly:
sample_rate: Final = '8000'
"""
Implements API for Amazon Polly Cloud TTS service
"""
def __init__( self, aws_access_key: str, aws_secret_key: str, aws_region_name: str ) -> None:
self.aws_access_key = aws_access_key
self.aws_secret_key = aws_secret_key
self.aws_region_name = aws_region_name
self.channels = 1
self.sampwidth = 2
def genspeech( self,
text: str,
voice: AWS_POLLY_VOICES,
recpath: Path,
) -> Path:
log = logger.getChild( 'AWSPolly.genspeech' )
namestr = f'polly-{text}-{voice}'
fhash = hashlib.sha224(
namestr.encode( 'utf-8' )
).hexdigest()
filename: Path = recpath / f'{fhash}.wav'
if filename.is_file():
log.debug( 'using cached filename=%r for text=%r', filename, text )
return filename
recpath.mkdir( mode = 755, parents = True, exist_ok = True )
polly_client = boto3.client(
'polly',
aws_access_key_id = self.aws_access_key,
aws_secret_access_key = self.aws_secret_key,
region_name = self.aws_region_name
)
log.debug( 'requesting new tts content for text=%r', text )
polly_response = polly_client.synthesize_speech(
VoiceId = voice,
Engine = 'neural',
OutputFormat = 'pcm',
SampleRate = self.sample_rate,
TextType = 'ssml',
Text = text
)
wavframes = polly_response['AudioStream'].read()
with wave.open( str( filename ), 'wb' ) as waveout:
waveout.setnchannels( self.channels )
waveout.setsampwidth( self.sampwidth )
waveout.setframerate( int( self.sample_rate ))
waveout.setnframes( 0 )
waveout.setcomptype( 'NONE', 'NONE' )
waveout.writeframesraw( wavframes )
return filename