From a49f35b3a86bd7daf11483b2a1a718952ea13b8e Mon Sep 17 00:00:00 2001 From: Jimmy Shimizu Date: Sun, 11 Dec 2016 14:58:43 +0100 Subject: [PATCH] BREAKING CHANGE: Allow specifying voice name for Microsoft TTS Reference voices by name. --- README.md | 37 +++++++++++++++++------- lib/actions/say.js | 10 ++++++- lib/actions/sayall.js | 10 ++++++- lib/tts-providers/microsoft.js | 53 +++++++++++++++++++++++++++++----- 4 files changed, 90 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index 1d05d38c..ab203bfc 100644 --- a/README.md +++ b/README.md @@ -305,9 +305,7 @@ Example: "voicerss": "Your api key for TTS with voicerss", "microsoft": { "key": "Your api for Bing speech API", - "gender": "Female", - "name": "Microsoft Server Speech Text to Speech Voice (en-US, ZiraRUS)", - "language": "en-US" + "name": "ZiraRUS" }, "port": 5005, "securePort": 5006, @@ -440,25 +438,41 @@ The following configuration is available (the entered values except key are defa { "microsoft": { "key": "Your api for Bing speech API", - "gender": "Female", - "name": "Microsoft Server Speech Text to Speech Voice (en-US, ZiraRUS)", - "language": "en-US" + "name": "ZiraRUS" } } ``` -If you change language, you need to change the name matching the gender for that language, according to this list: https://www.microsoft.com/cognitive-services/en-us/speech-api/documentation/API-Reference-REST/BingVoiceOutput#SupLocales. This one doesn't support providing language directly in the request for this reason. +You change language by specifying a voice name correlating to the desired language. +Name should be specified according to this list: https://www.microsoft.com/cognitive-services/en-us/speech-api/documentation/API-Reference-REST/BingVoiceOutput#SupLocales +where name is the right most part of the voice font name (without optional Apollo suffix). Example: + +`Microsoft Server Speech Text to Speech Voice (ar-EG, Hoda)` name should be specified as `Hoda` + +`Microsoft Server Speech Text to Speech Voice (de-DE, Stefan, Apollo)` name should be specified as `Stefan` + +`Microsoft Server Speech Text to Speech Voice (en-US, BenjaminRUS)` name should be specified as `BenjaminRUS` Action is: - /[Room name]/say/[phrase][/[announce volume]] - /sayall/[phrase][/[announce volume]] + /[Room name]/say/[phrase][/[name]][/[announce volume]] + /sayall/[phrase][/[name]][/[announce volume]] Example: /Office/say/Hello, dinner is ready + /Office/say/Hello, dinner is ready/BenjaminRUS + /Office/say/Guten morgen/Stefan /sayall/Hello, dinner is ready /Office/say/Hello, dinner is ready/90 + /Office/say/Guten morgen/Stefan/90 + +Supported voices are: + +Hoda, Hedda, Stefan, Catherine, Linda, Susan, George, Ravi, ZiraRUS, BenjaminRUS, Laura, Pablo, Raul, Caroline, Julie, Paul, Cosimo, Ayumi, Ichiro, Daniel, Irina, Pavel, HuihuiRUS, Yaoyao, Kangkang, Tracy, Danny, Yating, Zhiwei + +See https://www.microsoft.com/cognitive-services/en-us/speech-api/documentation/API-Reference-REST/BingVoiceOutput#SupLocales to identify +which language and gender it maps against. #### AWS Polly @@ -496,10 +510,11 @@ Action is: Example: /Office/say/Hello, dinner is ready - /Office/say/Hej, maten är klar/Joanna + /Office/say/Hello, dinner is ready/Nicole + /Office/say/Hej, maten är klar/Astrid /sayall/Hello, dinner is ready /Office/say/Hello, dinner is ready/90 - /Office/say/Hej, maten är klar/Russell/90 + /Office/say/Hej, maten är klar/Astrid/90 This is the current list of voice names and their corresponding language and accent (as of Dec 2016). To get a current list of voices, you would need to use the AWS CLI and invoke the describe-voices command. diff --git a/lib/actions/say.js b/lib/actions/say.js index bf3940c1..d5e5b936 100644 --- a/lib/actions/say.js +++ b/lib/actions/say.js @@ -11,7 +11,15 @@ let port; let system; function say(player, values) { - const text = decodeURIComponent(values[0]); + let text; + try { + text = decodeURIComponent(values[0]); + } catch (err) { + if (err instanceof URIError) { + err.message = `The encoded phrase ${values[0]} could not be URI decoded. Make sure your url encoded values (%xx) are within valid ranges. xx should be hexadecimal representations`; + } + return Promise.reject(err); + } let announceVolume; let language; diff --git a/lib/actions/sayall.js b/lib/actions/sayall.js index ecd31dd1..ec1a8ea4 100644 --- a/lib/actions/sayall.js +++ b/lib/actions/sayall.js @@ -7,7 +7,15 @@ let port; let system; function sayAll(player, values) { - const text = decodeURIComponent(values[0]); + let text; + try { + text = decodeURIComponent(values[0]); + } catch (err) { + if (err instanceof URIError) { + err.message = `The encoded phrase ${values[0]} could not be URI decoded. Make sure your url encoded values (%xx) are within valid ranges. xx should be hexadecimal representations`; + } + return Promise.reject(err); + } let announceVolume; let language; diff --git a/lib/tts-providers/microsoft.js b/lib/tts-providers/microsoft.js index a48d615b..dd991e94 100644 --- a/lib/tts-providers/microsoft.js +++ b/lib/tts-providers/microsoft.js @@ -10,9 +10,7 @@ const APP_ID = '9aa44d9e6ec14da99231a9166fd50b0f'; const INSTANCE_ID = crypto.randomBytes(16).toString('hex'); const TOKEN_EXPIRATION = 590000; // 9:50 minutes in ms const DEFAULT_SETTINGS = { - language: 'en-US', - gender: 'Female', - name: 'Microsoft Server Speech Text to Speech Voice (en-US, ZiraRUS)' + name: 'ZiraRUS' }; let bearerToken; @@ -39,15 +37,19 @@ function format(lang, gender, name, text) { return `${text}`; } -function microsoft(phrase, language) { +function microsoft(phrase, voiceName) { if (!globalSettings.microsoft || !globalSettings.microsoft.key) { return Promise.resolve(); } const settings = Object.assign({}, DEFAULT_SETTINGS, globalSettings.microsoft); + if (voiceName) { + settings.name = voiceName; + } + const phraseHash = crypto.createHash('sha1').update(phrase).digest('hex'); - const filename = `microsoft-${phraseHash}-${settings.language}-${settings.gender}.wav`; + const filename = `microsoft-${phraseHash}-${settings.name}.wav`; const filepath = path.resolve(globalSettings.webroot, 'tts', filename); const expectedUri = `/tts/${filename}`; @@ -65,7 +67,12 @@ function microsoft(phrase, language) { } return promise.then(() => { - const ssml = format(settings.language, settings.gender, settings.name, phrase); + const voice = VOICE[settings.name]; + if (!voice) { + throw new Error(`Voice name ${settings.name} could not be located in the list of valid voice names`); + } + + const ssml = format(voice.language, voice.gender, voice.font, phrase); return request({ uri: 'https://speech.platform.bing.com/synthesize', method: 'POST', @@ -99,4 +106,36 @@ function microsoft(phrase, language) { }); } -module.exports = microsoft; \ No newline at end of file +const VOICE = { + Hoda: { language: 'ar-EG', gender: 'Female', font: 'Microsoft Server Speech Text to Speech Voice (ar-EG, Hoda)' }, + Hedda: { language: 'de-DE', gender: 'Female', font: 'Microsoft Server Speech Text to Speech Voice (de-DE, Hedda)' }, + Stefan: { language: 'de-DE', gender: 'Male', font: 'Microsoft Server Speech Text to Speech Voice (de-DE, Stefan, Apollo)' }, + Catherine: { language: 'en-AU', gender: 'Female', font: 'Microsoft Server Speech Text to Speech Voice (en-AU, Catherine)' }, + Linda: { language: 'en-CA', gender: 'Female', font: 'Microsoft Server Speech Text to Speech Voice (en-CA, Linda)' }, + Susan: { language: 'en-GB', gender: 'Female', font: 'Microsoft Server Speech Text to Speech Voice (en-GB, Susan, Apollo)' }, + George: { language: 'en-GB', gender: 'Male', font: 'Microsoft Server Speech Text to Speech Voice (en-GB, George, Apollo)' }, + Ravi: { language: 'en-IN', gender: 'Male', font: 'Microsoft Server Speech Text to Speech Voice (en-IN, Ravi, Apollo)' }, + ZiraRUS: { language: 'en-US', gender: 'Female', font: 'Microsoft Server Speech Text to Speech Voice (en-US, ZiraRUS)' }, + BenjaminRUS: { language: 'en-US', gender: 'Male', font: 'Microsoft Server Speech Text to Speech Voice (en-US, BenjaminRUS)' }, + Laura: { language: 'es-ES', gender: 'Female', font: 'Microsoft Server Speech Text to Speech Voice (es-ES, Laura, Apollo)' }, + Pablo: { language: 'es-ES', gender: 'Male', font: 'Microsoft Server Speech Text to Speech Voice (es-ES, Pablo, Apollo)' }, + Raul: { language: 'es-MX', gender: 'Male', font: 'Microsoft Server Speech Text to Speech Voice (es-MX, Raul, Apollo)' }, + Caroline: { language: 'fr-CA', gender: 'Female', font: 'Microsoft Server Speech Text to Speech Voice (fr-CA, Caroline)' }, + Julie: { language: 'fr-FR', gender: 'Female', font: 'Microsoft Server Speech Text to Speech Voice (fr-FR, Julie, Apollo)' }, + Paul: { language: 'fr-FR', gender: 'Male', font: 'Microsoft Server Speech Text to Speech Voice (fr-FR, Paul, Apollo)' }, + Cosimo: { language: 'it-IT', gender: 'Male', font: 'Microsoft Server Speech Text to Speech Voice (it-IT, Cosimo, Apollo)' }, + Ayumi: { language: 'ja-JP', gender: 'Female', font: 'Microsoft Server Speech Text to Speech Voice (ja-JP, Ayumi, Apollo)' }, + Ichiro: { language: 'ja-JP', gender: 'Male', font: 'Microsoft Server Speech Text to Speech Voice (ja-JP, Ichiro, Apollo)' }, + Daniel: { language: 'pt-BR', gender: 'Male', font: 'Microsoft Server Speech Text to Speech Voice (pt-BR, Daniel, Apollo)' }, + Irina: { language: 'ru-RU', gender: 'Female', font: 'Microsoft Server Speech Text to Speech Voice (ru-RU, Irina, Apollo)' }, + Pavel: { language: 'ru-RU', gender: 'Male', font: 'Microsoft Server Speech Text to Speech Voice (ru-RU, Pavel, Apollo)' }, + HuihuiRUS: { language: 'zh-CN', gender: 'Female', font: 'Microsoft Server Speech Text to Speech Voice (zh-CN, HuihuiRUS)' }, + Yaoyao: { language: 'zh-CN', gender: 'Female', font: 'Microsoft Server Speech Text to Speech Voice (zh-CN, Yaoyao, Apollo)' }, + Kangkang: { language: 'zh-CN', gender: 'Male', font: 'Microsoft Server Speech Text to Speech Voice (zh-CN, Kangkang, Apollo)' }, + Tracy: { language: 'zh-HK', gender: 'Female', font: 'Microsoft Server Speech Text to Speech Voice (zh-HK, Tracy, Apollo)' }, + Danny: { language: 'zh-HK', gender: 'Male', font: 'Microsoft Server Speech Text to Speech Voice (zh-HK, Danny, Apollo)' }, + Yating: { language: 'zh-TW', gender: 'Female', font: 'Microsoft Server Speech Text to Speech Voice (zh-TW, Yating, Apollo)' }, + Zhiwei: { language: 'zh-TW', gender: 'Male', font: 'Microsoft Server Speech Text to Speech Voice (zh-TW, Zhiwei, Apollo)' } +}; + +module.exports = microsoft;