-
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
15 changed files
with
135,236 additions
and
27 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
84 changes: 84 additions & 0 deletions
84
Mirivoice/Mirivoice.Plugins.Builtin/IPAConverters/EnglishUSIPAConverter.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
using Mirivoice.Mirivoice.Plugins.Builtin.Phonemizers.Utils; | ||
using Serilog; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
using System; | ||
using System.IO; | ||
using System.Text; | ||
using System.Reflection; | ||
using System.Text.RegularExpressions; | ||
using System.Collections.Generic; | ||
Check warning on line 10 in Mirivoice/Mirivoice.Plugins.Builtin/IPAConverters/EnglishUSIPAConverter.cs GitHub Actions / build (win-x64)
Check warning on line 10 in Mirivoice/Mirivoice.Plugins.Builtin/IPAConverters/EnglishUSIPAConverter.cs GitHub Actions / build (linux-x64)
|
||
|
||
namespace Mirivoice.Mirivoice.Plugins.Builtin.IPAConverters | ||
{ | ||
public class EnglishUSIPAConverter : BaseIPAConverter | ||
{ | ||
// Many things are from https://github.com/wwesantos/arpabet-to-ipa/tree/master | ||
|
||
private readonly Dictionary<string, string> ArpaToIPA = new Dictionary<string, string> | ||
{ | ||
{ "AO", "ɔ" }, | ||
{ "AA", "ɑ" }, | ||
{ "IY", "i" }, | ||
{ "UW", "u" }, | ||
{ "EH", "e" }, // modern versions use 'e' instead of 'ɛ' | ||
{ "IH", "ɪ" }, | ||
{ "UH", "ʊ" }, | ||
{ "AH", "ʌ" }, | ||
{ "AH0", "ə" }, | ||
{ "AE", "æ" }, | ||
{ "AX", "ə" }, | ||
{ "EY", "eɪ" }, | ||
{ "AY", "aɪ" }, | ||
{ "OW", "oʊ" }, | ||
{ "AW", "aʊ" }, | ||
{ "OY", "ɔɪ" }, | ||
{ "P", "p" }, | ||
{ "B", "b" }, | ||
{ "T", "t" }, | ||
{ "D", "d" }, | ||
{ "K", "k" }, | ||
{ "G", "g" }, | ||
{ "CH", "tʃ" }, | ||
{ "JH", "dʒ" }, | ||
{ "F", "f" }, | ||
{ "V", "v" }, | ||
{ "TH", "θ" }, | ||
{ "DH", "ð" }, | ||
{ "S", "s" }, | ||
{ "Z", "z" }, | ||
{ "SH", "ʃ" }, | ||
{ "ZH", "ʒ" }, | ||
{ "HH", "h" }, | ||
{ "M", "m" }, | ||
{ "N", "n" }, | ||
{ "NG", "ŋ" }, | ||
{ "L", "l" }, | ||
{ "R", "r" }, | ||
{ "ER", "ɜr" }, | ||
{ "AXR", "ər" }, | ||
{ "W", "w" }, | ||
{ "Y", "j" } | ||
}; | ||
|
||
|
||
public override string ConvertToIPA(string phoneme, bool isFirstPhoneme) | ||
{ | ||
List<string> IPA = new List<string>(); | ||
foreach (string phone in phoneme.Split(" ", StringSplitOptions.RemoveEmptyEntries)) | ||
{ | ||
if (ArpaToIPA.ContainsKey(phone.ToUpper())) | ||
{ | ||
IPA.Add(ArpaToIPA[phone.ToUpper()]); | ||
} | ||
else | ||
{ | ||
IPA.Add(phone); | ||
} | ||
} | ||
string res = string.Join("\t", IPA); | ||
//Log.Debug($"Converted {phoneme} to {res}"); | ||
return res; | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
65 changes: 65 additions & 0 deletions
65
Mirivoice/Mirivoice.Plugins.Builtin/Phonemizers/EnglishUSPhonemizer.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
using Mirivoice.Mirivoice.Plugins.Builtin.IPAConverters; | ||
using Mirivoice.Mirivoice.Plugins.Builtin.Phonemizers.Utils; | ||
using Serilog; | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Text; | ||
|
||
namespace Mirivoice.Mirivoice.Plugins.Builtin.Phonemizers | ||
{ | ||
public class EnglishUSPhonemizer : BasePhonemizer | ||
{ | ||
public override BaseIPAConverter IPAConverter { get; set; } = new EnglishUSIPAConverter(); | ||
public override bool UseWordDivider { get; set; } = true; | ||
|
||
protected override string[] SplitToWords(string sentence) | ||
{ | ||
List<string> words = new List<string>(); | ||
StringBuilder sb = new StringBuilder(); | ||
foreach (string word in sentence.Split()) | ||
{ | ||
char[] charArr = word.ToCharArray(); | ||
bool LastWasPunctuation = false; | ||
foreach (char c in charArr) | ||
{ | ||
if (IsPunctuation(c.ToString())) | ||
{ | ||
words.Add(sb.ToString()); | ||
sb.Clear(); | ||
|
||
sb.Append(c); | ||
LastWasPunctuation = true; | ||
continue; | ||
} | ||
if (!IsPunctuation(c.ToString())) | ||
{ | ||
if (LastWasPunctuation) | ||
{ | ||
words.Add(sb.ToString()); | ||
sb.Clear(); | ||
LastWasPunctuation = false; | ||
} | ||
|
||
sb.Append(c); | ||
} | ||
|
||
} | ||
words.Add(sb.ToString()); | ||
sb.Clear(); | ||
} | ||
return words.ToArray(); | ||
} | ||
|
||
protected override string ToPhoneme(string word, out bool isEditable) | ||
{ | ||
if (word.Trim() == string.Empty) | ||
{ | ||
|
||
isEditable = false; | ||
return word; | ||
} | ||
isEditable = true; | ||
return EnglishUSPhonemizerUtil.WordToArpabet(word); // k ae t | ||
} | ||
} | ||
} |
57 changes: 57 additions & 0 deletions
57
Mirivoice/Mirivoice.Plugins.Builtin/Phonemizers/Utils/CmuDict.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
using Avalonia.Platform; | ||
using Serilog; | ||
using System; | ||
using System.Collections.Generic; | ||
using System.IO; | ||
using System.Linq; | ||
using System.Text; | ||
using System.Threading.Tasks; | ||
|
||
namespace Mirivoice.Mirivoice.Plugins.Builtin.Phonemizers.Utils | ||
{ | ||
public class CmuDict | ||
{ | ||
public Dictionary<string, string> CMUDict; | ||
|
||
public CmuDict() | ||
{ | ||
CMUDict = new Dictionary<string, string>(); | ||
var uri = new Uri("avares://Mirivoice/Assets/Plugin.Datas/cmudict.txt"); | ||
var assets = AssetLoader.Open(uri); | ||
|
||
using (var stream = assets) | ||
{ | ||
using (var reader = new StreamReader(stream)) | ||
{ | ||
string line = null; | ||
while ((line = reader.ReadLine()) != null) | ||
{ | ||
var parts = line.Split(" ", 2); | ||
if (parts.Length == 2) | ||
{ | ||
string phone = parts[1].ToLower().Trim(); | ||
foreach (string p in phone.Split()) | ||
{ | ||
// Remove stress markers, except ah0 | ||
if (p.EndsWith("0") && p != "ah0") | ||
{ | ||
phone = phone.Replace(p, p.Substring(0, p.Length - 1)); | ||
} | ||
else if (p.EndsWith("1") || p.EndsWith("2")) | ||
{ | ||
phone = phone.Replace(p, p.Substring(0, p.Length - 1)); | ||
} | ||
} | ||
CMUDict[parts[0].Trim().ToLower()] = phone; | ||
} | ||
|
||
} | ||
} | ||
} | ||
|
||
Log.Information("CMU Dict loaded"); | ||
|
||
|
||
} | ||
} | ||
} |
31 changes: 31 additions & 0 deletions
31
Mirivoice/Mirivoice.Plugins.Builtin/Phonemizers/Utils/EnglishUSPhonemizerUtil.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
using Avalonia.Media; | ||
using Avalonia.Platform; | ||
using Serilog; | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Drawing; | ||
using System.IO; | ||
using System.Linq; | ||
using System.Text; | ||
using System.Threading.Tasks; | ||
|
||
namespace Mirivoice.Mirivoice.Plugins.Builtin.Phonemizers.Utils | ||
{ | ||
/// <summary> | ||
/// Should call InitCMUDict() before using WordToArpabet | ||
/// </summary> | ||
public static class EnglishUSPhonemizerUtil | ||
{ | ||
private static CmuDict cmuDict = new CmuDict(); | ||
|
||
|
||
public static string WordToArpabet(string word) | ||
{ | ||
// use the CMU Pronouncing Dictionary to convert words to ARPAbet | ||
string arpabetRes = cmuDict.CMUDict.TryGetValue(word.ToLower(), out string arpabet) ? arpabet.ToLower() : word; | ||
//Log.Debug($"WordToArpabet: {word} -> {arpabetRes}"); | ||
return arpabetRes; | ||
|
||
} | ||
} | ||
} |
Oops, something went wrong.