-
Notifications
You must be signed in to change notification settings - Fork 16
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #338 from andy840119/editor/romaji-generator
Implement Japanese lyric romaji generator.
- Loading branch information
Showing
10 changed files
with
253 additions
and
1 deletion.
There are no files selected for viewing
67 changes: 67 additions & 0 deletions
67
osu.Game.Rulesets.Karaoke.Tests/Edit/Generator/RomajiTags/Ja/JaRomajiTagGeneratorTest.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
// Copyright (c) andy840119 <[email protected]>. Licensed under the GPL Licence. | ||
// See the LICENCE file in the repository root for full licence text. | ||
|
||
using System; | ||
using NUnit.Framework; | ||
using osu.Game.Rulesets.Karaoke.Edit.Generator.RomajiTags.Ja; | ||
using osu.Game.Rulesets.Karaoke.Objects; | ||
using osu.Game.Rulesets.Karaoke.Tests.Helper; | ||
|
||
namespace osu.Game.Rulesets.Karaoke.Tests.Edit.Generator.RomajiTags.Ja | ||
{ | ||
public class JaRomajiTagGeneratorTest | ||
{ | ||
[TestCase("花火大会", new[] { "[0,2]:hanabi", "[2,4]:taikai" })] | ||
[TestCase("はなび", new string[] { "[0,3]:hanabi" })] | ||
[TestCase("枯れた世界に輝く", new[] { "[0,3]:kareta", "[3,6]:sekaini", "[6,8]:kagayaku" })] | ||
public void TestCreateRomajiTags(string text, string[] actualRomaji) | ||
{ | ||
var config = generatorConfig(null); | ||
RunRomajiCheckTest(text, actualRomaji, config); | ||
} | ||
|
||
[TestCase("花火大会", new[] { "[0,2]:HANABI", "[2,4]:TAIKAI" })] | ||
[TestCase("はなび", new string[] { "[0,3]:HANABI" })] | ||
public void TestCreateRomajiTagsWithUppercase(string text, string[] actualRomaji) | ||
{ | ||
var config = generatorConfig(nameof(JaRomajiTagGeneratorConfig.Uppercase)); | ||
RunRomajiCheckTest(text, actualRomaji, config); | ||
} | ||
|
||
#region test helper | ||
|
||
protected void RunRomajiCheckTest(string text, string[] actualRomaji, JaRomajiTagGeneratorConfig config) | ||
{ | ||
var generator = new JaRomajiTagGenerator(config); | ||
|
||
var lyric = new Lyric { Text = text }; | ||
var romajiTags = generator.CreateRomajiTags(lyric); | ||
var actualRomajiTags = TestCaseTagHelper.ParseRomajiTags(actualRomaji); | ||
|
||
Assert.AreEqual(romajiTags, actualRomajiTags); | ||
} | ||
|
||
private JaRomajiTagGeneratorConfig generatorConfig(params string[] properties) | ||
{ | ||
var config = new JaRomajiTagGeneratorConfig(); | ||
if (properties == null) | ||
return config; | ||
|
||
foreach (var propertyName in properties) | ||
{ | ||
if (propertyName == null) | ||
continue; | ||
|
||
var theMethod = config.GetType().GetProperty(propertyName); | ||
if (theMethod == null) | ||
throw new MissingMethodException("Config is not exist."); | ||
|
||
theMethod.SetValue(config, true); | ||
} | ||
|
||
return config; | ||
} | ||
|
||
#endregion | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
110 changes: 110 additions & 0 deletions
110
osu.Game.Rulesets.Karaoke/Edit/Generator/RomajiTags/Ja/JaRomajiTagGenerator.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
// Copyright (c) andy840119 <[email protected]>. Licensed under the GPL Licence. | ||
// See the LICENCE file in the repository root for full licence text. | ||
|
||
using System.Collections.Generic; | ||
using System.IO; | ||
using System.Linq; | ||
using Lucene.Net.Analysis; | ||
using Lucene.Net.Analysis.Ja; | ||
using Lucene.Net.Analysis.TokenAttributes; | ||
using osu.Framework.Extensions.IEnumerableExtensions; | ||
using osu.Game.Rulesets.Karaoke.Objects; | ||
using osu.Game.Rulesets.Karaoke.Utils; | ||
|
||
namespace osu.Game.Rulesets.Karaoke.Edit.Generator.RomajiTags.Ja | ||
{ | ||
public class JaRomajiTagGenerator : RomajiTagGenerator<JaRomajiTagGeneratorConfig> | ||
{ | ||
private readonly Analyzer analyzer; | ||
|
||
public JaRomajiTagGenerator(JaRomajiTagGeneratorConfig config) | ||
: base(config) | ||
{ | ||
analyzer = Analyzer.NewAnonymous((fieldName, reader) => | ||
{ | ||
Tokenizer tokenizer = new JapaneseTokenizer(reader, null, true, JapaneseTokenizerMode.SEARCH); | ||
return new TokenStreamComponents(tokenizer, new JapaneseReadingFormFilter(tokenizer, false)); | ||
}); | ||
} | ||
|
||
public override RomajiTag[] CreateRomajiTags(Lyric lyric) | ||
{ | ||
var text = lyric.Text; | ||
var processingTags = new List<RomajiTagGeneratorPatameter>(); | ||
|
||
// Tokenize the text | ||
var tokenStream = analyzer.GetTokenStream("dummy", new StringReader(text)); | ||
|
||
// Get result and offset | ||
var result = tokenStream.GetAttribute<ICharTermAttribute>(); | ||
var offsetAtt = tokenStream.GetAttribute<IOffsetAttribute>(); | ||
|
||
// Reset the stream and convert all result | ||
tokenStream.Reset(); | ||
|
||
while (true) | ||
{ | ||
// Read next token | ||
tokenStream.ClearAttributes(); | ||
tokenStream.IncrementToken(); | ||
|
||
// Get parsed result, result is Katakana. | ||
var katakana = result.ToString(); | ||
if (string.IsNullOrEmpty(katakana)) | ||
break; | ||
|
||
var parentText = text[offsetAtt.StartOffset..offsetAtt.EndOffset]; | ||
var fromKanji = JpStringUtils.ToKatakana(katakana) != JpStringUtils.ToKatakana(parentText); | ||
|
||
// Convert to romaji. | ||
var romaji = JpStringUtils.ToRomaji(katakana); | ||
if (Config.Uppercase) | ||
romaji = romaji.ToUpper(); | ||
|
||
// Make tag | ||
processingTags.Add(new RomajiTagGeneratorPatameter | ||
{ | ||
FromKanji = fromKanji, | ||
RomajiTag = new RomajiTag | ||
{ | ||
Text = romaji, | ||
StartIndex = offsetAtt.StartOffset, | ||
EndIndex = offsetAtt.EndOffset | ||
} | ||
}); | ||
} | ||
|
||
// Dispose | ||
tokenStream.End(); | ||
tokenStream.Dispose(); | ||
|
||
var romajiTags = new List<RomajiTag>(); | ||
|
||
foreach (var processingTag in processingTags) | ||
{ | ||
// conbine romajies of they are not from kanji. | ||
var previousProcessingTag = processingTags.GetPrevious(processingTag); | ||
var fromKanji = processingTag.FromKanji; | ||
if (previousProcessingTag != null && !fromKanji) | ||
{ | ||
var combinedRomajiTag = TextTagsUtils.Combine(previousProcessingTag.RomajiTag, processingTag.RomajiTag); | ||
romajiTags.Remove(previousProcessingTag.RomajiTag); | ||
romajiTags.Add(combinedRomajiTag); | ||
} | ||
else | ||
{ | ||
romajiTags.Add(processingTag.RomajiTag); | ||
} | ||
} | ||
|
||
return romajiTags.ToArray(); | ||
} | ||
|
||
internal class RomajiTagGeneratorPatameter | ||
{ | ||
public bool FromKanji { get; set; } | ||
|
||
public RomajiTag RomajiTag { get; set; } | ||
} | ||
} | ||
} |
13 changes: 13 additions & 0 deletions
13
osu.Game.Rulesets.Karaoke/Edit/Generator/RomajiTags/Ja/JaRomajiTagGeneratorConfig.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
// Copyright (c) andy840119 <[email protected]>. Licensed under the GPL Licence. | ||
// See the LICENCE file in the repository root for full licence text. | ||
|
||
namespace osu.Game.Rulesets.Karaoke.Edit.Generator.RomajiTags.Ja | ||
{ | ||
public class JaRomajiTagGeneratorConfig : RomajiTagGeneratorConfig | ||
{ | ||
/// <summary> | ||
/// Generate romaji as uppercase. | ||
/// </summary> | ||
public bool Uppercase { get; set; } | ||
} | ||
} |
19 changes: 19 additions & 0 deletions
19
osu.Game.Rulesets.Karaoke/Edit/Generator/RomajiTags/RomajiTagGenerator.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
// Copyright (c) andy840119 <[email protected]>. Licensed under the GPL Licence. | ||
// See the LICENCE file in the repository root for full licence text. | ||
|
||
using osu.Game.Rulesets.Karaoke.Objects; | ||
|
||
namespace osu.Game.Rulesets.Karaoke.Edit.Generator.RomajiTags | ||
{ | ||
public abstract class RomajiTagGenerator<T> where T : RomajiTagGeneratorConfig | ||
{ | ||
protected T Config { get; } | ||
|
||
protected RomajiTagGenerator(T config) | ||
{ | ||
Config = config; | ||
} | ||
|
||
public abstract RomajiTag[] CreateRomajiTags(Lyric lyric); | ||
} | ||
} |
9 changes: 9 additions & 0 deletions
9
osu.Game.Rulesets.Karaoke/Edit/Generator/RomajiTags/RomajiTagGeneratorConfig.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
// Copyright (c) andy840119 <[email protected]>. Licensed under the GPL Licence. | ||
// See the LICENCE file in the repository root for full licence text. | ||
|
||
namespace osu.Game.Rulesets.Karaoke.Edit.Generator.RomajiTags | ||
{ | ||
public class RomajiTagGeneratorConfig | ||
{ | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters