Skip to content

Commit

Permalink
Merge pull request #338 from andy840119/editor/romaji-generator
Browse files Browse the repository at this point in the history
Implement Japanese lyric romaji generator.
  • Loading branch information
andy840119 authored Dec 21, 2020
2 parents a70820c + e982879 commit fa1a568
Show file tree
Hide file tree
Showing 10 changed files with 253 additions and 1 deletion.
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// Copyright (c) andy840119 <[email protected]>. Licensed under the GPL Licence.
// See the LICENCE file in the repository root for full licence text.

using System;
using NUnit.Framework;
using osu.Game.Rulesets.Karaoke.Edit.Generator.RomajiTags.Ja;
using osu.Game.Rulesets.Karaoke.Objects;
using osu.Game.Rulesets.Karaoke.Tests.Helper;

namespace osu.Game.Rulesets.Karaoke.Tests.Edit.Generator.RomajiTags.Ja
{
public class JaRomajiTagGeneratorTest
{
[TestCase("花火大会", new[] { "[0,2]:hanabi", "[2,4]:taikai" })]
[TestCase("はなび", new string[] { "[0,3]:hanabi" })]
[TestCase("枯れた世界に輝く", new[] { "[0,3]:kareta", "[3,6]:sekaini", "[6,8]:kagayaku" })]
public void TestCreateRomajiTags(string text, string[] actualRomaji)
{
var config = generatorConfig(null);
RunRomajiCheckTest(text, actualRomaji, config);
}

[TestCase("花火大会", new[] { "[0,2]:HANABI", "[2,4]:TAIKAI" })]
[TestCase("はなび", new string[] { "[0,3]:HANABI" })]
public void TestCreateRomajiTagsWithUppercase(string text, string[] actualRomaji)
{
var config = generatorConfig(nameof(JaRomajiTagGeneratorConfig.Uppercase));
RunRomajiCheckTest(text, actualRomaji, config);
}

#region test helper

protected void RunRomajiCheckTest(string text, string[] actualRomaji, JaRomajiTagGeneratorConfig config)
{
var generator = new JaRomajiTagGenerator(config);

var lyric = new Lyric { Text = text };
var romajiTags = generator.CreateRomajiTags(lyric);
var actualRomajiTags = TestCaseTagHelper.ParseRomajiTags(actualRomaji);

Assert.AreEqual(romajiTags, actualRomajiTags);
}

private JaRomajiTagGeneratorConfig generatorConfig(params string[] properties)
{
var config = new JaRomajiTagGeneratorConfig();
if (properties == null)
return config;

foreach (var propertyName in properties)
{
if (propertyName == null)
continue;

var theMethod = config.GetType().GetProperty(propertyName);
if (theMethod == null)
throw new MissingMethodException("Config is not exist.");

theMethod.SetValue(config, true);
}

return config;
}

#endregion
}
}
14 changes: 14 additions & 0 deletions osu.Game.Rulesets.Karaoke.Tests/Utils/JpStringUtilsTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,19 @@ public void TestToKatakana(string text, string actual)
var katakana = JpStringUtils.ToKatakana(text);
Assert.AreEqual(katakana, actual);
}

[TestCase("はなび", "hanabi")]
[TestCase("たいかい", "taikai")]
[TestCase("ハナビ", "hanabi")]
[TestCase("タイカイ", "taikai")]
[TestCase("花火大会", "花火大会")] // cannot convert kanji to romaji.
[TestCase("ハナビ wo miru", "hanabi wo miru")]
[TestCase("タイカイー☆", "taikaii☆")] // it's converted by package, let's skip this checking.
[TestCase("タイカイ ー☆", "taikai -☆")] // it's converted by package, let's skip this checking.
public void TestToRomaji(string text, string actual)
{
var romaji = JpStringUtils.ToRomaji(text);
Assert.AreEqual(romaji, actual);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
// Copyright (c) andy840119 <[email protected]>. Licensed under the GPL Licence.
// See the LICENCE file in the repository root for full licence text.

using System.Collections.Generic;
using System.IO;
using System.Linq;
using Lucene.Net.Analysis;
using Lucene.Net.Analysis.Ja;
using Lucene.Net.Analysis.TokenAttributes;
using osu.Framework.Extensions.IEnumerableExtensions;
using osu.Game.Rulesets.Karaoke.Objects;
using osu.Game.Rulesets.Karaoke.Utils;

namespace osu.Game.Rulesets.Karaoke.Edit.Generator.RomajiTags.Ja
{
public class JaRomajiTagGenerator : RomajiTagGenerator<JaRomajiTagGeneratorConfig>
{
private readonly Analyzer analyzer;

public JaRomajiTagGenerator(JaRomajiTagGeneratorConfig config)
: base(config)
{
analyzer = Analyzer.NewAnonymous((fieldName, reader) =>
{
Tokenizer tokenizer = new JapaneseTokenizer(reader, null, true, JapaneseTokenizerMode.SEARCH);
return new TokenStreamComponents(tokenizer, new JapaneseReadingFormFilter(tokenizer, false));
});
}

public override RomajiTag[] CreateRomajiTags(Lyric lyric)
{
var text = lyric.Text;
var processingTags = new List<RomajiTagGeneratorPatameter>();

// Tokenize the text
var tokenStream = analyzer.GetTokenStream("dummy", new StringReader(text));

// Get result and offset
var result = tokenStream.GetAttribute<ICharTermAttribute>();
var offsetAtt = tokenStream.GetAttribute<IOffsetAttribute>();

// Reset the stream and convert all result
tokenStream.Reset();

while (true)
{
// Read next token
tokenStream.ClearAttributes();
tokenStream.IncrementToken();

// Get parsed result, result is Katakana.
var katakana = result.ToString();
if (string.IsNullOrEmpty(katakana))
break;

var parentText = text[offsetAtt.StartOffset..offsetAtt.EndOffset];
var fromKanji = JpStringUtils.ToKatakana(katakana) != JpStringUtils.ToKatakana(parentText);

// Convert to romaji.
var romaji = JpStringUtils.ToRomaji(katakana);
if (Config.Uppercase)
romaji = romaji.ToUpper();

// Make tag
processingTags.Add(new RomajiTagGeneratorPatameter
{
FromKanji = fromKanji,
RomajiTag = new RomajiTag
{
Text = romaji,
StartIndex = offsetAtt.StartOffset,
EndIndex = offsetAtt.EndOffset
}
});
}

// Dispose
tokenStream.End();
tokenStream.Dispose();

var romajiTags = new List<RomajiTag>();

foreach (var processingTag in processingTags)
{
// conbine romajies of they are not from kanji.
var previousProcessingTag = processingTags.GetPrevious(processingTag);
var fromKanji = processingTag.FromKanji;
if (previousProcessingTag != null && !fromKanji)
{
var combinedRomajiTag = TextTagsUtils.Combine(previousProcessingTag.RomajiTag, processingTag.RomajiTag);
romajiTags.Remove(previousProcessingTag.RomajiTag);
romajiTags.Add(combinedRomajiTag);
}
else
{
romajiTags.Add(processingTag.RomajiTag);
}
}

return romajiTags.ToArray();
}

internal class RomajiTagGeneratorPatameter
{
public bool FromKanji { get; set; }

public RomajiTag RomajiTag { get; set; }
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
// Copyright (c) andy840119 <[email protected]>. Licensed under the GPL Licence.
// See the LICENCE file in the repository root for full licence text.

namespace osu.Game.Rulesets.Karaoke.Edit.Generator.RomajiTags.Ja
{
public class JaRomajiTagGeneratorConfig : RomajiTagGeneratorConfig
{
/// <summary>
/// Generate romaji as uppercase.
/// </summary>
public bool Uppercase { get; set; }
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
// Copyright (c) andy840119 <[email protected]>. Licensed under the GPL Licence.
// See the LICENCE file in the repository root for full licence text.

using osu.Game.Rulesets.Karaoke.Objects;

namespace osu.Game.Rulesets.Karaoke.Edit.Generator.RomajiTags
{
public abstract class RomajiTagGenerator<T> where T : RomajiTagGeneratorConfig
{
protected T Config { get; }

protected RomajiTagGenerator(T config)
{
Config = config;
}

public abstract RomajiTag[] CreateRomajiTags(Lyric lyric);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
// Copyright (c) andy840119 <[email protected]>. Licensed under the GPL Licence.
// See the LICENCE file in the repository root for full licence text.

namespace osu.Game.Rulesets.Karaoke.Edit.Generator.RomajiTags
{
public class RomajiTagGeneratorConfig
{
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ public override RubyTag[] CreateRubyTags(Lyric lyric)
if (!Config.EnableDuplicatedRuby)
{
// Not add duplicated ruby if same as parent.
var parentText = text.Substring(offsetAtt.StartOffset, offsetAtt.EndOffset - offsetAtt.StartOffset);
var parentText = text[offsetAtt.StartOffset..offsetAtt.EndOffset];
if (parentText == katakana || parentText == hiragana)
continue;
}
Expand Down
6 changes: 6 additions & 0 deletions osu.Game.Rulesets.Karaoke/Utils/JpStringUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// See the LICENCE file in the repository root for full licence text.

using Zipangu;
using WanaKanaSharp;

namespace osu.Game.Rulesets.Karaoke.Utils
{
Expand All @@ -16,5 +17,10 @@ public static string ToKatakana(string text)
{
return text.HiraganaToKatakana();
}

public static string ToRomaji(string text)
{
return RomajiConverter.ToRomaji(text, false, null);
}
}
}
12 changes: 12 additions & 0 deletions osu.Game.Rulesets.Karaoke/Utils/TextTagsUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ public static T[] FindInvalid<T>(T[] textTags, string lyric, Sorting sorting = S
return Sort(invalidList.Distinct().ToArray());
}

// todo : might think about better way for lyric merging ruby or romaji using.
public static T Shifting<T>(T textTag, int shifting) where T : ITextTag, new()
{
return new T
Expand All @@ -77,6 +78,17 @@ public static T[] FindInvalid<T>(T[] textTags, string lyric, Sorting sorting = S
};
}

public static T Combine<T>(T textTagA, T textTagB) where T : ITextTag, new()
{
var sortinValue = Sort(new[] { textTagA, textTagB });
return new T
{
StartIndex = sortinValue[0].StartIndex,
EndIndex = sortinValue[1].EndIndex,
Text = sortinValue[0].Text + sortinValue[1].Text
};
}

public enum Sorting
{
/// <summary>
Expand Down
2 changes: 2 additions & 0 deletions osu.Game.Rulesets.Karaoke/osu.Game.Rulesets.Karaoke.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
<PackageReference Include="ppy.osu.Game" Version="2020.1218.0" />
<PackageReference Include="LyricMaker" Version="1.1.1" />
<PackageReference Include="NicoKaraParser" Version="1.1.0" />
<PackageReference Include="WanaKanaSharp" Version="0.1.1" />
<PackageReference Include="Zipangu" Version="1.1.8" />
</ItemGroup>
<ItemGroup>
Expand Down Expand Up @@ -48,6 +49,7 @@
<InputAssemblies Include="NWaves.dll" />
<InputAssemblies Include="LyricMaker.dll" />
<InputAssemblies Include="NicoKaraParser.dll" />
<InputAssemblies Include="WanaKanaSharp.dll" />
<InputAssemblies Include="Zipangu.dll" />
</ItemGroup>

Expand Down

0 comments on commit fa1a568

Please sign in to comment.