Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement the romaji generator for fill the property inside the time-tag. #2077

Merged
merged 8 commits into from
Jul 23, 2023
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// Copyright (c) andy840119 <[email protected]>. Licensed under the GPL Licence.
// See the LICENCE file in the repository root for full licence text.

using System.Linq;
using NUnit.Framework;
using osu.Game.Rulesets.Karaoke.Edit.Generator.Lyrics.Romajies;
using osu.Game.Rulesets.Karaoke.Objects;
using osu.Game.Rulesets.Karaoke.Tests.Asserts;

namespace osu.Game.Rulesets.Karaoke.Tests.Editor.Generator.Lyrics.Romajis;

public abstract class BaseRomajiGeneratorTest<TRomajiGenerator, TConfig> : BaseLyricGeneratorTest<TRomajiGenerator, RomajiGenerateResult[], TConfig>
where TRomajiGenerator : RomajiGenerator<TConfig> where TConfig : RomajiGeneratorConfig, new()
{
protected void CheckGenerateResult(Lyric lyric, string[] expectedRubies, TConfig config)
{
var expected = RomajiGenerateResultHelper.ParseRomajiGenerateResults(lyric.TimeTags, expectedRubies);
CheckGenerateResult(lyric, expected, config);
}

protected override void AssertEqual(RomajiGenerateResult[] expected, RomajiGenerateResult[] actual)
{
TimeTagAssert.ArePropertyEqual(expected.Select(x => x.TimeTag).ToArray(), actual.Select(x => x.TimeTag).ToArray());
Assert.AreEqual(expected.Select(x => x.InitialRomaji), actual.Select(x => x.InitialRomaji));
Assert.AreEqual(expected.Select(x => x.RomajiText), actual.Select(x => x.RomajiText));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
// Copyright (c) andy840119 <[email protected]>. Licensed under the GPL Licence.
// See the LICENCE file in the repository root for full licence text.

using System.Collections.Generic;
using System.Linq;
using NUnit.Framework;
using osu.Game.Rulesets.Karaoke.Edit.Generator.Lyrics.Romajies.Ja;
using osu.Game.Rulesets.Karaoke.Objects;
using osu.Game.Rulesets.Karaoke.Tests.Helper;

namespace osu.Game.Rulesets.Karaoke.Tests.Editor.Generator.Lyrics.Romajis.Ja;

public class JaRomajiGeneratorTest : BaseRomajiGeneratorTest<JaRomajiGenerator, JaRomajiGeneratorConfig>
{
[TestCase("花火大会", new[] { "[0,start]", "[3,end]" }, true)]
[TestCase("花火大会", new[] { "[0,start]" }, true)]
[TestCase("花火大会", new[] { "[3,end]" }, false)] // not able to generate the has no start time-tag.
[TestCase("花火大会", new string[] { }, false)] // not able to generate the romaji if has no time-tag.
[TestCase("", new string[] { }, false)] // not able to generate the romaji if lyric is empty.
[TestCase(" ", new string[] { }, false)]
[TestCase(null, new string[] { }, false)]
public void TestCanGenerate(string text, string[] timeTagStrings, bool canGenerate)
{
var config = GeneratorEmptyConfig();

var timeTags = TestCaseTagHelper.ParseTimeTags(timeTagStrings);
var lyric = new Lyric
{
Text = text,
TimeTags = timeTags,
};

CheckCanGenerate(lyric, canGenerate, config);
}

// the generated result is not perfect, but it's OK for now.
[TestCase("はなび", new[] { "[0,start]" }, new[] { "^hana bi" })]
[TestCase("花火大会", new[] { "[0,start]", "[3,end]" }, new[] { "^hanabi taikai", "" })]
[TestCase("花火大会", new[] { "[0,start]", "[2,start]", "[3,end]" }, new[] { "^hanabi", "taikai", "" })]
[TestCase("枯れた世界に輝く",
new[] { "[0,start]", "[1,start]", "[2,start]", "[3,start]", "[4,start]", "[5,start]", "[6,start]", "[6,start]", "[6,start]", "[7,start]", "[7,end]" },
new[] { "^kare", "", "ta", "sekai", "", "ni", "kagayaku", "", "", "", "" })]
public void TestGenerate(string text, string[] timeTagStrings, string[] expectedRomajies)
{
var config = GeneratorEmptyConfig();

var timeTags = TestCaseTagHelper.ParseTimeTags(timeTagStrings);
var lyric = new Lyric
{
Text = text,
TimeTags = timeTags,
};

CheckGenerateResult(lyric, expectedRomajies, config);
}

[TestCase("はなび", new[] { "[0,start]" }, new[] { "^HANA BI" })]
[TestCase("花火大会", new[] { "[0,start]", "[2,start]", "[3,end]" }, new[] { "^HANABI", "TAIKAI", "" })]
public void TestGenerateWithUppercase(string text, string[] timeTagStrings, string[] expectedRomajies)
{
var config = GeneratorEmptyConfig(x => x.Uppercase.Value = true);

var timeTags = TestCaseTagHelper.ParseTimeTags(timeTagStrings);
var lyric = new Lyric
{
Text = text,
TimeTags = timeTags,
};

CheckGenerateResult(lyric, expectedRomajies, config);
}

[TestCase("花", new[] { "[0,start]", "[0,end]" }, new[] { "[0]:hana" }, new[] { "^hana", "" })]
[TestCase("花火", new[] { "[0,start]", "[1,end]" }, new[] { "[0]:hana", "[1]:bi" }, new[] { "^hana bi", "" })]
[TestCase("花火", new[] { "[0,start]", "[1,start]", "[1,end]" }, new[] { "[0]:hana", "[1]:bi" }, new[] { "^hana", "bi", "" })]
[TestCase("花火", new[] { "[0,start]", "[0,start]", "[1,start]", "[1,end]" }, new[] { "[0]:hana", "[1]:bi" }, new[] { "^hana", "", "bi", "" })]
[TestCase("はなび", new[] { "[0,start]", "[1,start]", "[2,start]", "[2,end]" }, new[] { "[0]:hana", "[2]:bi" }, new[] { "^hana", "", "bi", "" })]
public void TestConvertToRomajiGenerateResult(string text, string[] timeTagStrings, string[] romajiParams, string[] expectedResults)
{
var timeTags = TestCaseTagHelper.ParseTimeTags(timeTagStrings);
var romajis = parseRomajiGenerateResults(romajiParams);

var expected = RomajiGenerateResultHelper.ParseRomajiGenerateResults(timeTags, expectedResults);
var actual = JaRomajiGenerator.Convert(timeTags, romajis).ToArray();

AssertEqual(expected, actual);
}

/// <summary>
/// Process test case time tag string format into <see cref="TimeTag"/>
/// </summary>
/// <example>
/// <inheritdoc cref="TestCaseTagHelper.ParseRomajiTag"/>
/// </example>
/// <param name="str">Time tag string format</param>
/// <returns><see cref="RomajiGenerateResultHelper"/>Time tag object</returns>
private static JaRomajiGenerator.RomajiGeneratorParameter parseRomajiGenerateResult(string str)
{
// because format is same as the text-tag testing format, so just use this helper.
var romajiTag = TestCaseTagHelper.ParseRomajiTag(str);
return new JaRomajiGenerator.RomajiGeneratorParameter
{
StartIndex = romajiTag.StartIndex,
EndIndex = romajiTag.EndIndex,
RomajiText = romajiTag.Text,
};
}

private static JaRomajiGenerator.RomajiGeneratorParameter[] parseRomajiGenerateResults(IEnumerable<string> strings)
=> strings.Select(parseRomajiGenerateResult).ToArray();
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
// Copyright (c) andy840119 <[email protected]>. Licensed under the GPL Licence.
// See the LICENCE file in the repository root for full licence text.

using System;
using System.Collections.Generic;
using System.Linq;
using osu.Game.Rulesets.Karaoke.Edit.Generator.Lyrics.Romajies;
using osu.Game.Rulesets.Karaoke.Objects;

namespace osu.Game.Rulesets.Karaoke.Tests.Editor.Generator.Lyrics.Romajis;

public class RomajiGenerateResultHelper
{
/// <summary>
/// Convert the string format into the <see cref="RomajiGenerateResult"/>.
/// </summary>
/// <example>
/// karaoke
/// ^karaoke
/// </example>
/// <param name="timeTag">Origin time-tag</param>
/// <param name="str">Generate result string format</param>
/// <returns><see cref="RomajiGenerateResult"/>Romaji generate result.</returns>
public static RomajiGenerateResult ParseRomajiGenerateResult(TimeTag timeTag, string str)
{
bool initialRomaji = str.StartsWith("^", StringComparison.Ordinal);

return new RomajiGenerateResult
{
TimeTag = timeTag,
InitialRomaji = initialRomaji,
RomajiText = str.Replace("^", ""),
};
}

public static RomajiGenerateResult[] ParseRomajiGenerateResults(IList<TimeTag> timeTags, IList<string> strings)
{
if (timeTags.Count != strings.Count)
throw new InvalidOperationException();

return parseRomajiGenerateResults(timeTags, strings).ToArray();

static IEnumerable<RomajiGenerateResult> parseRomajiGenerateResults(IList<TimeTag> timeTags, IList<string> strings)
{
for (int i = 0; i < timeTags.Count; i++)
{
yield return ParseRomajiGenerateResult(timeTags[i], strings[i]);
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
// Copyright (c) andy840119 <[email protected]>. Licensed under the GPL Licence.
// See the LICENCE file in the repository root for full licence text.

using System.Globalization;
using System.Linq;
using NUnit.Framework;
using osu.Framework.Graphics.Sprites;
using osu.Game.Rulesets.Karaoke.Edit.Generator.Lyrics.Romajies;
using osu.Game.Rulesets.Karaoke.Objects;
using osu.Game.Rulesets.Karaoke.Tests.Asserts;
using osu.Game.Rulesets.Karaoke.Tests.Helper;

namespace osu.Game.Rulesets.Karaoke.Tests.Editor.Generator.Lyrics.Romajis;

public class RomajiTagGeneratorSelectorTest : BaseLyricGeneratorSelectorTest<RomajiGeneratorSelector, RomajiGenerateResult[]>
{
[TestCase(17, "花火大会", true)]
[TestCase(17, "我是中文", true)] // only change the language code to decide should be able to generate or not.
[TestCase(17, "", false)] // will not able to generate the romaji if lyric is empty.
[TestCase(17, " ", false)]
[TestCase(17, null, false)]
[TestCase(1028, "はなび", false)] // Should not be able to generate if language is not supported.
public void TestCanGenerate(int lcid, string text, bool canGenerate)
{
var selector = CreateSelector();
var lyric = new Lyric
{
Language = new CultureInfo(lcid),
Text = text,
TimeTags = new[]
{
new TimeTag(new TextIndex()),
},
};

CheckCanGenerate(lyric, canGenerate, selector);
}

[TestCase(17, "はなび", new[] { "[0,start]" }, new[] { "^hana bi" })] // Japanese
[TestCase(1041, "花火大会", new[] { "[0,start]", "[3,end]" }, new[] { "^hanabi taikai", "" })] // Japanese
public void TestGenerate(int lcid, string text, string[] timeTagStrings, string[] expectedRomajies)
{
var selector = CreateSelector();

var timeTags = TestCaseTagHelper.ParseTimeTags(timeTagStrings);
var lyric = new Lyric
{
Language = new CultureInfo(lcid),
Text = text,
TimeTags = timeTags,
};

var expected = RomajiGenerateResultHelper.ParseRomajiGenerateResults(timeTags, expectedRomajies);
CheckGenerateResult(lyric, expected, selector);
}

protected override void AssertEqual(RomajiGenerateResult[] expected, RomajiGenerateResult[] actual)
{
TimeTagAssert.ArePropertyEqual(expected.Select(x => x.TimeTag).ToArray(), actual.Select(x => x.TimeTag).ToArray());
Assert.AreEqual(expected.Select(x => x.InitialRomaji), actual.Select(x => x.InitialRomaji));
Assert.AreEqual(expected.Select(x => x.RomajiText), actual.Select(x => x.RomajiText));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
using osu.Game.Rulesets.Karaoke.Edit.Generator.Lyrics.Language;
using osu.Game.Rulesets.Karaoke.Edit.Generator.Lyrics.Notes;
using osu.Game.Rulesets.Karaoke.Edit.Generator.Lyrics.ReferenceLyric;
using osu.Game.Rulesets.Karaoke.Edit.Generator.Lyrics.Romajies.Ja;
using osu.Game.Rulesets.Karaoke.Edit.Generator.Lyrics.RomajiTags.Ja;
using osu.Game.Rulesets.Karaoke.Edit.Generator.Lyrics.RubyTags.Ja;
using osu.Game.Rulesets.Karaoke.Edit.Generator.Lyrics.TimeTags.Ja;
Expand Down Expand Up @@ -54,6 +55,9 @@ protected override void InitialiseDefaults()
// Time tag generator
SetDefault<JaTimeTagGeneratorConfig>();
SetDefault<ZhTimeTagGeneratorConfig>();

// Romaji generator
SetDefault<JaRomajiGeneratorConfig>();
}

protected void SetDefault<T>() where T : GeneratorConfig, new()
Expand Down Expand Up @@ -81,6 +85,7 @@ protected static KaraokeRulesetEditGeneratorSetting GetSettingByType<TValue>() =
Type t when t == typeof(JaRubyTagGeneratorConfig) => KaraokeRulesetEditGeneratorSetting.JaRubyTagGeneratorConfig,
Type t when t == typeof(JaTimeTagGeneratorConfig) => KaraokeRulesetEditGeneratorSetting.JaTimeTagGeneratorConfig,
Type t when t == typeof(ZhTimeTagGeneratorConfig) => KaraokeRulesetEditGeneratorSetting.ZhTimeTagGeneratorConfig,
Type t when t == typeof(JaRomajiGeneratorConfig) => KaraokeRulesetEditGeneratorSetting.JaRomajiGeneratorConfig,
_ => throw new NotSupportedException(),
};

Expand Down Expand Up @@ -134,4 +139,7 @@ public enum KaraokeRulesetEditGeneratorSetting
// Time tag generator
JaTimeTagGeneratorConfig,
ZhTimeTagGeneratorConfig,

// Romaji generator.
JaRomajiGeneratorConfig,
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

using System.Collections.Generic;
using System.IO;
using System.Linq;
using Lucene.Net.Analysis;
using Lucene.Net.Analysis.Ja;
using Lucene.Net.Analysis.TokenAttributes;
Expand All @@ -29,16 +30,17 @@ public JaRomajiTagGenerator(JaRomajiTagGeneratorConfig config)

protected override RomajiTag[] GenerateFromItem(Lyric item)
{
string text = item.Text;
var processingTags = new List<RomajiTagGeneratorParameter>();

// Tokenize the text
string text = item.Text;
var tokenStream = analyzer.GetTokenStream("dummy", new StringReader(text));

// Get result and offset
var result = tokenStream.GetAttribute<ICharTermAttribute>();
var offsetAtt = tokenStream.GetAttribute<IOffsetAttribute>();
var processingRomajies = getProcessingRomajies(text, tokenStream, Config).ToArray();

return Convert(processingRomajies).ToArray();
}

private static IEnumerable<RomajiTagGeneratorParameter> getProcessingRomajies(string text, TokenStream tokenStream, JaRomajiTagGeneratorConfig config)
{
// Reset the stream and convert all result
tokenStream.Reset();

Expand All @@ -48,42 +50,49 @@ protected override RomajiTag[] GenerateFromItem(Lyric item)
tokenStream.ClearAttributes();
tokenStream.IncrementToken();

// Get result and offset
var charTermAttribute = tokenStream.GetAttribute<ICharTermAttribute>();
var offsetAttribute = tokenStream.GetAttribute<IOffsetAttribute>();

// Get parsed result, result is Katakana.
string katakana = result.ToString();
string katakana = charTermAttribute.ToString();
if (string.IsNullOrEmpty(katakana))
break;

string parentText = text[offsetAtt.StartOffset..offsetAtt.EndOffset];
string parentText = text[offsetAttribute.StartOffset..offsetAttribute.EndOffset];
bool fromKanji = JpStringUtils.ToKatakana(katakana) != JpStringUtils.ToKatakana(parentText);

// Convert to romaji.
string romaji = JpStringUtils.ToRomaji(katakana);
if (Config.Uppercase.Value)
if (config.Uppercase.Value)
romaji = romaji.ToUpper();

// Make tag
processingTags.Add(new RomajiTagGeneratorParameter
yield return new RomajiTagGeneratorParameter
{
FromKanji = fromKanji,
RomajiTag = new RomajiTag
{
Text = romaji,
StartIndex = offsetAtt.StartOffset,
EndIndex = offsetAtt.EndOffset - 1,
StartIndex = offsetAttribute.StartOffset,
EndIndex = offsetAttribute.EndOffset - 1,
},
});
};
}

// Dispose
tokenStream.End();
tokenStream.Dispose();
}

internal static IEnumerable<RomajiTag> Convert(RomajiTagGeneratorParameter[] tags)
{
var romajiTags = new List<RomajiTag>();

foreach (var processingTag in processingTags)
foreach (var processingTag in tags)
{
// combine romajies of they are not from kanji.
var previousProcessingTag = processingTags.GetPrevious(processingTag);
var previousProcessingTag = tags.GetPrevious(processingTag);
bool fromKanji = processingTag.FromKanji;

if (previousProcessingTag != null && !fromKanji)
Expand All @@ -98,7 +107,7 @@ protected override RomajiTag[] GenerateFromItem(Lyric item)
}
}

return romajiTags.ToArray();
return romajiTags;
}

internal class RomajiTagGeneratorParameter
Expand Down
Loading