Skip to content

Commit

Permalink
Merge pull request #2077 from andy840119/adjust-the-generator
Browse files Browse the repository at this point in the history
Adjust the generator.
  • Loading branch information
andy840119 authored Jul 23, 2023
2 parents a545464 + 2468709 commit 511bcf7
Show file tree
Hide file tree
Showing 13 changed files with 534 additions and 32 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// Copyright (c) andy840119 <[email protected]>. Licensed under the GPL Licence.
// See the LICENCE file in the repository root for full licence text.

using System.Linq;
using NUnit.Framework;
using osu.Game.Rulesets.Karaoke.Edit.Generator.Lyrics.Romajies;
using osu.Game.Rulesets.Karaoke.Objects;
using osu.Game.Rulesets.Karaoke.Tests.Asserts;

namespace osu.Game.Rulesets.Karaoke.Tests.Editor.Generator.Lyrics.Romajis;

public abstract class BaseRomajiGeneratorTest<TRomajiGenerator, TConfig> : BaseLyricGeneratorTest<TRomajiGenerator, RomajiGenerateResult[], TConfig>
where TRomajiGenerator : RomajiGenerator<TConfig> where TConfig : RomajiGeneratorConfig, new()
{
protected void CheckGenerateResult(Lyric lyric, string[] expectedRubies, TConfig config)
{
var expected = RomajiGenerateResultHelper.ParseRomajiGenerateResults(lyric.TimeTags, expectedRubies);
CheckGenerateResult(lyric, expected, config);
}

protected override void AssertEqual(RomajiGenerateResult[] expected, RomajiGenerateResult[] actual)
{
TimeTagAssert.ArePropertyEqual(expected.Select(x => x.TimeTag).ToArray(), actual.Select(x => x.TimeTag).ToArray());
Assert.AreEqual(expected.Select(x => x.InitialRomaji), actual.Select(x => x.InitialRomaji));
Assert.AreEqual(expected.Select(x => x.RomajiText), actual.Select(x => x.RomajiText));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
// Copyright (c) andy840119 <[email protected]>. Licensed under the GPL Licence.
// See the LICENCE file in the repository root for full licence text.

using System.Collections.Generic;
using System.Linq;
using NUnit.Framework;
using osu.Game.Rulesets.Karaoke.Edit.Generator.Lyrics.Romajies.Ja;
using osu.Game.Rulesets.Karaoke.Objects;
using osu.Game.Rulesets.Karaoke.Tests.Helper;

namespace osu.Game.Rulesets.Karaoke.Tests.Editor.Generator.Lyrics.Romajis.Ja;

public class JaRomajiGeneratorTest : BaseRomajiGeneratorTest<JaRomajiGenerator, JaRomajiGeneratorConfig>
{
[TestCase("花火大会", new[] { "[0,start]", "[3,end]" }, true)]
[TestCase("花火大会", new[] { "[0,start]" }, true)]
[TestCase("花火大会", new[] { "[3,end]" }, false)] // not able to generate the has no start time-tag.
[TestCase("花火大会", new string[] { }, false)] // not able to generate the romaji if has no time-tag.
[TestCase("", new string[] { }, false)] // not able to generate the romaji if lyric is empty.
[TestCase(" ", new string[] { }, false)]
[TestCase(null, new string[] { }, false)]
public void TestCanGenerate(string text, string[] timeTagStrings, bool canGenerate)
{
var config = GeneratorEmptyConfig();

var timeTags = TestCaseTagHelper.ParseTimeTags(timeTagStrings);
var lyric = new Lyric
{
Text = text,
TimeTags = timeTags,
};

CheckCanGenerate(lyric, canGenerate, config);
}

// the generated result is not perfect, but it's OK for now.
[TestCase("はなび", new[] { "[0,start]" }, new[] { "^hana bi" })]
[TestCase("花火大会", new[] { "[0,start]", "[3,end]" }, new[] { "^hanabi taikai", "" })]
[TestCase("花火大会", new[] { "[0,start]", "[2,start]", "[3,end]" }, new[] { "^hanabi", "taikai", "" })]
[TestCase("枯れた世界に輝く",
new[] { "[0,start]", "[1,start]", "[2,start]", "[3,start]", "[4,start]", "[5,start]", "[6,start]", "[6,start]", "[6,start]", "[7,start]", "[7,end]" },
new[] { "^kare", "", "ta", "sekai", "", "ni", "kagayaku", "", "", "", "" })]
public void TestGenerate(string text, string[] timeTagStrings, string[] expectedRomajies)
{
var config = GeneratorEmptyConfig();

var timeTags = TestCaseTagHelper.ParseTimeTags(timeTagStrings);
var lyric = new Lyric
{
Text = text,
TimeTags = timeTags,
};

CheckGenerateResult(lyric, expectedRomajies, config);
}

[TestCase("はなび", new[] { "[0,start]" }, new[] { "^HANA BI" })]
[TestCase("花火大会", new[] { "[0,start]", "[2,start]", "[3,end]" }, new[] { "^HANABI", "TAIKAI", "" })]
public void TestGenerateWithUppercase(string text, string[] timeTagStrings, string[] expectedRomajies)
{
var config = GeneratorEmptyConfig(x => x.Uppercase.Value = true);

var timeTags = TestCaseTagHelper.ParseTimeTags(timeTagStrings);
var lyric = new Lyric
{
Text = text,
TimeTags = timeTags,
};

CheckGenerateResult(lyric, expectedRomajies, config);
}

[TestCase("花", new[] { "[0,start]", "[0,end]" }, new[] { "[0]:hana" }, new[] { "^hana", "" })]
[TestCase("花火", new[] { "[0,start]", "[1,end]" }, new[] { "[0]:hana", "[1]:bi" }, new[] { "^hana bi", "" })]
[TestCase("花火", new[] { "[0,start]", "[1,start]", "[1,end]" }, new[] { "[0]:hana", "[1]:bi" }, new[] { "^hana", "bi", "" })]
[TestCase("花火", new[] { "[0,start]", "[0,start]", "[1,start]", "[1,end]" }, new[] { "[0]:hana", "[1]:bi" }, new[] { "^hana", "", "bi", "" })]
[TestCase("はなび", new[] { "[0,start]", "[1,start]", "[2,start]", "[2,end]" }, new[] { "[0]:hana", "[2]:bi" }, new[] { "^hana", "", "bi", "" })]
public void TestConvertToRomajiGenerateResult(string text, string[] timeTagStrings, string[] romajiParams, string[] expectedResults)
{
var timeTags = TestCaseTagHelper.ParseTimeTags(timeTagStrings);
var romajis = parseRomajiGenerateResults(romajiParams);

var expected = RomajiGenerateResultHelper.ParseRomajiGenerateResults(timeTags, expectedResults);
var actual = JaRomajiGenerator.Convert(timeTags, romajis).ToArray();

AssertEqual(expected, actual);
}

/// <summary>
/// Process test case time tag string format into <see cref="TimeTag"/>
/// </summary>
/// <example>
/// <inheritdoc cref="TestCaseTagHelper.ParseRomajiTag"/>
/// </example>
/// <param name="str">Time tag string format</param>
/// <returns><see cref="RomajiGenerateResultHelper"/>Time tag object</returns>
private static JaRomajiGenerator.RomajiGeneratorParameter parseRomajiGenerateResult(string str)
{
// because format is same as the text-tag testing format, so just use this helper.
var romajiTag = TestCaseTagHelper.ParseRomajiTag(str);
return new JaRomajiGenerator.RomajiGeneratorParameter
{
StartIndex = romajiTag.StartIndex,
EndIndex = romajiTag.EndIndex,
RomajiText = romajiTag.Text,
};
}

private static JaRomajiGenerator.RomajiGeneratorParameter[] parseRomajiGenerateResults(IEnumerable<string> strings)
=> strings.Select(parseRomajiGenerateResult).ToArray();
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
// Copyright (c) andy840119 <[email protected]>. Licensed under the GPL Licence.
// See the LICENCE file in the repository root for full licence text.

using System;
using System.Collections.Generic;
using System.Linq;
using osu.Game.Rulesets.Karaoke.Edit.Generator.Lyrics.Romajies;
using osu.Game.Rulesets.Karaoke.Objects;

namespace osu.Game.Rulesets.Karaoke.Tests.Editor.Generator.Lyrics.Romajis;

public class RomajiGenerateResultHelper
{
/// <summary>
/// Convert the string format into the <see cref="RomajiGenerateResult"/>.
/// </summary>
/// <example>
/// karaoke
/// ^karaoke
/// </example>
/// <param name="timeTag">Origin time-tag</param>
/// <param name="str">Generate result string format</param>
/// <returns><see cref="RomajiGenerateResult"/>Romaji generate result.</returns>
public static RomajiGenerateResult ParseRomajiGenerateResult(TimeTag timeTag, string str)
{
bool initialRomaji = str.StartsWith("^", StringComparison.Ordinal);

return new RomajiGenerateResult
{
TimeTag = timeTag,
InitialRomaji = initialRomaji,
RomajiText = str.Replace("^", ""),
};
}

public static RomajiGenerateResult[] ParseRomajiGenerateResults(IList<TimeTag> timeTags, IList<string> strings)
{
if (timeTags.Count != strings.Count)
throw new InvalidOperationException();

return parseRomajiGenerateResults(timeTags, strings).ToArray();

static IEnumerable<RomajiGenerateResult> parseRomajiGenerateResults(IList<TimeTag> timeTags, IList<string> strings)
{
for (int i = 0; i < timeTags.Count; i++)
{
yield return ParseRomajiGenerateResult(timeTags[i], strings[i]);
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
// Copyright (c) andy840119 <[email protected]>. Licensed under the GPL Licence.
// See the LICENCE file in the repository root for full licence text.

using System.Globalization;
using System.Linq;
using NUnit.Framework;
using osu.Framework.Graphics.Sprites;
using osu.Game.Rulesets.Karaoke.Edit.Generator.Lyrics.Romajies;
using osu.Game.Rulesets.Karaoke.Objects;
using osu.Game.Rulesets.Karaoke.Tests.Asserts;
using osu.Game.Rulesets.Karaoke.Tests.Helper;

namespace osu.Game.Rulesets.Karaoke.Tests.Editor.Generator.Lyrics.Romajis;

public class RomajiTagGeneratorSelectorTest : BaseLyricGeneratorSelectorTest<RomajiGeneratorSelector, RomajiGenerateResult[]>
{
[TestCase(17, "花火大会", true)]
[TestCase(17, "我是中文", true)] // only change the language code to decide should be able to generate or not.
[TestCase(17, "", false)] // will not able to generate the romaji if lyric is empty.
[TestCase(17, " ", false)]
[TestCase(17, null, false)]
[TestCase(1028, "はなび", false)] // Should not be able to generate if language is not supported.
public void TestCanGenerate(int lcid, string text, bool canGenerate)
{
var selector = CreateSelector();
var lyric = new Lyric
{
Language = new CultureInfo(lcid),
Text = text,
TimeTags = new[]
{
new TimeTag(new TextIndex()),
},
};

CheckCanGenerate(lyric, canGenerate, selector);
}

[TestCase(17, "はなび", new[] { "[0,start]" }, new[] { "^hana bi" })] // Japanese
[TestCase(1041, "花火大会", new[] { "[0,start]", "[3,end]" }, new[] { "^hanabi taikai", "" })] // Japanese
public void TestGenerate(int lcid, string text, string[] timeTagStrings, string[] expectedRomajies)
{
var selector = CreateSelector();

var timeTags = TestCaseTagHelper.ParseTimeTags(timeTagStrings);
var lyric = new Lyric
{
Language = new CultureInfo(lcid),
Text = text,
TimeTags = timeTags,
};

var expected = RomajiGenerateResultHelper.ParseRomajiGenerateResults(timeTags, expectedRomajies);
CheckGenerateResult(lyric, expected, selector);
}

protected override void AssertEqual(RomajiGenerateResult[] expected, RomajiGenerateResult[] actual)
{
TimeTagAssert.ArePropertyEqual(expected.Select(x => x.TimeTag).ToArray(), actual.Select(x => x.TimeTag).ToArray());
Assert.AreEqual(expected.Select(x => x.InitialRomaji), actual.Select(x => x.InitialRomaji));
Assert.AreEqual(expected.Select(x => x.RomajiText), actual.Select(x => x.RomajiText));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
using osu.Game.Rulesets.Karaoke.Edit.Generator.Lyrics.Language;
using osu.Game.Rulesets.Karaoke.Edit.Generator.Lyrics.Notes;
using osu.Game.Rulesets.Karaoke.Edit.Generator.Lyrics.ReferenceLyric;
using osu.Game.Rulesets.Karaoke.Edit.Generator.Lyrics.Romajies.Ja;
using osu.Game.Rulesets.Karaoke.Edit.Generator.Lyrics.RomajiTags.Ja;
using osu.Game.Rulesets.Karaoke.Edit.Generator.Lyrics.RubyTags.Ja;
using osu.Game.Rulesets.Karaoke.Edit.Generator.Lyrics.TimeTags.Ja;
Expand Down Expand Up @@ -54,6 +55,9 @@ protected override void InitialiseDefaults()
// Time tag generator
SetDefault<JaTimeTagGeneratorConfig>();
SetDefault<ZhTimeTagGeneratorConfig>();

// Romaji generator
SetDefault<JaRomajiGeneratorConfig>();
}

protected void SetDefault<T>() where T : GeneratorConfig, new()
Expand Down Expand Up @@ -81,6 +85,7 @@ protected static KaraokeRulesetEditGeneratorSetting GetSettingByType<TValue>() =
Type t when t == typeof(JaRubyTagGeneratorConfig) => KaraokeRulesetEditGeneratorSetting.JaRubyTagGeneratorConfig,
Type t when t == typeof(JaTimeTagGeneratorConfig) => KaraokeRulesetEditGeneratorSetting.JaTimeTagGeneratorConfig,
Type t when t == typeof(ZhTimeTagGeneratorConfig) => KaraokeRulesetEditGeneratorSetting.ZhTimeTagGeneratorConfig,
Type t when t == typeof(JaRomajiGeneratorConfig) => KaraokeRulesetEditGeneratorSetting.JaRomajiGeneratorConfig,
_ => throw new NotSupportedException(),
};

Expand Down Expand Up @@ -134,4 +139,7 @@ public enum KaraokeRulesetEditGeneratorSetting
// Time tag generator
JaTimeTagGeneratorConfig,
ZhTimeTagGeneratorConfig,

// Romaji generator.
JaRomajiGeneratorConfig,
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

using System.Collections.Generic;
using System.IO;
using System.Linq;
using Lucene.Net.Analysis;
using Lucene.Net.Analysis.Ja;
using Lucene.Net.Analysis.TokenAttributes;
Expand All @@ -29,16 +30,17 @@ public JaRomajiTagGenerator(JaRomajiTagGeneratorConfig config)

protected override RomajiTag[] GenerateFromItem(Lyric item)
{
string text = item.Text;
var processingTags = new List<RomajiTagGeneratorParameter>();

// Tokenize the text
string text = item.Text;
var tokenStream = analyzer.GetTokenStream("dummy", new StringReader(text));

// Get result and offset
var result = tokenStream.GetAttribute<ICharTermAttribute>();
var offsetAtt = tokenStream.GetAttribute<IOffsetAttribute>();
var processingRomajies = getProcessingRomajies(text, tokenStream, Config).ToArray();

return Convert(processingRomajies).ToArray();
}

private static IEnumerable<RomajiTagGeneratorParameter> getProcessingRomajies(string text, TokenStream tokenStream, JaRomajiTagGeneratorConfig config)
{
// Reset the stream and convert all result
tokenStream.Reset();

Expand All @@ -48,42 +50,49 @@ protected override RomajiTag[] GenerateFromItem(Lyric item)
tokenStream.ClearAttributes();
tokenStream.IncrementToken();

// Get result and offset
var charTermAttribute = tokenStream.GetAttribute<ICharTermAttribute>();
var offsetAttribute = tokenStream.GetAttribute<IOffsetAttribute>();

// Get parsed result, result is Katakana.
string katakana = result.ToString();
string katakana = charTermAttribute.ToString();
if (string.IsNullOrEmpty(katakana))
break;

string parentText = text[offsetAtt.StartOffset..offsetAtt.EndOffset];
string parentText = text[offsetAttribute.StartOffset..offsetAttribute.EndOffset];
bool fromKanji = JpStringUtils.ToKatakana(katakana) != JpStringUtils.ToKatakana(parentText);

// Convert to romaji.
string romaji = JpStringUtils.ToRomaji(katakana);
if (Config.Uppercase.Value)
if (config.Uppercase.Value)
romaji = romaji.ToUpper();

// Make tag
processingTags.Add(new RomajiTagGeneratorParameter
yield return new RomajiTagGeneratorParameter
{
FromKanji = fromKanji,
RomajiTag = new RomajiTag
{
Text = romaji,
StartIndex = offsetAtt.StartOffset,
EndIndex = offsetAtt.EndOffset - 1,
StartIndex = offsetAttribute.StartOffset,
EndIndex = offsetAttribute.EndOffset - 1,
},
});
};
}

// Dispose
tokenStream.End();
tokenStream.Dispose();
}

internal static IEnumerable<RomajiTag> Convert(RomajiTagGeneratorParameter[] tags)
{
var romajiTags = new List<RomajiTag>();

foreach (var processingTag in processingTags)
foreach (var processingTag in tags)
{
// combine romajies of they are not from kanji.
var previousProcessingTag = processingTags.GetPrevious(processingTag);
var previousProcessingTag = tags.GetPrevious(processingTag);
bool fromKanji = processingTag.FromKanji;

if (previousProcessingTag != null && !fromKanji)
Expand All @@ -98,7 +107,7 @@ protected override RomajiTag[] GenerateFromItem(Lyric item)
}
}

return romajiTags.ToArray();
return romajiTags;
}

internal class RomajiTagGeneratorParameter
Expand Down
Loading

0 comments on commit 511bcf7

Please sign in to comment.