Skip to content

Commit

Permalink
Merge branch 'anitomy'
Browse files Browse the repository at this point in the history
  • Loading branch information
insomniachi committed Jun 15, 2024
2 parents 0f6a768 + f56901f commit e666ab7
Show file tree
Hide file tree
Showing 25 changed files with 2,644 additions and 36 deletions.
70 changes: 70 additions & 0 deletions Totoro.Core/Anitomy/Anitomy.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
namespace Anitomy;


public class Anitomy
{

public static IEnumerable<Element> Parse(string filename, Options options)
{
var elements = new List<Element>(32);
var tokens = new List<Token>();

/** remove/parse extension */
var fname = filename;
if (options.ParseFileExtension)
{
var extension = "";
if (RemoveExtensionFromFilename(ref fname, ref extension))
{
elements.Add(new Element(ElementCategory.FileExtension, extension));
}
}

/** set filename */
if (string.IsNullOrEmpty(filename))
{
return elements;
}
elements.Add(new Element(ElementCategory.FileName, fname));

/** tokenize */
var isTokenized = new Tokenizer(fname, elements, options, tokens).Tokenize();
if (!isTokenized)
{
return elements;
}
new Parser(elements, options, tokens).Parse();
return elements;
}

public static IEnumerable<Element> Parse(string filename)
{
return Parse(filename, new Options());
}

private static bool RemoveExtensionFromFilename(ref string filename, ref string extension)
{
int position;
if (string.IsNullOrEmpty(filename) || (position = filename.LastIndexOf('.')) == -1)
{
return false;
}

/** remove file extension */
extension = filename.Substring(position + 1);
if (extension.Length > 4 || !extension.All(char.IsLetterOrDigit))
{
return false;
}

/** check if valid anime extension */
var keyword = KeywordManager.Normalize(extension);
if (!KeywordManager.Contains(ElementCategory.FileExtension, keyword))
{
return false;
}

filename = filename.Substring(0, position);
return true;
}
}
31 changes: 31 additions & 0 deletions Totoro.Core/Anitomy/Element.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
using System.Diagnostics;

namespace Anitomy;

[DebuggerDisplay("{Category} = {Value}")]
public class Element(ElementCategory category, string value)
{
public ElementCategory Category { get; set; } = category;
public string Value { get; } = value;

public override int GetHashCode()
{
return -1926371015 + Value.GetHashCode();
}

public override bool Equals(object obj)
{
if (this == obj)
{
return true;
}

if (obj == null || GetType() != obj.GetType())
{
return false;
}

var other = (Element) obj;
return Category.Equals(other.Category);
}
}
31 changes: 31 additions & 0 deletions Totoro.Core/Anitomy/ElementCategory.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
namespace Anitomy;

public enum ElementCategory
{
AnimeSeason,
AnimeSeasonPrefix,
AnimeTitle,
AnimeType,
AnimeYear,
AudioTerm,
DeviceCompatibility,
EpisodeNumber,
EpisodeNumberAlt,
EpisodePrefix,
EpisodeTitle,
FileChecksum,
FileExtension,
FileName,
Language,
Other,
ReleaseGroup,
ReleaseInformation,
ReleaseVersion,
Source,
Subtitles,
VideoResolution,
VideoTerm,
VolumeNumber,
VolumePrefix,
Unknown
}
212 changes: 212 additions & 0 deletions Totoro.Core/Anitomy/Keyword.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@

namespace Anitomy;


/// <summary>
/// A class to manager the list of known anime keywords. This class is analogous to <code>keyword.cpp</code> of Anitomy, and <code>KeywordManager.java</code> of AnitomyJ
/// </summary>
public static class KeywordManager
{
private static readonly Dictionary<string, Keyword> Keys = [];
private static readonly Dictionary<string, Keyword> Extensions = [];
private static readonly List<(ElementCategory, List<string>)> PeekEntries;

static KeywordManager()
{
var optionsDefault = new KeywordOptions();
var optionsInvalid = new KeywordOptions(true, true, false);
var optionsUnidentifiable = new KeywordOptions(false, true, true);
var optionsUnidentifiableInvalid = new KeywordOptions(false, true, false);
var optionsUnidentifiableUnsearchable = new KeywordOptions(false, false, true);

Add(ElementCategory.AnimeSeasonPrefix, optionsUnidentifiable, ["SAISON", "SEASON"]);

Add(ElementCategory.AnimeType, optionsUnidentifiable, ["GEKIJOUBAN", "MOVIE", "OAD", "OAV", "ONA", "OVA", "SPECIAL", "SPECIALS", "TV"]);

Add(ElementCategory.AnimeType, optionsUnidentifiableUnsearchable, ["SP"]); // e.g. "Yumeiro Patissiere SP Professional"

Add(ElementCategory.AnimeType, optionsUnidentifiableInvalid, ["ED", "ENDING", "NCED", "NCOP", "OP", "OPENING", "PREVIEW", "PV"]);

Add(ElementCategory.AudioTerm, optionsDefault,
[
// Audio channels
"2.0CH", "2CH", "5.1", "5.1CH", "DTS", "DTS-ES", "DTS5.1","TRUEHD5.1",
// Audio codec
"AAC", "AACX2", "AACX3", "AACX4", "AC3", "EAC3", "E-AC-3",
"FLAC", "FLACX2", "FLACX3", "FLACX4", "LOSSLESS", "MP3", "OGG", "VORBIS",
// Audio language
"DUALAUDIO", "DUAL AUDIO"
]
);

Add(ElementCategory.DeviceCompatibility, optionsDefault, ["IPAD3", "IPHONE5", "IPOD", "PS3", "XBOX", "XBOX360"]);

Add(ElementCategory.DeviceCompatibility, optionsUnidentifiable, ["ANDROID"]);

Add(ElementCategory.EpisodePrefix, optionsDefault, ["EP", "EP.", "EPS", "EPS.", "EPISODE", "EPISODE.", "EPISODES", "CAPITULO", "EPISODIO", "FOLGE"]);

Add(ElementCategory.EpisodePrefix, optionsInvalid, ["E", "\\x7B2C"]); // single-letter episode keywords are not valid tokens

Add(ElementCategory.FileExtension, optionsDefault, ["3GP", "AVI", "DIVX", "FLV", "M2TS", "MKV", "MOV", "MP4", "MPG", "OGM", "RM", "RMVB", "TS", "WEBM", "WMV"]);

Add(ElementCategory.FileExtension, optionsInvalid, ["AAC", "AIFF", "FLAC", "M4A", "MP3", "MKA", "OGG", "WAV", "WMA", "7Z", "RAR", "ZIP", "ASS", "SRT"]);

Add(ElementCategory.Language, optionsDefault, ["ENG", "ENGLISH", "ESPANO", "JAP", "PT-BR", "SPANISH", "VOSTFR"]);

Add(ElementCategory.Language, optionsUnidentifiable, ["ESP", "ITA"]); // e.g. "Tokyo ESP:, "Bokura ga Ita"

Add(ElementCategory.Other, optionsDefault, ["REMASTER", "REMASTERED", "UNCENSORED", "UNCUT", "TS", "VFR", "WIDESCREEN", "WS"]);

Add(ElementCategory.ReleaseGroup, optionsDefault, ["THORA"]);

Add(ElementCategory.ReleaseInformation, optionsDefault, ["BATCH", "COMPLETE", "PATCH", "REMUX"]);

Add(ElementCategory.ReleaseInformation, optionsUnidentifiable, ["END", "FINAL"]); // e.g. "The End of Evangelion", 'Final Approach"

Add(ElementCategory.ReleaseVersion, optionsDefault, ["V0", "V1", "V2", "V3", "V4"]);

Add(ElementCategory.Source, optionsDefault, ["BD", "BDRIP", "BLURAY", "BLU-RAY", "DVD", "DVD5", "DVD9", "DVD-R2J", "DVDRIP", "DVD-RIP", "R2DVD", "R2J", "R2JDVD", "R2JDVDRIP", "HDTV", "HDTVRIP", "TVRIP", "TV-RIP", "WEBCAST", "WEBRIP"]);

Add(ElementCategory.Subtitles, optionsDefault, ["ASS", "BIG5", "DUB", "DUBBED", "HARDSUB", "HARDSUBS", "RAW", "SOFTSUB", "SOFTSUBS", "SUB", "SUBBED", "SUBTITLED"]);

Add(ElementCategory.VideoTerm,optionsDefault,
[
// Frame rate
"23.976FPS", "24FPS", "29.97FPS", "30FPS", "60FPS", "120FPS",
// Video codec
"8BIT", "8-BIT", "10BIT", "10BITS", "10-BIT", "10-BITS",
"HI10", "HI10P", "HI444", "HI444P", "HI444PP",
"H264", "H265", "H.264", "H.265", "X264", "X265", "X.264",
"AVC", "HEVC", "HEVC2", "DIVX", "DIVX5", "DIVX6", "XVID",
// Video format
"AVI", "RMVB", "WMV", "WMV3", "WMV9",
// Video quality
"HQ", "LQ",
// Video resolution
"HD", "SD"
]
);

Add(ElementCategory.VolumePrefix,optionsDefault, ["VOL", "VOL.", "VOLUME"]);

PeekEntries =
[
(ElementCategory.AudioTerm, new List<string> { "Dual Audio" }),
(ElementCategory.VideoTerm, new List<string> { "H264", "H.264", "h264", "h.264" }),
(ElementCategory.VideoResolution, new List<string> { "480p", "720p", "1080p" }),
(ElementCategory.Source, new List<string> { "Blu-Ray" })
];
}

public static string Normalize(string word)
{
return string.IsNullOrEmpty(word) ? word : word.ToUpperInvariant();
}

public static bool Contains(ElementCategory category, string keyword)
{
var keys = GetKeywordContainer(category);
if (keys.TryGetValue(keyword, out var foundEntry))
{
return foundEntry.Category == category;
}

return false;
}

/// <summary>
/// Finds a particular <code>keyword</code>. If found sets <code>category</code> and <code>options</code> to the found search result.
/// </summary>
/// <param name="keyword">the keyword to search for</param>
/// <param name="category">the reference that will be set/changed to the found keyword category</param>
/// <param name="options">the reference that will be set/changed to the found keyword options</param>
/// <returns>if the keyword was found</returns>
public static bool FindAndSet(string keyword, ref ElementCategory category, ref KeywordOptions options)
{
var keys = GetKeywordContainer(category);
if (!keys.TryGetValue(keyword, out var foundEntry))
{
return false;
}

if (category == ElementCategory.Unknown)
{
category = foundEntry.Category;
}
else if (foundEntry.Category != category)
{
return false;
}
options = foundEntry.Options;
return true;
}

/// <summary>
/// Given a particular <code>filename</code> and <code>range</code> attempt to preidentify the token before we attempt the main parsing logic
/// </summary>
/// <param name="filename">the filename</param>
/// <param name="range">the search range</param>
/// <param name="elements">elements array that any pre-identified elements will be added to</param>
/// <param name="preidentifiedTokens">elements array that any pre-identified token ranges will be added to</param>
public static void PeekAndAdd(string filename, TokenRange range, List<Element> elements, List<TokenRange> preidentifiedTokens)
{
var endR = range.Offset + range.Size;
var search = filename.Substring(range.Offset, endR > filename.Length ? filename.Length - range.Offset : endR - range.Offset);
foreach (var entry in PeekEntries)
{
foreach (var keyword in entry.Item2)
{
var foundIdx = search.IndexOf(keyword, StringComparison.CurrentCulture);
if (foundIdx == -1)
{
continue;
}

foundIdx += range.Offset;
elements.Add(new Element(entry.Item1, keyword));
preidentifiedTokens.Add(new TokenRange(foundIdx, keyword.Length));
}
}
}

// Private API

/** Returns the appropriate keyword container. */
private static Dictionary<string, Keyword> GetKeywordContainer(ElementCategory category)
{
return category == ElementCategory.FileExtension ? Extensions : Keys;
}

/// Adds a <code>category</code>, <code>options</code>, and <code>keywords</code> to the internal keywords list.
private static void Add(ElementCategory category, KeywordOptions options, IEnumerable<string> keywords)
{
var keys = GetKeywordContainer(category);
foreach (var key in keywords.Where(k => !string.IsNullOrEmpty(k) && !keys.ContainsKey(k)))
{
keys[key] = new Keyword(category, options);
}
}
}

/// <summary>
/// Keyword options for a particular keyword.
/// </summary>
/// <remarks>
/// Constructs a new keyword options
/// </remarks>
/// <param name="identifiable">if the token is identifiable</param>
/// <param name="searchable">if the token is searchable</param>
/// <param name="valid">if the token is valid</param>
public class KeywordOptions(bool identifiable, bool searchable, bool valid)
{
public bool Identifiable { get; } = identifiable;
public bool Searchable { get; } = searchable;
public bool Valid { get; } = valid;

public KeywordOptions() : this(true, true, true) { }
}

/// <summary>
/// A Keyword
/// </summary>
public record Keyword(ElementCategory Category, KeywordOptions Options);
11 changes: 11 additions & 0 deletions Totoro.Core/Anitomy/Options.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@

namespace Anitomy;

public class Options(string delimiters = " _.&+,|", bool episode = true, bool title = true, bool extension = true, bool group = true)
{
public string AllowedDelimiters { get; } = delimiters;
public bool ParseEpisodeNumber { get; } = episode;
public bool ParseEpisodeTitle { get; } = title;
public bool ParseFileExtension { get; } = extension;
public bool ParseReleaseGroup { get; } = group;
}
Loading

0 comments on commit e666ab7

Please sign in to comment.