Skip to content

Commit

Permalink
Convert static readonly fields to const, #662
Browse files Browse the repository at this point in the history
  • Loading branch information
paulirwin committed Dec 31, 2024
1 parent d96dc9e commit 3361bcb
Show file tree
Hide file tree
Showing 83 changed files with 372 additions and 357 deletions.
30 changes: 15 additions & 15 deletions src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKBigramFilter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ public enum CJKScript
/// Forms bigrams of CJK terms that are generated from <see cref="StandardTokenizer"/>
/// or ICUTokenizer.
/// <para>
/// CJK types are set by these tokenizers, but you can also use
/// CJK types are set by these tokenizers, but you can also use
/// <see cref="CJKBigramFilter(TokenStream, CJKScript)"/> to explicitly control which
/// of the CJK scripts are turned into bigrams.
/// </para>
Expand Down Expand Up @@ -83,8 +83,8 @@ public sealed class CJKBigramFilter : TokenFilter
private static readonly string KATAKANA_TYPE = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.KATAKANA];
private static readonly string HANGUL_TYPE = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.HANGUL];

// sentinel value for ignoring a script
private static readonly string NO = "<NO>";
// sentinel value for ignoring a script
private const string NO = "<NO>";

// these are set to either their type or NO if we want to pass them thru
private readonly string doHan;
Expand Down Expand Up @@ -133,7 +133,7 @@ public CJKBigramFilter(TokenStream @in)
/// </summary>
/// <param name="in">
/// Input <see cref="TokenStream"/> </param>
/// <param name="flags"> OR'ed set from <see cref="CJKScript.HAN"/>, <see cref="CJKScript.HIRAGANA"/>,
/// <param name="flags"> OR'ed set from <see cref="CJKScript.HAN"/>, <see cref="CJKScript.HIRAGANA"/>,
/// <see cref="CJKScript.KATAKANA"/>, <see cref="CJKScript.HANGUL"/> </param>
public CJKBigramFilter(TokenStream @in, CJKScript flags)
: this(@in, flags, false)
Expand All @@ -145,7 +145,7 @@ public CJKBigramFilter(TokenStream @in, CJKScript flags)
/// and whether or not unigrams should also be output. </summary>
/// <param name="in">
/// Input <see cref="TokenStream"/> </param>
/// <param name="flags"> OR'ed set from <see cref="CJKScript.HAN"/>, <see cref="CJKScript.HIRAGANA"/>,
/// <param name="flags"> OR'ed set from <see cref="CJKScript.HAN"/>, <see cref="CJKScript.HIRAGANA"/>,
/// <see cref="CJKScript.KATAKANA"/>, <see cref="CJKScript.HANGUL"/> </param>
/// <param name="outputUnigrams"> true if unigrams for the selected writing systems should also be output.
/// when this is false, this is only done when there are no adjacent characters to form
Expand All @@ -166,8 +166,8 @@ public CJKBigramFilter(TokenStream @in, CJKScript flags, bool outputUnigrams)
}

/*
* much of this complexity revolves around handling the special case of a
* "lone cjk character" where cjktokenizer would output a unigram. this
* much of this complexity revolves around handling the special case of a
* "lone cjk character" where cjktokenizer would output a unigram. this
* is also the only time we ever have to captureState.
*/
public override bool IncrementToken()
Expand All @@ -186,7 +186,7 @@ public override bool IncrementToken()
// when also outputting unigrams, we output the unigram first,
// then rewind back to revisit the bigram.
// so an input of ABC is A + (rewind)AB + B + (rewind)BC + C
// the logic in hasBufferedUnigram ensures we output the C,
// the logic in hasBufferedUnigram ensures we output the C,
// even though it did actually have adjacent CJK characters.

if (ngramState)
Expand Down Expand Up @@ -225,7 +225,7 @@ public override bool IncrementToken()
{

// we have a buffered unigram, and we peeked ahead to see if we could form
// a bigram, but we can't, because the offsets are unaligned. capture the state
// a bigram, but we can't, because the offsets are unaligned. capture the state
// of this peeked data to be revisited next time thru the loop, and dump our unigram.

loneState = CaptureState();
Expand All @@ -246,7 +246,7 @@ public override bool IncrementToken()
{

// we have a buffered unigram, and we peeked ahead to see if we could form
// a bigram, but we can't, because its not a CJK type. capture the state
// a bigram, but we can't, because its not a CJK type. capture the state
// of this peeked data to be revisited next time thru the loop, and dump our unigram.

loneState = CaptureState();
Expand All @@ -259,7 +259,7 @@ public override bool IncrementToken()
else
{

// case 3: we have only zero or 1 codepoints buffered,
// case 3: we have only zero or 1 codepoints buffered,
// so not enough to form a bigram. But, we also have no
// more input. So if we have a buffered codepoint, emit
// a unigram, otherwise, its end of stream.
Expand All @@ -277,7 +277,7 @@ public override bool IncrementToken()
private State loneState; // rarely used: only for "lone cjk characters", where we emit unigrams

/// <summary>
/// looks at next input token, returning false is none is available
/// looks at next input token, returning false is none is available
/// </summary>
private bool DoNext()
{
Expand Down Expand Up @@ -359,7 +359,7 @@ private void Refill()
}

/// <summary>
/// Flushes a bigram token to output from our buffer
/// Flushes a bigram token to output from our buffer
/// This is the normal case, e.g. ABC -> AB BC
/// </summary>
private void FlushBigram()
Expand All @@ -383,7 +383,7 @@ private void FlushBigram()
/// <summary>
/// Flushes a unigram token to output from our buffer.
/// This happens when we encounter isolated CJK characters, either the whole
/// CJK string is a single character, or we encounter a CJK character surrounded
/// CJK string is a single character, or we encounter a CJK character surrounded
/// by space, punctuation, english, etc, but not beside any other CJK.
/// </summary>
private void FlushUnigram()
Expand Down Expand Up @@ -435,4 +435,4 @@ public override void Reset()
ngramState = false;
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// </summary>
public sealed class CapitalizationFilter : TokenFilter
{
public static readonly int DEFAULT_MAX_WORD_COUNT = int.MaxValue;
public static readonly int DEFAULT_MAX_TOKEN_LENGTH = int.MaxValue;
public const int DEFAULT_MAX_WORD_COUNT = int.MaxValue;
public const int DEFAULT_MAX_TOKEN_LENGTH = int.MaxValue;

private readonly bool onlyFirstWord;
private readonly CharArraySet keep;
Expand Down Expand Up @@ -269,4 +269,4 @@ private void ProcessWord(char[] buffer, int offset, int length, int wordCount)
//return word.toString();
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ namespace Lucene.Net.Analysis.Icu.Segmentation
/// to many languages.
/// </summary>
/// <remarks>
/// Generally tokenizes Unicode text according to UAX#29
/// (<see cref="T:BreakIterator.GetWordInstance(ULocale.ROOT)"/>),
/// Generally tokenizes Unicode text according to UAX#29
/// (<see cref="T:BreakIterator.GetWordInstance(ULocale.ROOT)"/>),
/// but with the following tailorings:
/// <list type="bullet">
/// <item><description>Thai, Lao, Myanmar, Khmer, and CJK text is broken into words with a dictionary.</description></item>
Expand All @@ -54,6 +54,7 @@ public class DefaultICUTokenizerConfig : ICUTokenizerConfig
/// <summary>Token type for words that appear to be numbers</summary>
public static readonly string WORD_NUMBER = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.NUM];
/// <summary>Token type for words that appear to be emoji sequences</summary>
// ReSharper disable once ConvertToConstant.Global - matches the fields above to keep it static readonly
public static readonly string WORD_EMOJI = "<EMOJI>"; //StandardTokenizer.TOKEN_TYPES[StandardTokenizer.EMOJI]; // LUCENENET: 4.8.1 StandardTokenizer doesn't contain EMOJI

/// <summary>
Expand Down
23 changes: 13 additions & 10 deletions src/Lucene.Net.Analysis.Kuromoji/Dict/BinaryDictionary.cs
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,14 @@ namespace Lucene.Net.Analysis.Ja.Dict
/// </summary>
public abstract class BinaryDictionary : IDictionary
{
public static readonly string DICT_FILENAME_SUFFIX = "$buffer.dat";
public static readonly string TARGETMAP_FILENAME_SUFFIX = "$targetMap.dat";
public static readonly string POSDICT_FILENAME_SUFFIX = "$posDict.dat";

public static readonly string DICT_HEADER = "kuromoji_dict";
public static readonly string TARGETMAP_HEADER = "kuromoji_dict_map";
public static readonly string POSDICT_HEADER = "kuromoji_dict_pos";
public const string DICT_FILENAME_SUFFIX = "$buffer.dat";
public const string TARGETMAP_FILENAME_SUFFIX = "$targetMap.dat";
public const string POSDICT_FILENAME_SUFFIX = "$posDict.dat";

public const string DICT_HEADER = "kuromoji_dict";
public const string TARGETMAP_HEADER = "kuromoji_dict_map";
public const string POSDICT_HEADER = "kuromoji_dict_pos";
// ReSharper disable once ConvertToConstant.Global - VERSION should be a field
public static readonly int VERSION = 1;

private readonly ByteBuffer buffer;
Expand Down Expand Up @@ -387,10 +388,12 @@ private string ReadString(int offset, int length, bool kana)
}

/// <summary>flag that the entry has baseform data. otherwise its not inflected (same as surface form)</summary>
public static readonly int HAS_BASEFORM = 1;
public const int HAS_BASEFORM = 1;

/// <summary>flag that the entry has reading data. otherwise reading is surface form converted to katakana</summary>
public static readonly int HAS_READING = 2;
public const int HAS_READING = 2;

/// <summary>flag that the entry has pronunciation data. otherwise pronunciation is the reading</summary>
public static readonly int HAS_PRONUNCIATION = 4;
public const int HAS_PRONUNCIATION = 4;
}
}
29 changes: 15 additions & 14 deletions src/Lucene.Net.Analysis.Kuromoji/Dict/CharacterDefinition.cs
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,9 @@ namespace Lucene.Net.Analysis.Ja.Dict
/// </summary>
public sealed class CharacterDefinition
{
public static readonly string FILENAME_SUFFIX = ".dat";
public static readonly string HEADER = "kuromoji_cd";
public const string FILENAME_SUFFIX = ".dat";
public const string HEADER = "kuromoji_cd";
// ReSharper disable once ConvertToConstant.Global - VERSION should be a field
public static readonly int VERSION = 1;

public static readonly int CLASS_COUNT = Enum.GetValues(typeof(CharacterClass)).Length;
Expand All @@ -45,18 +46,18 @@ private enum CharacterClass : byte
private readonly bool[] groupMap = new bool[CLASS_COUNT];

// the classes:
public static readonly byte NGRAM = (byte)CharacterClass.NGRAM;
public static readonly byte DEFAULT = (byte)CharacterClass.DEFAULT;
public static readonly byte SPACE = (byte)CharacterClass.SPACE;
public static readonly byte SYMBOL = (byte)CharacterClass.SYMBOL;
public static readonly byte NUMERIC = (byte)CharacterClass.NUMERIC;
public static readonly byte ALPHA = (byte)CharacterClass.ALPHA;
public static readonly byte CYRILLIC = (byte)CharacterClass.CYRILLIC;
public static readonly byte GREEK = (byte)CharacterClass.GREEK;
public static readonly byte HIRAGANA = (byte)CharacterClass.HIRAGANA;
public static readonly byte KATAKANA = (byte)CharacterClass.KATAKANA;
public static readonly byte KANJI = (byte)CharacterClass.KANJI;
public static readonly byte KANJINUMERIC = (byte)CharacterClass.KANJINUMERIC;
public const byte NGRAM = (byte)CharacterClass.NGRAM;
public const byte DEFAULT = (byte)CharacterClass.DEFAULT;
public const byte SPACE = (byte)CharacterClass.SPACE;
public const byte SYMBOL = (byte)CharacterClass.SYMBOL;
public const byte NUMERIC = (byte)CharacterClass.NUMERIC;
public const byte ALPHA = (byte)CharacterClass.ALPHA;
public const byte CYRILLIC = (byte)CharacterClass.CYRILLIC;
public const byte GREEK = (byte)CharacterClass.GREEK;
public const byte HIRAGANA = (byte)CharacterClass.HIRAGANA;
public const byte KATAKANA = (byte)CharacterClass.KATAKANA;
public const byte KANJI = (byte)CharacterClass.KANJI;
public const byte KANJINUMERIC = (byte)CharacterClass.KANJINUMERIC;

private CharacterDefinition()
{
Expand Down
5 changes: 3 additions & 2 deletions src/Lucene.Net.Analysis.Kuromoji/Dict/ConnectionCosts.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,9 @@ namespace Lucene.Net.Analysis.Ja.Dict
/// </summary>
public sealed class ConnectionCosts
{
public static readonly string FILENAME_SUFFIX = ".dat";
public static readonly string HEADER = "kuromoji_cc";
public const string FILENAME_SUFFIX = ".dat";
public const string HEADER = "kuromoji_cc";
// ReSharper disable once ConvertToConstant.Global - VERSION should be a field
public static readonly int VERSION = 1;

private readonly short[][] costs; // array is backward IDs first since get is called using the same backward ID consecutively. maybe doesn't matter.
Expand Down
4 changes: 2 additions & 2 deletions src/Lucene.Net.Analysis.Kuromoji/Dict/Dictionary.cs
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,9 @@ public interface IDictionary
// 'getAdditionalData' if other dictionaries like unidic have additional data
}

// LUCENENT TODO: Make this whole thing into an abstact class??
// LUCENENET TODO: Make this whole thing into an abstract class??
public static class Dictionary // LUCENENET specific: CA1052 Static holder types should be Static or NotInheritable
{
public static readonly string INTERNAL_SEPARATOR = "\u0000";
public const string INTERNAL_SEPARATOR = "\0";
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ namespace Lucene.Net.Analysis.Ja.Dict
/// </summary>
public sealed class TokenInfoDictionary : BinaryDictionary
{
public static readonly string FST_FILENAME_SUFFIX = "$fst.dat";
public const string FST_FILENAME_SUFFIX = "$fst.dat";

private readonly TokenInfoFST fst;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,10 @@ namespace Lucene.Net.Analysis.Ja
public class JapaneseIterationMarkCharFilter : CharFilter
{
/// <summary>Normalize kanji iteration marks by default</summary>
public static readonly bool NORMALIZE_KANJI_DEFAULT = true;
public const bool NORMALIZE_KANJI_DEFAULT = true;

/// <summary>Normalize kana iteration marks by default</summary>
public static readonly bool NORMALIZE_KANA_DEFAULT = true;
public const bool NORMALIZE_KANA_DEFAULT = true;

private const char KANJI_ITERATION_MARK = '\u3005'; // 々

Expand Down Expand Up @@ -167,17 +167,17 @@ public JapaneseIterationMarkCharFilter(TextReader input, bool normalizeKanji, bo
/// Reads a specified maximum number of characters from the current reader and writes the data to a buffer, beginning at the specified index.
/// </summary>
/// <param name="buffer">
/// When this method returns, contains the specified character array with the values between index and (index + count - 1)
/// When this method returns, contains the specified character array with the values between index and (index + count - 1)
/// replaced by the characters read from the current source.</param>
/// <param name="offset">
/// The position in buffer at which to begin writing.
/// </param>
/// <param name="length">
/// The maximum number of characters to read. If the end of the reader is reached before the specified number of characters is
/// The maximum number of characters to read. If the end of the reader is reached before the specified number of characters is
/// read into the buffer, the method returns.
/// </param>
/// <returns>
/// The number of characters that have been read. The number will be less than or equal to count, depending on whether the data is
/// The number of characters that have been read. The number will be less than or equal to count, depending on whether the data is
/// available within the reader. This method returns 0 (zero) if it is called when no more characters are left to read.
/// </returns>
public override int Read(char[] buffer, int offset, int length)
Expand Down
2 changes: 1 addition & 1 deletion src/Lucene.Net.Analysis.Kuromoji/JapaneseTokenizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ public sealed class JapaneseTokenizer : Tokenizer
/// <summary>
/// Default tokenization mode. Currently this is <see cref="JapaneseTokenizerMode.SEARCH"/>.
/// </summary>
public static readonly JapaneseTokenizerMode DEFAULT_MODE = JapaneseTokenizerMode.SEARCH;
public const JapaneseTokenizerMode DEFAULT_MODE = JapaneseTokenizerMode.SEARCH;

// LUCENENET specific: de-nested Type and renamed JapaneseTokenizerType

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,11 @@ namespace Lucene.Net.Analysis.Phonetic
public class DoubleMetaphoneFilterFactory : TokenFilterFactory
{
/// <summary>parameter name: true if encoded tokens should be added as synonyms</summary>
public static readonly string INJECT = "inject";
public const string INJECT = "inject";
/// <summary>parameter name: restricts the length of the phonetic code</summary>
public static readonly string MAX_CODE_LENGTH = "maxCodeLength";
public const string MAX_CODE_LENGTH = "maxCodeLength";
/// <summary>default maxCodeLength if not specified</summary>
public static readonly int DEFAULT_MAX_CODE_LENGTH = 4;
public const int DEFAULT_MAX_CODE_LENGTH = 4;

private readonly bool inject;
private readonly int maxCodeLength;
Expand Down
2 changes: 1 addition & 1 deletion src/Lucene.Net.Analysis.Phonetic/Language/Bm/Languages.cs
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ public class Languages
// exposes org/apache/commons/codec/language/bm/%s_languages.txt for %s in NameType.* as a list of supported
// languages, and a second part that provides instance methods for accessing this set fo supported languages.

public static readonly string ANY = "any";
public const string ANY = "any";

private static readonly IDictionary<NameType, Languages> LANGUAGES = LoadLanguages();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ public class RefinedSoundex : IStringEncoder
/// <summary>
/// since 1.4
/// </summary>
public static readonly string US_ENGLISH_MAPPING_STRING = "01360240043788015936020505";
public const string US_ENGLISH_MAPPING_STRING = "01360240043788015936020505";

/// <summary>
/// RefinedSoundex is *refined* for a number of reasons one being that the
Expand Down
Loading

0 comments on commit 3361bcb

Please sign in to comment.