Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

IAttribute and ICharTermAttribute method changes, #1038 #1049

Merged
merged 8 commits into from
Dec 3, 2024
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// Lucene version compatibility level 4.8.1
using Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Analysis.TokenAttributes.Extensions;
using System;

namespace Lucene.Net.Analysis.Br
Expand Down Expand Up @@ -41,7 +42,7 @@ public sealed class BrazilianStemFilter : TokenFilter
private readonly IKeywordAttribute keywordAttr;

/// <summary>
/// Creates a new <see cref="BrazilianStemFilter"/>
/// Creates a new <see cref="BrazilianStemFilter"/>
/// </summary>
/// <param name="in"> the source <see cref="TokenStream"/> </param>
public BrazilianStemFilter(TokenStream @in)
Expand Down Expand Up @@ -74,4 +75,4 @@ public override bool IncrementToken()
}
}
}
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Lucene version compatibility level 4.8.1
using J2N.Text;
using Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Analysis.TokenAttributes.Extensions;
using Lucene.Net.Analysis.Util;
using Lucene.Net.Diagnostics;
using Lucene.Net.Util;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// Lucene version compatibility level 4.8.1
using Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Analysis.TokenAttributes.Extensions;
using System;

namespace Lucene.Net.Analysis.De
Expand All @@ -22,7 +23,7 @@ namespace Lucene.Net.Analysis.De
*/

/// <summary>
/// A <see cref="TokenFilter"/> that stems German words.
/// A <see cref="TokenFilter"/> that stems German words.
/// <para>
/// It supports a table of words that should
/// not be stemmed at all. The stemmer used can be changed at runtime after the
Expand Down Expand Up @@ -93,4 +94,4 @@ public GermanStemmer Stemmer
}
}
}
}
}
9 changes: 5 additions & 4 deletions src/Lucene.Net.Analysis.Common/Analysis/En/KStemFilter.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// Lucene version compatibility level 4.8.1
using Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Analysis.TokenAttributes.Extensions;
using System.IO;

namespace Lucene.Net.Analysis.En
Expand Down Expand Up @@ -30,13 +31,13 @@ namespace Lucene.Net.Analysis.En
/// Conference on Research and Development in Information Retrieval, 191-203, 1993).
/// <para/>
/// All terms must already be lowercased for this filter to work correctly.
///
///
/// <para>
/// Note: This filter is aware of the <see cref="IKeywordAttribute"/>. To prevent
/// certain terms from being passed to the stemmer
/// <see cref="IKeywordAttribute.IsKeyword"/> should be set to <code>true</code>
/// in a previous <see cref="TokenStream"/>.
///
///
/// Note: For including the original term as well as the stemmed version, see
/// <see cref="Miscellaneous.KeywordRepeatFilterFactory"/>
/// </para>
Expand All @@ -47,7 +48,7 @@ public sealed class KStemFilter : TokenFilter
private readonly ICharTermAttribute termAttribute;
private readonly IKeywordAttribute keywordAtt;

public KStemFilter(TokenStream @in)
public KStemFilter(TokenStream @in)
: base(@in)
{
termAttribute = AddAttribute<ICharTermAttribute>();
Expand Down Expand Up @@ -75,4 +76,4 @@ public override bool IncrementToken()
return true;
}
}
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// Lucene version compatibility level 4.8.1
using Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Analysis.TokenAttributes.Extensions;
using System;

namespace Lucene.Net.Analysis.Fr
Expand All @@ -22,7 +23,7 @@ namespace Lucene.Net.Analysis.Fr
*/

/// <summary>
/// A <see cref="TokenFilter"/> that stems french words.
/// A <see cref="TokenFilter"/> that stems french words.
/// <para>
/// The used stemmer can be changed at runtime after the
/// filter object is created (as long as it is a <see cref="FrenchStemmer"/>).
Expand All @@ -33,9 +34,9 @@ namespace Lucene.Net.Analysis.Fr
/// the <see cref="IKeywordAttribute"/> before this <see cref="TokenStream"/>.
/// </para> </summary>
/// <seealso cref="Miscellaneous.KeywordMarkerFilter"/>
/// @deprecated (3.1) Use <see cref="Snowball.SnowballFilter"/> with
/// @deprecated (3.1) Use <see cref="Snowball.SnowballFilter"/> with
/// <see cref="Tartarus.Snowball.Ext.FrenchStemmer"/> instead, which has the
/// same functionality. This filter will be removed in Lucene 5.0
/// same functionality. This filter will be removed in Lucene 5.0
[Obsolete("(3.1) Use SnowballFilter with FrenchStemmer instead, which has the same functionality. This filter will be removed in Lucene 5.0")]
public sealed class FrenchStemFilter : TokenFilter
{
Expand Down Expand Up @@ -93,4 +94,4 @@ public FrenchStemmer Stemmer
}
}
}
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// Lucene version compatibility level 4.10.4
using Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Analysis.TokenAttributes.Extensions;
using Lucene.Net.Util;
using System.Collections.Generic;
using JCG = J2N.Collections.Generic;
Expand All @@ -24,20 +25,20 @@ namespace Lucene.Net.Analysis.Hunspell
*/

/// <summary>
/// <see cref="TokenFilter"/> that uses hunspell affix rules and words to stem tokens.
/// Since hunspell supports a word having multiple stems, this filter can emit
/// <see cref="TokenFilter"/> that uses hunspell affix rules and words to stem tokens.
/// Since hunspell supports a word having multiple stems, this filter can emit
/// multiple tokens for each consumed token
///
///
/// <para>
/// Note: This filter is aware of the <see cref="IKeywordAttribute"/>. To prevent
/// certain terms from being passed to the stemmer
/// <see cref="IKeywordAttribute.IsKeyword"/> should be set to <c>true</c>
/// in a previous <see cref="TokenStream"/>.
///
///
/// Note: For including the original term as well as the stemmed version, see
/// <see cref="Miscellaneous.KeywordRepeatFilterFactory"/>
/// </para>
///
///
/// @lucene.experimental
/// </summary>
public sealed class HunspellStemFilter : TokenFilter
Expand Down Expand Up @@ -160,4 +161,4 @@ public override void Reset()
}
});
}
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Lucene version compatibility level 4.8.1
using Lucene.Net.Analysis.Core;
using Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Analysis.TokenAttributes.Extensions;
using Lucene.Net.Analysis.Util;
using Lucene.Net.Support;
using Lucene.Net.Util;
Expand Down Expand Up @@ -38,26 +39,26 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// <see cref="StopFilter"/> into a single efficient
/// multi-purpose class.
/// <para>
/// If you are unsure how exactly a regular expression should look like, consider
/// If you are unsure how exactly a regular expression should look like, consider
/// prototyping by simply trying various expressions on some test texts via
/// <see cref="Regex.Split(string)"/>. Once you are satisfied, give that regex to
/// <see cref="PatternAnalyzer"/>. Also see <a target="_blank"
/// <see cref="Regex.Split(string)"/>. Once you are satisfied, give that regex to
/// <see cref="PatternAnalyzer"/>. Also see <a target="_blank"
/// href="http://www.regular-expressions.info/">Regular Expression Tutorial</a>.
/// </para>
/// <para>
/// This class can be considerably faster than the "normal" Lucene tokenizers.
/// This class can be considerably faster than the "normal" Lucene tokenizers.
/// It can also serve as a building block in a compound Lucene
/// <see cref="TokenFilter"/> chain. For example as in this
/// <see cref="TokenFilter"/> chain. For example as in this
/// stemming example:
/// <code>
/// PatternAnalyzer pat = ...
/// TokenStream tokenStream = new SnowballFilter(
/// pat.GetTokenStream("content", "James is running round in the woods"),
/// pat.GetTokenStream("content", "James is running round in the woods"),
/// "English"));
/// </code>
/// </para>
/// </summary>
/// @deprecated (4.0) use the pattern-based analysis in the analysis/pattern package instead.
/// @deprecated (4.0) use the pattern-based analysis in the analysis/pattern package instead.
[Obsolete("(4.0) use the pattern-based analysis in the analysis/pattern package instead.")]
public sealed class PatternAnalyzer : Analyzer
{
Expand Down Expand Up @@ -196,8 +197,8 @@ public PatternAnalyzer(LuceneVersion matchVersion, Regex pattern, bool toLowerCa
/// <returns> a new token stream </returns>
public TokenStreamComponents CreateComponents(string fieldName, TextReader reader, string text)
{
// Ideally the Analyzer superclass should have a method with the same signature,
// with a default impl that simply delegates to the StringReader flavour.
// Ideally the Analyzer superclass should have a method with the same signature,
// with a default impl that simply delegates to the StringReader flavour.
if (reader is null)
{
reader = new FastStringReader(text);
Expand Down Expand Up @@ -448,9 +449,9 @@ public override void Reset()

// LUCENENET: Since we need to "reset" the Match
// object, we also need an "isReset" flag to indicate
// whether we are at the head of the match and to
// take the appropriate measures to ensure we don't
// overwrite our matcher variable with
// whether we are at the head of the match and to
// take the appropriate measures to ensure we don't
// overwrite our matcher variable with
// matcher = matcher.NextMatch();
// before it is time. A string could potentially
// match on index 0, so we need another variable to
Expand Down Expand Up @@ -528,10 +529,10 @@ public override bool IncrementToken()
{
text = text.ToLower(); // LUCENENET: Since this class is obsolete, we aren't going to bother with passing culture in the constructor.
}
// if (toLowerCase) {
// if (toLowerCase) {
//// use next line once JDK 1.5 String.toLowerCase() performance regression is fixed
//// see http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6265809
// text = s.substring(start, i).toLowerCase();
// text = s.substring(start, i).toLowerCase();
//// char[] chars = new char[i-start];
//// for (int j=start; j < i; j++) chars[j-start] = Character.toLowerCase(s.charAt(j));
//// text = new String(chars);
Expand Down Expand Up @@ -607,4 +608,4 @@ internal FastStringReader(string s)
internal string String => s;
}
}
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Lucene version compatibility level 4.8.1
using System;
using Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Analysis.TokenAttributes.Extensions;
using Lucene.Net.Util;

namespace Lucene.Net.Analysis.Miscellaneous
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Lucene version compatibility level 8.2.0
// LUCENENET NOTE: Ported because Lucene.Net.Analysis.OpenNLP requires this to be useful.
using Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Analysis.TokenAttributes.Extensions;
using Lucene.Net.Util;
#nullable enable

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// Lucene version compatibility level 4.8.1
using Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Analysis.TokenAttributes.Extensions;
using Lucene.Net.Util;
using System;
using System.IO;
Expand Down Expand Up @@ -305,4 +306,4 @@ internal static bool IsDefined(this Lucene43EdgeNGramTokenizer.Side side)
#pragma warning restore CS0612 // Type or member is obsolete

}
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// Lucene version compatibility level 4.8.1
using Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Analysis.TokenAttributes.Extensions;
using System;
using System.IO;

Expand Down Expand Up @@ -171,4 +172,4 @@ public override void Reset()
pos = 0;
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,10 @@ public int PositionIncrement

// LUCENENET specific - The interface requires this to be implemented, since we added it to avoid casts.
public void CopyTo(IAttribute target) => _ = target;
paulirwin marked this conversation as resolved.
Show resolved Hide resolved

public void Clear()
{
}
}

private sealed class PositionLengthAttributeAnonymousClass : IPositionLengthAttribute
Expand All @@ -136,6 +140,10 @@ public int PositionLength

// LUCENENET specific - The interface requires this to be implemented, since we added it to avoid casts.
public void CopyTo(IAttribute target) => _ = target;
paulirwin marked this conversation as resolved.
Show resolved Hide resolved

public void Clear()
{
}
}

/// <summary>
Expand Down Expand Up @@ -233,4 +241,4 @@ public override void Reset()
curTermBuffer = null;
}
}
}
}
13 changes: 7 additions & 6 deletions src/Lucene.Net.Analysis.Common/Analysis/Nl/DutchStemFilter.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// Lucene version compatibility level 4.8.1
using Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Analysis.TokenAttributes.Extensions;
using Lucene.Net.Analysis.Util;
using System;
using System.Collections.Generic;
Expand All @@ -24,7 +25,7 @@ namespace Lucene.Net.Analysis.Nl
*/

/// <summary>
/// A <see cref="TokenFilter"/> that stems Dutch words.
/// A <see cref="TokenFilter"/> that stems Dutch words.
/// <para>
/// It supports a table of words that should
/// not be stemmed at all. The stemmer used can be changed at runtime after the
Expand All @@ -34,12 +35,12 @@ namespace Lucene.Net.Analysis.Nl
/// To prevent terms from being stemmed use an instance of
/// <see cref="Miscellaneous.KeywordMarkerFilter"/> or a custom <see cref="TokenFilter"/> that sets
/// the <see cref="IKeywordAttribute"/> before this <see cref="TokenStream"/>.
/// </para>
/// </para>
/// </summary>
/// <seealso cref="Miscellaneous.KeywordMarkerFilter"/>
/// @deprecated (3.1) Use <see cref="Snowball.SnowballFilter"/> with
/// @deprecated (3.1) Use <see cref="Snowball.SnowballFilter"/> with
/// <see cref="Tartarus.Snowball.Ext.DutchStemmer"/> instead, which has the
/// same functionality. This filter will be removed in Lucene 5.0
/// same functionality. This filter will be removed in Lucene 5.0
[Obsolete("(3.1) Use Snowball.SnowballFilter with Tartarus.Snowball.Ext.DutchStemmer instead, which has the same functionality. This filter will be removed in Lucene 5.0")]
public sealed class DutchStemFilter : TokenFilter
{
Expand All @@ -61,7 +62,7 @@ public DutchStemFilter(TokenStream @in)

/// <param name="in"> Input <see cref="TokenStream"/> </param>
/// <param name="stemdictionary"> Dictionary of word stem pairs, that overrule the algorithm </param>
public DutchStemFilter(TokenStream @in, IDictionary<string, string> stemdictionary)
public DutchStemFilter(TokenStream @in, IDictionary<string, string> stemdictionary)
: this(@in)
{
stemmer.StemDictionary = stemdictionary;
Expand Down Expand Up @@ -132,4 +133,4 @@ public CharArrayDictionary<string> StemDictionary
}
}
}
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// Lucene version compatibility level 4.8.1
using Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Analysis.TokenAttributes.Extensions;
using System.Text.RegularExpressions;

namespace Lucene.Net.Analysis.Pattern
Expand All @@ -24,7 +25,7 @@ namespace Lucene.Net.Analysis.Pattern
/// <summary>
/// A TokenFilter which applies a <see cref="Regex"/> to each token in the stream,
/// replacing match occurances with the specified replacement string.
///
///
/// <para>
/// <b>Note:</b> Depending on the input and the pattern used and the input
/// <see cref="TokenStream"/>, this <see cref="TokenFilter"/> may produce <see cref="Token"/>s whose text is the empty
Expand Down Expand Up @@ -73,4 +74,4 @@ public override bool IncrementToken()
return true;
}
}
}
}
Loading
Loading