Skip to content

Commit

Permalink
Make MemoryIndex.TokenStreamAnonymousClass and PrefixTreeStrategy.Cel…
Browse files Browse the repository at this point in the history
…lTokenStream reusable
  • Loading branch information
paulirwin committed Dec 21, 2024
1 parent 6b9b031 commit e2ff6e9
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 9 deletions.
36 changes: 34 additions & 2 deletions src/Lucene.Net.Memory/MemoryIndex.cs
Original file line number Diff line number Diff line change
Expand Up @@ -284,23 +284,45 @@ public virtual TokenStream KeywordTokenStream<T>(ICollection<T> keywords)
return new TokenStreamAnonymousClass<T>(keywords);
}

/// <summary>
/// An anonymous implementation of <see cref="TokenStream"/> for
/// <see cref="KeywordTokenStream{T}(ICollection{T})"/>.
/// </summary>
/// <typeparam name="T">The type of item in the collection.</typeparam>
/// <remarks>
/// LUCENENET specific - This class originally got an enumerator in the constructor and stored it to a field
/// that was never reset, which meant that it could not be reused (since most IEnumerator implementations can
/// only be iterated once and throw on <see cref="System.Collections.IEnumerator.Reset()"/>). This class has
/// been modified to initialize <see cref="iter"/> on <see cref="TokenStream.Reset()"/> instead, which allows
/// it to be reused, per the TokenStream workflow contract of allowing <see cref="TokenStream.Reset()"/> after
/// <see cref="TokenStream.Close()"/>.
/// </remarks>
private sealed class TokenStreamAnonymousClass<T> : TokenStream
{
public TokenStreamAnonymousClass(ICollection<T> keywords)
{
iter = keywords.GetEnumerator();
// LUCENENET specific - initializing iter in Reset() instead of here and storing keywords, see remarks above
// iter = keywords.GetEnumerator();
this.keywords = keywords;
start = 0;
termAtt = AddAttribute<ICharTermAttribute>();
offsetAtt = AddAttribute<IOffsetAttribute>();
}

private ICollection<T> keywords; // LUCENENET specific - see remarks above
private IEnumerator<T> iter;
private int start;
private readonly ICharTermAttribute termAtt;
private readonly IOffsetAttribute offsetAtt;

public override bool IncrementToken()
{
// LUCENENET specific - check for null iter
if (iter is null)
{
throw new InvalidOperationException("TokenStream is not properly initialized, IncrementToken() can only be called after Reset()");
}

if (!iter.MoveNext())
{
return false;
Expand All @@ -320,12 +342,22 @@ public override bool IncrementToken()
return true;
}

// LUCENENET specific - added Close() method to clean up resources
public override void Close()
{
iter?.Dispose(); // LUCENENET specific - dispose iter and set to null, can't be reused
iter?.Dispose();
iter = null;
base.Close();
}

// LUCENENET specific - added Reset() method to allow reuse of the TokenStream
public override void Reset()
{
iter?.Dispose();
iter = keywords.GetEnumerator();
start = 0;
base.Reset();
}
}

/// <summary>
Expand Down
43 changes: 36 additions & 7 deletions src/Lucene.Net.Spatial/Prefix/PrefixTreeStrategy.cs
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,8 @@ public virtual Field[] CreateIndexableFields(IShape? shape, double distErr)
//TODO is CellTokenStream supposed to be re-used somehow? see Uwe's comments:
// http://code.google.com/p/lucene-spatial-playground/issues/detail?id=4

Field field = new Field(FieldName, new CellTokenStream(cells.GetEnumerator()), FIELD_TYPE);
// LUCENENET specific - see remarks in CellTokenStream for why we're passing IEnumerable<Cell> instead of IEnumerator<Cell>
Field field = new Field(FieldName, new CellTokenStream(cells), FIELD_TYPE);
return new Field[] { field };
}

Expand All @@ -157,17 +158,28 @@ public virtual Field[] CreateIndexableFields(IShape? shape, double distErr)
IndexOptions = IndexOptions.DOCS_ONLY
}.Freeze();

/// <summary>Outputs the tokenString of a cell, and if its a leaf, outputs it again with the leaf byte.</summary>
/// <summary>
/// Outputs the tokenString of a cell, and if its a leaf, outputs it again with the leaf byte.
/// </summary>
/// <remarks>
/// LUCENENET specific - This class originally took an enumerator, which meant that it could not
/// be reused (since most IEnumerator implementations can only be iterated once and throw on
/// <see cref="System.Collections.IEnumerator.Reset()"/>). This class has been modified to take an
/// <c>IEnumerable&lt;Cell&gt;</c> instead, which allows it to be reused, per the TokenStream
/// workflow contract of allowing <see cref="TokenStream.Reset()"/> after <see cref="TokenStream.Close()"/>.
/// </remarks>
internal sealed class CellTokenStream : TokenStream
{
private readonly ICharTermAttribute termAtt;

private readonly IEnumerator<Cell> iter; // LUCENENET specific - marked readonly and got rid of null setting
private readonly IEnumerable<Cell> enumerable; // LUCENENET specific - see remarks above
private IEnumerator<Cell>? iter;

public CellTokenStream(IEnumerator<Cell> tokens)
public CellTokenStream(IEnumerable<Cell> tokens)
{
// LUCENENET specific - added guard clause
this.iter = tokens ?? throw new ArgumentNullException(nameof(tokens));
enumerable = tokens ?? throw new ArgumentNullException(nameof(tokens));
// LUCENENET NOTE: not initializing iter here, should be done in Reset()
termAtt = AddAttribute<ICharTermAttribute>();
}

Expand All @@ -183,9 +195,16 @@ public override bool IncrementToken()
nextTokenStringNeedingLeaf = null;
return true;
}

// LUCENENET specific: throw if iter has not been initialized
if (iter is null)
{
throw new InvalidOperationException("TokenStream is not initialized. Call Reset() before IncrementToken().");
}

if (iter.MoveNext())
{
Cell cell = iter.Current;
Cell cell = iter.Current ?? throw new InvalidOperationException("CellTokenStream received a null Cell from the enumerator.");
string token = cell.TokenString;
termAtt.Append(token);
if (cell.IsLeaf)
Expand All @@ -197,11 +216,21 @@ public override bool IncrementToken()
return false;
}

// LUCENENET specific - added Close() method to clean up resources
public override void Close()
{
iter.Dispose();
iter?.Dispose();
iter = null;
base.Close();
}

// LUCENENET specific - added Reset() method to allow for reuse of the TokenStream
public override void Reset()
{
iter?.Dispose();
iter = enumerable.GetEnumerator();
base.Reset();
}
}

public override ValueSource MakeDistanceValueSource(IPoint queryPoint, double multiplier)
Expand Down

0 comments on commit e2ff6e9

Please sign in to comment.