Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement ISpanAppendable in CharBlockArray and CharTermAttributeImpl #1028

Merged
merged 4 commits into from
Nov 19, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
140 changes: 44 additions & 96 deletions src/Lucene.Net.Facet/Taxonomy/WriterCache/CharBlockArray.cs
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,11 @@ namespace Lucene.Net.Facet.Taxonomy.WriterCache
/// @lucene.experimental
/// </summary>
// LUCENENET NOTE: The serialization features here are strictly for testing purposes,
// therefore it doesn't make any difference what type of serialization is used.
// To make things simpler, we are using BinaryReader and BinaryWriter since
// therefore it doesn't make any difference what type of serialization is used.
// To make things simpler, we are using BinaryReader and BinaryWriter since
// BinaryFormatter is not implemented in .NET Standard 1.x.
internal class CharBlockArray : IAppendable, ICharSequence
internal class CharBlockArray : IAppendable, ICharSequence,
ISpanAppendable /* LUCENENET specific */
{
private const long serialVersionUID = 1L;

Expand All @@ -65,8 +66,6 @@ public object Clone()
return clone;
}



// LUCENENET specific
public void Serialize(Stream writer)
{
Expand Down Expand Up @@ -192,28 +191,8 @@ public virtual CharBlockArray Append(char[]? value)
return this; // No-op
}

int remain = value.Length;
int offset = 0;
while (remain > 0)
{
if (this.current.length == this.blockSize)
{
AddBlock();
}
int toCopy = remain;
int remainingInBlock = this.blockSize - this.current.length;
if (remainingInBlock < toCopy)
{
toCopy = remainingInBlock;
}
Arrays.Copy(value, offset, this.current.chars, this.current.length, toCopy);
paulirwin marked this conversation as resolved.
Show resolved Hide resolved
offset += toCopy;
remain -= toCopy;
this.current.length += toCopy;
}

this.length += value.Length;
return this;
// LUCENENET specific - use ReadOnlySpan<char> version
return Append(value.AsSpan());
}

public virtual CharBlockArray Append(char[]? value, int startIndex, int length)
Expand All @@ -235,29 +214,8 @@ public virtual CharBlockArray Append(char[]? value, int startIndex, int length)
if (startIndex > value.Length - length)
throw new ArgumentOutOfRangeException(nameof(startIndex), $"Index and length must refer to a location within the string. For example {nameof(startIndex)} + {nameof(length)} <= {nameof(Length)}.");


int offset = startIndex;
int remain = length;
while (remain > 0)
{
if (this.current.length == this.blockSize)
{
AddBlock();
}
int toCopy = remain;
int remainingInBlock = this.blockSize - this.current.length;
if (remainingInBlock < toCopy)
{
toCopy = remainingInBlock;
}
Arrays.Copy(value, offset, this.current.chars, this.current.length, toCopy);
paulirwin marked this conversation as resolved.
Show resolved Hide resolved
offset += toCopy;
remain -= toCopy;
this.current.length += toCopy;
}

this.length += length;
return this;
// LUCENENET specific - use ReadOnlySpan<char> version
return Append(value.AsSpan(startIndex, length));
}

public virtual CharBlockArray Append(string? value)
Expand All @@ -267,28 +225,8 @@ public virtual CharBlockArray Append(string? value)
return this; // No-op
}

int remain = value.Length;
int offset = 0;
while (remain > 0)
{
if (this.current.length == this.blockSize)
{
AddBlock();
}
int toCopy = remain;
int remainingInBlock = this.blockSize - this.current.length;
if (remainingInBlock < toCopy)
{
toCopy = remainingInBlock;
}
value.CopyTo(offset, this.current.chars, this.current.length, toCopy);
offset += toCopy;
remain -= toCopy;
this.current.length += toCopy;
}

this.length += value.Length;
return this;
// LUCENENET specific - use ReadOnlySpan<char> version
return Append(value.AsSpan());
}

public virtual CharBlockArray Append(string? value, int startIndex, int length)
Expand All @@ -310,29 +248,8 @@ public virtual CharBlockArray Append(string? value, int startIndex, int length)
if (startIndex > value.Length - length)
throw new ArgumentOutOfRangeException(nameof(startIndex), $"Index and length must refer to a location within the string. For example {nameof(startIndex)} + {nameof(length)} <= {nameof(Length)}.");


int offset = startIndex;
int remain = length;
while (remain > 0)
{
if (this.current.length == this.blockSize)
{
AddBlock();
}
int toCopy = remain;
int remainingInBlock = this.blockSize - this.current.length;
if (remainingInBlock < toCopy)
{
toCopy = remainingInBlock;
}
value.CopyTo(offset, this.current.chars, this.current.length, toCopy);
paulirwin marked this conversation as resolved.
Show resolved Hide resolved
offset += toCopy;
remain -= toCopy;
this.current.length += toCopy;
}

this.length += length;
return this;
// LUCENENET specific - use ReadOnlySpan<char> version
return Append(value.AsSpan(startIndex, length));
}

public virtual CharBlockArray Append(StringBuilder? value)
Expand Down Expand Up @@ -409,6 +326,32 @@ public virtual CharBlockArray Append(StringBuilder? value, int startIndex, int l
return this;
}

public virtual CharBlockArray Append(ReadOnlySpan<char> value)
{
int offset = 0;
int remain = value.Length;
while (remain > 0)
{
if (this.current.length == this.blockSize)
{
AddBlock();
}
int toCopy = remain;
int remainingInBlock = this.blockSize - this.current.length;
if (remainingInBlock < toCopy)
{
toCopy = remainingInBlock;
}
value.Slice(offset, toCopy).CopyTo(this.current.chars.AsSpan(this.current.length));
offset += toCopy;
remain -= toCopy;
this.current.length += toCopy;
}

this.length += value.Length;
return this;
}

#nullable restore

#region IAppendable Members
Expand All @@ -431,6 +374,11 @@ public virtual CharBlockArray Append(StringBuilder? value, int startIndex, int l

IAppendable IAppendable.Append(ICharSequence value, int startIndex, int count) => Append(value, startIndex, count);

#endregion

#region ISpanAppendable Members

ISpanAppendable ISpanAppendable.Append(ReadOnlySpan<char> value) => Append(value);

#endregion

Expand Down Expand Up @@ -612,4 +560,4 @@ internal bool Equals(int startIndex, int length, ReadOnlySpan<char> other)
return true;
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,9 @@ public virtual void TestArray()
// CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder()
// .onUnmappableCharacter(CodingErrorAction.REPLACE)
// .onMalformedInput(CodingErrorAction.REPLACE);
//
// Encoding decoder = Encoding.GetEncoding(Encoding.UTF8.CodePage,
// new EncoderReplacementFallback("?"),
//
// Encoding decoder = Encoding.GetEncoding(Encoding.UTF8.CodePage,
// new EncoderReplacementFallback("?"),
// new DecoderReplacementFallback("?"));

for (int i = 0; i < n; i++)
Expand Down Expand Up @@ -247,6 +247,11 @@ public virtual void TestAppendableInterface()
expected = t.ToString();
t.Append((char[])null); // No-op
Assert.AreEqual(expected, t.ToString());

// LUCENENET specific - test ReadOnlySpan<char> overload
t = new CharBlockArray();
t.Append("12345678".AsSpan());
Assert.AreEqual("12345678", t.ToString());
}

// LUCENENET: Borrowed this test from TestCharTermAttributeImpl
Expand Down Expand Up @@ -285,6 +290,11 @@ public virtual void TestAppendableInterfaceWithLongSequences()
const string longTestString = "012345678901234567890123456789";
t.Append(new CharSequenceAnonymousClass(longTestString));
Assert.AreEqual("4567890123456" + longTestString, t.ToString());

// LUCENENET specific - test ReadOnlySpan<char> overload
t = new CharBlockArray();
t.Append("01234567890123456789012345678901234567890123456789".AsSpan());
Assert.AreEqual("01234567890123456789012345678901234567890123456789", t.ToString());
}

private sealed class CharSequenceAnonymousClass : ICharSequence
Expand Down Expand Up @@ -319,4 +329,4 @@ public override string ToString()
}
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,10 @@ public virtual void TestAppendableInterface()

t.Append((char[])null); // No-op
Assert.AreEqual("4teste", t.ToString());

// LUCENENET specific - test ReadOnlySpan<char> overload
t.SetEmpty().Append("12345678".AsSpan());
Assert.AreEqual("12345678", t.ToString());
}

[Test]
Expand Down Expand Up @@ -326,6 +330,10 @@ public virtual void TestAppendableInterfaceWithLongSequences()
const string longTestString = "012345678901234567890123456789";
t.Append(new CharSequenceAnonymousClass(longTestString));
Assert.AreEqual("4567890123456" + longTestString, t.ToString());

// LUCENENET specific - test ReadOnlySpan<char> overload
t.SetEmpty().Append("01234567890123456789012345678901234567890123456789".AsSpan());
Assert.AreEqual("01234567890123456789012345678901234567890123456789", t.ToString());
paulirwin marked this conversation as resolved.
Show resolved Hide resolved
}

private sealed class CharSequenceAnonymousClass : ICharSequence
Expand Down
21 changes: 11 additions & 10 deletions src/Lucene.Net/Analysis/TokenAttributes/CharTermAttribute.cs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ namespace Lucene.Net.Analysis.TokenAttributes
/// <summary>
/// The term text of a <see cref="Token"/>.
/// </summary>
public interface ICharTermAttribute : IAttribute, ICharSequence, IAppendable
public interface ICharTermAttribute : IAttribute, ICharSequence, IAppendable,
ISpanAppendable /* LUCENENET specific */
paulirwin marked this conversation as resolved.
Show resolved Hide resolved
{
/// <summary>
/// Copies the contents of buffer, starting at offset for
Expand All @@ -43,7 +44,7 @@ public interface ICharTermAttribute : IAttribute, ICharSequence, IAppendable
/// you can then directly alter. If the array is too
/// small for your token, use <see cref="ResizeBuffer(int)"/>
/// to increase it. After
/// altering the buffer be sure to call <see cref="SetLength(int)"/>
/// altering the buffer be sure to call <see cref="SetLength(int)"/>
/// to record the number of valid
/// characters that were placed into the termBuffer.
/// <para>
Expand Down Expand Up @@ -76,15 +77,15 @@ public interface ICharTermAttribute : IAttribute, ICharSequence, IAppendable
/// the termBuffer array. Use this to truncate the termBuffer
/// or to synchronize with external manipulation of the termBuffer.
/// Note: to grow the size of the array,
/// use <see cref="ResizeBuffer(int)"/> first.
/// NOTE: This is exactly the same operation as calling the <see cref="Length"/> setter, the primary
/// use <see cref="ResizeBuffer(int)"/> first.
/// NOTE: This is exactly the same operation as calling the <see cref="Length"/> setter, the primary
/// difference is that this method returns a reference to the current object so it can be chained.
/// <code>
/// obj.SetLength(30).Append("hey you");
/// </code>
/// </summary>
/// <param name="length"> the truncated length </param>
ICharTermAttribute SetLength(int length);
ICharTermAttribute SetLength(int length);
paulirwin marked this conversation as resolved.
Show resolved Hide resolved

/// <summary>
/// Sets the length of the termBuffer to zero.
Expand Down Expand Up @@ -197,8 +198,8 @@ public interface ICharTermAttribute : IAttribute, ICharSequence, IAppendable
/// </summary>
/// <param name="value">The sequence of characters to append.</param>
/// <remarks>
/// LUCENENET specific method, added because the .NET <see cref="string"/> data type
/// doesn't implement <see cref="ICharSequence"/>.
/// LUCENENET specific method, added because the .NET <see cref="string"/> data type
/// doesn't implement <see cref="ICharSequence"/>.
/// </remarks>
new ICharTermAttribute Append(string value);

Expand Down Expand Up @@ -228,8 +229,8 @@ public interface ICharTermAttribute : IAttribute, ICharSequence, IAppendable
/// <paramref name="startIndex"/> + <paramref name="count"/> is greater than the length of <paramref name="value"/>.
/// </exception>
/// <remarks>
/// LUCENENET specific method, added because the .NET <see cref="string"/> data type
/// doesn't implement <see cref="ICharSequence"/>.
/// LUCENENET specific method, added because the .NET <see cref="string"/> data type
/// doesn't implement <see cref="ICharSequence"/>.
/// </remarks>
new ICharTermAttribute Append(string value, int startIndex, int count); // LUCENENET TODO: API - change to startIndex/length to match .NET

Expand Down Expand Up @@ -270,7 +271,7 @@ public interface ICharTermAttribute : IAttribute, ICharSequence, IAppendable
/// <paramref name="startIndex"/> + <paramref name="count"/> is greater than the length of <paramref name="value"/>.
/// </exception>
/// <remarks>
/// LUCENENET specific method, added because the .NET <see cref="StringBuilder"/> data type
/// LUCENENET specific method, added because the .NET <see cref="StringBuilder"/> data type
/// doesn't implement <see cref="ICharSequence"/>.
/// </remarks>
new ICharTermAttribute Append(StringBuilder value, int startIndex, int count);
Expand Down
Loading
Loading