Skip to content

Commit

Permalink
merge
Browse files Browse the repository at this point in the history
  • Loading branch information
bitfaster committed Nov 26, 2024
2 parents 8a3be6e + 1c2f221 commit 3a5a72c
Show file tree
Hide file tree
Showing 51 changed files with 1,936 additions and 513 deletions.
6 changes: 0 additions & 6 deletions .github/workflows/benchpr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,6 @@ jobs:
run: dotnet build splitasm --configuration Release
- name: Benchmark
run: dotnet run --project "BitFaster.Caching.Benchmarks" -f net6.0 -c Release --filter '*'
- name: Plot results
run: dotnet run --project "Tools\BenchPlot\Benchplot.csproj" --configuration Release "BenchmarkDotNet.Artifacts"
- name: Post process disassembly
run: splitasm\splitasm\bin\Release\net6.0\splitasm.exe %GITHUB_WORKSPACE%\BenchmarkDotNet.Artifacts\results
shell: cmd
Expand Down Expand Up @@ -68,8 +66,6 @@ jobs:
run: dotnet build --configuration Release --no-restore
- name: Benchmark
run: dotnet run --project "BitFaster.Caching.Benchmarks" -f net6.0 -c Release --filter '*'
- name: Plot results
run: dotnet run --project Tools/BenchPlot/BenchPlot.csproj --configuration Release "BenchmarkDotNet.Artifacts"
- name: Publish Results
uses: actions/upload-artifact@v3
with:
Expand All @@ -94,8 +90,6 @@ jobs:
run: dotnet build --configuration Release --no-restore
- name: Benchmark
run: dotnet run --project "BitFaster.Caching.Benchmarks" -f net6.0 -c Release --filter '*'
- name: Plot results
run: dotnet run --project "Tools\BenchPlot\BenchPlot.csproj" --configuration Release "BenchmarkDotNet.Artifacts"
- name: Publish Results
uses: actions/upload-artifact@v3
with:
Expand Down
13 changes: 7 additions & 6 deletions BitFaster.Caching.Benchmarks/BitFaster.Caching.Benchmarks.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

<PropertyGroup>
<OutputType>Exe</OutputType>
<LangVersion>latest</LangVersion>
<TargetFrameworks>net6.0;net8.0</TargetFrameworks>
<AllowUnsafeBlocks>True</AllowUnsafeBlocks>
<!-- https://stackoverflow.com/a/59916801/131345 -->
Expand All @@ -19,12 +20,12 @@
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Benchly" Version="0.6.1" />
<PackageReference Include="BenchmarkDotNet" Version="0.13.12" />
<PackageReference Include="BenchmarkDotNet.Diagnostics.Windows" Version="0.13.12" />
<PackageReference Include="Benchly" Version="0.7.0" />
<PackageReference Include="BenchmarkDotNet" Version="0.14.0" />
<PackageReference Include="BenchmarkDotNet.Diagnostics.Windows" Version="0.14.0" />
<PackageReference Include="MathNet.Numerics" Version="5.0.0" />
<PackageReference Include="Microsoft.Extensions.Caching.Memory" Version="8.0.0" />
<PackageReference Include="System.Runtime.Caching" Version="8.0.0" />
<PackageReference Include="Microsoft.Extensions.Caching.Memory" Version="9.0.0" />
<PackageReference Include="System.Runtime.Caching" Version="8.0.1" />
</ItemGroup>

<ItemGroup>
Expand All @@ -41,4 +42,4 @@
<DefineConstants>MacOS</DefineConstants>
</PropertyGroup>

</Project>
</Project>
318 changes: 318 additions & 0 deletions BitFaster.Caching.Benchmarks/Lfu/CmSketchNoPin.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,318 @@
using System;
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;

#if NET6_0_OR_GREATER
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif

namespace BitFaster.Caching.Benchmarks.Lfu
{
internal class CmSketchNoPin<T, I>
where T : notnull
where I : struct, IsaProbe
{
private const long ResetMask = 0x7777777777777777L;
private const long OneMask = 0x1111111111111111L;

private long[] table;
private int sampleSize;
private int blockMask;
private int size;

private readonly IEqualityComparer<T> comparer;

/// <summary>
/// Initializes a new instance of the CmSketch class with the specified maximum size and equality comparer.
/// </summary>
/// <param name="maximumSize">The maximum size.</param>
/// <param name="comparer">The equality comparer.</param>
public CmSketchNoPin(long maximumSize, IEqualityComparer<T> comparer)
{
EnsureCapacity(maximumSize);
this.comparer = comparer;
}

/// <summary>
/// Gets the reset sample size.
/// </summary>
public int ResetSampleSize => this.sampleSize;

/// <summary>
/// Gets the size.
/// </summary>
public int Size => this.size;

/// <summary>
/// Estimate the frequency of the specified value, up to the maximum of 15.
/// </summary>
/// <param name="value">The value.</param>
/// <returns>The estimated frequency of the value.</returns>
public int EstimateFrequency(T value)
{
#if NET48
return EstimateFrequencyStd(value);
#else

I isa = default;

if (isa.IsAvx2Supported)
{
return EstimateFrequencyAvx(value);
}
else
{
return EstimateFrequencyStd(value);
}
#endif
}

/// <summary>
/// Increment the count of the specified value.
/// </summary>
/// <param name="value">The value.</param>
public void Increment(T value)
{
#if NET48
IncrementStd(value);
#else

I isa = default;

if (isa.IsAvx2Supported)
{
IncrementAvx(value);
}
else
{
IncrementStd(value);
}
#endif
}

/// <summary>
/// Clears the count for all items.
/// </summary>
public void Clear()
{
table = new long[table.Length];
size = 0;
}

// [MemberNotNull(nameof(table))]
private void EnsureCapacity(long maximumSize)
{
int maximum = (int)Math.Min(maximumSize, int.MaxValue >> 1);

table = new long[Math.Max(BitOps.CeilingPowerOfTwo(maximum), 8)];
blockMask = (int)((uint)table.Length >> 3) - 1;
sampleSize = (maximumSize == 0) ? 10 : (10 * maximum);

size = 0;
}

private unsafe int EstimateFrequencyStd(T value)
{
var count = stackalloc int[4];
int blockHash = Spread(comparer.GetHashCode(value));
int counterHash = Rehash(blockHash);
int block = (blockHash & blockMask) << 3;

for (int i = 0; i < 4; i++)
{
int h = (int)((uint)counterHash >> (i << 3));
int index = (h >> 1) & 15;
int offset = h & 1;
count[i] = (int)(((ulong)table[block + offset + (i << 1)] >> (index << 2)) & 0xfL);
}
return Math.Min(Math.Min(count[0], count[1]), Math.Min(count[2], count[3]));
}

private unsafe void IncrementStd(T value)
{
var index = stackalloc int[8];
int blockHash = Spread(comparer.GetHashCode(value));
int counterHash = Rehash(blockHash);
int block = (blockHash & blockMask) << 3;

for (int i = 0; i < 4; i++)
{
int h = (int)((uint)counterHash >> (i << 3));
index[i] = (h >> 1) & 15;
int offset = h & 1;
index[i + 4] = block + offset + (i << 1);
}

bool added =
IncrementAt(index[4], index[0])
| IncrementAt(index[5], index[1])
| IncrementAt(index[6], index[2])
| IncrementAt(index[7], index[3]);

if (added && (++size == sampleSize))
{
Reset();
}
}

// Applies another round of hashing for additional randomization
private static int Rehash(int x)
{
x = (int)(x * 0x31848bab);
x ^= (int)((uint)x >> 14);
return x;
}

// Applies a supplemental hash functions to defends against poor quality hash.
private static int Spread(int x)
{
x ^= (int)((uint)x >> 17);
x = (int)(x * 0xed5ad4bb);
x ^= (int)((uint)x >> 11);
x = (int)(x * 0xac4c1b51);
x ^= (int)((uint)x >> 15);
return x;
}

private bool IncrementAt(int i, int j)
{
int offset = j << 2;
long mask = (0xfL << offset);

if ((table[i] & mask) != mask)
{
table[i] += (1L << offset);
return true;
}

return false;
}

private void Reset()
{
// unroll, almost 2x faster
int count0 = 0;
int count1 = 0;
int count2 = 0;
int count3 = 0;

for (int i = 0; i < table.Length; i += 4)
{
count0 += BitOps.BitCount(table[i] & OneMask);
count1 += BitOps.BitCount(table[i + 1] & OneMask);
count2 += BitOps.BitCount(table[i + 2] & OneMask);
count3 += BitOps.BitCount(table[i + 3] & OneMask);

table[i] = (long)((ulong)table[i] >> 1) & ResetMask;
table[i + 1] = (long)((ulong)table[i + 1] >> 1) & ResetMask;
table[i + 2] = (long)((ulong)table[i + 2] >> 1) & ResetMask;
table[i + 3] = (long)((ulong)table[i + 3] >> 1) & ResetMask;
}

count0 = (count0 + count1) + (count2 + count3);

size = (size - (count0 >> 2)) >> 1;
}

#if NET6_0_OR_GREATER
private unsafe int EstimateFrequencyAvx(T value)
{
int blockHash = Spread(comparer.GetHashCode(value));
int counterHash = Rehash(blockHash);
int block = (blockHash & blockMask) << 3;

Vector128<int> h = Vector128.Create(counterHash);
h = Avx2.ShiftRightLogicalVariable(h.AsUInt32(), Vector128.Create(0U, 8U, 16U, 24U)).AsInt32();

var index = Avx2.ShiftRightLogical(h, 1);
index = Avx2.And(index, Vector128.Create(15)); // j - counter index
Vector128<int> offset = Avx2.And(h, Vector128.Create(1));
Vector128<int> blockOffset = Avx2.Add(Vector128.Create(block), offset); // i - table index
blockOffset = Avx2.Add(blockOffset, Vector128.Create(0, 2, 4, 6)); // + (i << 1)

fixed (long* tablePtr = table)
{
Vector256<long> tableVector = Avx2.GatherVector256(tablePtr, blockOffset, 8);
index = Avx2.ShiftLeftLogical(index, 2);

// convert index from int to long via permute
Vector256<long> indexLong = Vector256.Create(index, Vector128<int>.Zero).AsInt64();
Vector256<int> permuteMask2 = Vector256.Create(0, 4, 1, 5, 2, 5, 3, 7);
indexLong = Avx2.PermuteVar8x32(indexLong.AsInt32(), permuteMask2).AsInt64();
tableVector = Avx2.ShiftRightLogicalVariable(tableVector, indexLong.AsUInt64());
tableVector = Avx2.And(tableVector, Vector256.Create(0xfL));

Vector256<int> permuteMask = Vector256.Create(0, 2, 4, 6, 1, 3, 5, 7);
Vector128<ushort> count = Avx2.PermuteVar8x32(tableVector.AsInt32(), permuteMask)
.GetLower()
.AsUInt16();

// set the zeroed high parts of the long value to ushort.Max
#if NET6_0
count = Avx2.Blend(count, Vector128<ushort>.AllBitsSet, 0b10101010);
#else
count = Avx2.Blend(count, Vector128.Create(ushort.MaxValue), 0b10101010);
#endif

return Avx2.MinHorizontal(count).GetElement(0);
}
}

private unsafe void IncrementAvx(T value)
{
int blockHash = Spread(comparer.GetHashCode(value));
int counterHash = Rehash(blockHash);
int block = (blockHash & blockMask) << 3;

Vector128<int> h = Vector128.Create(counterHash);
h = Avx2.ShiftRightLogicalVariable(h.AsUInt32(), Vector128.Create(0U, 8U, 16U, 24U)).AsInt32();

Vector128<int> index = Avx2.ShiftRightLogical(h, 1);
index = Avx2.And(index, Vector128.Create(15)); // j - counter index
Vector128<int> offset = Avx2.And(h, Vector128.Create(1));
Vector128<int> blockOffset = Avx2.Add(Vector128.Create(block), offset); // i - table index
blockOffset = Avx2.Add(blockOffset, Vector128.Create(0, 2, 4, 6)); // + (i << 1)

fixed (long* tablePtr = table)
{
Vector256<long> tableVector = Avx2.GatherVector256(tablePtr, blockOffset, 8);

// j == index
index = Avx2.ShiftLeftLogical(index, 2);
Vector256<long> offsetLong = Vector256.Create(index, Vector128<int>.Zero).AsInt64();

Vector256<int> permuteMask = Vector256.Create(0, 4, 1, 5, 2, 5, 3, 7);
offsetLong = Avx2.PermuteVar8x32(offsetLong.AsInt32(), permuteMask).AsInt64();

// mask = (0xfL << offset)
Vector256<long> fifteen = Vector256.Create(0xfL);
Vector256<long> mask = Avx2.ShiftLeftLogicalVariable(fifteen, offsetLong.AsUInt64());

// (table[i] & mask) != mask)
// Note masked is 'equal' - therefore use AndNot below
Vector256<long> masked = Avx2.CompareEqual(Avx2.And(tableVector, mask), mask);

// 1L << offset
Vector256<long> inc = Avx2.ShiftLeftLogicalVariable(Vector256.Create(1L), offsetLong.AsUInt64());

// Mask to zero out non matches (add zero below) - first operand is NOT then AND result (order matters)
inc = Avx2.AndNot(masked, inc);

Vector256<byte> result = Avx2.CompareEqual(masked.AsByte(), Vector256<byte>.Zero);
bool wasInc = Avx2.MoveMask(result.AsByte()) == unchecked((int)(0b1111_1111_1111_1111_1111_1111_1111_1111));

tablePtr[blockOffset.GetElement(0)] += inc.GetElement(0);
tablePtr[blockOffset.GetElement(1)] += inc.GetElement(1);
tablePtr[blockOffset.GetElement(2)] += inc.GetElement(2);
tablePtr[blockOffset.GetElement(3)] += inc.GetElement(3);

if (wasInc && (++size == sampleSize))
{
Reset();
}
}
}
#endif
}
}
Loading

0 comments on commit 3a5a72c

Please sign in to comment.