Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use prime for ConcurrentDictionary initial size #421

Merged
merged 8 commits into from
Oct 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
121 changes: 121 additions & 0 deletions BitFaster.Caching.UnitTests/HashTablePrimesTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
using System.Collections.Generic;
using System;
using BitFaster.Caching;
using FluentAssertions;
using Xunit;
using Xunit.Abstractions;

namespace BitFaster.Caching.UnitTests
{
public class HashTablePrimesTests
{
private readonly ITestOutputHelper testOutputHelper;

public HashTablePrimesTests(ITestOutputHelper testOutputHelper)
{
this.testOutputHelper = testOutputHelper;
}

[Theory]
[InlineData(3, 7)]
[InlineData(8, 11)]
[InlineData(12, 17)]
[InlineData(132, 137)]
[InlineData(500, 137)]
public void NextPrimeGreaterThan(int input, int nextPrime)
{
HashTablePrimes.NextPrimeGreaterThan(input).Should().Be(nextPrime);
}

// This test method replicates the hash table sizes that will be computed by ConcurrentDictionary
// on earlier versions of .NET before prime numbers are used.
// 277 is prime
// 557 is prime
// 1117 is prime
// 2237 is prime
// 4477 has factors 11, 37, 121, 407
// 8957 has factors 13, 53, 169, 689
// 17917 has factors 19, 23, 41, 437, 779, 943
// 35837 is prime
// 71677 has factors 229, 313
// 143357 is prime
// 286717 has factors 163, 1759
// 573437 is prime
// 1146877 is prime
// 2293757 is prime
// 4587517 has factors 11, 103, 1133, 4049, 44539, 417047
// 9175037 is prime
// 18350077 has factors 701, 26177
// 36700157 has factors 13, 23, 299, 122743, 1595659, 2823089
// 73400317 has factors 4999, 14683
// 146800637 is prime
// 293601277 has factors 6113, 48029
// 587202557 has factors 1877, 312841
// 1174405117 has factors 10687, 109891
[Fact(Skip="Not a functional test")]
public void ComputeHashTableSizes()
{
// 137 gives a good balance of primes for smaller sizes, and few factors for larger sizes.
// Other good candidates: 131, 151, 163, 211
int size = 137;
for (int i = 0; i < 23; i++)
{
int nextSize = NextTableSize(size);
this.testOutputHelper.WriteLine($"{nextSize} {GetFactorsString(nextSize)}");
size = nextSize;
}
}

// Replicates .NET framework ConcurrentDictionary resize logic:
// https://github.com/microsoft/referencesource/blob/51cf7850defa8a17d815b4700b67116e3fa283c2/mscorlib/system/collections/Concurrent/ConcurrentDictionary.cs#L1828C29-L1828C29
private static int NextTableSize(int initial)
{
// Double the size of the buckets table and add one, so that we have an odd integer.
int newLength = initial * 2 + 1;

// Now, we only need to check odd integers, and find the first that is not divisible
// by 3, 5 or 7.
while (newLength % 3 == 0 || newLength % 5 == 0 || newLength % 7 == 0)
{
newLength += 2;
}

return newLength;
}

private static string GetFactorsString(int nextSize)
{
var factors = Factor(nextSize);

factors.Remove(1);
factors.Remove(nextSize);
factors.Sort();

if (factors.Count == 0)
{
return "is prime";
}

return $"has factors {string.Join(", ", factors)}";
}

// https://stackoverflow.com/questions/239865/best-way-to-find-all-factors-of-a-given-number
private static List<int> Factor(int number)
{
var factors = new List<int>();
int max = (int)Math.Sqrt(number); // Round down

for (int factor = 1; factor <= max; ++factor) // Test from 1 to the square root, or the int below it, inclusive.
{
if (number % factor == 0)
{
factors.Add(factor);
if (factor != number / factor) // Don't add the square root twice! Thanks Jon
factors.Add(number / factor);
}
}

return factors;
}
}
}
46 changes: 46 additions & 0 deletions BitFaster.Caching/HashTablePrimes.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
using System;

namespace BitFaster.Caching
{
// Using the capacity passed into the cache ctor to initialize the ConcurrentDictionary has 2 problems:
//
// 1. By allocating up front, we eliminate resizing. However, if the capacity is very large and the cache is not used,
// we will waste a lot of memory.
// 2. On earlier versions of .NET, ConcurrentDictionary uses the capacity arg to directly initialize the hash table
// size. On resize, the hashtable is grown to 2x + 1 while avoiding factors of 3, 5, or 7 (but not larger). On
// newer versions of.NET, both initial size and resize is based the next prime number larger than capacity. Collisions
// are reduced when hash table size is prime. Hence the change to use primes in all cases in newer versions of the
// framework.
//
// To mitigate this, we adopt a simple scheme: find the next prime larger than the capacity arg, up to 137. If the
// capacity is greater than 137, just set the initial size to 137, thereby bounding initial memory consumption for
// large caches.
//
// - Older.NET implementations: For smaller caches, we fix size at the next largest prime. For larger tables, we now
// start out with a larger prime (avoiding all factors up to 137, not just 3, 5 and 7). Above 137, some sizes will be
// prime and others have relatively few factors.The complete list is given as a comment in the unit test code.
// - Newer.NET implementations: as above for smaller caches. For larger caches, the resize will use successively larger
// primes.The duplicate prime computation added is only during construction and is effectively a no-op.
internal class HashTablePrimes
{
#if NETSTANDARD2_0
internal static int[] Primes = new int[] {
#else
internal static ReadOnlySpan<int> Primes => new int[] {
#endif
7, 11, 17, 23, 29, 37, 47, 59, 71, 89, 107, 131
};
internal static int NextPrimeGreaterThan(int min)
{
foreach (int prime in Primes)
{
if (prime > min)
{
return prime;
}
}

return 137;
}
}
}
3 changes: 2 additions & 1 deletion BitFaster.Caching/Lfu/ConcurrentLfu.cs
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,8 @@ public ConcurrentLfu(int capacity)
/// <param name="comparer">The equality comparer.</param>
public ConcurrentLfu(int concurrencyLevel, int capacity, IScheduler scheduler, IEqualityComparer<K> comparer)
{
this.dictionary = new ConcurrentDictionary<K, LfuNode<K, V>>(concurrencyLevel, capacity, comparer);
int dictionaryCapacity = HashTablePrimes.NextPrimeGreaterThan(capacity);
this.dictionary = new ConcurrentDictionary<K, LfuNode<K, V>>(concurrencyLevel, dictionaryCapacity, comparer);

// cap concurrency at proc count * 2
int readStripes = Math.Min(BitOps.CeilingPowerOfTwo(concurrencyLevel), BitOps.CeilingPowerOfTwo(Environment.ProcessorCount * 2));
Expand Down
3 changes: 2 additions & 1 deletion BitFaster.Caching/Lru/ClassicLru.cs
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ public ClassicLru(int concurrencyLevel, int capacity, IEqualityComparer<K> compa
Throw.ArgNull(ExceptionArgument.comparer);

this.capacity = capacity;
this.dictionary = new ConcurrentDictionary<K, LinkedListNode<LruItem>>(concurrencyLevel, this.capacity + 1, comparer);
int dictionaryCapacity = HashTablePrimes.NextPrimeGreaterThan(capacity);
this.dictionary = new ConcurrentDictionary<K, LinkedListNode<LruItem>>(concurrencyLevel, dictionaryCapacity, comparer);
this.policy = new CachePolicy(new Optional<IBoundedPolicy>(this), Optional<ITimePolicy>.None());
}

Expand Down
2 changes: 1 addition & 1 deletion BitFaster.Caching/Lru/ConcurrentLruCore.cs
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ public ConcurrentLruCore(
this.warmQueue = new ConcurrentQueue<I>();
this.coldQueue = new ConcurrentQueue<I>();

int dictionaryCapacity = this.Capacity + 1;
int dictionaryCapacity = HashTablePrimes.NextPrimeGreaterThan(this.Capacity);

this.dictionary = new ConcurrentDictionary<K, I>(concurrencyLevel, dictionaryCapacity, comparer);
this.itemPolicy = itemPolicy;
Expand Down
Loading