Skip to content

Commit

Permalink
Remove the simulator's split package for Jigsaw modularity
Browse files Browse the repository at this point in the history
The CountMin64 implementation is provided by AddThis StreamLib, but
required modifying the package-private state for the TinyLFU reset
operation. This was done by using the same package name for field
access, but will break in JDK9 modularity restrictions.

Now the CountMin64 is a slimmed down fork of the original code. The
BloomFilter option was removed (since 2 others are provided) so that
the dependency could be removed. The JavaDoc retains the attribution.
  • Loading branch information
ben-manes committed Dec 27, 2017
1 parent e785458 commit 3f06fc1
Show file tree
Hide file tree
Showing 9 changed files with 126 additions and 86 deletions.
2 changes: 0 additions & 2 deletions gradle/dependencies.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ ext {
jcache: '1.1.0',
jsr305: '3.0.2',
jsr330: '1',
stream: '2.9.5',
univocityParsers: '2.5.9',
ycsb: '1.13.0-RC1',
xz: '1.6',
Expand Down Expand Up @@ -108,7 +107,6 @@ ext {
jcache: "javax.cache:cache-api:${versions.jcache}",
jsr305: "com.google.code.findbugs:jsr305:${versions.jsr305}",
jsr330: "javax.inject:javax.inject:${versions.jsr330}",
stream: "com.clearspring.analytics:stream:${versions.stream}",
univocityParsers: "com.univocity:univocity-parsers:${versions.univocityParsers}",
ycsb: "com.github.brianfrankcooper.ycsb:core:${versions.ycsb}",
xz: "org.tukaani:xz:${versions.xz}",
Expand Down
1 change: 0 additions & 1 deletion simulator/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ dependencies {
compile libraries.akka
compile libraries.ycsb
compile libraries.guava
compile libraries.stream
compile libraries.fastutil
compile libraries.flipTables
compile benchmarkLibraries.ohc
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@
*/
package com.github.benmanes.caffeine.cache.simulator.admission;

import com.clearspring.analytics.stream.frequency.CountMin64TinyLfu;
import com.github.benmanes.caffeine.cache.simulator.BasicSettings;
import com.github.benmanes.caffeine.cache.simulator.admission.countmin4.AdaptiveResetCountMin4;
import com.github.benmanes.caffeine.cache.simulator.admission.countmin4.IncrementalResetCountMin4;
import com.github.benmanes.caffeine.cache.simulator.admission.countmin4.PeriodicResetCountMin4;
import com.github.benmanes.caffeine.cache.simulator.admission.countmin64.CountMin64TinyLfu;
import com.github.benmanes.caffeine.cache.simulator.admission.perfect.PerfectFrequency;
import com.github.benmanes.caffeine.cache.simulator.admission.table.RandomRemovalFrequencyTable;
import com.github.benmanes.caffeine.cache.simulator.admission.tinycache.TinyCacheAdapter;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.github.benmanes.caffeine.cache.simulator.admission.countmin64;

import static com.google.common.base.Preconditions.checkArgument;

import java.util.Random;

/**
* Count-Min Sketch data structure with optional conservative addition.
* <p>
* This is a derived from <tt>CountMinSketch</tt> and <tt>ConservativeAddSketch</tt> provided by
* <a href="https://github.com/addthis/stream-lib">StreamLib</a>.
*/
final class CountMin64 {
static final long PRIME_MODULUS = (1L << 31) - 1;

final long[][] table;
final long[] hashA;
final int depth;
final int width;

public CountMin64(double eps, double confidence, int seed) {
// 2/w = eps ; w = 2/eps
// 1/2^depth <= 1-confidence ; depth >= -log2 (1-confidence)
this.width = (int) Math.ceil(2 / eps);
this.depth = (int) Math.ceil(-Math.log(1 - confidence) / Math.log(2));
this.table = new long[depth][width];
this.hashA = new long[depth];

// We're using a linear hash functions of the form ((a*x+b) mod p) where a,b are chosen
// independently for each hash function. However we can set b = 0 as all it does is shift the
// results without compromising their uniformity or independence with the other hashes.
Random r = new Random(seed);
for (int i = 0; i < depth; ++i) {
hashA[i] = r.nextInt(Integer.MAX_VALUE);
}
}

/** The estimate is correct within epsilon * (total item count), with probability confidence. */
public long estimateCount(long item) {
long count = Long.MAX_VALUE;
for (int i = 0; i < depth; ++i) {
count = Math.min(count, table[i][hash(item, i)]);
}
return count;
}

public void add(boolean conservative, long item, long count) {
// Actually for negative increments we'll need to use the median instead of minimum, and
// accuracy will suffer somewhat. Probably makes sense to add an "allow negative increments"
// parameter to constructor.
checkArgument(count >= 0, "Negative increments not implemented");

if (conservative) {
conservativeAdd(item, count);
} else {
add(item, count);
}
}

private void add(long item, long count) {
for (int i = 0; i < depth; ++i) {
table[i][hash(item, i)] += count;
}
}

private void conservativeAdd(long item, long count) {
int[] buckets = new int[depth];
for (int i = 0; i < depth; ++i) {
buckets[i] = hash(item, i);
}
long min = table[0][buckets[0]];
for (int i = 1; i < depth; ++i) {
min = Math.min(min, table[i][buckets[i]]);
}
for (int i = 0; i < depth; ++i) {
long newVal = Math.max(table[i][buckets[i]], min + count);
table[i][buckets[i]] = newVal;
}
}

private int hash(long item, int i) {
long hash = hashA[i] * item;
// A super fast way of computing x mod 2^p-1
// See http://www.cs.princeton.edu/courses/archive/fall09/cos521/Handouts/universalclasses.pdf
// page 149, right after Proposition 7.
hash += hash >> 32;
hash &= PRIME_MODULUS;
// Doing "%" after (int) conversion is ~2x faster than %'ing longs.
return ((int) hash) % width;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.clearspring.analytics.stream.frequency;
package com.github.benmanes.caffeine.cache.simulator.admission.countmin64;

import com.github.benmanes.caffeine.cache.simulator.BasicSettings;
import com.github.benmanes.caffeine.cache.simulator.admission.Frequency;
Expand All @@ -36,21 +36,17 @@
public final class CountMin64TinyLfu implements Frequency {
private static final int MAX_COUNT = 15;

final CountMinSketch sketch;
final boolean conservative;
final CountMin64 sketch;
final int sampleSize;
int size;

public CountMin64TinyLfu(Config config) {
BasicSettings settings = new BasicSettings(config);
sketch = new CountMin64(settings.tinyLfu().countMin64().eps(),
settings.tinyLfu().countMin64().confidence(), settings.randomSeed());
conservative = settings.tinyLfu().conservative();
sampleSize = 10 * settings.maximumSize();

if (settings.tinyLfu().conservative()) {
sketch = new ConservativeAddSketch(settings.tinyLfu().countMin64().eps(),
settings.tinyLfu().countMin64().confidence(), settings.randomSeed());
} else {
sketch = new CountMinSketch(settings.tinyLfu().countMin64().eps(),
settings.tinyLfu().countMin64().confidence(), settings.randomSeed());
}
}

/** Returns the estimated usage frequency of the item. */
Expand All @@ -62,7 +58,7 @@ public int frequency(long o) {
@Override
public void increment(long o) {
if (sketch.estimateCount(o) < MAX_COUNT) {
sketch.add(o, 1);
sketch.add(conservative, o, 1);
}
size += 1;
resetIfNeeded();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
package com.github.benmanes.caffeine.cache.simulator.membership;

import com.github.benmanes.caffeine.cache.simulator.BasicSettings;
import com.github.benmanes.caffeine.cache.simulator.membership.bloom.AddThisBloomFilter;
import com.github.benmanes.caffeine.cache.simulator.membership.bloom.BloomFilter;
import com.github.benmanes.caffeine.cache.simulator.membership.bloom.GuavaBloomFilter;
import com.typesafe.config.Config;
Expand All @@ -27,14 +26,6 @@
* @author [email protected] (Ben Manes)
*/
public enum FilterType {
ADDTHIS {
@Override public Membership create(long expectedInsertions, double fpp, Config config) {
return new AddThisBloomFilter(expectedInsertions, fpp);
}
@Override public String toString() {
return "AddThis";
}
},
CAFFEINE {
@Override public Membership create(long expectedInsertions, double fpp, Config config) {
int randomSeed = new BasicSettings(config).randomSeed();
Expand Down

This file was deleted.

2 changes: 1 addition & 1 deletion simulator/src/main/resources/reference.conf
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ caffeine.simulator {
TinyLfu,
]

# The membership filter implementation: Caffeine, Guava, AddThis
# The membership filter implementation: Caffeine, Guava
membership-filter = caffeine

sampled {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import java.util.List;
import java.util.Random;

import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import com.github.benmanes.caffeine.cache.simulator.membership.FilterType;
Expand All @@ -43,28 +44,30 @@ public class MembershipTest {

static final boolean display = false;

@Test
public void bloomFilterTest() {
@Test(dataProvider = "filterTypes")
public void bloomFilterTest(FilterType filterType) {
for (int capacity = 2 << 10; capacity < (2 << 22); capacity = capacity << 2) {
long[] input = new Random().longs(capacity).distinct().toArray();
List<String[]> rows = new ArrayList<>();
int expectedInsertions = capacity / 2;

for (FilterType filterType : FilterType.values()) {
Membership filter = filterType.create(expectedInsertions, FPP, CONFIG);

int falsePostives = falsePostives(filter, input);
double falsePositiveRate = ((double) falsePostives / expectedInsertions);
assertThat(filterType.toString(), falsePositiveRate, is(lessThan(FPP + 0.01)));
rows.add(row(filterType, expectedInsertions, falsePostives, falsePositiveRate));
}
Membership filter = filterType.create(expectedInsertions, FPP, CONFIG);
int falsePostives = falsePostives(filter, input);
double falsePositiveRate = ((double) falsePostives / expectedInsertions);
assertThat(filterType.toString(), falsePositiveRate, is(lessThan(FPP + 0.01)));
rows.add(row(filterType, expectedInsertions, falsePostives, falsePositiveRate));

if (display) {
printTable(rows);
}
}
}

@DataProvider(name = "filterTypes")
public Object[] providesFilterTypes() {
return FilterType.values();
}

/** Returns the false positives based on an input of unique elements. */
private int falsePostives(Membership filter, long[] input) {
int falsePositives = 0;
Expand Down

0 comments on commit 3f06fc1

Please sign in to comment.