Skip to content

Commit

Permalink
Refactor LFU insertion check and add lower bound for sketch size
Browse files Browse the repository at this point in the history
  • Loading branch information
vekterli committed Dec 10, 2024
1 parent 4349174 commit dd4dc36
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 9 deletions.
4 changes: 3 additions & 1 deletion vespalib/src/vespa/vespalib/stllike/cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,9 @@ class cache {
* Setting the size to >0 will always create a new sketch. The sketch will be
* initialized with the cache keys that are currently present in the cache segments,
* giving each existing entry an estimated frequency of 1. All preexisting frequency
* information about entries _not_ currently in the cache will be lost.
* information about entries _not_ currently in the cache will be lost. To avoid
* pathological frequency estimates for existing entries, the sketch has a lower
* bound size of max(existing cache element count, cache_max_elem_count).
*/
void set_frequency_sketch_size(size_t cache_max_elem_count);

Expand Down
21 changes: 13 additions & 8 deletions vespalib/src/vespa/vespalib/stllike/cache.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,9 @@ template<typename P>
void cache<P>::set_frequency_sketch_size(size_t cache_max_elem_count) {
std::lock_guard guard(_hashLock);
if (cache_max_elem_count > 0) {
_sketch = std::make_unique<SketchType>(cache_max_elem_count, _hasher);
// Ensure we can count our existing cached elements, if any.
size_t effective_elem_count = std::max(size(), cache_max_elem_count);
_sketch = std::make_unique<SketchType>(effective_elem_count, _hasher);
// (Re)setting the sketch loses all frequency knowledge, but we can at the
// very least pre-seed it with the information we _do_ have, which is that
// all elements already in the cache have an estimated frequency of >= 1.
Expand Down Expand Up @@ -295,14 +297,17 @@ cache<P>::lfu_accepts_insertion(const K& key, const V& value,
// TODO > capacity_bytes() instead of >=, this uses >= to be symmetric with removeOldest()
const bool would_displace = ((segment.size() >= segment.capacity()) ||
(segment.size_bytes() + calcSize(key, value)) >= segment.capacity_bytes());
const K* victim;
if (would_displace && (victim = segment.last_key_or_nullptr()) != nullptr) {
const auto existing_freq = _sketch->count_min(*victim);
// Frequency > instead of >= (i.e. must be _more_ popular, not just _as_ popular)
// empirically shows significantly better hit rates in our cache trace simulations.
return (candidate_freq > existing_freq);
if (!would_displace) {
return true; // No displacement, no reason to deny insertion
}
return true; // No displacement, no reason to deny insertion.
const K* victim = segment.last_key_or_nullptr();
if (!victim) {
return true; // Cache segment is empty, allow at least one entry
}
const auto existing_freq = _sketch->count_min(*victim);
// Frequency > instead of >= (i.e. must be _more_ popular, not just _as_ popular)
// empirically shows significantly better hit rates in our cache trace simulations.
return (candidate_freq > existing_freq);
}

template<typename P>
Expand Down

0 comments on commit dd4dc36

Please sign in to comment.