Skip to content

Commit

Permalink
Merge pull request #33022 from vespa-engine/vekterli/cache-lfu-integr…
Browse files Browse the repository at this point in the history
…ation

Integrate optional LFU frequency sketch into (S)LRU cache
  • Loading branch information
vekterli authored Dec 11, 2024
2 parents db59d3e + dd4dc36 commit 61c660f
Show file tree
Hide file tree
Showing 7 changed files with 353 additions and 25 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -661,7 +661,7 @@ TEST_F(LogDataStoreTest, Control_static_memory_usage)
// FIXME this is very, very implementation-specific... :I
constexpr size_t mutex_size = sizeof(std::mutex) * 2 * (113 + 1); // sizeof(std::mutex) is platform dependent
constexpr size_t string_size = sizeof(std::string);
constexpr size_t lru_segment_overhead = 304;
constexpr size_t lru_segment_overhead = 352;
EXPECT_EQ(74476 + mutex_size + 3 * string_size + lru_segment_overhead, usage.allocatedBytes());
EXPECT_EQ(752u + mutex_size + 3 * string_size + lru_segment_overhead, usage.usedBytes());
}
Expand Down
122 changes: 122 additions & 0 deletions vespalib/src/tests/stllike/cache_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -503,4 +503,126 @@ TEST_F(SlruCacheTest, accessing_element_in_protected_segment_moves_to_segment_he
ASSERT_NO_FATAL_FAILURE(assert_segment_lru_keys(cache, {5, 3}, {4, 2, 1}));
}

struct LfuCacheTest : SlruCacheTest {
LfuCacheTest() : SlruCacheTest() {
// Prepopulate backing store
m[1] = "a";
m[2] = "b";
m[3] = "c";
m[4] = "d";
m[5] = "e";
}
};

TEST_F(LfuCacheTest, lfu_gates_probationary_segment_displacing) {
// Disable protected segment; LRU mode only
cache<CacheParam<P, B, zero<uint32_t>, size<std::string>>> cache(m, -1, 0);
cache.maxElements(3, 0);
cache.set_frequency_sketch_size(3);
// Element 1 is the talk of the town. Everybody wants a piece. So popular...!
ASSERT_EQ(cache.read(1), "a");
ASSERT_EQ(cache.read(1), "a");
// Cache still has capacity, so LFU does not gate the insertion
ASSERT_EQ(cache.read(2), "b");
ASSERT_EQ(cache.read(3), "c");
EXPECT_EQ(cache.lfu_dropped(), 0);
ASSERT_NO_FATAL_FAILURE(assert_segment_lru_keys(cache, {3, 2, 1}, {}));
// Attempting to read-through 4 will _not_ insert it into the cache, as doing so
// would displace a more popular element (1).
ASSERT_EQ(cache.read(4), "d");
ASSERT_NO_FATAL_FAILURE(assert_segment_lru_keys(cache, {3, 2, 1}, {}));
EXPECT_EQ(cache.lfu_dropped(), 1);
// Reading 4 once more won't make it _more_ popular than 1, so still rejected.
ASSERT_EQ(cache.read(4), "d");
EXPECT_EQ(cache.lfu_dropped(), 2);
ASSERT_NO_FATAL_FAILURE(assert_segment_lru_keys(cache, {3, 2, 1}, {}));
// But reading it once again will make it more popular, displacing 1.
ASSERT_EQ(cache.read(4), "d");
EXPECT_EQ(cache.lfu_dropped(), 2);
ASSERT_NO_FATAL_FAILURE(assert_segment_lru_keys(cache, {4, 3, 2}, {}));
EXPECT_EQ(cache.lfu_not_promoted(), 0); // Only applies to SLRU
}

TEST_F(LfuCacheTest, lfu_gates_protected_segment_displacing) {
cache<CacheParam<P, B, zero<uint32_t>, size<std::string>>> cache(m, -1, -1);
cache.maxElements(4, 2);
cache.set_frequency_sketch_size(6);
ASSERT_EQ(cache.read(1), "a");
ASSERT_EQ(cache.read(2), "b");
ASSERT_EQ(cache.read(3), "c");
ASSERT_EQ(cache.read(4), "d");
// Move 1+2 into protected. These will now have an estimated frequency of 2.
ASSERT_EQ(cache.read(1), "a");
ASSERT_EQ(cache.read(2), "b");
ASSERT_NO_FATAL_FAILURE(assert_segment_lru_keys(cache, {4, 3}, {2, 1}));
ASSERT_EQ(cache.read(5), "e");
// Both 1+2 are trending higher on social media than 3+4. Touching 3+4 will
// bump them to the head of the LRU, but not into the protected segment (yet).
EXPECT_EQ(cache.lfu_not_promoted(), 0);
ASSERT_EQ(cache.read(3), "c");
ASSERT_NO_FATAL_FAILURE(assert_segment_lru_keys(cache, {3, 5, 4}, {2, 1}));
EXPECT_EQ(cache.lfu_not_promoted(), 1);
ASSERT_EQ(cache.read(4), "d");
ASSERT_NO_FATAL_FAILURE(assert_segment_lru_keys(cache, {4, 3, 5}, {2, 1}));
EXPECT_EQ(cache.lfu_not_promoted(), 2);
// 4 just went viral and can enter the protected segment. This displaces the tail (1)
// of the protected segment back into probationary.
ASSERT_EQ(cache.read(4), "d");
ASSERT_NO_FATAL_FAILURE(assert_segment_lru_keys(cache, {1, 3, 5}, {4, 2}));
EXPECT_EQ(cache.lfu_not_promoted(), 2);
}

TEST_F(LfuCacheTest, lfu_gates_probationary_inserts_on_write_through) {
cache<CacheParam<P, B, zero<uint32_t>, size<std::string>>> cache(m, -1, 0);
cache.maxElements(2, 0);
cache.set_frequency_sketch_size(2);
ASSERT_EQ(cache.read(2), "b"); // ==> freq 1
ASSERT_EQ(cache.read(2), "b"); // ==> freq 2
cache.write(7, "zoid"); // OK; capacity < max elems
ASSERT_NO_FATAL_FAILURE(assert_segment_lru_keys(cache, {7, 2}, {}));
// 8 is not more popular than 2, so this insertion does not displace it
cache.write(8, "berg");
ASSERT_NO_FATAL_FAILURE(assert_segment_lru_keys(cache, {7, 2}, {}));
// LFU is not updated from writes
cache.write(8, "hello");
cache.write(8, "world");
ASSERT_NO_FATAL_FAILURE(assert_segment_lru_keys(cache, {7, 2}, {}));
EXPECT_EQ(cache.lfu_dropped(), 3);
}

TEST_F(LfuCacheTest, lfu_gating_considers_capacity_bytes) {
cache<CacheParam<P, B, SelfAsSize, zero<std::string>>> cache(m, 200, 0);
cache.maxElements(10, 0); // will be capacity bytes-bound
cache.set_frequency_sketch_size(10);
cache.write(100, "foo");
ASSERT_EQ(cache.read(100), "foo"); // Freq => 1
ASSERT_NO_FATAL_FAILURE(assert_segment_size_bytes(cache, 180, 0));
// Inserting new element 50 would displace more popular 100
cache.write(50, "bar");
ASSERT_NO_FATAL_FAILURE(assert_segment_lru_keys(cache, {100}, {}));
ASSERT_NO_FATAL_FAILURE(assert_segment_size_bytes(cache, 180, 0));
ASSERT_EQ(cache.read(50), "bar"); // Freq => 1, still no displacement
ASSERT_NO_FATAL_FAILURE(assert_segment_lru_keys(cache, {100}, {}));
ASSERT_NO_FATAL_FAILURE(assert_segment_size_bytes(cache, 180, 0));
ASSERT_EQ(cache.read(50), "bar"); // Freq => 2, rise and shine
ASSERT_NO_FATAL_FAILURE(assert_segment_lru_keys(cache, {50}, {}));
ASSERT_NO_FATAL_FAILURE(assert_segment_size_bytes(cache, 130, 0));
}

TEST_F(LfuCacheTest, resetting_sketch_initializes_new_sketch_with_cached_elems) {
cache<CacheParam<P, B, zero<uint32_t>, size<std::string>>> cache(m, -1, -1);
cache.maxElements(2, 1);
cache.set_frequency_sketch_size(0);
ASSERT_EQ(cache.read(1), "a");
ASSERT_EQ(cache.read(2), "b");
ASSERT_EQ(cache.read(1), "a"); // => protected
ASSERT_EQ(cache.read(3), "c");
ASSERT_NO_FATAL_FAILURE(assert_segment_lru_keys(cache, {3, 2}, {1}));
cache.set_frequency_sketch_size(10);
EXPECT_EQ(cache.lfu_dropped(), 0);
ASSERT_EQ(cache.read(4), "d"); // Not more popular than 2 => not inserted
ASSERT_NO_FATAL_FAILURE(assert_segment_lru_keys(cache, {3, 2}, {1}));
EXPECT_EQ(cache.lfu_dropped(), 1);
}

GTEST_MAIN_RUN_ALL_TESTS()
39 changes: 39 additions & 0 deletions vespalib/src/tests/stllike/lrucache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -317,4 +317,43 @@ TEST(LruCacheMapTest, implicit_lru_trimming_on_oversized_insert_does_not_remove_
EXPECT_EQ(lru_key_order(cache), "2");
}

TEST(LruCacheMapTest, can_get_iter_to_last_element) {
using Cache = lrucache_map<LruParam<int, std::string>>;
Cache cache(5);
// Returned iterator is end() if the map is empty
EXPECT_TRUE(cache.iter_to_last() == cache.end());
cache.insert(1, "a");
ASSERT_TRUE(cache.iter_to_last() != cache.end());
EXPECT_EQ(cache.iter_to_last().key(), 1);
cache.insert(2, "b");
ASSERT_TRUE(cache.iter_to_last() != cache.end());
EXPECT_EQ(cache.iter_to_last().key(), 1); // LRU tail is still 1
cache.insert(3, "c");
cache.insert(4, "d");
ASSERT_TRUE(cache.iter_to_last() != cache.end());
EXPECT_EQ(cache.iter_to_last().key(), 1); // ... and still 1.
// Move 1 to LRU head. Tail is now 2.
ASSERT_TRUE(cache.find_and_ref(1));
ASSERT_TRUE(cache.iter_to_last() != cache.end());
EXPECT_EQ(cache.iter_to_last().key(), 2);
// Move 3 to LRU head. Tail is still 2.
ASSERT_TRUE(cache.find_and_ref(3));
ASSERT_TRUE(cache.iter_to_last() != cache.end());
EXPECT_EQ(cache.iter_to_last().key(), 2);
// Move 2 to LRU head. Tail is now 4.
ASSERT_TRUE(cache.find_and_ref(2));
ASSERT_TRUE(cache.iter_to_last() != cache.end());
EXPECT_EQ(cache.iter_to_last().key(), 4);

EXPECT_EQ(lru_key_order(cache), "2 3 1 4");

cache.erase(4);
ASSERT_TRUE(cache.iter_to_last() != cache.end());
EXPECT_EQ(cache.iter_to_last().key(), 1);
cache.erase(3);
cache.erase(2);
cache.erase(1);
ASSERT_TRUE(cache.iter_to_last() == cache.end());
}

GTEST_MAIN_RUN_ALL_TESTS()
66 changes: 57 additions & 9 deletions vespalib/src/vespa/vespalib/stllike/cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

#include "lrucache_map.h"
#include <vespa/vespalib/util/memoryusage.h>
#include <vespa/vespalib/util/relative_frequency_sketch.h>
#include <atomic>
#include <mutex>
#include <vector>
Expand Down Expand Up @@ -77,6 +78,15 @@ enum class CacheSegment {
*
* Note that the regular non-SLRU cache is implemented to reside entirely within the probationary
* segment.
*
* Optionally, the cache can be configured to use a probabilistic LFU frequency sketch for
* gating insertions to its segments. An element that is a candidate for insertion will only
* be allowed into a segment if it is estimated to be more frequently accessed than the element
* it would displace. LFU gating works in both LRU and SLRU modes. In both modes, initial
* insertion into the probationary segment is gated (for cache read-through and write-through).
* In SLRU mode, promotion from probationary to protected is also gated. In the case that
* promotion is denied, the candidate element is placed at the LRU head of the probationary
* segment instead, giving it another chance.
*/
template <typename P>
class cache {
Expand Down Expand Up @@ -109,6 +119,8 @@ class cache {
// Fetches an existing key from the cache _without_ updating the LRU ordering.
[[nodiscard]] const typename P::Value& get_existing(const KeyT& key) const;

const KeyT* last_key_or_nullptr() const noexcept;

// Returns true iff `key` existed in the mapping prior to the call, which also
// implies the mapping has been updated by consuming `value` (i.e. its contents
// has been std::move()'d away and it is now in a logically empty state).
Expand All @@ -127,6 +139,10 @@ class cache {
// size_bytes() <= capacity() && size() <= maxElements()
using Lru::trim;

// Invokes functor `fn` for each segment key in LRU order (new to old)
template <typename F>
void for_each_key(F fn);

[[nodiscard]] std::vector<KeyT> dump_segment_keys_in_lru_order();

using Lru::empty;
Expand Down Expand Up @@ -248,6 +264,24 @@ class cache {

[[nodiscard]] virtual MemoryUsage getStaticMemoryUsage() const;

/**
* Sets the size (in number of elements) of a probabilistic LFU frequency sketch
* used by the cache to gate insertions into its segments. The element count should
* be at least as large as the maximum _expected_ number of elements that the cache
* can hold at once.
*
* Setting the size to 0 disables the LFU functionality and frees allocated memory
* associated with any previous frequency sketch.
*
* Setting the size to >0 will always create a new sketch. The sketch will be
* initialized with the cache keys that are currently present in the cache segments,
* giving each existing entry an estimated frequency of 1. All preexisting frequency
* information about entries _not_ currently in the cache will be lost. To avoid
* pathological frequency estimates for existing entries, the sketch has a lower
* bound size of max(existing cache element count, cache_max_elem_count).
*/
void set_frequency_sketch_size(size_t cache_max_elem_count);

/**
* Listeners for insertion and removal events that may be overridden by a subclass.
* Important: implementations should never directly or indirectly modify the cache
Expand Down Expand Up @@ -294,14 +328,16 @@ class cache {

[[nodiscard]] virtual CacheStats get_stats() const;

size_t getHit() const noexcept { return _hit.load(std::memory_order_relaxed); }
size_t getMiss() const noexcept { return _miss.load(std::memory_order_relaxed); }
size_t getNonExisting() const noexcept { return _non_existing.load(std::memory_order_relaxed); }
size_t getRace() const noexcept { return _race.load(std::memory_order_relaxed); }
size_t getInsert() const noexcept { return _insert.load(std::memory_order_relaxed); }
size_t getWrite() const noexcept { return _write.load(std::memory_order_relaxed); }
size_t getInvalidate() const noexcept { return _invalidate.load(std::memory_order_relaxed); }
size_t getLookup() const noexcept { return _lookup.load(std::memory_order_relaxed); }
size_t getHit() const noexcept { return _hit.load(std::memory_order_relaxed); }
size_t getMiss() const noexcept { return _miss.load(std::memory_order_relaxed); }
size_t getNonExisting() const noexcept { return _non_existing.load(std::memory_order_relaxed); }
size_t getRace() const noexcept { return _race.load(std::memory_order_relaxed); }
size_t getInsert() const noexcept { return _insert.load(std::memory_order_relaxed); }
size_t getWrite() const noexcept { return _write.load(std::memory_order_relaxed); }
size_t getInvalidate() const noexcept { return _invalidate.load(std::memory_order_relaxed); }
size_t getLookup() const noexcept { return _lookup.load(std::memory_order_relaxed); }
size_t lfu_dropped() const noexcept { return _lfu_dropped.load(std::memory_order_relaxed); }
size_t lfu_not_promoted() const noexcept { return _lfu_not_promoted.load(std::memory_order_relaxed); }

/**
* Returns the number of bytes that are always implicitly added for each element
Expand All @@ -322,9 +358,16 @@ class cache {
private:
// Implicitly updates LRU segment(s) on hit.
// Precondition: _hashLock is held.
[[nodiscard]] bool try_fill_from_cache(const K& key, V& val_out);
[[nodiscard]] bool try_fill_from_cache(const K& key, V& val_out, const std::lock_guard<std::mutex>& guard);

[[nodiscard]] bool multi_segment() const noexcept { return _protected_segment.capacity_bytes() != 0; }
void lfu_add(const K& key) noexcept;
[[nodiscard]] uint8_t lfu_add_and_count(const K& key) noexcept;
[[nodiscard]] bool lfu_accepts_insertion(const K& key, const V& value,
const SizeConstrainedLru& segment,
uint8_t candidate_freq) const noexcept;
[[nodiscard]] bool lfu_accepts_insertion(const K& key, const V& value, const SizeConstrainedLru& segment);

void trim_segments();
void verifyHashLock(const UniqueLock& guard) const;
[[nodiscard]] size_t calcSize(const K& k, const V& v) const noexcept {
Expand All @@ -344,6 +387,8 @@ class cache {
v.store(v.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed);
}

using SketchType = RelativeFrequencySketch<K, Hash>;

[[no_unique_address]] Hash _hasher;
[[no_unique_address]] SizeK _sizeK;
[[no_unique_address]] SizeV _sizeV;
Expand All @@ -357,7 +402,10 @@ class cache {
mutable std::atomic<size_t> _update;
mutable std::atomic<size_t> _invalidate;
mutable std::atomic<size_t> _lookup;
mutable std::atomic<size_t> _lfu_dropped;
mutable std::atomic<size_t> _lfu_not_promoted;
BackingStore& _store;
std::unique_ptr<SketchType> _sketch;

ProbationarySegmentLru _probationary_segment;
ProtectedSegmentLru _protected_segment;
Expand Down
Loading

0 comments on commit 61c660f

Please sign in to comment.