From 434917497276953715d06c645efa88674f871684 Mon Sep 17 00:00:00 2001
From: Tor Brede Vekterli <vekterli@vespa.ai>
Date: Mon, 9 Dec 2024 12:50:29 +0000
Subject: [PATCH 1/2] Integrate optional LFU frequency sketch into (S)LRU cache

Allows a cache to be configured to use a probabilistic LFU frequency
sketch for gating insertions to its segments. An element that is a
candidate for insertion will only be allowed into a segment if it is
estimated to be more frequently accessed than the element it would
displace. LFU gating works in both LRU and SLRU modes. In both modes,
initial insertion into the probationary segment is gated (for cache
read-through and write-through). In SLRU mode, promotion from
probationary to protected is also gated. In the case that promotion
is denied, the candidate element is placed at the LRU head of the
probationary segment instead, giving it another chance.

The configured sketch element count should be at least as large as the
maximum _expected_ number of elements that the cache can hold at once.

The default size is 0, i.e. LFU functionality is disabled.

Setting the size to >0 will always create a new sketch. The sketch
will be initialized with the cache keys that are currently present
in the cache segments, giving each existing entry an estimated
frequency of 1. All preexisting frequency information about entries
_not_ currently in the cache will be lost.
---
 .../logdatastore/logdatastore_test.cpp        |   2 +-
 vespalib/src/tests/stllike/cache_test.cpp     | 122 ++++++++++++++++
 vespalib/src/tests/stllike/lrucache.cpp       |  39 ++++++
 vespalib/src/vespa/vespalib/stllike/cache.h   |  64 +++++++--
 vespalib/src/vespa/vespalib/stllike/cache.hpp | 132 ++++++++++++++++--
 .../src/vespa/vespalib/stllike/lrucache_map.h |   6 +
 .../vespa/vespalib/stllike/lrucache_map.hpp   |   6 +
 7 files changed, 346 insertions(+), 25 deletions(-)

diff --git a/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp b/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp
index c944f879dfb..8a986e4d33a 100644
--- a/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp
+++ b/searchlib/src/tests/docstore/logdatastore/logdatastore_test.cpp
@@ -661,7 +661,7 @@ TEST_F(LogDataStoreTest, Control_static_memory_usage)
     // FIXME this is very, very implementation-specific... :I
     constexpr size_t mutex_size = sizeof(std::mutex) * 2 * (113 + 1); // sizeof(std::mutex) is platform dependent
     constexpr size_t string_size = sizeof(std::string);
-    constexpr size_t lru_segment_overhead = 304;
+    constexpr size_t lru_segment_overhead = 352;
     EXPECT_EQ(74476 + mutex_size + 3 * string_size + lru_segment_overhead, usage.allocatedBytes());
     EXPECT_EQ(752u + mutex_size + 3 * string_size + lru_segment_overhead, usage.usedBytes());
 }
diff --git a/vespalib/src/tests/stllike/cache_test.cpp b/vespalib/src/tests/stllike/cache_test.cpp
index 017424a2b72..0bdf71eab12 100644
--- a/vespalib/src/tests/stllike/cache_test.cpp
+++ b/vespalib/src/tests/stllike/cache_test.cpp
@@ -503,4 +503,126 @@ TEST_F(SlruCacheTest, accessing_element_in_protected_segment_moves_to_segment_he
     ASSERT_NO_FATAL_FAILURE(assert_segment_lru_keys(cache, {5, 3}, {4, 2, 1}));
 }
 
+struct LfuCacheTest : SlruCacheTest {
+    LfuCacheTest() : SlruCacheTest() {
+        // Prepopulate backing store
+        m[1] = "a";
+        m[2] = "b";
+        m[3] = "c";
+        m[4] = "d";
+        m[5] = "e";
+    }
+};
+
+TEST_F(LfuCacheTest, lfu_gates_probationary_segment_displacing) {
+    // Disable protected segment; LRU mode only
+    cache<CacheParam<P, B, zero<uint32_t>, size<std::string>>> cache(m, -1, 0);
+    cache.maxElements(3, 0);
+    cache.set_frequency_sketch_size(3);
+    // Element 1 is the talk of the town. Everybody wants a piece. So popular...!
+    ASSERT_EQ(cache.read(1), "a");
+    ASSERT_EQ(cache.read(1), "a");
+    // Cache still has capacity, so LFU does not gate the insertion
+    ASSERT_EQ(cache.read(2), "b");
+    ASSERT_EQ(cache.read(3), "c");
+    EXPECT_EQ(cache.lfu_dropped(), 0);
+    ASSERT_NO_FATAL_FAILURE(assert_segment_lru_keys(cache, {3, 2, 1}, {}));
+    // Attempting to read-through 4 will _not_ insert it into the cache, as doing so
+    // would displace a more popular element (1).
+    ASSERT_EQ(cache.read(4), "d");
+    ASSERT_NO_FATAL_FAILURE(assert_segment_lru_keys(cache, {3, 2, 1}, {}));
+    EXPECT_EQ(cache.lfu_dropped(), 1);
+    // Reading 4 once more won't make it _more_ popular than 1, so still rejected.
+    ASSERT_EQ(cache.read(4), "d");
+    EXPECT_EQ(cache.lfu_dropped(), 2);
+    ASSERT_NO_FATAL_FAILURE(assert_segment_lru_keys(cache, {3, 2, 1}, {}));
+    // But reading it once again will make it more popular, displacing 1.
+    ASSERT_EQ(cache.read(4), "d");
+    EXPECT_EQ(cache.lfu_dropped(), 2);
+    ASSERT_NO_FATAL_FAILURE(assert_segment_lru_keys(cache, {4, 3, 2}, {}));
+    EXPECT_EQ(cache.lfu_not_promoted(), 0); // Only applies to SLRU
+}
+
+TEST_F(LfuCacheTest, lfu_gates_protected_segment_displacing) {
+    cache<CacheParam<P, B, zero<uint32_t>, size<std::string>>> cache(m, -1, -1);
+    cache.maxElements(4, 2);
+    cache.set_frequency_sketch_size(6);
+    ASSERT_EQ(cache.read(1), "a");
+    ASSERT_EQ(cache.read(2), "b");
+    ASSERT_EQ(cache.read(3), "c");
+    ASSERT_EQ(cache.read(4), "d");
+    // Move 1+2 into protected. These will now have an estimated frequency of 2.
+    ASSERT_EQ(cache.read(1), "a");
+    ASSERT_EQ(cache.read(2), "b");
+    ASSERT_NO_FATAL_FAILURE(assert_segment_lru_keys(cache, {4, 3}, {2, 1}));
+    ASSERT_EQ(cache.read(5), "e");
+    // Both 1+2 are trending higher on social media than 3+4. Touching 3+4 will
+    // bump them to the head of the LRU, but not into the protected segment (yet).
+    EXPECT_EQ(cache.lfu_not_promoted(), 0);
+    ASSERT_EQ(cache.read(3), "c");
+    ASSERT_NO_FATAL_FAILURE(assert_segment_lru_keys(cache, {3, 5, 4}, {2, 1}));
+    EXPECT_EQ(cache.lfu_not_promoted(), 1);
+    ASSERT_EQ(cache.read(4), "d");
+    ASSERT_NO_FATAL_FAILURE(assert_segment_lru_keys(cache, {4, 3, 5}, {2, 1}));
+    EXPECT_EQ(cache.lfu_not_promoted(), 2);
+    // 4 just went viral and can enter the protected segment. This displaces the tail (1)
+    // of the protected segment back into probationary.
+    ASSERT_EQ(cache.read(4), "d");
+    ASSERT_NO_FATAL_FAILURE(assert_segment_lru_keys(cache, {1, 3, 5}, {4, 2}));
+    EXPECT_EQ(cache.lfu_not_promoted(), 2);
+}
+
+TEST_F(LfuCacheTest, lfu_gates_probationary_inserts_on_write_through) {
+    cache<CacheParam<P, B, zero<uint32_t>, size<std::string>>> cache(m, -1, 0);
+    cache.maxElements(2, 0);
+    cache.set_frequency_sketch_size(2);
+    ASSERT_EQ(cache.read(2), "b"); // ==> freq 1
+    ASSERT_EQ(cache.read(2), "b"); // ==> freq 2
+    cache.write(7, "zoid"); // OK; capacity < max elems
+    ASSERT_NO_FATAL_FAILURE(assert_segment_lru_keys(cache, {7, 2}, {}));
+    // 8 is not more popular than 2, so this insertion does not displace it
+    cache.write(8, "berg");
+    ASSERT_NO_FATAL_FAILURE(assert_segment_lru_keys(cache, {7, 2}, {}));
+    // LFU is not updated from writes
+    cache.write(8, "hello");
+    cache.write(8, "world");
+    ASSERT_NO_FATAL_FAILURE(assert_segment_lru_keys(cache, {7, 2}, {}));
+    EXPECT_EQ(cache.lfu_dropped(), 3);
+}
+
+TEST_F(LfuCacheTest, lfu_gating_considers_capacity_bytes) {
+    cache<CacheParam<P, B, SelfAsSize, zero<std::string>>> cache(m, 200, 0);
+    cache.maxElements(10, 0); // will be capacity bytes-bound
+    cache.set_frequency_sketch_size(10);
+    cache.write(100, "foo");
+    ASSERT_EQ(cache.read(100), "foo"); // Freq => 1
+    ASSERT_NO_FATAL_FAILURE(assert_segment_size_bytes(cache, 180, 0));
+    // Inserting new element 50 would displace more popular 100
+    cache.write(50, "bar");
+    ASSERT_NO_FATAL_FAILURE(assert_segment_lru_keys(cache, {100}, {}));
+    ASSERT_NO_FATAL_FAILURE(assert_segment_size_bytes(cache, 180, 0));
+    ASSERT_EQ(cache.read(50), "bar"); // Freq => 1, still no displacement
+    ASSERT_NO_FATAL_FAILURE(assert_segment_lru_keys(cache, {100}, {}));
+    ASSERT_NO_FATAL_FAILURE(assert_segment_size_bytes(cache, 180, 0));
+    ASSERT_EQ(cache.read(50), "bar"); // Freq => 2, rise and shine
+    ASSERT_NO_FATAL_FAILURE(assert_segment_lru_keys(cache, {50}, {}));
+    ASSERT_NO_FATAL_FAILURE(assert_segment_size_bytes(cache, 130, 0));
+}
+
+TEST_F(LfuCacheTest, resetting_sketch_initializes_new_sketch_with_cached_elems) {
+    cache<CacheParam<P, B, zero<uint32_t>, size<std::string>>> cache(m, -1, -1);
+    cache.maxElements(2, 1);
+    cache.set_frequency_sketch_size(0);
+    ASSERT_EQ(cache.read(1), "a");
+    ASSERT_EQ(cache.read(2), "b");
+    ASSERT_EQ(cache.read(1), "a"); // => protected
+    ASSERT_EQ(cache.read(3), "c");
+    ASSERT_NO_FATAL_FAILURE(assert_segment_lru_keys(cache, {3, 2}, {1}));
+    cache.set_frequency_sketch_size(10);
+    EXPECT_EQ(cache.lfu_dropped(), 0);
+    ASSERT_EQ(cache.read(4), "d"); // Not more popular than 2 => not inserted
+    ASSERT_NO_FATAL_FAILURE(assert_segment_lru_keys(cache, {3, 2}, {1}));
+    EXPECT_EQ(cache.lfu_dropped(), 1);
+}
+
 GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/vespalib/src/tests/stllike/lrucache.cpp b/vespalib/src/tests/stllike/lrucache.cpp
index 0e0249134e0..a735a42008f 100644
--- a/vespalib/src/tests/stllike/lrucache.cpp
+++ b/vespalib/src/tests/stllike/lrucache.cpp
@@ -317,4 +317,43 @@ TEST(LruCacheMapTest, implicit_lru_trimming_on_oversized_insert_does_not_remove_
     EXPECT_EQ(lru_key_order(cache), "2");
 }
 
+TEST(LruCacheMapTest, can_get_iter_to_last_element) {
+    using Cache = lrucache_map<LruParam<int, std::string>>;
+    Cache cache(5);
+    // Returned iterator is end() if the map is empty
+    EXPECT_TRUE(cache.iter_to_last() == cache.end());
+    cache.insert(1, "a");
+    ASSERT_TRUE(cache.iter_to_last() != cache.end());
+    EXPECT_EQ(cache.iter_to_last().key(), 1);
+    cache.insert(2, "b");
+    ASSERT_TRUE(cache.iter_to_last() != cache.end());
+    EXPECT_EQ(cache.iter_to_last().key(), 1); // LRU tail is still 1
+    cache.insert(3, "c");
+    cache.insert(4, "d");
+    ASSERT_TRUE(cache.iter_to_last() != cache.end());
+    EXPECT_EQ(cache.iter_to_last().key(), 1); // ... and still 1.
+    // Move 1 to LRU head. Tail is now 2.
+    ASSERT_TRUE(cache.find_and_ref(1));
+    ASSERT_TRUE(cache.iter_to_last() != cache.end());
+    EXPECT_EQ(cache.iter_to_last().key(), 2);
+    // Move 3 to LRU head. Tail is still 2.
+    ASSERT_TRUE(cache.find_and_ref(3));
+    ASSERT_TRUE(cache.iter_to_last() != cache.end());
+    EXPECT_EQ(cache.iter_to_last().key(), 2);
+    // Move 2 to LRU head. Tail is now 4.
+    ASSERT_TRUE(cache.find_and_ref(2));
+    ASSERT_TRUE(cache.iter_to_last() != cache.end());
+    EXPECT_EQ(cache.iter_to_last().key(), 4);
+
+    EXPECT_EQ(lru_key_order(cache), "2 3 1 4");
+
+    cache.erase(4);
+    ASSERT_TRUE(cache.iter_to_last() != cache.end());
+    EXPECT_EQ(cache.iter_to_last().key(), 1);
+    cache.erase(3);
+    cache.erase(2);
+    cache.erase(1);
+    ASSERT_TRUE(cache.iter_to_last() == cache.end());
+}
+
 GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/vespalib/src/vespa/vespalib/stllike/cache.h b/vespalib/src/vespa/vespalib/stllike/cache.h
index 95afed0e3f7..1a983ca509f 100644
--- a/vespalib/src/vespa/vespalib/stllike/cache.h
+++ b/vespalib/src/vespa/vespalib/stllike/cache.h
@@ -3,6 +3,7 @@
 
 #include "lrucache_map.h"
 #include <vespa/vespalib/util/memoryusage.h>
+#include <vespa/vespalib/util/relative_frequency_sketch.h>
 #include <atomic>
 #include <mutex>
 #include <vector>
@@ -77,6 +78,15 @@ enum class CacheSegment {
  *
  * Note that the regular non-SLRU cache is implemented to reside entirely within the probationary
  * segment.
+ *
+ * Optionally, the cache can be configured to use a probabilistic LFU frequency sketch for
+ * gating insertions to its segments. An element that is a candidate for insertion will only
+ * be allowed into a segment if it is estimated to be more frequently accessed than the element
+ * it would displace. LFU gating works in both LRU and SLRU modes. In both modes, initial
+ * insertion into the probationary segment is gated (for cache read-through and write-through).
+ * In SLRU mode, promotion from probationary to protected is also gated. In the case that
+ * promotion is denied, the candidate element is placed at the LRU head of the probationary
+ * segment instead, giving it another chance.
  */
 template <typename P>
 class cache {
@@ -109,6 +119,8 @@ class cache {
         // Fetches an existing key from the cache _without_ updating the LRU ordering.
         [[nodiscard]] const typename P::Value& get_existing(const KeyT& key) const;
 
+        const KeyT* last_key_or_nullptr() const noexcept;
+
         // Returns true iff `key` existed in the mapping prior to the call, which also
         // implies the mapping has been updated by consuming `value` (i.e. its contents
         // has been std::move()'d away and it is now in a logically empty state).
@@ -127,6 +139,10 @@ class cache {
         //   size_bytes() <= capacity() && size() <= maxElements()
         using Lru::trim;
 
+        // Invokes functor `fn` for each segment key in LRU order (new to old)
+        template <typename F>
+        void for_each_key(F fn);
+
         [[nodiscard]] std::vector<KeyT> dump_segment_keys_in_lru_order();
 
         using Lru::empty;
@@ -248,6 +264,22 @@ class cache {
 
     [[nodiscard]] virtual MemoryUsage getStaticMemoryUsage() const;
 
+    /**
+     * Sets the size (in number of elements) of a probabilistic LFU frequency sketch
+     * used by the cache to gate insertions into its segments. The element count should
+     * be at least as large as the maximum _expected_ number of elements that the cache
+     * can hold at once.
+     *
+     * Setting the size to 0 disables the LFU functionality and frees allocated memory
+     * associated with any previous frequency sketch.
+     *
+     * Setting the size to >0 will always create a new sketch. The sketch will be
+     * initialized with the cache keys that are currently present in the cache segments,
+     * giving each existing entry an estimated frequency of 1. All preexisting frequency
+     * information about entries _not_ currently in the cache will be lost.
+     */
+    void set_frequency_sketch_size(size_t cache_max_elem_count);
+
     /**
      * Listeners for insertion and removal events that may be overridden by a subclass.
      * Important: implementations should never directly or indirectly modify the cache
@@ -294,14 +326,16 @@ class cache {
 
     [[nodiscard]] virtual CacheStats get_stats() const;
 
-    size_t         getHit() const noexcept { return _hit.load(std::memory_order_relaxed); }
-    size_t        getMiss() const noexcept { return _miss.load(std::memory_order_relaxed); }
-    size_t getNonExisting() const noexcept { return _non_existing.load(std::memory_order_relaxed); }
-    size_t        getRace() const noexcept { return _race.load(std::memory_order_relaxed); }
-    size_t      getInsert() const noexcept { return _insert.load(std::memory_order_relaxed); }
-    size_t       getWrite() const noexcept { return _write.load(std::memory_order_relaxed); }
-    size_t  getInvalidate() const noexcept { return _invalidate.load(std::memory_order_relaxed); }
-    size_t      getLookup() const noexcept { return _lookup.load(std::memory_order_relaxed); }
+    size_t           getHit() const noexcept { return _hit.load(std::memory_order_relaxed); }
+    size_t          getMiss() const noexcept { return _miss.load(std::memory_order_relaxed); }
+    size_t   getNonExisting() const noexcept { return _non_existing.load(std::memory_order_relaxed); }
+    size_t          getRace() const noexcept { return _race.load(std::memory_order_relaxed); }
+    size_t        getInsert() const noexcept { return _insert.load(std::memory_order_relaxed); }
+    size_t         getWrite() const noexcept { return _write.load(std::memory_order_relaxed); }
+    size_t    getInvalidate() const noexcept { return _invalidate.load(std::memory_order_relaxed); }
+    size_t        getLookup() const noexcept { return _lookup.load(std::memory_order_relaxed); }
+    size_t      lfu_dropped() const noexcept { return _lfu_dropped.load(std::memory_order_relaxed); }
+    size_t lfu_not_promoted() const noexcept { return _lfu_not_promoted.load(std::memory_order_relaxed); }
 
     /**
      * Returns the number of bytes that are always implicitly added for each element
@@ -322,9 +356,16 @@ class cache {
 private:
     // Implicitly updates LRU segment(s) on hit.
     // Precondition: _hashLock is held.
-    [[nodiscard]] bool try_fill_from_cache(const K& key, V& val_out);
+    [[nodiscard]] bool try_fill_from_cache(const K& key, V& val_out, const std::lock_guard<std::mutex>& guard);
 
     [[nodiscard]] bool multi_segment() const noexcept { return _protected_segment.capacity_bytes() != 0; }
+    void lfu_add(const K& key) noexcept;
+    [[nodiscard]] uint8_t lfu_add_and_count(const K& key) noexcept;
+    [[nodiscard]] bool lfu_accepts_insertion(const K& key, const V& value,
+                                             const SizeConstrainedLru& segment,
+                                             uint8_t candidate_freq) const noexcept;
+    [[nodiscard]] bool lfu_accepts_insertion(const K& key, const V& value, const SizeConstrainedLru& segment);
+
     void trim_segments();
     void verifyHashLock(const UniqueLock& guard) const;
     [[nodiscard]] size_t calcSize(const K& k, const V& v) const noexcept {
@@ -344,6 +385,8 @@ class cache {
         v.store(v.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed);
     }
 
+    using SketchType = RelativeFrequencySketch<K, Hash>;
+
     [[no_unique_address]] Hash  _hasher;
     [[no_unique_address]] SizeK _sizeK;
     [[no_unique_address]] SizeV _sizeV;
@@ -357,7 +400,10 @@ class cache {
     mutable std::atomic<size_t> _update;
     mutable std::atomic<size_t> _invalidate;
     mutable std::atomic<size_t> _lookup;
+    mutable std::atomic<size_t> _lfu_dropped;
+    mutable std::atomic<size_t> _lfu_not_promoted;
     BackingStore&               _store;
+    std::unique_ptr<SketchType> _sketch;
 
     ProbationarySegmentLru      _probationary_segment;
     ProtectedSegmentLru         _protected_segment;
diff --git a/vespalib/src/vespa/vespalib/stllike/cache.hpp b/vespalib/src/vespa/vespalib/stllike/cache.hpp
index 23c3d12314b..b7ec01f0e03 100644
--- a/vespalib/src/vespa/vespalib/stllike/cache.hpp
+++ b/vespalib/src/vespa/vespalib/stllike/cache.hpp
@@ -4,6 +4,7 @@
 #include "cache.h"
 #include "cache_stats.h"
 #include "lrucache_map.hpp"
+#include <vespa/vespalib/util/relative_frequency_sketch.h>
 
 namespace vespalib {
 
@@ -80,6 +81,16 @@ cache<P>::SizeConstrainedLru::get_existing(const KeyT& key) const {
     return Lru::get(key);
 }
 
+template <typename P>
+const typename P::Key*
+cache<P>::SizeConstrainedLru::last_key_or_nullptr() const noexcept {
+    // There is no const_iterator on the LRU base class, so do an awkward const_cast instead.
+    // We don't do any direct or indirect mutations, so should be fully well-defined.
+    auto* mut_self = const_cast<SizeConstrainedLru*>(this);
+    auto iter = mut_self->iter_to_last();
+    return (iter != mut_self->end()) ? &iter.key() : nullptr;
+}
+
 template <typename P>
 bool
 cache<P>::SizeConstrainedLru::try_get_and_ref(const KeyT& key, ValueT& val_out) {
@@ -91,14 +102,23 @@ cache<P>::SizeConstrainedLru::try_get_and_ref(const KeyT& key, ValueT& val_out)
     return false;
 }
 
+template <typename P>
+template <typename F>
+void
+cache<P>::SizeConstrainedLru::for_each_key(F fn) {
+    for (auto it = Lru::begin(); it != Lru::end(); ++it) {
+        fn(it.key());
+    }
+}
+
 template <typename P>
 std::vector<typename P::Key>
 cache<P>::SizeConstrainedLru::dump_segment_keys_in_lru_order() {
     std::vector<KeyT> lru_keys;
     lru_keys.reserve(size());
-    for (auto it = Lru::begin(); it != Lru::end(); ++it) {
-        lru_keys.emplace_back(it.key());
-    }
+    for_each_key([&lru_keys](const KeyT& k) {
+        lru_keys.emplace_back(k);
+    });
     return lru_keys;
 }
 
@@ -220,7 +240,10 @@ cache<P>::cache(BackingStore& backing_store,
     _update(0),
     _invalidate(0),
     _lookup(0),
+    _lfu_dropped(0),
+    _lfu_not_promoted(0),
     _store(backing_store),
+    _sketch(),
     _probationary_segment(*this, max_probationary_bytes),
     _protected_segment(*this, max_protected_bytes)
 {}
@@ -230,6 +253,64 @@ cache<P>::cache(BackingStore& backing_store, size_t max_bytes)
     : cache(backing_store, max_bytes, 0)
 {}
 
+template<typename P>
+void cache<P>::set_frequency_sketch_size(size_t cache_max_elem_count) {
+    std::lock_guard guard(_hashLock);
+    if (cache_max_elem_count > 0) {
+        _sketch = std::make_unique<SketchType>(cache_max_elem_count, _hasher);
+        // (Re)setting the sketch loses all frequency knowledge, but we can at the
+        // very least pre-seed it with the information we _do_ have, which is that
+        // all elements already in the cache have an estimated frequency of >= 1.
+        auto pre_seed_sketch = [this](const K& key) { _sketch->add(key); };
+        _probationary_segment.for_each_key(pre_seed_sketch);
+        _protected_segment.for_each_key(pre_seed_sketch); // no-op unless SLRU
+    } else {
+        _sketch.reset();
+    }
+}
+
+template<typename P>
+void
+cache<P>::lfu_add(const K& key) noexcept {
+    if (_sketch) {
+        _sketch->add(key);
+    }
+}
+
+template<typename P>
+uint8_t
+cache<P>::lfu_add_and_count(const K& key) noexcept {
+    return _sketch ? _sketch->add_and_count(key) : 0;
+}
+
+template<typename P>
+bool
+cache<P>::lfu_accepts_insertion(const K& key, const V& value,
+                                const SizeConstrainedLru& segment,
+                                uint8_t candidate_freq) const noexcept
+{
+    if (!_sketch) {
+        return true; // Trivially accepts insertion, since there's no LFU policy
+    }
+    // TODO > capacity_bytes() instead of >=, this uses >= to be symmetric with removeOldest()
+    const bool would_displace = ((segment.size() >= segment.capacity()) ||
+                                 (segment.size_bytes() + calcSize(key, value)) >= segment.capacity_bytes());
+    const K* victim;
+    if (would_displace && (victim = segment.last_key_or_nullptr()) != nullptr) {
+        const auto existing_freq = _sketch->count_min(*victim);
+        // Frequency > instead of >= (i.e. must be _more_ popular, not just _as_ popular)
+        // empirically shows significantly better hit rates in our cache trace simulations.
+        return (candidate_freq > existing_freq);
+    }
+    return true; // No displacement, no reason to deny insertion.
+}
+
+template<typename P>
+bool
+cache<P>::lfu_accepts_insertion(const K& key, const V& value, const SizeConstrainedLru& segment) {
+    return !_sketch || lfu_accepts_insertion(key, value, segment, _sketch->count_min(key));
+}
+
 template <typename P>
 MemoryUsage
 cache<P>::getStaticMemoryUsage() const {
@@ -254,7 +335,7 @@ cache<P>::read(const K& key, BackingStoreArgs&&... backing_store_args)
     V value;
     {
         std::lock_guard guard(_hashLock);
-        if (try_fill_from_cache(key, value)) {
+        if (try_fill_from_cache(key, value, guard)) {
             increment_stat(_hit, guard);
             return value;
         } else {
@@ -265,34 +346,50 @@ cache<P>::read(const K& key, BackingStoreArgs&&... backing_store_args)
     std::lock_guard store_guard(getLock(key));
     {
         std::lock_guard guard(_hashLock);
-        if (try_fill_from_cache(key, value)) {
+        if (try_fill_from_cache(key, value, guard)) {
             increment_stat(_race, guard); // Somebody else just fetched it ahead of me.
             return value;
         }
     }
     if (_store.read(key, value, std::forward<BackingStoreArgs>(backing_store_args)...)) {
         std::lock_guard guard(_hashLock);
-        _probationary_segment.insert_and_update_size(key, value);
-        onInsert(key);
-        increment_stat(_insert, guard);
+        const auto new_freq = lfu_add_and_count(key);
+        if (lfu_accepts_insertion(key, value, _probationary_segment, new_freq)) {
+            _probationary_segment.insert_and_update_size(key, value);
+            onInsert(key);
+            increment_stat(_insert, guard);
+        } else {
+            increment_stat(_lfu_dropped, guard);
+        }
     } else {
-        _non_existing.fetch_add(1, std::memory_order_relaxed);
+        _non_existing.fetch_add(1, std::memory_order_relaxed); // Atomic since we're outside _hashLock
     }
     return value;
 }
 
 template <typename P>
 bool
-cache<P>::try_fill_from_cache(const K& key, V& val_out) {
+cache<P>::try_fill_from_cache(const K& key, V& val_out, const std::lock_guard<std::mutex>& guard) {
     if (_probationary_segment.try_get_and_ref(key, val_out)) {
+        // Hitting the cache bumps the sketch count regardless of LRU vs SLRU mode.
+        const auto new_freq = lfu_add_and_count(key);
         if (multi_segment()) {
-            // Hit on probationary item; move to protected segment
-            const bool erased = _probationary_segment.try_erase_and_update_size(key);
-            assert(erased);
-            _protected_segment.insert_and_update_size(key, val_out);
+            if (lfu_accepts_insertion(key, val_out, _protected_segment, new_freq)) {
+                // Hit on probationary item; move to protected segment
+                const bool erased = _probationary_segment.try_erase_and_update_size(key);
+                assert(erased);
+                _protected_segment.insert_and_update_size(key, val_out);
+            } else {
+                // Probationary element is not admitted to the VIP section of the protected segment,
+                // but _has_ been put at the head of the probationary segment, allowing it another
+                // chance to party with the stars.
+                increment_stat(_lfu_not_promoted, guard);
+                return true;
+            }
         }
         return true;
     } else if (multi_segment() && _protected_segment.try_get_and_ref(key, val_out)) {
+        lfu_add(key);
         return true;
     }
     return false;
@@ -306,15 +403,20 @@ cache<P>::write(const K& key, V value)
     _store.write(key, value);
     {
         std::lock_guard guard(_hashLock);
+        // We do not update the frequency sketch on writes, only on reads. We _do_ consult
+        // the sketch when determining if a new element should displace an existing element.
+
         // Important: `try_replace_and_update_size()` consumes `value` if replacing took place
         if (_probationary_segment.try_replace_and_update_size(key, value)) {
             increment_stat(_update, guard);
         } else if (multi_segment() && _protected_segment.try_replace_and_update_size(key, value)) {
             increment_stat(_update, guard);
-        } else {
+        } else if (lfu_accepts_insertion(key, value, _probationary_segment)) {
             // Always insert into probationary first
             _probationary_segment.insert_and_update_size(key, std::move(value));
             onInsert(key);
+        } else {
+            increment_stat(_lfu_dropped, guard);
         }
         increment_stat(_write, guard); // TODO only increment when not updating?
     }
diff --git a/vespalib/src/vespa/vespalib/stllike/lrucache_map.h b/vespalib/src/vespa/vespalib/stllike/lrucache_map.h
index 7d8968dfcf3..78943415fd6 100644
--- a/vespalib/src/vespa/vespalib/stllike/lrucache_map.h
+++ b/vespalib/src/vespa/vespalib/stllike/lrucache_map.h
@@ -115,6 +115,12 @@ class lrucache_map : private P::HashTable
      */
     const V & get(const K & key) const { return HashTable::find(key)->second._value; }
 
+    /**
+     * Returns an iterator to the tail of the LRU, i.e. the oldest element, or end()
+     * iff the mapping is empty. Note: this is not a reverse iterator.
+     */
+    iterator iter_to_last() noexcept;
+
     /**
      * This simply erases the object.
      */
diff --git a/vespalib/src/vespa/vespalib/stllike/lrucache_map.hpp b/vespalib/src/vespa/vespalib/stllike/lrucache_map.hpp
index 944dd53a588..14ff93c0050 100644
--- a/vespalib/src/vespa/vespalib/stllike/lrucache_map.hpp
+++ b/vespalib/src/vespa/vespalib/stllike/lrucache_map.hpp
@@ -108,6 +108,12 @@ lrucache_map<P>::move(next_t from, next_t to) {
     }
 }
 
+template <typename P>
+typename lrucache_map<P>::iterator
+lrucache_map<P>::iter_to_last() noexcept {
+    return iterator(this, _tail); // If _tail is npos, this is implicitly == end()
+}
+
 template< typename P >
 void
 lrucache_map<P>::erase(const K & key) {

From dd4dc36c4a977057f26d405b72f71ced790a56c1 Mon Sep 17 00:00:00 2001
From: Tor Brede Vekterli <vekterli@vespa.ai>
Date: Tue, 10 Dec 2024 16:39:07 +0000
Subject: [PATCH 2/2] Refactor LFU insertion check and add lower bound for
 sketch size

---
 vespalib/src/vespa/vespalib/stllike/cache.h   |  4 +++-
 vespalib/src/vespa/vespalib/stllike/cache.hpp | 21 ++++++++++++-------
 2 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/vespalib/src/vespa/vespalib/stllike/cache.h b/vespalib/src/vespa/vespalib/stllike/cache.h
index 1a983ca509f..ade1a2b96f6 100644
--- a/vespalib/src/vespa/vespalib/stllike/cache.h
+++ b/vespalib/src/vespa/vespalib/stllike/cache.h
@@ -276,7 +276,9 @@ class cache {
      * Setting the size to >0 will always create a new sketch. The sketch will be
      * initialized with the cache keys that are currently present in the cache segments,
      * giving each existing entry an estimated frequency of 1. All preexisting frequency
-     * information about entries _not_ currently in the cache will be lost.
+     * information about entries _not_ currently in the cache will be lost. To avoid
+     * pathological frequency estimates for existing entries, the sketch has a lower
+     * bound size of max(existing cache element count, cache_max_elem_count).
      */
     void set_frequency_sketch_size(size_t cache_max_elem_count);
 
diff --git a/vespalib/src/vespa/vespalib/stllike/cache.hpp b/vespalib/src/vespa/vespalib/stllike/cache.hpp
index b7ec01f0e03..a3ff1a29c7c 100644
--- a/vespalib/src/vespa/vespalib/stllike/cache.hpp
+++ b/vespalib/src/vespa/vespalib/stllike/cache.hpp
@@ -257,7 +257,9 @@ template<typename P>
 void cache<P>::set_frequency_sketch_size(size_t cache_max_elem_count) {
     std::lock_guard guard(_hashLock);
     if (cache_max_elem_count > 0) {
-        _sketch = std::make_unique<SketchType>(cache_max_elem_count, _hasher);
+        // Ensure we can count our existing cached elements, if any.
+        size_t effective_elem_count = std::max(size(), cache_max_elem_count);
+        _sketch = std::make_unique<SketchType>(effective_elem_count, _hasher);
         // (Re)setting the sketch loses all frequency knowledge, but we can at the
         // very least pre-seed it with the information we _do_ have, which is that
         // all elements already in the cache have an estimated frequency of >= 1.
@@ -295,14 +297,17 @@ cache<P>::lfu_accepts_insertion(const K& key, const V& value,
     // TODO > capacity_bytes() instead of >=, this uses >= to be symmetric with removeOldest()
     const bool would_displace = ((segment.size() >= segment.capacity()) ||
                                  (segment.size_bytes() + calcSize(key, value)) >= segment.capacity_bytes());
-    const K* victim;
-    if (would_displace && (victim = segment.last_key_or_nullptr()) != nullptr) {
-        const auto existing_freq = _sketch->count_min(*victim);
-        // Frequency > instead of >= (i.e. must be _more_ popular, not just _as_ popular)
-        // empirically shows significantly better hit rates in our cache trace simulations.
-        return (candidate_freq > existing_freq);
+    if (!would_displace) {
+        return true; // No displacement, no reason to deny insertion
     }
-    return true; // No displacement, no reason to deny insertion.
+    const K* victim = segment.last_key_or_nullptr();
+    if (!victim) {
+        return true; // Cache segment is empty, allow at least one entry
+    }
+    const auto existing_freq = _sketch->count_min(*victim);
+    // Frequency > instead of >= (i.e. must be _more_ popular, not just _as_ popular)
+    // empirically shows significantly better hit rates in our cache trace simulations.
+    return (candidate_freq > existing_freq);
 }
 
 template<typename P>