From 8380a9695c3cdb6af79868cf2dd84a7d3c4a7b54 Mon Sep 17 00:00:00 2001
From: sparknack <shawn.wang@zilliz.com>
Date: Tue, 7 Jan 2025 10:38:49 +0800
Subject: [PATCH] memory usage optimization for sparse vector (#1011)

* sparse: remove raw data cache in sparse inverted index

To reduce memory usage, remove the raw data cache in sparse inverted index.
Note that config param `drop_ratio_build` and GetVectorByIds() are also be
removed.

Signed-off-by: Shawn Wang <shawn.wang@zilliz.com>

* sparse: quantize float to uint16_t for BM25 inverted index

To reduce the memory usage of the inverted index, when BM25 metric is used,
quantize term frequency from float to uint16_t. All values that exceeds the
maximum of uint16_t is quantized to the maximum of uint16_t.

Signed-off-by: Shawn Wang <shawn.wang@zilliz.com>

---------

Signed-off-by: Shawn Wang <shawn.wang@zilliz.com>
---
 CMakeLists.txt                                |   4 +
 include/knowhere/sparse_utils.h               |  97 +++-
 python/setup.py                               |   1 +
 src/index/sparse/sparse_index_node.cc         |  89 ++--
 src/index/sparse/sparse_inverted_index.h      | 485 +++++++++---------
 .../sparse/sparse_inverted_index_config.h     |   1 +
 tests/ut/test_sparse.cc                       | 178 +------
 tests/ut/utils.h                              |  29 ++
 8 files changed, 440 insertions(+), 444 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9235e6e92..329c5c0b6 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -89,6 +89,9 @@ include(cmake/libs/libhnsw.cmake)
 
 include_directories(thirdparty/faiss)
 
+find_package(Boost REQUIRED)
+include_directories(${Boost_INCLUDE_DIRS})
+
 find_package(OpenMP REQUIRED)
 
 find_package(folly REQUIRED)
@@ -177,6 +180,7 @@ endif()
 include_directories(src)
 include_directories(include)
 
+list(APPEND KNOWHERE_LINKER_LIBS Boost::boost)
 list(APPEND KNOWHERE_LINKER_LIBS faiss)
 list(APPEND KNOWHERE_LINKER_LIBS glog::glog)
 list(APPEND KNOWHERE_LINKER_LIBS nlohmann_json::nlohmann_json)
diff --git a/include/knowhere/sparse_utils.h b/include/knowhere/sparse_utils.h
index ba582a5a2..be069c559 100644
--- a/include/knowhere/sparse_utils.h
+++ b/include/knowhere/sparse_utils.h
@@ -15,6 +15,7 @@
 #pragma once
 
 #include <algorithm>
+#include <boost/iterator/iterator_facade.hpp>
 #include <cstddef>
 #include <cstdint>
 #include <cstring>
@@ -59,6 +60,33 @@ GetDocValueBM25Computer(float k1, float b, float avgdl) {
     };
 }
 
+// A docid filter that tests whether a given id is in the list of docids, which is regarded as another form of BitSet.
+// Note that all ids to be tested must be tested exactly once and in order.
+class DocIdFilterByVector {
+ public:
+    DocIdFilterByVector(std::vector<table_t>&& docids) : docids_(std::move(docids)) {
+        std::sort(docids_.begin(), docids_.end());
+    }
+
+    [[nodiscard]] bool
+    test(const table_t id) {
+        // find the first id that is greater than or equal to the specific id
+        while (pos_ < docids_.size() && docids_[pos_] < id) {
+            ++pos_;
+        }
+        return !(pos_ < docids_.size() && docids_[pos_] == id);
+    }
+
+    [[nodiscard]] bool
+    empty() const {
+        return docids_.empty();
+    }
+
+ private:
+    std::vector<table_t> docids_;
+    size_t pos_ = 0;
+};
+
 template <typename T>
 class SparseRow {
     static_assert(std::is_same_v<T, fp32>, "SparseRow supports float only");
@@ -72,6 +100,15 @@ class SparseRow {
     SparseRow(size_t count, uint8_t* data, bool own_data) : data_(data), count_(count), own_data_(own_data) {
     }
 
+    SparseRow(const std::vector<std::pair<table_t, T>>& data) : count_(data.size()), own_data_(true) {
+        data_ = new uint8_t[count_ * element_size()];
+        for (size_t i = 0; i < count_; ++i) {
+            auto* elem = reinterpret_cast<ElementProxy*>(data_) + i;
+            elem->index = data[i].first;
+            elem->value = data[i].second;
+        }
+    }
+
     // copy constructor and copy assignment operator perform deep copy
     SparseRow(const SparseRow<T>& other) : SparseRow(other.count_) {
         std::memcpy(data_, other.data_, data_byte_size());
@@ -147,6 +184,9 @@ class SparseRow {
 
     void
     set_at(size_t i, table_t index, T value) {
+        if (i >= count_) {
+            throw std::out_of_range("set_at on a SparseRow with invalid index");
+        }
         auto* elem = reinterpret_cast<ElementProxy*>(data_) + i;
         elem->index = index;
         elem->value = value;
@@ -300,12 +340,12 @@ class GrowableVectorView {
         mmap_element_count_ = 0;
     }
 
-    size_type
+    [[nodiscard]] size_type
     capacity() const {
         return mmap_byte_size_ / sizeof(T);
     }
 
-    size_type
+    [[nodiscard]] size_type
     size() const {
         return mmap_element_count_;
     }
@@ -346,6 +386,59 @@ class GrowableVectorView {
         return reinterpret_cast<const T*>(mmap_data_)[i];
     }
 
+    class iterator : public boost::iterator_facade<iterator, T, boost::random_access_traversal_tag, T&> {
+     public:
+        iterator() = default;
+        explicit iterator(T* ptr) : ptr_(ptr) {
+        }
+
+        friend class GrowableVectorView;
+        friend class boost::iterator_core_access;
+
+        T&
+        dereference() const {
+            return *ptr_;
+        }
+
+        void
+        increment() {
+            ++ptr_;
+        }
+
+        void
+        decrement() {
+            --ptr_;
+        }
+
+        void
+        advance(std::ptrdiff_t n) {
+            ptr_ += n;
+        }
+
+        std::ptrdiff_t
+        distance_to(const iterator& other) const {
+            return other.ptr_ - ptr_;
+        }
+
+        bool
+        equal(const iterator& other) const {
+            return ptr_ == other.ptr_;
+        }
+
+     private:
+        T* ptr_ = nullptr;
+    };
+
+    iterator
+    begin() const {
+        return iterator(reinterpret_cast<T*>(mmap_data_));
+    }
+
+    iterator
+    end() const {
+        return iterator(reinterpret_cast<T*>(mmap_data_) + mmap_element_count_);
+    }
+
  private:
     void* mmap_data_ = nullptr;
     size_type mmap_byte_size_ = 0;
diff --git a/python/setup.py b/python/setup.py
index 8893901fe..8a0f32140 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -51,6 +51,7 @@ def get_readme():
     get_numpy_include(),
     os.path.join("..", "include"),
     os.path.join("..", "thirdparty"),
+    get_thirdparty_prefix("boost-headers") + "/include",
     get_thirdparty_prefix("nlohmann_json") + "/include",
     get_thirdparty_prefix("libglog") + "/include",
     get_thirdparty_prefix("gflags") + "/include"
diff --git a/src/index/sparse/sparse_index_node.cc b/src/index/sparse/sparse_index_node.cc
index fa5897b3d..b3bccf3a7 100644
--- a/src/index/sparse/sparse_index_node.cc
+++ b/src/index/sparse/sparse_index_node.cc
@@ -19,7 +19,6 @@
 #include "knowhere/config.h"
 #include "knowhere/dataset.h"
 #include "knowhere/expected.h"
-#include "knowhere/feature.h"
 #include "knowhere/index/index_factory.h"
 #include "knowhere/index/index_node.h"
 #include "knowhere/log.h"
@@ -54,14 +53,12 @@ class SparseInvertedIndexNode : public IndexNode {
             LOG_KNOWHERE_ERROR_ << Type() << " only support metric_type IP or BM25";
             return Status::invalid_metric_type;
         }
-        auto drop_ratio_build = cfg.drop_ratio_build.value_or(0.0f);
         auto index_or = CreateIndex</*mmapped=*/false>(cfg);
         if (!index_or.has_value()) {
             return index_or.error();
         }
         auto index = index_or.value();
-        index->Train(static_cast<const sparse::SparseRow<T>*>(dataset->GetTensor()), dataset->GetRows(),
-                     drop_ratio_build);
+        index->Train(static_cast<const sparse::SparseRow<T>*>(dataset->GetTensor()), dataset->GetRows());
         if (index_ != nullptr) {
             LOG_KNOWHERE_WARNING_ << Type() << " has already been created, deleting old";
             DeleteExistingIndex();
@@ -127,7 +124,7 @@ class SparseInvertedIndexNode : public IndexNode {
      public:
         RefineIterator(const sparse::BaseInvertedIndex<T>* index, sparse::SparseRow<T>&& query,
                        std::shared_ptr<PrecomputedDistanceIterator> precomputed_it,
-                       const sparse::DocValueComputer<T>& computer, bool use_knowhere_search_pool = true,
+                       const sparse::DocValueComputer<float>& computer, bool use_knowhere_search_pool = true,
                        const float refine_ratio = 0.5f)
             : IndexIterator(true, use_knowhere_search_pool, refine_ratio),
               index_(index),
@@ -158,7 +155,7 @@ class SparseInvertedIndexNode : public IndexNode {
      private:
         const sparse::BaseInvertedIndex<T>* index_;
         sparse::SparseRow<T> query_;
-        const sparse::DocValueComputer<T> computer_;
+        const sparse::DocValueComputer<float> computer_;
         std::shared_ptr<PrecomputedDistanceIterator> precomputed_it_;
         bool first_return_ = true;
     };
@@ -185,7 +182,7 @@ class SparseInvertedIndexNode : public IndexNode {
         auto computer = computer_or.value();
         auto drop_ratio_search = cfg.drop_ratio_search.value_or(0.0f);
 
-        const bool approximated = index_->IsApproximated() || drop_ratio_search > 0;
+        const bool approximated = drop_ratio_search > 0;
 
         auto vec = std::vector<std::shared_ptr<IndexNode::iterator>>(nq, nullptr);
         try {
@@ -228,37 +225,17 @@ class SparseInvertedIndexNode : public IndexNode {
 
     [[nodiscard]] expected<DataSetPtr>
     GetVectorByIds(const DataSetPtr dataset) const override {
-        if (!index_) {
-            return expected<DataSetPtr>::Err(Status::empty_index, "index not loaded");
-        }
-
-        auto rows = dataset->GetRows();
-        auto ids = dataset->GetIds();
-
-        auto data = std::make_unique<sparse::SparseRow<T>[]>(rows);
-        int64_t dim = 0;
-        try {
-            for (int64_t i = 0; i < rows; ++i) {
-                auto& target = data[i];
-                index_->GetVectorById(ids[i], target);
-                dim = std::max(dim, target.dim());
-            }
-        } catch (std::exception& e) {
-            return expected<DataSetPtr>::Err(Status::invalid_args, "GetVectorByIds failed");
-        }
-        auto res = GenResultDataSet(rows, dim, data.release());
-        res->SetIsSparse(true);
-        return res;
+        return expected<DataSetPtr>::Err(Status::not_implemented, "GetVectorByIds not implemented");
     }
 
     [[nodiscard]] bool
     HasRawData(const std::string& metric_type) const override {
-        return true;
+        return false;
     }
 
     [[nodiscard]] expected<DataSetPtr>
     GetIndexMeta(std::unique_ptr<Config> cfg) const override {
-        throw std::runtime_error("GetIndexMeta not supported for current index type");
+        return expected<DataSetPtr>::Err(Status::not_implemented, "GetIndexMeta not supported for current index type");
     }
 
     Status
@@ -292,7 +269,7 @@ class SparseInvertedIndexNode : public IndexNode {
             return index_or.error();
         }
         index_ = index_or.value();
-        return index_->Load(reader);
+        return index_->Load(reader, 0, "");
     }
 
     Status
@@ -301,29 +278,36 @@ class SparseInvertedIndexNode : public IndexNode {
             LOG_KNOWHERE_WARNING_ << Type() << " has already been created, deleting old";
             DeleteExistingIndex();
         }
+
         auto cfg = static_cast<const knowhere::SparseInvertedIndexConfig&>(*config);
+        auto index_or = CreateIndex</*mmapped=*/true>(cfg);
+        if (!index_or.has_value()) {
+            return index_or.error();
+        }
+        index_ = index_or.value();
+
         auto reader = knowhere::FileReader(filename);
-        map_size_ = reader.size();
+        size_t map_size = reader.size();
         int map_flags = MAP_SHARED;
 #ifdef MAP_POPULATE
         if (cfg.enable_mmap_pop.has_value() && cfg.enable_mmap_pop.value()) {
             map_flags |= MAP_POPULATE;
         }
 #endif
-        map_ = static_cast<char*>(mmap(nullptr, map_size_, PROT_READ, map_flags, reader.descriptor(), 0));
-        if (map_ == MAP_FAILED) {
-            LOG_KNOWHERE_ERROR_ << "Failed to mmap file: " << strerror(errno);
+        void* mapped_memory = mmap(nullptr, map_size, PROT_READ, map_flags, reader.descriptor(), 0);
+        if (mapped_memory == MAP_FAILED) {
+            LOG_KNOWHERE_ERROR_ << "Failed to mmap file " << filename << ": " << strerror(errno);
             return Status::disk_file_error;
         }
-        if (madvise(map_, map_size_, MADV_RANDOM) != 0) {
-            LOG_KNOWHERE_WARNING_ << "Failed to madvise file: " << strerror(errno);
-        }
-        auto index_or = CreateIndex</*mmapped=*/true>(cfg);
-        if (!index_or.has_value()) {
-            return index_or.error();
-        }
-        index_ = index_or.value();
-        MemoryIOReader map_reader((uint8_t*)map_, map_size_);
+
+        auto cleanup_mmap = [map_size, filename](void* map_addr) {
+            if (munmap(map_addr, map_size) != 0) {
+                LOG_KNOWHERE_ERROR_ << "Failed to munmap file " << filename << ": " << strerror(errno);
+            }
+        };
+        std::unique_ptr<void, decltype(cleanup_mmap)> mmap_guard(mapped_memory, cleanup_mmap);
+
+        MemoryIOReader map_reader(reinterpret_cast<uint8_t*>(mapped_memory), map_size);
         auto supplement_target_filename = filename + ".knowhere_sparse_index_supplement";
         return index_->Load(map_reader, map_flags, supplement_target_filename);
     }
@@ -364,7 +348,8 @@ class SparseInvertedIndexNode : public IndexNode {
     expected<sparse::BaseInvertedIndex<T>*>
     CreateIndex(const SparseInvertedIndexConfig& cfg) const {
         if (IsMetricType(cfg.metric_type.value(), metric::BM25)) {
-            auto idx = new sparse::InvertedIndex<T, use_wand, true, mmapped>();
+            // quantize float to uint16_t when BM25 metric type is used.
+            auto idx = new sparse::InvertedIndex<T, uint16_t, use_wand, true, mmapped>();
             if (!cfg.bm25_k1.has_value() || !cfg.bm25_b.has_value() || !cfg.bm25_avgdl.has_value()) {
                 return expected<sparse::BaseInvertedIndex<T>*>::Err(
                     Status::invalid_args, "BM25 parameters k1, b, and avgdl must be set when building/loading");
@@ -376,7 +361,7 @@ class SparseInvertedIndexNode : public IndexNode {
             idx->SetBM25Params(k1, b, avgdl, max_score_ratio);
             return idx;
         } else {
-            return new sparse::InvertedIndex<T, use_wand, false, mmapped>();
+            return new sparse::InvertedIndex<T, T, use_wand, false, mmapped>();
         }
     }
 
@@ -386,23 +371,11 @@ class SparseInvertedIndexNode : public IndexNode {
             delete index_;
             index_ = nullptr;
         }
-        if (map_ != nullptr) {
-            auto res = munmap(map_, map_size_);
-            if (res != 0) {
-                LOG_KNOWHERE_ERROR_ << "Failed to munmap when trying to delete index: " << strerror(errno);
-            }
-            map_ = nullptr;
-            map_size_ = 0;
-        }
     }
 
     sparse::BaseInvertedIndex<T>* index_{};
     std::shared_ptr<ThreadPool> search_pool_;
     std::shared_ptr<ThreadPool> build_pool_;
-
-    // if map_ is not nullptr, it means the index is mmapped from disk.
-    char* map_ = nullptr;
-    size_t map_size_ = 0;
 };  // class SparseInvertedIndexNode
 
 // Concurrent version of SparseInvertedIndexNode
diff --git a/src/index/sparse/sparse_inverted_index.h b/src/index/sparse/sparse_inverted_index.h
index 06665894a..611b0dd36 100644
--- a/src/index/sparse/sparse_inverted_index.h
+++ b/src/index/sparse/sparse_inverted_index.h
@@ -20,13 +20,14 @@
 #include <filesystem>
 #include <fstream>
 #include <iostream>
-#include <queue>
+#include <memory>
 #include <unordered_map>
 #include <vector>
 
 #include "index/sparse/sparse_inverted_index_config.h"
 #include "io/memory_io.h"
 #include "knowhere/bitsetview.h"
+#include "knowhere/comp/index_param.h"
 #include "knowhere/expected.h"
 #include "knowhere/log.h"
 #include "knowhere/sparse_utils.h"
@@ -44,10 +45,10 @@ class BaseInvertedIndex {
     // supplement_target_filename: when in mmap mode, we need an extra file to store the mmaped index data structure.
     // this file will be created during loading and deleted in the destructor.
     virtual Status
-    Load(MemoryIOReader& reader, int map_flags = MAP_SHARED, const std::string& supplement_target_filename = "") = 0;
+    Load(MemoryIOReader& reader, int map_flags, const std::string& supplement_target_filename) = 0;
 
     virtual Status
-    Train(const SparseRow<T>* data, size_t rows, float drop_ratio_build) = 0;
+    Train(const SparseRow<T>* data, size_t rows) = 0;
 
     virtual Status
     Add(const SparseRow<T>* data, size_t rows, int64_t dim) = 0;
@@ -61,17 +62,11 @@ class BaseInvertedIndex {
                     const DocValueComputer<T>& computer) const = 0;
 
     virtual float
-    GetRawDistance(const label_t id, const SparseRow<T>& query, const DocValueComputer<T>& computer) const = 0;
-
-    virtual void
-    GetVectorById(const label_t id, SparseRow<T>& output) const = 0;
+    GetRawDistance(const label_t vec_id, const SparseRow<T>& query, const DocValueComputer<T>& computer) const = 0;
 
     virtual expected<DocValueComputer<T>>
     GetDocValueComputer(const SparseInvertedIndexConfig& cfg) const = 0;
 
-    virtual bool
-    IsApproximated() const = 0;
-
     [[nodiscard]] virtual size_t
     size() const = 0;
 
@@ -82,8 +77,8 @@ class BaseInvertedIndex {
     n_cols() const = 0;
 };
 
-template <typename T, bool use_wand = false, bool bm25 = false, bool mmapped = false>
-class InvertedIndex : public BaseInvertedIndex<T> {
+template <typename DType, typename QType, bool use_wand = false, bool bm25 = false, bool mmapped = false>
+class InvertedIndex : public BaseInvertedIndex<DType> {
  public:
     explicit InvertedIndex() {
     }
@@ -106,12 +101,15 @@ class InvertedIndex : public BaseInvertedIndex<T> {
         }
     }
 
+    template <typename U>
+    using Vector = std::conditional_t<mmapped, GrowableVectorView<U>, std::vector<U>>;
+
     void
     SetBM25Params(float k1, float b, float avgdl, float max_score_ratio) {
         bm25_params_ = std::make_unique<BM25Params>(k1, b, avgdl, max_score_ratio);
     }
 
-    expected<DocValueComputer<T>>
+    expected<DocValueComputer<float>>
     GetDocValueComputer(const SparseInvertedIndexConfig& cfg) const override {
         // if metric_type is set in config, it must match with how the index was built.
         auto metric_type = cfg.metric_type;
@@ -119,33 +117,34 @@ class InvertedIndex : public BaseInvertedIndex<T> {
             if (metric_type.has_value() && !IsMetricType(metric_type.value(), metric::IP)) {
                 auto msg =
                     "metric type not match, expected: " + std::string(metric::IP) + ", got: " + metric_type.value();
-                return expected<DocValueComputer<T>>::Err(Status::invalid_metric_type, msg);
+                return expected<DocValueComputer<float>>::Err(Status::invalid_metric_type, msg);
             }
-            return GetDocValueOriginalComputer<T>();
+            return GetDocValueOriginalComputer<float>();
         }
         if (metric_type.has_value() && !IsMetricType(metric_type.value(), metric::BM25)) {
             auto msg =
                 "metric type not match, expected: " + std::string(metric::BM25) + ", got: " + metric_type.value();
-            return expected<DocValueComputer<T>>::Err(Status::invalid_metric_type, msg);
+            return expected<DocValueComputer<float>>::Err(Status::invalid_metric_type, msg);
         }
         // avgdl must be supplied during search
         if (!cfg.bm25_avgdl.has_value()) {
-            return expected<DocValueComputer<T>>::Err(Status::invalid_args, "avgdl must be supplied during searching");
+            return expected<DocValueComputer<float>>::Err(Status::invalid_args,
+                                                          "avgdl must be supplied during searching");
         }
         auto avgdl = cfg.bm25_avgdl.value();
         if constexpr (use_wand) {
             // wand: search time k1/b must equal load time config.
             if ((cfg.bm25_k1.has_value() && cfg.bm25_k1.value() != bm25_params_->k1) ||
                 ((cfg.bm25_b.has_value() && cfg.bm25_b.value() != bm25_params_->b))) {
-                return expected<DocValueComputer<T>>::Err(
+                return expected<DocValueComputer<float>>::Err(
                     Status::invalid_args, "search time k1/b must equal load time config for WAND index.");
             }
-            return GetDocValueBM25Computer<T>(bm25_params_->k1, bm25_params_->b, avgdl);
+            return GetDocValueBM25Computer<float>(bm25_params_->k1, bm25_params_->b, avgdl);
         } else {
             // inverted index: search time k1/b may override load time config.
             auto k1 = cfg.bm25_k1.has_value() ? cfg.bm25_k1.value() : bm25_params_->k1;
             auto b = cfg.bm25_b.has_value() ? cfg.bm25_b.value() : bm25_params_->b;
-            return GetDocValueBM25Computer<T>(k1, b, avgdl);
+            return GetDocValueBM25Computer<float>(k1, b, avgdl);
         }
     }
 
@@ -156,35 +155,58 @@ class InvertedIndex : public BaseInvertedIndex<T> {
          *
          * 1. size_t rows
          * 2. size_t cols
-         * 3. T value_threshold_
+         * 3. DType value_threshold_ (deprecated)
          * 4. for each row:
          *     1. size_t len
          *     2. for each non-zero value:
          *        1. table_t idx
-         *        2. T val
+         *        2. DType val (when QType is different from DType, the QType value of val is stored as a DType with
+         *           precision loss)
          *
-         * inverted_lut_ and max_score_in_dim_ not serialized, they will be
-         * constructed dynamically during deserialization.
+         * inverted_index_ids_, inverted_index_vals_ and max_score_in_dim_ are
+         * not serialized, they will be constructed dynamically during
+         * deserialization.
          *
          * Data are densely packed in serialized bytes and no padding is added.
          */
-        writeBinaryPOD(writer, n_rows_internal());
-        writeBinaryPOD(writer, n_cols_internal());
-        writeBinaryPOD(writer, value_threshold_);
-        for (size_t i = 0; i < n_rows_internal(); ++i) {
-            auto& row = raw_data_[i];
-            writeBinaryPOD(writer, row.size());
-            if (row.size() == 0) {
+        DType deprecated_value_threshold = 0;
+        writeBinaryPOD(writer, n_rows_internal_);
+        writeBinaryPOD(writer, max_dim_);
+        writeBinaryPOD(writer, deprecated_value_threshold);
+        BitsetView bitset(nullptr, 0);
+
+        std::vector<Cursor<BitsetView>> cursors;
+        for (size_t i = 0; i < inverted_index_ids_.size(); ++i) {
+            cursors.emplace_back(inverted_index_ids_[i], inverted_index_vals_[i], n_rows_internal_, 0, 0, bitset);
+        }
+
+        auto dim_map_reverse = std::unordered_map<uint32_t, table_t>();
+        for (auto dim_it = dim_map_.begin(); dim_it != dim_map_.end(); ++dim_it) {
+            dim_map_reverse[dim_it->second] = dim_it->first;
+        }
+
+        for (table_t vec_id = 0; vec_id < n_rows_internal_; ++vec_id) {
+            std::vector<std::pair<table_t, DType>> vec_row;
+            for (size_t i = 0; i < inverted_index_ids_.size(); ++i) {
+                if (cursors[i].cur_vec_id_ == vec_id) {
+                    vec_row.emplace_back(dim_map_reverse[i], cursors[i].cur_vec_val());
+                    cursors[i].next();
+                }
+            }
+
+            SparseRow<DType> raw_row(vec_row);
+            writeBinaryPOD(writer, raw_row.size());
+            if (raw_row.size() == 0) {
                 continue;
             }
-            writer.write(row.data(), row.size() * SparseRow<T>::element_size());
+            writer.write(raw_row.data(), raw_row.size() * SparseRow<DType>::element_size());
         }
+
         return Status::success;
     }
-
     Status
-    Load(MemoryIOReader& reader, int map_flags = MAP_SHARED,
-         const std::string& supplement_target_filename = "") override {
+    Load(MemoryIOReader& reader, int map_flags, const std::string& supplement_target_filename) override {
+        DType deprecated_value_threshold;
         int64_t rows;
         readBinaryPOD(reader, rows);
         // previous versions used the signness of rows to indicate whether to
@@ -192,15 +214,11 @@ class InvertedIndex : public BaseInvertedIndex<T> {
         // take the absolute value of rows.
         rows = std::abs(rows);
         readBinaryPOD(reader, max_dim_);
-        readBinaryPOD(reader, value_threshold_);
-        if (value_threshold_ > 0) {
-            drop_during_build_ = true;
-        }
+        readBinaryPOD(reader, deprecated_value_threshold);
 
         if constexpr (mmapped) {
             RETURN_IF_ERROR(PrepareMmap(reader, rows, map_flags, supplement_target_filename));
         } else {
-            raw_data_.reserve(rows);
             if constexpr (bm25) {
                 bm25_params_->row_sums.reserve(rows);
             }
@@ -209,17 +227,21 @@ class InvertedIndex : public BaseInvertedIndex<T> {
         for (int64_t i = 0; i < rows; ++i) {
             size_t count;
             readBinaryPOD(reader, count);
+            SparseRow<DType> raw_row;
             if constexpr (mmapped) {
-                raw_data_.emplace_back(count, reader.data() + reader.tellg(), false);
-                reader.advance(count * SparseRow<T>::element_size());
+                raw_row = std::move(SparseRow<DType>(count, reader.data() + reader.tellg(), false));
+                reader.advance(count * SparseRow<DType>::element_size());
             } else {
-                raw_data_.emplace_back(count);
+                raw_row = std::move(SparseRow<DType>(count));
                 if (count > 0) {
-                    reader.read(raw_data_[i].data(), count * SparseRow<T>::element_size());
+                    reader.read(raw_row.data(), count * SparseRow<DType>::element_size());
                 }
             }
-            add_row_to_index(raw_data_[i], i);
+            add_row_to_index(raw_row, i);
         }
+
+        n_rows_internal_ = rows;
+
         return Status::success;
     }
 
@@ -227,7 +249,7 @@ class InvertedIndex : public BaseInvertedIndex<T> {
     Status
     PrepareMmap(MemoryIOReader& reader, size_t rows, int map_flags, const std::string& supplement_target_filename) {
         const auto initial_reader_location = reader.tellg();
-        const auto nnz = (reader.remaining() - (rows * sizeof(size_t))) / SparseRow<T>::element_size();
+        const auto nnz = (reader.remaining() - (rows * sizeof(size_t))) / SparseRow<DType>::element_size();
 
         // count raw vector idx occurrences
         std::unordered_map<table_t, size_t> idx_counts;
@@ -242,22 +264,23 @@ class InvertedIndex : public BaseInvertedIndex<T> {
                 readBinaryPOD(reader, idx);
                 idx_counts[idx]++;
                 // skip value
-                reader.advance(sizeof(T));
+                reader.advance(sizeof(DType));
             }
         }
         // reset reader to the beginning
         reader.seekg(initial_reader_location);
 
-        auto raw_data_byte_size = rows * sizeof(typename decltype(raw_data_)::value_type);
-        auto inverted_lut_byte_size = idx_counts.size() * sizeof(typename decltype(inverted_lut_)::value_type);
-        // actually due to drop_ratio_build, the number of non-zero values that will be added to the luts is
-        // less than nnz. but since the memory is mmapped, it is ok to still allocate some extra space for those
-        // dropped values.
-        auto luts_byte_size = nnz * sizeof(typename decltype(inverted_lut_)::value_type::value_type);
+        auto inverted_index_ids_byte_size =
+            idx_counts.size() * sizeof(typename decltype(inverted_index_ids_)::value_type);
+        auto inverted_index_vals_byte_size =
+            idx_counts.size() * sizeof(typename decltype(inverted_index_vals_)::value_type);
+        auto plists_ids_byte_size = nnz * sizeof(typename decltype(inverted_index_ids_)::value_type::value_type);
+        auto plists_vals_byte_size = nnz * sizeof(typename decltype(inverted_index_vals_)::value_type::value_type);
         auto max_score_in_dim_byte_size = idx_counts.size() * sizeof(typename decltype(max_score_in_dim_)::value_type);
         size_t row_sums_byte_size = 0;
 
-        map_byte_size_ = raw_data_byte_size + inverted_lut_byte_size + luts_byte_size;
+        map_byte_size_ =
+            inverted_index_ids_byte_size + inverted_index_vals_byte_size + plists_ids_byte_size + plists_vals_byte_size;
         if constexpr (use_wand) {
             map_byte_size_ += max_score_in_dim_byte_size;
         }
@@ -302,10 +325,10 @@ class InvertedIndex : public BaseInvertedIndex<T> {
         char* ptr = map_;
 
         // initialize containers memory.
-        raw_data_.initialize(ptr, raw_data_byte_size);
-        ptr += raw_data_byte_size;
-        inverted_lut_.initialize(ptr, inverted_lut_byte_size);
-        ptr += inverted_lut_byte_size;
+        inverted_index_ids_.initialize(ptr, inverted_index_ids_byte_size);
+        ptr += inverted_index_ids_byte_size;
+        inverted_index_vals_.initialize(ptr, inverted_index_vals_byte_size);
+        ptr += inverted_index_vals_byte_size;
 
         if constexpr (use_wand) {
             max_score_in_dim_.initialize(ptr, max_score_in_dim_byte_size);
@@ -317,15 +340,23 @@ class InvertedIndex : public BaseInvertedIndex<T> {
             ptr += row_sums_byte_size;
         }
 
+        for (const auto& [idx, count] : idx_counts) {
+            auto& plist_ids = inverted_index_ids_.emplace_back();
+            auto plist_ids_byte_size = count * sizeof(typename decltype(inverted_index_ids_)::value_type::value_type);
+            plist_ids.initialize(ptr, plist_ids_byte_size);
+            ptr += plist_ids_byte_size;
+        }
+        for (const auto& [idx, count] : idx_counts) {
+            auto& plist_vals = inverted_index_vals_.emplace_back();
+            auto plist_vals_byte_size = count * sizeof(typename decltype(inverted_index_vals_)::value_type::value_type);
+            plist_vals.initialize(ptr, plist_vals_byte_size);
+            ptr += plist_vals_byte_size;
+        }
         size_t dim_id = 0;
         for (const auto& [idx, count] : idx_counts) {
             dim_map_[idx] = dim_id;
-            auto& lut = inverted_lut_.emplace_back();
-            auto lut_byte_size = count * sizeof(typename decltype(inverted_lut_)::value_type::value_type);
-            lut.initialize(ptr, lut_byte_size);
-            ptr += lut_byte_size;
             if constexpr (use_wand) {
-                max_score_in_dim_.emplace_back(0);
+                max_score_in_dim_.emplace_back(0.0f);
             }
             ++dim_id;
         }
@@ -338,62 +369,39 @@ class InvertedIndex : public BaseInvertedIndex<T> {
     // Non zero drop ratio is only supported for static index, i.e. data should
     // include all rows that'll be added to the index.
     Status
-    Train(const SparseRow<T>* data, size_t rows, float drop_ratio_build) override {
+    Train(const SparseRow<DType>* data, size_t rows) override {
         if constexpr (mmapped) {
             throw std::invalid_argument("mmapped InvertedIndex does not support Train");
         } else {
-            if (drop_ratio_build == 0.0f) {
-                return Status::success;
-            }
-            // TODO: maybe i += 10 to down sample to speed up.
-            size_t amount = 0;
-            for (size_t i = 0; i < rows; ++i) {
-                amount += data[i].size();
-            }
-            if (amount == 0) {
-                return Status::success;
-            }
-            std::vector<T> vals;
-            vals.reserve(amount);
-            for (size_t i = 0; i < rows; ++i) {
-                for (size_t j = 0; j < data[i].size(); ++j) {
-                    vals.push_back(fabs(data[i][j].val));
-                }
-            }
-            value_threshold_ = get_threshold(vals, drop_ratio_build);
-            drop_during_build_ = true;
             return Status::success;
         }
     }
 
     Status
-    Add(const SparseRow<T>* data, size_t rows, int64_t dim) override {
+    Add(const SparseRow<DType>* data, size_t rows, int64_t dim) override {
         if constexpr (mmapped) {
             throw std::invalid_argument("mmapped InvertedIndex does not support Add");
         } else {
-            auto current_rows = n_rows_internal();
-            if (current_rows > 0 && drop_during_build_) {
-                LOG_KNOWHERE_ERROR_ << "Not allowed to add data to a built index with drop_ratio_build > 0.";
-                return Status::invalid_args;
-            }
+            auto current_rows = n_rows_internal_;
             if ((size_t)dim > max_dim_) {
                 max_dim_ = dim;
             }
 
-            raw_data_.insert(raw_data_.end(), data, data + rows);
             if constexpr (bm25) {
                 bm25_params_->row_sums.reserve(current_rows + rows);
             }
             for (size_t i = 0; i < rows; ++i) {
                 add_row_to_index(data[i], current_rows + i);
             }
+            n_rows_internal_ += rows;
+
             return Status::success;
         }
     }
 
     void
-    Search(const SparseRow<T>& query, size_t k, float drop_ratio_search, float* distances, label_t* labels,
-           size_t refine_factor, const BitsetView& bitset, const DocValueComputer<T>& computer) const override {
+    Search(const SparseRow<DType>& query, size_t k, float drop_ratio_search, float* distances, label_t* labels,
+           size_t refine_factor, const BitsetView& bitset, const DocValueComputer<float>& computer) const override {
         // initially set result distances to NaN and labels to -1
         std::fill(distances, distances + k, std::numeric_limits<float>::quiet_NaN());
         std::fill(labels, labels + k, -1);
@@ -401,18 +409,17 @@ class InvertedIndex : public BaseInvertedIndex<T> {
             return;
         }
 
-        std::vector<T> values(query.size());
+        std::vector<DType> values(query.size());
         for (size_t i = 0; i < query.size(); ++i) {
             values[i] = std::abs(query[i].val);
         }
         auto q_threshold = get_threshold(values, drop_ratio_search);
 
-        // if no data was dropped during both build and search, no refinement is
-        // needed.
-        if (!drop_during_build_ && drop_ratio_search == 0) {
+        // if no data was dropped during search, no refinement is needed.
+        if (drop_ratio_search == 0) {
             refine_factor = 1;
         }
-        MaxMinHeap<T> heap(k * refine_factor);
+        MaxMinHeap<float> heap(k * refine_factor);
         if constexpr (!use_wand) {
             search_brute_force(query, q_threshold, heap, bitset, computer);
         } else {
@@ -428,54 +435,68 @@ class InvertedIndex : public BaseInvertedIndex<T> {
 
     // Returned distances are inaccurate based on the drop_ratio.
     std::vector<float>
-    GetAllDistances(const SparseRow<T>& query, float drop_ratio_search, const BitsetView& bitset,
-                    const DocValueComputer<T>& computer) const override {
+    GetAllDistances(const SparseRow<DType>& query, float drop_ratio_search, const BitsetView& bitset,
+                    const DocValueComputer<float>& computer) const override {
         if (query.size() == 0) {
             return {};
         }
-        std::vector<T> values(query.size());
+        std::vector<DType> values(query.size());
         for (size_t i = 0; i < query.size(); ++i) {
             values[i] = std::abs(query[i].val);
         }
         auto q_threshold = get_threshold(values, drop_ratio_search);
         auto distances = compute_all_distances(query, q_threshold, computer);
-        for (size_t i = 0; i < distances.size(); ++i) {
-            if (bitset.empty() || !bitset.test(i)) {
-                continue;
+        if (!bitset.empty()) {
+            for (size_t i = 0; i < distances.size(); ++i) {
+                if (bitset.test(i)) {
+                    distances[i] = 0.0f;
+                }
             }
-            distances[i] = 0.0f;
         }
         return distances;
     }
 
     float
-    GetRawDistance(const label_t id, const SparseRow<T>& query, const DocValueComputer<T>& computer) const override {
-        T doc_sum = bm25 ? bm25_params_->row_sums.at(id) : 0;
-        return query.dot(raw_data_[id], computer, doc_sum);
-    }
+    GetRawDistance(const label_t vec_id, const SparseRow<DType>& query,
+                   const DocValueComputer<float>& computer) const override {
+        float distance = 0.0f;
 
-    void
-    GetVectorById(const label_t id, SparseRow<T>& output) const override {
-        output = raw_data_[id];
+        for (size_t i = 0; i < query.size(); ++i) {
+            auto [idx, val] = query[i];
+            auto dim_id = dim_map_.find(idx);
+            if (dim_id == dim_map_.end()) {
+                continue;
+            }
+            auto& plist_ids = inverted_index_ids_[dim_id->second];
+            auto it = std::lower_bound(plist_ids.begin(), plist_ids.end(), vec_id,
+                                       [](const auto& x, table_t y) { return x < y; });
+            if (it != plist_ids.end() && *it == vec_id) {
+                distance += val * computer(inverted_index_vals_[dim_id->second][it - plist_ids.begin()],
+                                           bm25 ? bm25_params_->row_sums.at(vec_id) : 0);
+            }
+        }
+
+        return distance;
     }
 
     [[nodiscard]] size_t
     size() const override {
         size_t res = sizeof(*this);
-        for (size_t i = 0; i < raw_data_.size(); ++i) {
-            res += raw_data_[i].memory_usage();
-        }
         res += dim_map_.size() *
                (sizeof(typename decltype(dim_map_)::key_type) + sizeof(typename decltype(dim_map_)::mapped_type));
 
         if constexpr (mmapped) {
             return res + map_byte_size_;
         } else {
-            res += sizeof(typename decltype(raw_data_)::value_type) * raw_data_.capacity();
-
-            res += sizeof(typename decltype(inverted_lut_)::value_type) * inverted_lut_.capacity();
-            for (size_t i = 0; i < inverted_lut_.size(); ++i) {
-                res += sizeof(typename decltype(inverted_lut_)::value_type::value_type) * inverted_lut_[i].capacity();
+            res += sizeof(typename decltype(inverted_index_ids_)::value_type) * inverted_index_ids_.capacity();
+            for (size_t i = 0; i < inverted_index_ids_.size(); ++i) {
+                res += sizeof(typename decltype(inverted_index_ids_)::value_type::value_type) *
+                       inverted_index_ids_[i].capacity();
+            }
+            res += sizeof(typename decltype(inverted_index_vals_)::value_type) * inverted_index_vals_.capacity();
+            for (size_t i = 0; i < inverted_index_vals_.size(); ++i) {
+                res += sizeof(typename decltype(inverted_index_vals_)::value_type::value_type) *
+                       inverted_index_vals_[i].capacity();
             }
             if constexpr (use_wand) {
                 res += sizeof(typename decltype(max_score_in_dim_)::value_type) * max_score_in_dim_.capacity();
@@ -486,25 +507,20 @@ class InvertedIndex : public BaseInvertedIndex<T> {
 
     [[nodiscard]] size_t
     n_rows() const override {
-        return n_rows_internal();
+        return n_rows_internal_;
     }
 
     [[nodiscard]] size_t
     n_cols() const override {
-        return n_cols_internal();
-    }
-
-    [[nodiscard]] virtual bool
-    IsApproximated() const override {
-        return drop_during_build_;
+        return max_dim_;
     }
 
  private:
     // Given a vector of values, returns the threshold value.
     // All values strictly smaller than the threshold will be ignored.
     // values will be modified in this function.
-    inline T
-    get_threshold(std::vector<T>& values, float drop_ratio) const {
+    inline DType
+    get_threshold(std::vector<DType>& values, float drop_ratio) const {
         // drop_ratio is in [0, 1) thus drop_count is guaranteed to be less
         // than values.size().
         auto drop_count = static_cast<size_t>(drop_ratio * values.size());
@@ -516,125 +532,117 @@ class InvertedIndex : public BaseInvertedIndex<T> {
         return *pos;
     }
 
-    size_t
-    n_rows_internal() const {
-        return raw_data_.size();
-    }
-
-    size_t
-    n_cols_internal() const {
-        return max_dim_;
-    }
-
     std::vector<float>
-    compute_all_distances(const SparseRow<T>& q_vec, T q_threshold, const DocValueComputer<T>& computer) const {
-        std::vector<float> scores(n_rows_internal(), 0.0f);
+    compute_all_distances(const SparseRow<DType>& q_vec, DType q_threshold,
+                          const DocValueComputer<float>& computer) const {
+        std::vector<float> scores(n_rows_internal_, 0.0f);
         for (size_t idx = 0; idx < q_vec.size(); ++idx) {
             auto [i, v] = q_vec[idx];
-            if (v < q_threshold || i >= n_cols_internal()) {
+            if (v < q_threshold || i >= max_dim_) {
                 continue;
             }
             auto dim_id = dim_map_.find(i);
             if (dim_id == dim_map_.end()) {
                 continue;
             }
-            auto& lut = inverted_lut_[dim_id->second];
+            auto& plist_ids = inverted_index_ids_[dim_id->second];
+            auto& plist_vals = inverted_index_vals_[dim_id->second];
             // TODO: improve with SIMD
-            for (size_t j = 0; j < lut.size(); j++) {
-                auto [doc_id, val] = lut[j];
-                T val_sum = bm25 ? bm25_params_->row_sums.at(doc_id) : 0;
+            for (size_t j = 0; j < plist_ids.size(); ++j) {
+                auto doc_id = plist_ids[j];
+                auto val = plist_vals[j];
+                float val_sum = bm25 ? bm25_params_->row_sums.at(doc_id) : 0;
                 scores[doc_id] += v * computer(val, val_sum);
             }
         }
         return scores;
     }
 
-    // find the top-k candidates using brute force search, k as specified by the capacity of the heap.
-    // any value in q_vec that is smaller than q_threshold and any value with dimension >= n_cols() will be ignored.
-    // TODO: may switch to row-wise brute force if filter rate is high. Benchmark needed.
-    void
-    search_brute_force(const SparseRow<T>& q_vec, T q_threshold, MaxMinHeap<T>& heap, const BitsetView& bitset,
-                       const DocValueComputer<T>& computer) const {
-        auto scores = compute_all_distances(q_vec, q_threshold, computer);
-        for (size_t i = 0; i < n_rows_internal(); ++i) {
-            if ((bitset.empty() || !bitset.test(i)) && scores[i] != 0) {
-                heap.push(i, scores[i]);
-            }
-        }
-    }
-
-    // LUT supports size() and operator[] which returns an SparseIdVal.
-    template <typename LUT>
+    template <typename DocIdFilter>
     struct Cursor {
      public:
-        Cursor(const LUT& lut, size_t num_vec, float max_score, float q_value, const BitsetView bitset)
-            : lut_(lut),
-              lut_size_(lut.size()),
+        Cursor(const Vector<table_t>& plist_ids, const Vector<QType>& plist_vals, size_t num_vec, float max_score,
+               float q_value, DocIdFilter filter)
+            : plist_ids_(plist_ids),
+              plist_vals_(plist_vals),
+              plist_size_(plist_ids.size()),
               total_num_vec_(num_vec),
               max_score_(max_score),
               q_value_(q_value),
-              bitset_(bitset) {
-            while (loc_ < lut_size_ && !bitset_.empty() && bitset_.test(lut_[loc_].id)) {
-                loc_++;
-            }
+              filter_(filter) {
+            skip_filtered_ids();
             update_cur_vec_id();
         }
         Cursor(const Cursor& rhs) = delete;
+        Cursor(Cursor&& rhs) noexcept = default;
 
         void
         next() {
-            next_internal();
+            ++loc_;
+            skip_filtered_ids();
             update_cur_vec_id();
         }
 
-        // advance loc until cur_vec_id_ >= vec_id
         void
         seek(table_t vec_id) {
-            while (loc_ < lut_size_ && lut_[loc_].id < vec_id) {
-                next_internal();
+            while (loc_ < plist_size_ && plist_ids_[loc_] < vec_id) {
+                ++loc_;
             }
+            skip_filtered_ids();
             update_cur_vec_id();
         }
 
-        T
+        QType
         cur_vec_val() const {
-            return lut_[loc_].val;
+            return plist_vals_[loc_];
         }
 
-        const LUT& lut_;
-        const size_t lut_size_;
+        const Vector<table_t>& plist_ids_;
+        const Vector<QType>& plist_vals_;
+        const size_t plist_size_;
         size_t loc_ = 0;
         size_t total_num_vec_ = 0;
         float max_score_ = 0.0f;
         float q_value_ = 0.0f;
-        const BitsetView bitset_;
+        DocIdFilter filter_;
         table_t cur_vec_id_ = 0;
 
      private:
         inline void
         update_cur_vec_id() {
-            if (loc_ >= lut_size_) {
-                cur_vec_id_ = total_num_vec_;
-            } else {
-                cur_vec_id_ = lut_[loc_].id;
-            }
+            cur_vec_id_ = (loc_ >= plist_size_) ? total_num_vec_ : plist_ids_[loc_];
         }
 
         inline void
-        next_internal() {
-            loc_++;
-            while (loc_ < lut_size_ && !bitset_.empty() && bitset_.test(lut_[loc_].id)) {
-                loc_++;
+        skip_filtered_ids() {
+            while (loc_ < plist_size_ && !filter_.empty() && filter_.test(plist_ids_[loc_])) {
+                ++loc_;
             }
         }
     };  // struct Cursor
 
+    // find the top-k candidates using brute force search, k as specified by the capacity of the heap.
+    // any value in q_vec that is smaller than q_threshold and any value with dimension >= n_cols() will be ignored.
+    // TODO: may switch to row-wise brute force if filter rate is high. Benchmark needed.
+    template <typename DocIdFilter>
+    void
+    search_brute_force(const SparseRow<DType>& q_vec, DType q_threshold, MaxMinHeap<float>& heap, DocIdFilter& filter,
+                       const DocValueComputer<float>& computer) const {
+        auto scores = compute_all_distances(q_vec, q_threshold, computer);
+        for (size_t i = 0; i < n_rows_internal_; ++i) {
+            if ((filter.empty() || !filter.test(i)) && scores[i] != 0) {
+                heap.push(i, scores[i]);
+            }
+        }
+    }
+
     // any value in q_vec that is smaller than q_threshold will be ignored.
+    template <typename DocIdFilter>
     void
-    search_wand(const SparseRow<T>& q_vec, T q_threshold, MaxMinHeap<T>& heap, const BitsetView& bitset,
-                const DocValueComputer<T>& computer) const {
+    search_wand(const SparseRow<DType>& q_vec, DType q_threshold, MaxMinHeap<float>& heap, DocIdFilter& filter,
+                const DocValueComputer<float>& computer) const {
         auto q_dim = q_vec.size();
-        std::vector<std::shared_ptr<Cursor<const typename decltype(inverted_lut_)::value_type&>>> cursors(q_dim);
+        std::vector<std::shared_ptr<Cursor<DocIdFilter>>> cursors(q_dim);
         size_t valid_q_dim = 0;
         for (size_t i = 0; i < q_dim; ++i) {
             auto [idx, val] = q_vec[i];
@@ -642,9 +650,10 @@ class InvertedIndex : public BaseInvertedIndex<T> {
             if (dim_id == dim_map_.end() || std::abs(val) < q_threshold) {
                 continue;
             }
-            auto& lut = inverted_lut_[dim_id->second];
-            cursors[valid_q_dim++] = std::make_shared<Cursor<decltype(lut)>>(
-                lut, n_rows_internal(), max_score_in_dim_[dim_id->second] * val, val, bitset);
+            auto& plist_ids = inverted_index_ids_[dim_id->second];
+            auto& plist_vals = inverted_index_vals_[dim_id->second];
+            cursors[valid_q_dim++] = std::make_shared<Cursor<DocIdFilter>>(
+                plist_ids, plist_vals, n_rows_internal_, max_score_in_dim_[dim_id->second] * val, val, filter);
         }
         if (valid_q_dim == 0) {
             return;
@@ -660,7 +669,7 @@ class InvertedIndex : public BaseInvertedIndex<T> {
             size_t pivot;
             bool found_pivot = false;
             for (pivot = 0; pivot < valid_q_dim; ++pivot) {
-                if (cursors[pivot]->loc_ >= cursors[pivot]->lut_size_) {
+                if (cursors[pivot]->loc_ >= cursors[pivot]->plist_size_) {
                     break;
                 }
                 upper_bound += cursors[pivot]->max_score_;
@@ -679,7 +688,7 @@ class InvertedIndex : public BaseInvertedIndex<T> {
                     if (cursor->cur_vec_id_ != pivot_id) {
                         break;
                     }
-                    T cur_vec_sum = bm25 ? bm25_params_->row_sums.at(cursor->cur_vec_id_) : 0;
+                    float cur_vec_sum = bm25 ? bm25_params_->row_sums.at(cursor->cur_vec_id_) : 0;
                     score += cursor->q_value_ * computer(cursor->cur_vec_val(), cur_vec_sum);
                     cursor->next();
                 }
@@ -701,23 +710,22 @@ class InvertedIndex : public BaseInvertedIndex<T> {
     }
 
     void
-    refine_and_collect(const SparseRow<T>& q_vec, MaxMinHeap<T>& inaccurate, size_t k, float* distances,
-                       label_t* labels, const DocValueComputer<T>& computer) const {
-        std::priority_queue<SparseIdVal<T>, std::vector<SparseIdVal<T>>, std::greater<SparseIdVal<T>>> heap;
+    refine_and_collect(const SparseRow<DType>& q_vec, MaxMinHeap<float>& inacc_heap, size_t k, float* distances,
+                       label_t* labels, const DocValueComputer<float>& computer) const {
+        std::vector<table_t> docids;
+        MaxMinHeap<float> heap(k);
 
-        while (!inaccurate.empty()) {
-            auto [u, d] = inaccurate.top();
-            inaccurate.pop();
-
-            T u_sum = bm25 ? bm25_params_->row_sums.at(u) : 0;
+        docids.reserve(inacc_heap.size());
+        while (!inacc_heap.empty()) {
+            table_t u = inacc_heap.pop();
+            docids.emplace_back(u);
+        }
 
-            auto dist_acc = q_vec.dot(raw_data_[u], computer, u_sum);
-            if (heap.size() < k) {
-                heap.emplace(u, dist_acc);
-            } else if (heap.top().val < dist_acc) {
-                heap.pop();
-                heap.emplace(u, dist_acc);
-            }
+        DocIdFilterByVector filter(std::move(docids));
+        if (use_wand) {
+            search_wand(q_vec, 0, heap, filter, computer);
+        } else {
+            search_brute_force(q_vec, 0, heap, filter, computer);
         }
         collect_result(heap, distances, labels);
     }
@@ -734,8 +742,8 @@ class InvertedIndex : public BaseInvertedIndex<T> {
     }
 
     inline void
-    add_row_to_index(const SparseRow<T>& row, table_t id) {
-        [[maybe_unused]] T row_sum = 0;
+    add_row_to_index(const SparseRow<DType>& row, table_t vec_id) {
+        [[maybe_unused]] float row_sum = 0;
         for (size_t j = 0; j < row.size(); ++j) {
             auto [idx, val] = row[j];
             if constexpr (bm25) {
@@ -743,7 +751,7 @@ class InvertedIndex : public BaseInvertedIndex<T> {
             }
             // Skip values equals to or close enough to zero(which contributes
             // little to the total IP score).
-            if (val == 0 || (drop_during_build_ && fabs(val) < value_threshold_)) {
+            if (val == 0) {
                 continue;
             }
             auto dim_it = dim_map_.find(idx);
@@ -752,14 +760,16 @@ class InvertedIndex : public BaseInvertedIndex<T> {
                     throw std::runtime_error("unexpected vector dimension in mmaped InvertedIndex");
                 }
                 dim_it = dim_map_.insert({idx, next_dim_id_++}).first;
-                inverted_lut_.emplace_back();
+                inverted_index_ids_.emplace_back();
+                inverted_index_vals_.emplace_back();
                 if constexpr (use_wand) {
-                    max_score_in_dim_.emplace_back(0);
+                    max_score_in_dim_.emplace_back(0.0f);
                 }
             }
-            inverted_lut_[dim_it->second].emplace_back(id, val);
+            inverted_index_ids_[dim_it->second].emplace_back(vec_id);
+            inverted_index_vals_[dim_it->second].emplace_back(get_quant_val(val));
             if constexpr (use_wand) {
-                auto score = val;
+                auto score = static_cast<float>(val);
                 if constexpr (bm25) {
                     score = bm25_params_->max_score_ratio * bm25_params_->wand_max_score_computer(val, row_sum);
                 }
@@ -771,24 +781,31 @@ class InvertedIndex : public BaseInvertedIndex<T> {
         }
     }
 
+    inline QType
+    get_quant_val(DType val) const {
+        if constexpr (!std::is_same_v<QType, DType>) {
+            const DType max_val = static_cast<DType>(std::numeric_limits<QType>::max());
+            if (val >= max_val) {
+                return std::numeric_limits<QType>::max();
+            } else if (val <= std::numeric_limits<QType>::min()) {
+                return std::numeric_limits<QType>::min();
+            } else {
+                return static_cast<QType>(val);
+            }
+        } else {
+            return val;
+        }
+    }
+
     // key is raw sparse vector dim/idx, value is the mapped dim/idx id in the index.
     std::unordered_map<table_t, uint32_t> dim_map_;
 
-    template <typename U>
-    using Vector = std::conditional_t<mmapped, GrowableVectorView<U>, std::vector<U>>;
-
     // reserve, [], size, emplace_back
-    Vector<SparseRow<T>> raw_data_;
-
-    Vector<Vector<SparseIdVal<T>>> inverted_lut_;
-    // If we want to drop small values during build, we must first train the
-    // index with all the data to compute value_threshold_.
-    bool drop_during_build_ = false;
-    // when drop_during_build_ is true, any value smaller than value_threshold_
-    // will not be added to inverted_lut_. value_threshold_ is set to the
-    // drop_ratio_build-th percentile of all absolute values in the index.
-    T value_threshold_ = 0.0f;
-    Vector<T> max_score_in_dim_;
+    Vector<Vector<table_t>> inverted_index_ids_;
+    Vector<Vector<QType>> inverted_index_vals_;
+    Vector<float> max_score_in_dim_;
+
+    size_t n_rows_internal_ = 0;
     size_t max_dim_ = 0;
     uint32_t next_dim_id_ = 0;
 
@@ -801,17 +818,17 @@ class InvertedIndex : public BaseInvertedIndex<T> {
         float b;
         // row_sums is used to cache the sum of values of each row, which
         // corresponds to the document length of each doc in the BM25 formula.
-        Vector<T> row_sums;
+        Vector<float> row_sums;
 
         // below are used only for WAND index.
         float max_score_ratio;
-        DocValueComputer<T> wand_max_score_computer;
+        DocValueComputer<float> wand_max_score_computer;
 
         BM25Params(float k1, float b, float avgdl, float max_score_ratio)
             : k1(k1),
               b(b),
               max_score_ratio(max_score_ratio),
-              wand_max_score_computer(GetDocValueBM25Computer<T>(k1, b, avgdl)) {
+              wand_max_score_computer(GetDocValueBM25Computer<float>(k1, b, avgdl)) {
         }
     };  // struct BM25Params
 
diff --git a/src/index/sparse/sparse_inverted_index_config.h b/src/index/sparse/sparse_inverted_index_config.h
index 2d416e43a..7c56494eb 100644
--- a/src/index/sparse/sparse_inverted_index_config.h
+++ b/src/index/sparse/sparse_inverted_index_config.h
@@ -24,6 +24,7 @@ class SparseInvertedIndexConfig : public BaseConfig {
     CFG_INT refine_factor;
     CFG_FLOAT wand_bm25_max_score_ratio;
     KNOHWERE_DECLARE_CONFIG(SparseInvertedIndexConfig) {
+        // NOTE: drop_ratio_build has been deprecated, it won't change anything
         KNOWHERE_CONFIG_DECLARE_FIELD(drop_ratio_build)
             .description("drop ratio for build")
             .set_default(0.0f)
diff --git a/tests/ut/test_sparse.cc b/tests/ut/test_sparse.cc
index 85341bb05..c84886b32 100644
--- a/tests/ut/test_sparse.cc
+++ b/tests/ut/test_sparse.cc
@@ -45,12 +45,10 @@ TEST_CASE("Test Mem Sparse Index With Float Vector", "[float metrics]") {
     auto topk = 5;
     int64_t nq = 10;
 
-    auto [drop_ratio_build, drop_ratio_search] = GENERATE(table<float, float>({
-        {0.0, 0.0},
-        {0.15, 0.3},
-    }));
-
     auto metric = GENERATE(knowhere::metric::IP, knowhere::metric::BM25);
+
+    auto drop_ratio_search = metric == knowhere::metric::BM25 ? GENERATE(0.0, 0.1) : GENERATE(0.0, 0.3);
+
     auto version = GenTestVersionList();
 
     auto base_gen = [=, dim = dim]() {
@@ -64,17 +62,22 @@ TEST_CASE("Test Mem Sparse Index With Float Vector", "[float metrics]") {
         return json;
     };
 
-    auto sparse_inverted_index_gen = [base_gen, drop_ratio_build = drop_ratio_build,
-                                      drop_ratio_search = drop_ratio_search]() {
+    auto sparse_inverted_index_gen = [base_gen, drop_ratio_search = drop_ratio_search]() {
         knowhere::Json json = base_gen();
-        json[knowhere::indexparam::DROP_RATIO_BUILD] = drop_ratio_build;
         json[knowhere::indexparam::DROP_RATIO_SEARCH] = drop_ratio_search;
         return json;
     };
 
-    const auto train_ds = GenSparseDataSet(nb, dim, doc_sparsity);
-    // it is possible the query has more dims than the train dataset.
-    const auto query_ds = GenSparseDataSet(nq, dim + 20, query_sparsity);
+    auto sparse_dataset_gen = [&](int nr, int dim, float sparsity) -> knowhere::DataSetPtr {
+        if (metric == knowhere::metric::BM25) {
+            return GenSparseDataSetWithMaxVal(nr, dim, sparsity, 256, true);
+        } else {
+            return GenSparseDataSet(nr, dim, sparsity);
+        }
+    };
+
+    auto train_ds = sparse_dataset_gen(nb, dim, doc_sparsity);
+    auto query_ds = sparse_dataset_gen(nq, dim + 20, query_sparsity);
 
     const knowhere::Json conf = {
         {knowhere::meta::METRIC_TYPE, metric}, {knowhere::meta::TOPK, topk},      {knowhere::meta::BM25_K1, 1.2},
@@ -144,9 +147,8 @@ TEST_CASE("Test Mem Sparse Index With Float Vector", "[float metrics]") {
             REQUIRE(results.has_value());
             float recall = GetKNNRecall(*gt.value(), *results.value());
             check_distance_decreasing(*results.value());
-            auto drop_ratio_build = json[knowhere::indexparam::DROP_RATIO_BUILD].get<float>();
             auto drop_ratio_search = json[knowhere::indexparam::DROP_RATIO_SEARCH].get<float>();
-            if (drop_ratio_build == 0 && drop_ratio_search == 0) {
+            if (drop_ratio_search == 0) {
                 REQUIRE(recall == 1);
             } else {
                 // most test cases are above 0.95, only a few between 0.9 and 0.95
@@ -189,9 +191,8 @@ TEST_CASE("Test Mem Sparse Index With Float Vector", "[float metrics]") {
         float recall = GetKNNRecall(*filter_gt.value(), *results.value());
         check_distance_decreasing(*results.value());
 
-        auto drop_ratio_build = json[knowhere::indexparam::DROP_RATIO_BUILD].get<float>();
         auto drop_ratio_search = json[knowhere::indexparam::DROP_RATIO_SEARCH].get<float>();
-        if (drop_ratio_build == 0 && drop_ratio_search == 0) {
+        if (drop_ratio_search == 0) {
             REQUIRE(recall == 1);
         } else {
             REQUIRE(recall >= 0.8);
@@ -258,10 +259,15 @@ TEST_CASE("Test Mem Sparse Index With Float Vector", "[float metrics]") {
         REQUIRE(idx.Size() > 0);
         REQUIRE(idx.Count() == nb);
 
-        auto [radius, range_filter] = GENERATE(table<float, float>({
-            {0.5, 1},
-            {1, 1.5},
-        }));
+        auto [radius, range_filter] = metric == knowhere::metric::BM25 ? GENERATE(table<float, float>({
+                                                                             {80.0, 100.0},
+                                                                             {100.0, 200.0},
+                                                                         }))
+                                                                       : GENERATE(table<float, float>({
+                                                                             {0.5, 1},
+                                                                             {1, 1.5},
+                                                                         }));
+
         json[knowhere::meta::RADIUS] = radius;
         json[knowhere::meta::RANGE_FILTER] = range_filter;
 
@@ -311,108 +317,6 @@ TEST_CASE("Test Mem Sparse Index With Float Vector", "[float metrics]") {
     }
 }
 
-TEST_CASE("Test Mem Sparse Index GetVectorByIds", "[float metrics]") {
-    auto [nb, dim, doc_sparsity, query_sparsity] = GENERATE(table<int32_t, int32_t, float, float>({
-        // 300 dim, avg doc nnz 12, avg query nnz 9
-        {2000, 300, 0.95, 0.97},
-        // 300 dim, avg doc nnz 9, avg query nnz 3
-        {2000, 300, 0.97, 0.99},
-        // 3000 dim, avg doc nnz 90, avg query nnz 30
-        {20000, 3000, 0.97, 0.99},
-    }));
-    int64_t nq = GENERATE(10, 100);
-
-    auto [drop_ratio_build, drop_ratio_search] = GENERATE(table<float, float>({
-        {0.0, 0.0},
-        {0.32, 0.0},
-    }));
-
-    auto metric = knowhere::metric::IP;
-    auto version = GenTestVersionList();
-
-    auto base_gen = [=, dim = dim]() {
-        knowhere::Json json;
-        json[knowhere::meta::DIM] = dim;
-        json[knowhere::meta::METRIC_TYPE] = metric;
-        json[knowhere::meta::TOPK] = 1;
-        return json;
-    };
-
-    auto sparse_inverted_index_gen = [base_gen, drop_ratio_build = drop_ratio_build,
-                                      drop_ratio_search = drop_ratio_search]() {
-        knowhere::Json json = base_gen();
-        json[knowhere::indexparam::DROP_RATIO_BUILD] = drop_ratio_build;
-        json[knowhere::indexparam::DROP_RATIO_SEARCH] = drop_ratio_search;
-        return json;
-    };
-
-    const auto train_ds = GenSparseDataSet(nb, dim, doc_sparsity);
-
-    SECTION("Test GetVectorByIds") {
-        using std::make_tuple;
-        auto [name, gen] = GENERATE_REF(table<std::string, std::function<knowhere::Json()>>({
-            make_tuple(knowhere::IndexEnum::INDEX_SPARSE_INVERTED_INDEX, sparse_inverted_index_gen),
-            make_tuple(knowhere::IndexEnum::INDEX_SPARSE_WAND, sparse_inverted_index_gen),
-        }));
-        auto use_mmap = GENERATE(true, false);
-        auto tmp_file = "/tmp/knowhere_sparse_inverted_index_test";
-        {
-            auto idx = knowhere::IndexFactory::Instance().Create<knowhere::fp32>(name, version).value();
-            auto cfg_json = gen().dump();
-            CAPTURE(name, cfg_json);
-            knowhere::Json json = knowhere::Json::parse(cfg_json);
-
-            auto ids_ds = GenIdsDataSet(nb, nq);
-            REQUIRE(idx.Type() == name);
-            auto res = idx.Build(train_ds, json);
-            REQUIRE(idx.HasRawData(metric) ==
-                    knowhere::IndexStaticFaced<knowhere::fp32>::HasRawData(name, version, json));
-            if (!idx.HasRawData(metric)) {
-                return;
-            }
-            REQUIRE(res == knowhere::Status::success);
-            knowhere::BinarySet bs;
-            idx.Serialize(bs);
-
-            auto idx_new = knowhere::IndexFactory::Instance().Create<knowhere::fp32>(name, version).value();
-            if (use_mmap) {
-                WriteBinaryToFile(tmp_file, bs.GetByName(idx.Type()));
-                REQUIRE(idx_new.DeserializeFromFile(tmp_file, json) == knowhere::Status::success);
-            } else {
-                REQUIRE(idx_new.Deserialize(bs, json) == knowhere::Status::success);
-            }
-
-            auto retrieve_task = [&]() {
-                auto results = idx_new.GetVectorByIds(ids_ds);
-                REQUIRE(results.has_value());
-                auto xb = (knowhere::sparse::SparseRow<float>*)train_ds->GetTensor();
-                auto res_data = (knowhere::sparse::SparseRow<float>*)results.value()->GetTensor();
-                for (int i = 0; i < nq; ++i) {
-                    const auto id = ids_ds->GetIds()[i];
-                    const auto& truth_row = xb[id];
-                    const auto& res_row = res_data[i];
-                    REQUIRE(truth_row.size() == res_row.size());
-                    for (size_t j = 0; j < truth_row.size(); ++j) {
-                        REQUIRE(truth_row[j] == res_row[j]);
-                    }
-                }
-            };
-
-            std::vector<std::future<void>> retrieve_task_list;
-            for (int i = 0; i < 20; i++) {
-                retrieve_task_list.push_back(std::async(std::launch::async, [&] { return retrieve_task(); }));
-            }
-            for (auto& task : retrieve_task_list) {
-                task.wait();
-            }
-            // idx/idx_new to destroy and munmap
-        }
-        if (use_mmap) {
-            REQUIRE(std::remove(tmp_file) == 0);
-        }
-    }
-}
-
 TEST_CASE("Test Mem Sparse Index Handle Empty Vector", "[float metrics]") {
     auto [base_data, has_first_result] = GENERATE(table<std::vector<std::map<int32_t, float>>, bool>(
         {{std::vector<std::map<int32_t, float>>{
@@ -431,14 +335,9 @@ TEST_CASE("Test Mem Sparse Index Handle Empty Vector", "[float metrics]") {
     auto metric = GENERATE(knowhere::metric::IP, knowhere::metric::BM25);
     auto version = GenTestVersionList();
 
-    auto [drop_ratio_build, drop_ratio_search] = GENERATE(table<float, float>({
-        {0.0, 0.0},
-        {0.32, 0.0},
-        {0.32, 0.6},
-        {0.0, 0.6},
-    }));
+    auto drop_ratio_search = GENERATE(0.0, 0.6);
 
-    auto base_gen = [=, dim = dim, drop_ratio_build = drop_ratio_build, drop_ratio_search = drop_ratio_search]() {
+    auto base_gen = [=, dim = dim, drop_ratio_search = drop_ratio_search]() {
         knowhere::Json json;
         json[knowhere::meta::DIM] = dim;
         json[knowhere::meta::METRIC_TYPE] = metric;
@@ -446,7 +345,6 @@ TEST_CASE("Test Mem Sparse Index Handle Empty Vector", "[float metrics]") {
         json[knowhere::meta::BM25_K1] = 1.2;
         json[knowhere::meta::BM25_B] = 0.75;
         json[knowhere::meta::BM25_AVGDL] = 100;
-        json[knowhere::indexparam::DROP_RATIO_BUILD] = drop_ratio_build;
         json[knowhere::indexparam::DROP_RATIO_SEARCH] = drop_ratio_search;
         return json;
     };
@@ -537,22 +435,6 @@ TEST_CASE("Test Mem Sparse Index Handle Empty Vector", "[float metrics]") {
         REQUIRE(results.has_value());
         check_result(*results.value());
     }
-
-    SECTION("Test GetVectorByIds") {
-        std::vector<int64_t> ids = {0, 1, 2};
-        auto results = idx.GetVectorByIds(GenIdsDataSet(3, ids));
-        REQUIRE(results.has_value());
-        auto xb = (knowhere::sparse::SparseRow<float>*)train_ds->GetTensor();
-        auto res_data = (knowhere::sparse::SparseRow<float>*)results.value()->GetTensor();
-        for (int i = 0; i < 3; ++i) {
-            const auto& truth_row = xb[i];
-            const auto& res_row = res_data[i];
-            REQUIRE(truth_row.size() == res_row.size());
-            for (size_t j = 0; j < truth_row.size(); ++j) {
-                REQUIRE(truth_row[j] == res_row[j]);
-            }
-        }
-    }
 }
 
 TEST_CASE("Test Mem Sparse Index CC", "[float metrics]") {
@@ -578,8 +460,6 @@ TEST_CASE("Test Mem Sparse Index CC", "[float metrics]") {
 
     auto query_ds = doc_vector_gen(nq, dim);
 
-    // drop ratio build is not supported in CC index
-    auto drop_ratio_build = 0.0;
     auto drop_ratio_search = GENERATE(0.0, 0.3);
 
     auto metric = GENERATE(knowhere::metric::IP);
@@ -596,10 +476,8 @@ TEST_CASE("Test Mem Sparse Index CC", "[float metrics]") {
         return json;
     };
 
-    auto sparse_inverted_index_gen = [base_gen, drop_ratio_build = drop_ratio_build,
-                                      drop_ratio_search = drop_ratio_search]() {
+    auto sparse_inverted_index_gen = [base_gen, drop_ratio_search = drop_ratio_search]() {
         knowhere::Json json = base_gen();
-        json[knowhere::indexparam::DROP_RATIO_BUILD] = drop_ratio_build;
         json[knowhere::indexparam::DROP_RATIO_SEARCH] = drop_ratio_search;
         return json;
     };
diff --git a/tests/ut/utils.h b/tests/ut/utils.h
index 3bbc3d794..3b62ba7ef 100644
--- a/tests/ut/utils.h
+++ b/tests/ut/utils.h
@@ -380,6 +380,35 @@ GenSparseDataSet(int32_t rows, int32_t cols, float sparsity, int seed = 42) {
     return GenSparseDataSet(data, cols);
 }
 
+// Generate a sparse dataset with given sparsity and max value.
+inline knowhere::DataSetPtr
+GenSparseDataSetWithMaxVal(int32_t rows, int32_t cols, float sparsity, float max_val, bool use_bm25 = false,
+                           int seed = 42) {
+    int32_t num_elements = static_cast<int32_t>(rows * cols * (1.0f - sparsity));
+
+    std::mt19937 rng(seed);
+    auto real_distrib = std::uniform_real_distribution<float>(0, max_val);
+    auto row_distrib = std::uniform_int_distribution<int32_t>(0, rows - 1);
+    auto col_distrib = std::uniform_int_distribution<int32_t>(0, cols - 1);
+
+    std::vector<std::map<int32_t, float>> data(rows);
+
+    for (int32_t i = 0; i < num_elements; ++i) {
+        auto row = row_distrib(rng);
+        while (data[row].size() == (size_t)cols) {
+            row = row_distrib(rng);
+        }
+        auto col = col_distrib(rng);
+        while (data[row].find(col) != data[row].end()) {
+            col = col_distrib(rng);
+        }
+        auto val = use_bm25 ? static_cast<float>(static_cast<int32_t>(real_distrib(rng))) : real_distrib(rng);
+        data[row][col] = val;
+    }
+
+    return GenSparseDataSet(data, cols);
+}
+
 // a timer
 struct StopWatch {
     using timepoint_t = std::chrono::time_point<std::chrono::steady_clock>;