Skip to content

Commit

Permalink
Optimize benchmark for SCANN & HNSW (#1010)
Browse files Browse the repository at this point in the history
Signed-off-by: Cai Yudong <[email protected]>
  • Loading branch information
cydrain authored Dec 27, 2024
1 parent 6a691f6 commit 1cb3f0e
Show file tree
Hide file tree
Showing 6 changed files with 191 additions and 43 deletions.
9 changes: 6 additions & 3 deletions benchmark/hdf5/benchmark_binary.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,8 @@ TEST_F(Benchmark_binary, TEST_BINARY_IDMAP) {
index_type_ = knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP;

knowhere::Json conf = cfg_;
std::string index_file_name = get_index_name<knowhere::bin1>({});
std::vector<int32_t> params = {};
std::string index_file_name = get_index_name<knowhere::bin1>(params);
create_index<knowhere::bin1>(index_file_name, conf);
test_binary_idmap(conf);
}
Expand All @@ -187,7 +188,8 @@ TEST_F(Benchmark_binary, TEST_BINARY_IVF_FLAT) {
knowhere::Json conf = cfg_;
for (auto nlist : NLISTs_) {
conf[knowhere::indexparam::NLIST] = nlist;
std::string index_file_name = get_index_name<knowhere::bin1>({nlist});
std::vector<int32_t> params = {nlist};
std::string index_file_name = get_index_name<knowhere::bin1>(params);
create_index<knowhere::bin1>(index_file_name, conf);
test_binary_ivf(conf);
}
Expand All @@ -201,7 +203,8 @@ TEST_F(Benchmark_binary, TEST_BINARY_HNSW) {
conf[knowhere::indexparam::HNSW_M] = M;
for (auto efc : EFCONs_) {
conf[knowhere::indexparam::EFCONSTRUCTION] = efc;
std::string index_file_name = get_index_name<knowhere::bin1>({M, efc});
std::vector<int32_t> params = {M, efc};
std::string index_file_name = get_index_name<knowhere::bin1>(params);
create_index<knowhere::bin1>(index_file_name, conf);
test_binary_hnsw(conf);
}
Expand Down
9 changes: 6 additions & 3 deletions benchmark/hdf5/benchmark_binary_range.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,8 @@ TEST_F(Benchmark_binary_range, TEST_BINARY_IDMAP) {
index_type_ = knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP;

knowhere::Json conf = cfg_;
std::string index_file_name = get_index_name<knowhere::bin1>({});
std::vector<int32_t> params = {};
std::string index_file_name = get_index_name<knowhere::bin1>(params);
create_index<knowhere::bin1>(index_file_name, conf);
test_binary_idmap(conf);
}
Expand All @@ -193,7 +194,8 @@ TEST_F(Benchmark_binary_range, TEST_BINARY_IVF_FLAT) {
knowhere::Json conf = cfg_;
for (auto nlist : NLISTs_) {
conf[knowhere::indexparam::NLIST] = nlist;
std::string index_file_name = get_index_name<knowhere::bin1>({nlist});
std::vector<int32_t> params = {nlist};
std::string index_file_name = get_index_name<knowhere::bin1>(params);
create_index<knowhere::bin1>(index_file_name, conf);
test_binary_ivf(conf);
}
Expand All @@ -207,7 +209,8 @@ TEST_F(Benchmark_binary_range, TEST_BINARY_HNSW) {
conf[knowhere::indexparam::HNSW_M] = M;
for (auto efc : EFCONs_) {
conf[knowhere::indexparam::EFCONSTRUCTION] = efc;
std::string index_file_name = get_index_name<knowhere::bin1>({M, efc});
std::vector<int32_t> params = {M, efc};
std::string index_file_name = get_index_name<knowhere::bin1>(params);
create_index<knowhere::bin1>(index_file_name, conf);
test_binary_hnsw(conf);
}
Expand Down
183 changes: 152 additions & 31 deletions benchmark/hdf5/benchmark_float.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,16 +110,48 @@ class Benchmark_float : public Benchmark_knowhere, public ::testing::Test {
test_scann(const knowhere::Json& cfg) {
auto conf = cfg;

const auto reorder_k = conf[knowhere::indexparam::REORDER_K].get<int32_t>();
const auto with_raw_data = conf[knowhere::indexparam::WITH_RAW_DATA].get<bool>();
auto nlist = conf[knowhere::indexparam::NLIST].get<int32_t>();
std::string data_type_str = get_data_type_name<T>();

printf("\n[%0.3f s] %s | %s(%s) | nlist=%d, reorder_k=%d\n", get_time_diff(), ann_test_name_.c_str(),
index_type_.c_str(), data_type_str.c_str(), nlist, reorder_k);
printf("\n[%0.3f s] %s | %s(%s) | nlist=%d\n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str(),
data_type_str.c_str(), nlist);
printf("================================================================================\n");
for (auto nprobe : NPROBEs_) {
conf[knowhere::indexparam::NPROBE] = nprobe;
for (auto reorder_k : SCANN_REORDER_Ks) {
conf[knowhere::indexparam::REORDER_K] = reorder_k;
for (auto nprobe : NPROBEs_) {
conf[knowhere::indexparam::NPROBE] = nprobe;
for (auto nq : NQs_) {
auto ds_ptr = knowhere::GenDataSet(nq, dim_, xq_);
auto query = knowhere::ConvertToDataTypeIfNeeded<T>(ds_ptr);
for (auto k : TOPKs_) {
conf[knowhere::meta::TOPK] = k;
CALC_TIME_SPAN(auto result = index_.value().Search(query, conf, nullptr));
auto ids = result.value()->GetIds();
float recall = CalcRecall(ids, nq, k);
printf(" reorder_k = %4d, nprobe = %4d, nq = %4d, k = %4d, elapse = %6.3fs, R@ = %.4f\n",
reorder_k, nprobe, nq, k, TDIFF_, recall);
std::fflush(stdout);
}
}
}
}
printf("================================================================================\n");
printf("[%.3f s] Test '%s/%s' done\n\n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str());
}

template <typename T>
void
test_hnsw(const knowhere::Json& cfg) {
auto conf = cfg;
auto M = conf[knowhere::indexparam::HNSW_M].get<int64_t>();
auto efConstruction = conf[knowhere::indexparam::EFCONSTRUCTION].get<int64_t>();

std::string data_type_str = get_data_type_name<T>();
printf("\n[%0.3f s] %s | %s(%s) | M=%ld | efc=%ld\n", get_time_diff(), ann_test_name_.c_str(),
index_type_.c_str(), data_type_str.c_str(), M, efConstruction);
printf("================================================================================\n");
for (auto ef : EFs_) {
conf[knowhere::indexparam::EF] = ef;
for (auto nq : NQs_) {
auto ds_ptr = knowhere::GenDataSet(nq, dim_, xq_);
auto query = knowhere::ConvertToDataTypeIfNeeded<T>(ds_ptr);
Expand All @@ -128,8 +160,7 @@ class Benchmark_float : public Benchmark_knowhere, public ::testing::Test {
CALC_TIME_SPAN(auto result = index_.value().Search(query, conf, nullptr));
auto ids = result.value()->GetIds();
float recall = CalcRecall(ids, nq, k);
printf(" nprobe = %4d, nq = %4d, k = %4d, elapse = %6.3fs, R@ = %.4f\n", nprobe, nq, k, TDIFF_,
recall);
printf(" ef = %4d, nq = %4d, k = %4d, elapse = %6.3fs, R@ = %.4f\n", ef, nq, k, TDIFF_, recall);
std::fflush(stdout);
}
}
Expand All @@ -140,17 +171,20 @@ class Benchmark_float : public Benchmark_knowhere, public ::testing::Test {

template <typename T>
void
test_hnsw(const knowhere::Json& cfg) {
test_hnsw_refine(const knowhere::Json& cfg) {
auto conf = cfg;
auto M = conf[knowhere::indexparam::HNSW_M].get<int64_t>();
auto efConstruction = conf[knowhere::indexparam::EFCONSTRUCTION].get<int64_t>();
auto hnsw_M = conf[knowhere::indexparam::HNSW_M].get<int64_t>();
auto efc = conf[knowhere::indexparam::EFCONSTRUCTION].get<int64_t>();

auto ef = EFs_[0];
conf[knowhere::indexparam::EF] = ef;

std::string data_type_str = get_data_type_name<T>();
printf("\n[%0.3f s] %s | %s(%s) | M=%ld | efConstruction=%ld\n", get_time_diff(), ann_test_name_.c_str(),
index_type_.c_str(), data_type_str.c_str(), M, efConstruction);
printf("\n[%0.3f s] %s | %s(%s) | hnsw_M=%ld, efc=%ld, ef=%ld\n", get_time_diff(), ann_test_name_.c_str(),
index_type_.c_str(), data_type_str.c_str(), hnsw_M, efc, ef);
printf("================================================================================\n");
for (auto ef : EFs_) {
conf[knowhere::indexparam::EF] = ef;
for (auto refine_k : HNSW_REFINE_Ks_) {
conf[knowhere::indexparam::HNSW_REFINE_K] = refine_k;
for (auto nq : NQs_) {
auto ds_ptr = knowhere::GenDataSet(nq, dim_, xq_);
auto query = knowhere::ConvertToDataTypeIfNeeded<T>(ds_ptr);
Expand All @@ -159,7 +193,8 @@ class Benchmark_float : public Benchmark_knowhere, public ::testing::Test {
CALC_TIME_SPAN(auto result = index_.value().Search(query, conf, nullptr));
auto ids = result.value()->GetIds();
float recall = CalcRecall(ids, nq, k);
printf(" ef = %4d, nq = %4d, k = %4d, elapse = %6.3fs, R@ = %.4f\n", ef, nq, k, TDIFF_, recall);
printf(" refine_k = %3d, nq = %4d, k = %4d, elapse = %6.3fs, R@ = %.4f\n", refine_k, nq, k, TDIFF_,
recall);
std::fflush(stdout);
}
}
Expand Down Expand Up @@ -267,13 +302,14 @@ class Benchmark_float : public Benchmark_knowhere, public ::testing::Test {
const int32_t NBITS_ = 8;

// SCANN index params
const std::vector<int32_t> SCANN_REORDER_K = {256, 512, 1024};
const std::vector<bool> SCANN_WITH_RAW_DATA = {true};
const std::vector<int32_t> SCANN_REORDER_Ks = {128, 256, 512};

// HNSW index params
const std::vector<int32_t> HNSW_Ms_ = {16};
const std::vector<int32_t> EFCONs_ = {200};
const std::vector<int32_t> EFs_ = {128, 256, 512};
const std::vector<std::string> HNSW_SQ_TYPEs_ = {"SQ8", "FP16"};
const std::vector<int32_t> HNSW_REFINE_Ks_ = {1, 2, 4, 8, 16};

// DISKANN index params
const std::vector<int32_t> SEARCH_LISTs_ = {100, 200, 400};
Expand Down Expand Up @@ -383,23 +419,18 @@ TEST_F(Benchmark_float, TEST_SCANN) {

std::string index_file_name;
knowhere::Json conf = cfg_;
for (auto reorder_k : SCANN_REORDER_K) {
conf[knowhere::indexparam::REORDER_K] = reorder_k;
for (auto nlist : NLISTs_) {
conf[knowhere::indexparam::NLIST] = nlist;
for (const auto with_raw_data : SCANN_WITH_RAW_DATA) {
conf[knowhere::indexparam::WITH_RAW_DATA] = with_raw_data;
std::vector<int32_t> params = {nlist, reorder_k, with_raw_data};
conf[knowhere::indexparam::WITH_RAW_DATA] = true;
for (auto nlist : NLISTs_) {
conf[knowhere::indexparam::NLIST] = nlist;
std::vector<int32_t> params = {nlist};

TEST_SCANN(knowhere::fp32, params);
TEST_SCANN(knowhere::fp16, params);
TEST_SCANN(knowhere::bf16, params);
}
}
TEST_SCANN(knowhere::fp32, params);
TEST_SCANN(knowhere::fp16, params);
TEST_SCANN(knowhere::bf16, params);
}
}

TEST_F(Benchmark_float, TEST_HNSW) {
TEST_F(Benchmark_float, TEST_HNSW_FLAT) {
index_type_ = knowhere::IndexEnum::INDEX_HNSW;

#define TEST_HNSW(T, X) \
Expand All @@ -422,6 +453,96 @@ TEST_F(Benchmark_float, TEST_HNSW) {
}
}

TEST_F(Benchmark_float, TEST_HNSW_SQ) {
index_type_ = knowhere::IndexEnum::INDEX_HNSW_SQ;

#define TEST_HNSW(T, X) \
index_file_name = get_index_name<T>(X); \
create_index<T>(index_file_name, conf); \
test_hnsw_refine<T>(conf);

std::string index_file_name;
knowhere::Json conf = cfg_;

conf[knowhere::indexparam::HNSW_REFINE] = true;
conf[knowhere::indexparam::HNSW_REFINE_TYPE] = "FLAT";

for (auto M : HNSW_Ms_) {
conf[knowhere::indexparam::HNSW_M] = M;
for (auto efc : EFCONs_) {
conf[knowhere::indexparam::EFCONSTRUCTION] = efc;
for (auto sq_type : HNSW_SQ_TYPEs_) {
conf[knowhere::indexparam::SQ_TYPE] = sq_type;
std::vector<std::string> params = {std::to_string(M), std::to_string(efc), sq_type};

TEST_HNSW(knowhere::fp32, params);
TEST_HNSW(knowhere::fp16, params);
TEST_HNSW(knowhere::bf16, params);
}
}
}
}

TEST_F(Benchmark_float, TEST_HNSW_PQ) {
index_type_ = knowhere::IndexEnum::INDEX_HNSW_PQ;

#define TEST_HNSW(T, X) \
index_file_name = get_index_name<T>(X); \
create_index<T>(index_file_name, conf); \
test_hnsw_refine<T>(conf);

std::string index_file_name;
knowhere::Json conf = cfg_;

conf[knowhere::indexparam::HNSW_REFINE] = true;
conf[knowhere::indexparam::HNSW_REFINE_TYPE] = "FLAT";
conf[knowhere::indexparam::NBITS] = NBITS_;
conf[knowhere::indexparam::M] = 8;
for (auto hnsw_m : HNSW_Ms_) {
conf[knowhere::indexparam::HNSW_M] = hnsw_m;
for (auto efc : EFCONs_) {
conf[knowhere::indexparam::EFCONSTRUCTION] = efc;
for (auto pq_m : Ms_) {
conf[knowhere::indexparam::M] = pq_m;
std::vector<int32_t> params = {hnsw_m, efc, pq_m};

TEST_HNSW(knowhere::fp32, params);
TEST_HNSW(knowhere::fp16, params);
TEST_HNSW(knowhere::bf16, params);
}
}
}
}

TEST_F(Benchmark_float, TEST_HNSW_PRQ) {
index_type_ = knowhere::IndexEnum::INDEX_HNSW_PRQ;

#define TEST_HNSW(T, X) \
index_file_name = get_index_name<T>(X); \
create_index<T>(index_file_name, conf); \
test_hnsw_refine<T>(conf);

std::string index_file_name;
knowhere::Json conf = cfg_;

conf[knowhere::indexparam::HNSW_REFINE] = true;
conf[knowhere::indexparam::HNSW_REFINE_TYPE] = "FLAT";
conf[knowhere::indexparam::NBITS] = NBITS_;
conf[knowhere::indexparam::M] = 8;
conf[knowhere::indexparam::PRQ_NUM] = 2;
for (auto M : HNSW_Ms_) {
conf[knowhere::indexparam::HNSW_M] = M;
for (auto efc : EFCONs_) {
conf[knowhere::indexparam::EFCONSTRUCTION] = efc;
std::vector<int32_t> params = {M, efc};

TEST_HNSW(knowhere::fp32, params);
TEST_HNSW(knowhere::fp16, params);
TEST_HNSW(knowhere::bf16, params);
}
}
}

#ifdef KNOWHERE_WITH_DISKANN
TEST_F(Benchmark_float, TEST_DISKANN) {
index_type_ = knowhere::IndexEnum::INDEX_DISKANN;
Expand Down
14 changes: 12 additions & 2 deletions benchmark/hdf5/benchmark_knowhere.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,10 +112,10 @@ class Benchmark_knowhere : public Benchmark_hdf5 {
template <typename T>
static std::string
get_index_name(const std::string& ann_test_name, const std::string& index_type,
const std::vector<int32_t>& params) {
const std::vector<std::string>& params) {
std::string params_str = "";
for (size_t i = 0; i < params.size(); i++) {
params_str += "_" + std::to_string(params[i]);
params_str += "_" + params[i];
}
if constexpr (std::is_same_v<T, knowhere::fp32>) {
return ann_test_name + "_" + index_type + params_str + "_fp32" + ".index";
Expand All @@ -131,6 +131,16 @@ class Benchmark_knowhere : public Benchmark_hdf5 {
template <typename T>
std::string
get_index_name(const std::vector<int32_t>& params) {
std::vector<std::string> str_params;
for (auto param : params) {
str_params.push_back(std::to_string(param));
}
return this->get_index_name<T>(ann_test_name_, index_type_, str_params);
}

template <typename T>
std::string
get_index_name(const std::vector<std::string>& params) {
return this->get_index_name<T>(ann_test_name_, index_type_, params);
}

Expand Down
16 changes: 12 additions & 4 deletions benchmark/hdf5/ref_logs/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,11 @@ test_binary_range_hnsw:

###################################################################################################
# Test Knowhere float index
test_float: test_float_brute_force test_float_idmap test_float_ivf_flat test_float_ivf_sq8 test_float_ivf_pq test_float_scann test_float_hnsw test_float_diskann
test_float: test_float_brute_force test_float_idmap test_float_ivf_flat test_float_ivf_sq8 test_float_ivf_pq test_float_scann \
test_float_hnsw_flat test_float_hnsw_sq test_float_hnsw_pq test_float_diskann
test_float_raft: test_float_raft_brute_force test_float_raft_ivf_flat test_float_raft_ivf_pq test_float_raft_cagra
test_float_ivf: test_float_ivf_flat test_float_ivf_pq
test_float_ivf: test_float_ivf_flat test_float_ivf_sq8 test_float_ivf_pq
test_float_hnsw: test_float_hnsw_flat test_float_hnsw_sq test_float_hnsw_pq test_float_hnsw_prq

test_float_brute_force:
./benchmark_float --gtest_filter="Benchmark_float.TEST_BRUTE_FORCE" | tee test_float_brute_force.log
Expand All @@ -38,8 +40,14 @@ test_float_ivf_pq:
./benchmark_float --gtest_filter="Benchmark_float.TEST_IVF_PQ" | tee test_float_ivf_pq.log
test_float_scann:
./benchmark_float --gtest_filter="Benchmark_float.TEST_SCANN" | tee test_float_scann.log
test_float_hnsw:
./benchmark_float --gtest_filter="Benchmark_float.TEST_HNSW" | tee test_float_hnsw.log
test_float_hnsw_flat:
./benchmark_float --gtest_filter="Benchmark_float.TEST_HNSW_FLAT" | tee test_float_hnsw_flat.log
test_float_hnsw_sq:
./benchmark_float --gtest_filter="Benchmark_float.TEST_HNSW_SQ" | tee test_float_hnsw_sq.log
test_float_hnsw_pq:
./benchmark_float --gtest_filter="Benchmark_float.TEST_HNSW_PQ" | tee test_float_hnsw_pq.log
test_float_hnsw_prq:
./benchmark_float --gtest_filter="Benchmark_float.TEST_HNSW_PRQ" | tee test_float_hnsw_prq.log
test_float_diskann:
./benchmark_float --gtest_filter="Benchmark_float.TEST_DISKANN" | tee test_float_diskann.log

Expand Down
3 changes: 3 additions & 0 deletions include/knowhere/comp/index_param.h
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,9 @@ constexpr const char* SEARCH_CACHE_BUDGET_GB = "search_cache_budget_gb";
constexpr const char* SEARCH_LIST_SIZE = "search_list_size";

// FAISS additional Params
constexpr const char* HNSW_REFINE = "refine";
constexpr const char* HNSW_REFINE_K = "refine_k";
constexpr const char* HNSW_REFINE_TYPE = "refine_type";
constexpr const char* SQ_TYPE = "sq_type"; // for IVF_SQ and HNSW_SQ
constexpr const char* PRQ_NUM = "nrq"; // for PRQ, number of redisual quantizers

Expand Down

0 comments on commit 1cb3f0e

Please sign in to comment.