diff --git a/benchmark/hdf5/benchmark_float.cpp b/benchmark/hdf5/benchmark_float.cpp index c40b6a096..cb6e68649 100644 --- a/benchmark/hdf5/benchmark_float.cpp +++ b/benchmark/hdf5/benchmark_float.cpp @@ -14,6 +14,7 @@ #include #include "benchmark_knowhere.h" +#include "knowhere/comp/brute_force.h" #include "knowhere/comp/index_param.h" #include "knowhere/comp/knowhere_config.h" #include "knowhere/comp/local_file_manager.h" @@ -21,12 +22,42 @@ class Benchmark_float : public Benchmark_knowhere, public ::testing::Test { public: + template + void + test_brute_force(const knowhere::Json& cfg) { + auto conf = cfg; + std::string data_type_str = get_data_type_name(); + + auto base_ds_ptr = knowhere::GenDataSet(nb_, dim_, xb_); + auto base = knowhere::ConvertToDataTypeIfNeeded(base_ds_ptr); + + printf("\n[%0.3f s] %s | %s(%s) \n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str(), + data_type_str.c_str()); + printf("================================================================================\n"); + for (auto nq : NQs_) { + auto ds_ptr = knowhere::GenDataSet(nq, dim_, xq_); + auto query = knowhere::ConvertToDataTypeIfNeeded(ds_ptr); + for (auto k : TOPKs_) { + conf[knowhere::meta::TOPK] = k; + CALC_TIME_SPAN(auto result = knowhere::BruteForce::Search(base, query, conf, nullptr)); + auto ids = result.value()->GetIds(); + float recall = CalcRecall(ids, nq, k); + printf(" nq = %4d, k = %4d, elapse = %6.3fs, R@ = %.4f\n", nq, k, TDIFF_, recall); + std::fflush(stdout); + } + } + printf("================================================================================\n"); + printf("[%.3f s] Test '%s/%s' done\n\n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str()); + } + template void test_idmap(const knowhere::Json& cfg) { auto conf = cfg; - printf("\n[%0.3f s] %s | %s \n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str()); + std::string data_type_str = get_data_type_name(); + printf("\n[%0.3f s] %s | %s(%s) \n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str(), + data_type_str.c_str()); printf("================================================================================\n"); for (auto nq : NQs_) { auto ds_ptr = knowhere::GenDataSet(nq, dim_, xq_); @@ -50,8 +81,9 @@ class Benchmark_float : public Benchmark_knowhere, public ::testing::Test { auto conf = cfg; auto nlist = conf[knowhere::indexparam::NLIST].get(); - printf("\n[%0.3f s] %s | %s | nlist=%ld\n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str(), - nlist); + std::string data_type_str = get_data_type_name(); + printf("\n[%0.3f s] %s | %s(%s) | nlist=%ld\n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str(), + data_type_str.c_str(), nlist); printf("================================================================================\n"); for (auto nprobe : NPROBEs_) { conf[knowhere::indexparam::NPROBE] = nprobe; @@ -80,8 +112,9 @@ class Benchmark_float : public Benchmark_knowhere, public ::testing::Test { auto M = conf[knowhere::indexparam::HNSW_M].get(); auto efConstruction = conf[knowhere::indexparam::EFCONSTRUCTION].get(); - printf("\n[%0.3f s] %s | %s | M=%ld | efConstruction=%ld\n", get_time_diff(), ann_test_name_.c_str(), - index_type_.c_str(), M, efConstruction); + std::string data_type_str = get_data_type_name(); + printf("\n[%0.3f s] %s | %s(%s) | M=%ld | efConstruction=%ld\n", get_time_diff(), ann_test_name_.c_str(), + index_type_.c_str(), data_type_str.c_str(), M, efConstruction); printf("================================================================================\n"); for (auto ef : EFs_) { conf[knowhere::indexparam::EF] = ef; @@ -108,7 +141,9 @@ class Benchmark_float : public Benchmark_knowhere, public ::testing::Test { test_diskann(const knowhere::Json& cfg) { auto conf = cfg; - printf("\n[%0.3f s] %s | %s \n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str()); + std::string data_type_str = get_data_type_name(); + printf("\n[%0.3f s] %s | %s(%s) \n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str(), + data_type_str.c_str()); printf("================================================================================\n"); for (auto search_list_size : SEARCH_LISTs_) { conf["search_list_size"] = search_list_size; @@ -137,7 +172,9 @@ class Benchmark_float : public Benchmark_knowhere, public ::testing::Test { test_raft_cagra(const knowhere::Json& cfg) { auto conf = cfg; - printf("\n[%0.3f s] %s | %s \n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str()); + std::string data_type_str = get_data_type_name(); + printf("\n[%0.3f s] %s | %s(%s) \n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str(), + data_type_str.c_str()); printf("================================================================================\n"); for (auto itopk_size : ITOPK_SIZEs_) { conf[knowhere::indexparam::ITOPK_SIZE] = itopk_size; @@ -209,6 +246,15 @@ class Benchmark_float : public Benchmark_knowhere, public ::testing::Test { const std::vector ITOPK_SIZEs_ = {128, 192, 256}; }; +TEST_F(Benchmark_float, TEST_BRUTE_FORCE) { + index_type_ = "BruteForce"; + + knowhere::Json conf = cfg_; + test_brute_force(conf); + test_brute_force(conf); + test_brute_force(conf); +} + TEST_F(Benchmark_float, TEST_IDMAP) { index_type_ = knowhere::IndexEnum::INDEX_FAISS_IDMAP; diff --git a/benchmark/hdf5/benchmark_float_bitset.cpp b/benchmark/hdf5/benchmark_float_bitset.cpp index aab8a6731..1738f4b37 100644 --- a/benchmark/hdf5/benchmark_float_bitset.cpp +++ b/benchmark/hdf5/benchmark_float_bitset.cpp @@ -34,7 +34,9 @@ class Benchmark_float_bitset : public Benchmark_knowhere, public ::testing::Test test_ivf(const knowhere::Json& cfg) { auto conf = cfg; - printf("\n[%0.3f s] %s | %s \n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str()); + std::string data_type_str = get_data_type_name(); + printf("\n[%0.3f s] %s | %s(%s) \n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str(), + data_type_str.c_str()); printf("================================================================================\n"); for (auto per : PERCENTs_) { auto bitset_data = GenRandomBitset(nb_, nb_ * per / 100); @@ -65,7 +67,9 @@ class Benchmark_float_bitset : public Benchmark_knowhere, public ::testing::Test test_hnsw(const knowhere::Json& cfg) { auto conf = cfg; - printf("\n[%0.3f s] %s | %s \n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str()); + std::string data_type_str = get_data_type_name(); + printf("\n[%0.3f s] %s | %s(%s) \n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str(), + data_type_str.c_str()); printf("================================================================================\n"); for (auto per : PERCENTs_) { auto bitset_data = GenRandomBitset(nb_, nb_ * per / 100); @@ -97,7 +101,9 @@ class Benchmark_float_bitset : public Benchmark_knowhere, public ::testing::Test test_diskann(const knowhere::Json& cfg) { auto conf = cfg; - printf("\n[%0.3f s] %s | %s \n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str()); + std::string data_type_str = get_data_type_name(); + printf("\n[%0.3f s] %s | %s(%s) \n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str(), + data_type_str.c_str()); printf("================================================================================\n"); for (auto per : PERCENTs_) { auto bitset_data = GenRandomBitset(nb_, nb_ * per / 100); diff --git a/benchmark/hdf5/benchmark_float_qps.cpp b/benchmark/hdf5/benchmark_float_qps.cpp index a1d94935e..ea3b23788 100644 --- a/benchmark/hdf5/benchmark_float_qps.cpp +++ b/benchmark/hdf5/benchmark_float_qps.cpp @@ -32,8 +32,9 @@ class Benchmark_float_qps : public Benchmark_knowhere, public ::testing::Test { float expected_recall = 1.0f; conf[knowhere::meta::TOPK] = topk_; - printf("\n[%0.3f s] %s | %s | k=%d, R@=%.4f\n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str(), - topk_, expected_recall); + std::string data_type_str = get_data_type_name(); + printf("\n[%0.3f s] %s | %s(%s) | k=%d, R@=%.4f\n", get_time_diff(), ann_test_name_.c_str(), + index_type_.c_str(), data_type_str.c_str(), topk_, expected_recall); printf("================================================================================\n"); for (auto thread_num : THREAD_NUMs_) { CALC_TIME_SPAN(task(conf, thread_num, nq_)); @@ -49,6 +50,7 @@ class Benchmark_float_qps : public Benchmark_knowhere, public ::testing::Test { test_ivf(const knowhere::Json& cfg) { auto conf = cfg; auto nlist = conf[knowhere::indexparam::NLIST].get(); + std::string data_type_str = get_data_type_name(); auto find_smallest_nprobe = [&](float expected_recall) -> int32_t { conf[knowhere::meta::TOPK] = topk_; @@ -83,8 +85,9 @@ class Benchmark_float_qps : public Benchmark_knowhere, public ::testing::Test { conf[knowhere::indexparam::NPROBE] = nprobe; conf[knowhere::meta::TOPK] = topk_; - printf("\n[%0.3f s] %s | %s | nlist=%d, nprobe=%d, k=%d, R@=%.4f\n", get_time_diff(), - ann_test_name_.c_str(), index_type_.c_str(), nlist, nprobe, topk_, expected_recall); + printf("\n[%0.3f s] %s | %s(%s) | nlist=%d, nprobe=%d, k=%d, R@=%.4f\n", get_time_diff(), + ann_test_name_.c_str(), index_type_.c_str(), data_type_str.c_str(), nlist, nprobe, topk_, + expected_recall); printf("================================================================================\n"); for (auto thread_num : THREAD_NUMs_) { CALC_TIME_SPAN(task(conf, thread_num, nq_)); @@ -100,6 +103,7 @@ class Benchmark_float_qps : public Benchmark_knowhere, public ::testing::Test { void test_raft_cagra(const knowhere::Json& cfg) { auto conf = cfg; + std::string data_type_str = get_data_type_name(); auto find_smallest_max_iters = [&](float expected_recall) -> int32_t { auto ds_ptr = knowhere::GenDataSet(nq_, dim_, xq_); @@ -134,8 +138,8 @@ class Benchmark_float_qps : public Benchmark_knowhere, public ::testing::Test { conf[knowhere::meta::TOPK] = topk_; conf[knowhere::indexparam::MAX_ITERATIONS] = find_smallest_max_iters(expected_recall); - printf("\n[%0.3f s] %s | %s | k=%d, R@=%.4f\n", get_time_diff(), ann_test_name_.c_str(), - index_type_.c_str(), topk_, expected_recall); + printf("\n[%0.3f s] %s | %s(%s) | k=%d, R@=%.4f\n", get_time_diff(), ann_test_name_.c_str(), + index_type_.c_str(), data_type_str.c_str(), topk_, expected_recall); printf("================================================================================\n"); for (auto thread_num : THREAD_NUMs_) { CALC_TIME_SPAN(task(conf, thread_num, nq_)); @@ -153,6 +157,7 @@ class Benchmark_float_qps : public Benchmark_knowhere, public ::testing::Test { auto conf = cfg; auto M = conf[knowhere::indexparam::HNSW_M].get(); auto efConstruction = conf[knowhere::indexparam::EFCONSTRUCTION].get(); + std::string data_type_str = get_data_type_name(); auto find_smallest_ef = [&](float expected_recall) -> int32_t { conf[knowhere::meta::TOPK] = topk_; @@ -187,8 +192,9 @@ class Benchmark_float_qps : public Benchmark_knowhere, public ::testing::Test { conf[knowhere::indexparam::EF] = ef; conf[knowhere::meta::TOPK] = topk_; - printf("\n[%0.3f s] %s | %s | M=%d | efConstruction=%d, ef=%d, k=%d, R@=%.4f\n", get_time_diff(), - ann_test_name_.c_str(), index_type_.c_str(), M, efConstruction, ef, topk_, expected_recall); + printf("\n[%0.3f s] %s | %s(%s) | M=%d | efConstruction=%d, ef=%d, k=%d, R@=%.4f\n", get_time_diff(), + ann_test_name_.c_str(), index_type_.c_str(), data_type_str.c_str(), M, efConstruction, ef, topk_, + expected_recall); printf("================================================================================\n"); for (auto thread_num : THREAD_NUMs_) { CALC_TIME_SPAN(task(conf, thread_num, nq_)); @@ -208,6 +214,7 @@ class Benchmark_float_qps : public Benchmark_knowhere, public ::testing::Test { const auto reorder_k = conf[knowhere::indexparam::REORDER_K].get(); const auto with_raw_data = conf[knowhere::indexparam::WITH_RAW_DATA].get(); auto nlist = conf[knowhere::indexparam::NLIST].get(); + std::string data_type_str = get_data_type_name(); auto find_smallest_nprobe = [&](float expected_recall) -> int32_t { conf[knowhere::meta::TOPK] = topk_; @@ -244,9 +251,9 @@ class Benchmark_float_qps : public Benchmark_knowhere, public ::testing::Test { conf[knowhere::indexparam::NPROBE] = nprobe; conf[knowhere::meta::TOPK] = topk_; - printf("\n[%0.3f s] %s | %s | nlist=%d, nprobe=%d, reorder_k=%d, with_raw_data=%d, k=%d, R@=%.4f\n", - get_time_diff(), ann_test_name_.c_str(), index_type_.c_str(), nlist, nprobe, reorder_k, - with_raw_data ? 1 : 0, topk_, expected_recall); + printf("\n[%0.3f s] %s | %s(%s) | nlist=%d, nprobe=%d, reorder_k=%d, with_raw_data=%d, k=%d, R@=%.4f\n", + get_time_diff(), ann_test_name_.c_str(), index_type_.c_str(), data_type_str.c_str(), nlist, nprobe, + reorder_k, with_raw_data ? 1 : 0, topk_, expected_recall); printf("================================================================================\n"); for (auto thread_num : THREAD_NUMs_) { CALC_TIME_SPAN(task(conf, thread_num, nq_)); @@ -263,6 +270,7 @@ class Benchmark_float_qps : public Benchmark_knowhere, public ::testing::Test { void test_diskann(const knowhere::Json& cfg) { auto conf = cfg; + std::string data_type_str = get_data_type_name(); auto find_smallest_search_list_size = [&](float expected_recall) -> int32_t { conf[knowhere::meta::TOPK] = topk_; @@ -298,8 +306,9 @@ class Benchmark_float_qps : public Benchmark_knowhere, public ::testing::Test { conf[knowhere::indexparam::SEARCH_LIST_SIZE] = search_list_size; conf[knowhere::meta::TOPK] = topk_; - printf("\n[%0.3f s] %s | %s | search_list_size=%d, k=%d, R@=%.4f\n", get_time_diff(), - ann_test_name_.c_str(), index_type_.c_str(), search_list_size, topk_, expected_recall); + printf("\n[%0.3f s] %s | %s(%s) | search_list_size=%d, k=%d, R@=%.4f\n", get_time_diff(), + ann_test_name_.c_str(), index_type_.c_str(), data_type_str.c_str(), search_list_size, topk_, + expected_recall); printf("================================================================================\n"); for (auto thread_num : THREAD_NUMs_) { CALC_TIME_SPAN(task(conf, thread_num, nq_)); diff --git a/benchmark/hdf5/benchmark_float_range.cpp b/benchmark/hdf5/benchmark_float_range.cpp index b2c86a37d..7f92b3a90 100644 --- a/benchmark/hdf5/benchmark_float_range.cpp +++ b/benchmark/hdf5/benchmark_float_range.cpp @@ -26,8 +26,9 @@ class Benchmark_float_range : public Benchmark_knowhere, public ::testing::Test auto conf = cfg; auto radius = conf.at(knowhere::meta::RADIUS).get(); - printf("\n[%0.3f s] %s | %s, radius=%.3f\n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str(), - radius); + std::string data_type_str = get_data_type_name(); + printf("\n[%0.3f s] %s | %s(%s) | radius=%.3f\n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str(), + data_type_str.c_str(), radius); printf("================================================================================\n"); for (auto nq : NQs_) { auto ds_ptr = knowhere::GenDataSet(nq, dim_, xq_); @@ -54,8 +55,9 @@ class Benchmark_float_range : public Benchmark_knowhere, public ::testing::Test auto nlist = conf[knowhere::indexparam::NLIST].get(); auto radius = conf.at(knowhere::meta::RADIUS).get(); - printf("\n[%0.3f s] %s | %s | nlist=%ld, radius=%.3f\n", get_time_diff(), ann_test_name_.c_str(), - index_type_.c_str(), nlist, radius); + std::string data_type_str = get_data_type_name(); + printf("\n[%0.3f s] %s | %s(%s) | nlist=%ld, radius=%.3f\n", get_time_diff(), ann_test_name_.c_str(), + index_type_.c_str(), data_type_str.c_str(), nlist, radius); printf("================================================================================\n"); for (auto nprobe : NPROBEs_) { conf[knowhere::indexparam::NPROBE] = nprobe; @@ -84,8 +86,9 @@ class Benchmark_float_range : public Benchmark_knowhere, public ::testing::Test auto efc = conf[knowhere::indexparam::EFCONSTRUCTION].get(); auto radius = conf.at(knowhere::meta::RADIUS).get(); - printf("\n[%0.3f s] %s | %s | M=%ld | efc=%ld, radius=%.3f\n", get_time_diff(), ann_test_name_.c_str(), - index_type_.c_str(), M, efc, radius); + std::string data_type_str = get_data_type_name(); + printf("\n[%0.3f s] %s | %s(%s) | M=%ld | efc=%ld, radius=%.3f\n", get_time_diff(), ann_test_name_.c_str(), + index_type_.c_str(), data_type_str.c_str(), M, efc, radius); printf("================================================================================\n"); for (auto ef : EFs_) { conf[knowhere::indexparam::EF] = ef; @@ -113,8 +116,9 @@ class Benchmark_float_range : public Benchmark_knowhere, public ::testing::Test auto conf = cfg; auto radius = conf.at(knowhere::meta::RADIUS).get(); - printf("\n[%0.3f s] %s | %s, radius=%.3f\n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str(), - radius); + std::string data_type_str = get_data_type_name(); + printf("\n[%0.3f s] %s | %s(%s) | radius=%.3f\n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str(), + data_type_str.c_str(), radius); printf("================================================================================\n"); for (auto search_list_size : SEARCH_LISTs_) { conf["search_list_size"] = search_list_size; diff --git a/benchmark/hdf5/benchmark_float_range_bitset.cpp b/benchmark/hdf5/benchmark_float_range_bitset.cpp index 00673f2fb..108006c49 100644 --- a/benchmark/hdf5/benchmark_float_range_bitset.cpp +++ b/benchmark/hdf5/benchmark_float_range_bitset.cpp @@ -35,8 +35,9 @@ class Benchmark_float_range_bitset : public Benchmark_knowhere, public ::testing auto conf = cfg; auto radius = conf[knowhere::meta::RADIUS].get(); - printf("\n[%0.3f s] %s | %s | radius=%.3f\n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str(), - radius); + std::string data_type_str = get_data_type_name(); + printf("\n[%0.3f s] %s | %s(%s) | radius=%.3f\n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str(), + data_type_str.c_str(), radius); printf("================================================================================\n"); for (auto per : PERCENTs_) { auto bitset_data = GenRandomBitset(nb_, nb_ * per / 100); @@ -68,8 +69,9 @@ class Benchmark_float_range_bitset : public Benchmark_knowhere, public ::testing auto conf = cfg; auto radius = conf[knowhere::meta::RADIUS].get(); - printf("\n[%0.3f s] %s | %s | radius=%.3f\n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str(), - radius); + std::string data_type_str = get_data_type_name(); + printf("\n[%0.3f s] %s | %s(%s) | radius=%.3f\n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str(), + data_type_str.c_str(), radius); printf("================================================================================\n"); for (auto per : PERCENTs_) { auto bitset_data = GenRandomBitset(nb_, nb_ * per / 100); @@ -102,8 +104,9 @@ class Benchmark_float_range_bitset : public Benchmark_knowhere, public ::testing auto conf = cfg; auto radius = conf[knowhere::meta::RADIUS].get(); - printf("\n[%0.3f s] %s | %s | radius=%.3f\n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str(), - radius); + std::string data_type_str = get_data_type_name(); + printf("\n[%0.3f s] %s | %s(%s) | radius=%.3f\n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str(), + data_type_str.c_str(), radius); printf("================================================================================\n"); for (auto per : PERCENTs_) { auto bitset_data = GenRandomBitset(nb_, nb_ * per / 100); diff --git a/benchmark/hdf5/benchmark_knowhere.h b/benchmark/hdf5/benchmark_knowhere.h index 25331b5d0..cad82fb35 100644 --- a/benchmark/hdf5/benchmark_knowhere.h +++ b/benchmark/hdf5/benchmark_knowhere.h @@ -93,6 +93,22 @@ class Benchmark_knowhere : public Benchmark_hdf5 { index.Deserialize(binary_set, conf); } + template + static std::string + get_data_type_name() { + if constexpr (std::is_same_v) { + return "FP32"; + } else if constexpr (std::is_same_v) { + return "FP16"; + } else if constexpr (std::is_same_v) { + return "BF16"; + } else if constexpr (std::is_same_v) { + return "INT8"; + } else { + return ""; + } + } + template static std::string get_index_name(const std::string& ann_test_name, const std::string& index_type,