Skip to content

Commit

Permalink
Merge branch 'merge_baseline' into faiss_174_upgrade_with_my_files
Browse files Browse the repository at this point in the history
  • Loading branch information
alexanderguzhva committed Sep 20, 2023
2 parents 4d7d7e2 + e0a44d8 commit 8382969
Show file tree
Hide file tree
Showing 55 changed files with 534 additions and 198 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ venv/
**/knowhere/swigknowhere.py
wheelhouse/*

**/thirdparty/cardinal


*.bin

Expand Down
3 changes: 2 additions & 1 deletion benchmark/hdf5/benchmark_float_bitset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,8 @@ TEST_F(Benchmark_float_bitset, TEST_DISKANN) {
std::shared_ptr<knowhere::FileManager> file_manager = std::make_shared<knowhere::LocalFileManager>();
auto diskann_index_pack = knowhere::Pack(file_manager);

index_ = knowhere::IndexFactory::Instance().Create(index_type_, diskann_index_pack);
auto version = knowhere::Version::GetCurrentVersion().VersionCode();
index_ = knowhere::IndexFactory::Instance().Create(index_type_, version, diskann_index_pack);
printf("[%.3f s] Building all on %d vectors\n", get_time_diff(), nb_);
knowhere::DataSetPtr ds_ptr = nullptr;
index_.Build(*ds_ptr, conf);
Expand Down
3 changes: 2 additions & 1 deletion benchmark/hdf5/benchmark_float_range_bitset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,8 @@ TEST_F(Benchmark_float_range_bitset, TEST_DISKANN) {
std::shared_ptr<knowhere::FileManager> file_manager = std::make_shared<knowhere::LocalFileManager>();
auto diskann_index_pack = knowhere::Pack(file_manager);

index_ = knowhere::IndexFactory::Instance().Create(index_type_, diskann_index_pack);
auto version = knowhere::Version::GetCurrentVersion().VersionCode();
index_ = knowhere::IndexFactory::Instance().Create(index_type_, version, diskann_index_pack);
printf("[%.3f s] Building all on %d vectors\n", get_time_diff(), nb_);
knowhere::DataSetPtr ds_ptr = nullptr;
index_.Build(*ds_ptr, conf);
Expand Down
7 changes: 5 additions & 2 deletions benchmark/hdf5/benchmark_knowhere.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "knowhere/config.h"
#include "knowhere/factory.h"
#include "knowhere/index.h"
#include "knowhere/version.h"

class Benchmark_knowhere : public Benchmark_hdf5 {
public:
Expand Down Expand Up @@ -93,8 +94,9 @@ class Benchmark_knowhere : public Benchmark_hdf5 {

knowhere::Index<knowhere::IndexNode>
create_index(const std::string& index_file_name, const knowhere::Json& conf) {
auto version = knowhere::Version::GetCurrentVersion().VersionCode();
printf("[%.3f s] Creating index \"%s\"\n", get_time_diff(), index_type_.c_str());
index_ = knowhere::IndexFactory::Instance().Create(index_type_);
index_ = knowhere::IndexFactory::Instance().Create(index_type_, version);

try {
printf("[%.3f s] Reading index file: %s\n", get_time_diff(), index_file_name.c_str());
Expand All @@ -112,11 +114,12 @@ class Benchmark_knowhere : public Benchmark_hdf5 {

knowhere::Index<knowhere::IndexNode>
create_golden_index(const knowhere::Json& conf) {
auto version = knowhere::Version::GetCurrentVersion().VersionCode();
golden_index_type_ = knowhere::IndexEnum::INDEX_FAISS_IDMAP;

std::string golden_index_file_name = ann_test_name_ + "_" + golden_index_type_ + "_GOLDEN" + ".index";
printf("[%.3f s] Creating golden index \"%s\"\n", get_time_diff(), golden_index_type_.c_str());
golden_index_ = knowhere::IndexFactory::Instance().Create(golden_index_type_);
golden_index_ = knowhere::IndexFactory::Instance().Create(golden_index_type_, version);

try {
printf("[%.3f s] Reading golden index file: %s\n", get_time_diff(), golden_index_file_name.c_str());
Expand Down
4 changes: 2 additions & 2 deletions ci/docker/builder/cpu/ubuntu20.04/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
FROM ubuntu:20.04

ENV CMAKE_VERSION="v3.23"
ENV CMAKE_TAR="cmake-3.23.0-linux-x86_64.tar.gz"
ENV CMAKE_VERSION="v3.27"
ENV CMAKE_TAR="cmake-3.27.5-linux-x86_64.tar.gz"
RUN apt-get update && apt-get install -y --no-install-recommends wget curl g++ gcc ca-certificates\
make ccache python3-dev gfortran python3-setuptools swig libopenblas-dev pip \
&& apt-get remove --purge -y \
Expand Down
4 changes: 2 additions & 2 deletions ci/docker/builder/gpu/ubuntu20.04/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
FROM nvidia/cuda:11.6.0-devel-ubuntu20.04

ENV CMAKE_VERSION="v3.23"
ENV CMAKE_TAR="cmake-3.23.1-linux-x86_64.tar.gz"
ENV CMAKE_VERSION="v3.27"
ENV CMAKE_TAR="cmake-3.27.5-linux-x86_64.tar.gz"
RUN apt-get update && apt-get install -y --no-install-recommends wget curl g++ gcc ca-certificates\
make ccache python3-dev gfortran python3-setuptools swig libopenblas-dev pip \
&& apt-get remove --purge -y \
Expand Down
4 changes: 4 additions & 0 deletions include/knowhere/comp/index_param.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,10 @@ constexpr const char* INDEX_DISKANN = "DISKANN";
namespace meta {
constexpr const char* INDEX_TYPE = "index_type";
constexpr const char* METRIC_TYPE = "metric_type";
constexpr const char* DATA_PATH = "data_path";
constexpr const char* INDEX_PREFIX = "index_prefix";
constexpr const char* INDEX_ENGINE_VERSION = "index_engine_version";
constexpr const char* RETRIEVE_FRIENDLY = "retrieve_friendly";
constexpr const char* DIM = "dim";
constexpr const char* TENSOR = "tensor";
constexpr const char* ROWS = "rows";
Expand Down
6 changes: 6 additions & 0 deletions include/knowhere/comp/knowhere_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,12 @@ class KnowhereConfig {
static bool
SetAioContextPool(size_t num_ctx);

static void
SetBuildThreadPoolSize(size_t num_threads);

static void
SetSearchThreadPoolSize(size_t num_threads);

/**
* init GPU Resource
*/
Expand Down
10 changes: 10 additions & 0 deletions include/knowhere/comp/local_file_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,16 @@

#include <unordered_set>

#if __has_include(<filesystem>)
#include <filesystem>
namespace fs = std::filesystem;
#elif __has_include(<experimental/filesystem>)
#include <experimental/filesystem>
namespace fs = std::experimental::filesystem;
#else
error "Missing the <filesystem> header."
#endif

#include "knowhere/file_manager.h"
namespace knowhere {
/**
Expand Down
16 changes: 16 additions & 0 deletions include/knowhere/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -501,13 +501,29 @@ class BaseConfig : public Config {
CFG_STRING metric_type;
CFG_INT k;
CFG_INT num_build_thread;
CFG_BOOL retrieve_friendly;
CFG_STRING data_path;
CFG_STRING index_prefix;
CFG_FLOAT radius;
CFG_FLOAT range_filter;
CFG_BOOL trace_visit;
CFG_BOOL enable_mmap;
CFG_BOOL for_tuning;
KNOHWERE_DECLARE_CONFIG(BaseConfig) {
KNOWHERE_CONFIG_DECLARE_FIELD(metric_type).set_default("L2").description("metric type").for_train_and_search();
KNOWHERE_CONFIG_DECLARE_FIELD(retrieve_friendly)
.description("whether the index holds raw data for fast retrieval")
.set_default(false)
.for_train();
KNOWHERE_CONFIG_DECLARE_FIELD(data_path)
.description("raw data path.")
.allow_empty_without_default()
.for_train();
KNOWHERE_CONFIG_DECLARE_FIELD(index_prefix)
.description("path prefix to load or save index.")
.allow_empty_without_default()
.for_train()
.for_deserialize();
KNOWHERE_CONFIG_DECLARE_FIELD(k)
.set_default(10)
.description("search for top k similar vector.")
Expand Down
46 changes: 45 additions & 1 deletion include/knowhere/expected.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,57 @@ enum class Status {
hnsw_inner_error = 12,
malloc_error = 13,
diskann_inner_error = 14,
diskann_file_error = 15,
disk_file_error = 15,
invalid_value_in_json = 16,
arithmetic_overflow = 17,
raft_inner_error = 18,
invalid_binary_set = 19,
};

inline std::string
Status2String(knowhere::Status status) {
switch (status) {
case knowhere::Status::invalid_args:
return "invalid args";
case knowhere::Status::invalid_param_in_json:
return "invalid param in json";
case knowhere::Status::out_of_range_in_json:
return "out of range in json";
case knowhere::Status::type_conflict_in_json:
return "type conflict in json";
case knowhere::Status::invalid_metric_type:
return "invalid metric type";
case knowhere::Status::empty_index:
return "empty index";
case knowhere::Status::not_implemented:
return "not implemented";
case knowhere::Status::index_not_trained:
return "index not trained";
case knowhere::Status::index_already_trained:
return "index already trained";
case knowhere::Status::faiss_inner_error:
return "faiss inner error";
case knowhere::Status::hnsw_inner_error:
return "hnsw inner error";
case knowhere::Status::malloc_error:
return "malloc error";
case knowhere::Status::diskann_inner_error:
return "diskann inner error";
case knowhere::Status::disk_file_error:
return "disk file error";
case knowhere::Status::invalid_value_in_json:
return "invalid value in json";
case knowhere::Status::arithmetic_overflow:
return "arithmetic overflow";
case knowhere::Status::raft_inner_error:
return "raft inner error";
case knowhere::Status::invalid_binary_set:
return "invalid binary set";
default:
return "unexpected status";
}
}

template <typename T>
class expected {
public:
Expand Down
6 changes: 3 additions & 3 deletions include/knowhere/factory.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,14 @@ namespace knowhere {
class IndexFactory {
public:
Index<IndexNode>
Create(const std::string& name, const Object& object = nullptr);
Create(const std::string& name, const std::string& version, const Object& object = nullptr);
const IndexFactory&
Register(const std::string& name, std::function<Index<IndexNode>(const Object&)> func);
Register(const std::string& name, std::function<Index<IndexNode>(const std::string& version, const Object&)> func);
static IndexFactory&
Instance();

private:
typedef std::map<std::string, std::function<Index<IndexNode>(const Object&)>> FuncMap;
typedef std::map<std::string, std::function<Index<IndexNode>(const std::string&, const Object&)>> FuncMap;
IndexFactory();
static FuncMap&
MapInstance();
Expand Down
10 changes: 10 additions & 0 deletions include/knowhere/index.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,16 @@ class Index {
return *this;
}

T1*
Node() {
return node;
}

const T1*
Node() const {
return node;
}

template <typename T2>
Index<T2>
Cast() {
Expand Down
3 changes: 3 additions & 0 deletions include/knowhere/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,4 +69,7 @@ round_down(const T value, const T align) {
extern void
ConvertIVFFlatIfNeeded(const BinarySet& binset, const uint8_t* raw_data, const size_t raw_size);

bool
UseDiskLoad(const std::string& index_type, const std::string& /*version*/);

} // namespace knowhere
96 changes: 96 additions & 0 deletions include/knowhere/version.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
// Copyright (C) 2019-2023 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License.

#pragma once

#include <regex>
#include <string>

#include "log.h"

namespace knowhere {
namespace {
static const std::regex version_regex(R"(^knowhere-v(\d+)$)");
static constexpr const char* default_version = "knowhere-v0";
static constexpr const char* minimal_vesion = "knowhere-v0";
static constexpr const char* current_version = "knowhere-v0";
} // namespace

class Version {
public:
explicit Version(const std::string& version_code_) : version_code(version_code_) {
try {
std::smatch matches;
if (std::regex_match(version_code_, matches, version_regex)) {
version_ = std::stoi(matches[1]);
} else {
LOG_KNOWHERE_ERROR_ << "unexpected version code : " << version_code_;
}
} catch (std::exception& e) {
LOG_KNOWHERE_ERROR_ << "version code " << version_code_ << " parse failed : " << e.what();
}
}

bool
Valid() {
return version_ != unexpected_version_num;
};

const std::string&
VersionCode() const {
return version_code;
}

static bool
VersionCheck(const std::string& version) {
try {
return std::regex_match(version.c_str(), version_regex);
} catch (std::regex_error& e) {
LOG_KNOWHERE_ERROR_ << "unexpected index version : " << version;
}
return false;
}

// used when version is not set
static inline Version
GetDefaultVersion() {
return Version(default_version);
}

// the current version (newest version support)
static inline Version
GetCurrentVersion() {
return Version(current_version);
}

// the minimal version (oldest version support)
static inline Version
GetMinimalSupport() {
return Version(minimal_vesion);
}

static inline bool
VersionSupport(const Version& version) {
return VersionCheck(version.version_code) && GetMinimalSupport() <= version && version <= GetCurrentVersion();
}

friend bool
operator<=(const Version& lhs, const Version& rhs) {
return lhs.version_ <= rhs.version_;
}

private:
static constexpr int32_t unexpected_version_num = -1;
const std::string version_code;
int32_t version_ = unexpected_version_num;
};

} // namespace knowhere
4 changes: 2 additions & 2 deletions python/knowhere/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
from .swigknowhere import GetBinarySet, GetNullDataSet, GetNullBitSetView
import numpy as np

def CreateIndex(name):
return swigknowhere.IndexWrap(name)
def CreateIndex(name, version):
return swigknowhere.IndexWrap(name, version)


def CreateBitSet(bits_num):
Expand Down
10 changes: 6 additions & 4 deletions python/knowhere/knowhere.i
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ typedef uint64_t size_t;
#endif
#include <knowhere/expected.h>
#include <knowhere/factory.h>
#include <knowhere/version.h>
#include <knowhere/utils.h>
#include <knowhere/comp/local_file_manager.h>
using namespace knowhere;
%}
Expand Down Expand Up @@ -108,14 +110,14 @@ public:

class IndexWrap {
public:
IndexWrap(const std::string& name) {
IndexWrap(const std::string& name, const std::string& version) {
GILReleaser rel;
if (name == std::string("DISKANN")) {
if (knowhere::UseDiskLoad(name, version)) {
std::shared_ptr<knowhere::FileManager> file_manager = std::make_shared<knowhere::LocalFileManager>();
auto diskann_pack = knowhere::Pack(file_manager);
idx = IndexFactory::Instance().Create(name, diskann_pack);
idx = IndexFactory::Instance().Create(name, version, diskann_pack);
} else {
idx = IndexFactory::Instance().Create(name);
idx = IndexFactory::Instance().Create(name, version);
}
}

Expand Down
Loading

0 comments on commit 8382969

Please sign in to comment.