From 98a121e6bb6d55b411e9c972ea4dc906014f17d4 Mon Sep 17 00:00:00 2001 From: wxd Date: Thu, 1 Apr 2021 15:06:54 +0800 Subject: [PATCH] v3.2.7 (#46) Unified storage --- .gitignore | 13 + CMakeLists.txt | 18 +- c_api/api_data/gamma_config.cc | 40 +- c_api/api_data/gamma_config.h | 34 + c_api/api_data/gamma_doc.cc | 6 +- c_api/gamma_api.cc | 16 + c_api/gamma_api.h | 16 + idl/fbs/config.fbs | 6 + index/impl/gamma_index_ivfpq.cc | 3 +- index/impl/gpu/gamma_index_ivfpq_gpu.cc | 13 +- index/retrieval_model.h | 7 +- io/memory_raw_vector_io.h | 5 - io/mmap_raw_vector_io.cc | 18 +- io/mmap_raw_vector_io.h | 4 - io/table_io.cc | 6 +- io/table_io.h | 5 - search/gamma_engine.cc | 67 +- search/gamma_engine.h | 8 +- storage/async_writer.cc | 151 ++++ storage/async_writer.h | 54 ++ storage/block.cc | 187 +++++ storage/block.h | 91 +++ storage/compress/compressor.h | 45 + storage/compress/compressor_zfp.h | 136 +++ storage/compress/compressor_zstd.h | 94 +++ storage/lru_cache.h | 395 +++++++++ storage/segment.cc | 343 ++++++++ storage/segment.h | 117 +++ storage/storage_manager.cc | 315 +++++++ storage/storage_manager.h | 106 +++ storage/string_block.cc | 107 +++ storage/string_block.h | 52 ++ storage/table_block.cc | 58 ++ storage/table_block.h | 35 + storage/vector_block.cc | 151 ++++ storage/vector_block.h | 46 ++ table/field_range_index.cc | 25 +- table/field_range_index.h | 3 +- table/table.cc | 580 +++++-------- table/table.h | 97 +-- table/table_data.cc | 435 ---------- table/table_data.h | 110 --- table/table_define.h | 11 - tests/test.h | 3 +- tests/test_dump.cc | 773 ++++++++++++++++++ tests/test_files.cc | 187 +++-- .../concurrentqueue/blockingconcurrentqueue.h | 435 +--------- third_party/concurrentqueue/concurrentqueue.h | 350 +++++--- .../concurrentqueue/lightweightsemaphore.h | 411 ++++++++++ vector/mmap_raw_vector.cc | 118 ++- vector/mmap_raw_vector.h | 11 +- vector/raw_vector.cc | 24 +- vector/raw_vector.h | 11 +- vector/raw_vector_factory.h | 6 - vector/rocksdb_raw_vector.cc | 25 +- vector/vector_file_mapper.cc | 101 --- vector/vector_file_mapper.h | 49 -- vector/vector_manager.cc | 39 +- vector/vector_manager.h | 5 + 59 files changed, 4607 insertions(+), 1970 deletions(-) create mode 100644 .gitignore create mode 100644 storage/async_writer.cc create mode 100644 storage/async_writer.h create mode 100644 storage/block.cc create mode 100644 storage/block.h create mode 100644 storage/compress/compressor.h create mode 100644 storage/compress/compressor_zfp.h create mode 100644 storage/compress/compressor_zstd.h create mode 100644 storage/lru_cache.h create mode 100644 storage/segment.cc create mode 100644 storage/segment.h create mode 100644 storage/storage_manager.cc create mode 100644 storage/storage_manager.h create mode 100644 storage/string_block.cc create mode 100644 storage/string_block.h create mode 100644 storage/table_block.cc create mode 100644 storage/table_block.h create mode 100644 storage/vector_block.cc create mode 100644 storage/vector_block.h delete mode 100644 table/table_data.cc delete mode 100644 table/table_data.h create mode 100644 tests/test_dump.cc create mode 100644 third_party/concurrentqueue/lightweightsemaphore.h delete mode 100644 vector/vector_file_mapper.cc delete mode 100644 vector/vector_file_mapper.h diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..bad0693 --- /dev/null +++ b/.gitignore @@ -0,0 +1,13 @@ +release +debug +build +gtest +flatbuffers +flatbuffers-1.11.0 +fbs-gen +.vscode +.idea +GPATH +GRTAGS +GTAGS +third_party/faiss diff --git a/CMakeLists.txt b/CMakeLists.txt index d664e07..7dc3881 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,14 +2,13 @@ # GAMMA ENGINE #-------------------------------------------- project(gamma_engine C CXX) -cmake_minimum_required(VERSION 3.0) +cmake_minimum_required(VERSION 3.17) list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Modules) option(BUILD_TEST "Build tests" off) option(BUILD_WITH_GPU "Build gamma with gpu index support" off) option(BUILD_TOOLS "Build tools" off) -option(TABLE_STR_INT64 "table string int64 address" off) exec_program( "sh" @@ -28,7 +27,10 @@ set(GAMMA_SEARCH ${CMAKE_CURRENT_SOURCE_DIR}/search) set(GAMMA_INDEX ${CMAKE_CURRENT_SOURCE_DIR}/index) set(GAMMA_INDEX_IMPL ${CMAKE_CURRENT_SOURCE_DIR}/index/impl) set(GAMMA_INDEX_IMPL_HNSWLIB ${CMAKE_CURRENT_SOURCE_DIR}/index/impl/hnswlib) +set(GAMMA_INDEX_IMPL_SSG ${CMAKE_CURRENT_SOURCE_DIR}/index/impl/ssg) set(GAMMA_VECTOR ${CMAKE_CURRENT_SOURCE_DIR}/vector) +set(STORAGE ${CMAKE_CURRENT_SOURCE_DIR}/storage) +set(STORAGE_COMPRESS ${CMAKE_CURRENT_SOURCE_DIR}/storage/compress) set(GAMMA_REALTIME ${CMAKE_CURRENT_SOURCE_DIR}/realtime) set(GAMMA_TABLE ${CMAKE_CURRENT_SOURCE_DIR}/table) set(GAMMA_C_API ${CMAKE_CURRENT_SOURCE_DIR}/c_api) @@ -67,11 +69,6 @@ ELSE (DEFINED ENV{ZFP_HOME}) MESSAGE(STATUS "ZFP home isn't set, so COMPRESS is not supported! ") ENDIF (DEFINED ENV{ZFP_HOME}) -if(TABLE_STR_INT64) - MESSAGE(STATUS "TABLE_STR_INT64 is on") - ADD_DEFINITIONS(-DTABLE_STR_INT64) -endif(TABLE_STR_INT64) - #INCLUDE DIR include_directories( ${THIRDPARTY} @@ -86,7 +83,10 @@ include_directories( ${GAMMA_INDEX} ${GAMMA_INDEX_IMPL} ${GAMMA_INDEX_IMPL_HNSWLIB} + ${GAMMA_INDEX_IMPL_SSG} ${GAMMA_VECTOR} + ${STORAGE} + ${STORAGE_COMPRESS} ${GAMMA_REALTIME} ${GAMMA_C_API} ${GAMMA_IO} @@ -108,6 +108,7 @@ set(LIBRARIES -lcrypto -lzstd -lopenblas + -ltbb ) #ADDITIONAL SOURCE DIR @@ -119,8 +120,11 @@ aux_source_directory(${UTIL} DIR_SRCS) aux_source_directory(${COMMON} DIR_SRCS) aux_source_directory(${GAMMA_SEARCH} DIR_SRCS) aux_source_directory(${GAMMA_INDEX} DIR_SRCS) +aux_source_directory(${STORAGE} DIR_SRCS) +aux_source_directory(${STORAGE_COMPRESS} DIR_SRCS) aux_source_directory(${GAMMA_INDEX_IMPL} DIR_SRCS) aux_source_directory(${GAMMA_INDEX_IMPL_HNSWLIB} DIR_SRCS) +aux_source_directory(${GAMMA_INDEX_IMPL_SSG} DIR_SRCS) aux_source_directory(${GAMMA_VECTOR} DIR_SRCS) aux_source_directory(${GAMMA_REALTIME} DIR_SRCS) aux_source_directory(${GAMMA_TABLE} DIR_SRCS) diff --git a/c_api/api_data/gamma_config.cc b/c_api/api_data/gamma_config.cc index dd4e76f..f822a1d 100644 --- a/c_api/api_data/gamma_config.cc +++ b/c_api/api_data/gamma_config.cc @@ -11,9 +11,22 @@ namespace tig_gamma { int Config::Serialize(char **out, int *out_len) { flatbuffers::FlatBufferBuilder builder; + + std::vector> + cache_vector(cache_infos_.size()); + int i = 0; + for (auto &c : cache_infos_) { + auto cache = gamma_api::CreateCacheInfo(builder, + builder.CreateString(c.field_name), + c.cache_size); + cache_vector[i++] = cache; + } + auto cache_vec = builder.CreateVector(cache_vector); auto config = gamma_api::CreateConfig(builder, builder.CreateString(path_), - builder.CreateString(log_dir_)); + builder.CreateString(log_dir_), + cache_vec); + builder.Finish(config); *out_len = builder.GetSize(); *out = (char *)malloc(*out_len * sizeof(char)); @@ -26,6 +39,16 @@ void Config::Deserialize(const char *data, int len) { path_ = config_->path()->str(); log_dir_ = config_->log_dir()->str(); + + size_t cache_num = config_->cache_infos()->size(); + cache_infos_.resize(cache_num); + for (size_t i = 0; i < cache_num; ++i) { + auto c = config_->cache_infos()->Get(i); + struct CacheInfo cache_info; + cache_info.field_name = c->field_name()->str(); + cache_info.cache_size = c->cache_size(); + cache_infos_[i] = cache_info; + } } const std::string &Config::Path() { @@ -42,4 +65,19 @@ const std::string &Config::LogDir() { void Config::SetLogDir(std::string &log_dir) { log_dir_ = log_dir; } +void Config::AddCacheInfo(const struct CacheInfo &cache) { + cache_infos_.push_back(cache); +} + +void Config::AddCacheInfo(struct CacheInfo &&cache) { + cache_infos_.emplace_back(std::forward(cache)); +} + +void Config::AddCacheInfo(std::string name, int cache_size) { + struct CacheInfo c; + c.field_name = name; + c.cache_size = cache_size; + cache_infos_.push_back(c); +} + } // namespace tig_gamma \ No newline at end of file diff --git a/c_api/api_data/gamma_config.h b/c_api/api_data/gamma_config.h index 669008f..4c09b4c 100644 --- a/c_api/api_data/gamma_config.h +++ b/c_api/api_data/gamma_config.h @@ -12,8 +12,31 @@ #include "config_generated.h" #include "gamma_raw_data.h" + namespace tig_gamma { +struct CacheInfo { + std::string field_name; + int cache_size; + CacheInfo() {} + + CacheInfo(const CacheInfo &other) { *this = other; } + + CacheInfo &operator=(const CacheInfo &other) { + field_name = other.field_name; + cache_size = other.cache_size; + return *this; + } + + CacheInfo(CacheInfo &&other) { *this = std::move(other); } + + CacheInfo &operator=(CacheInfo &&other) { + field_name = std::move(other.field_name); + cache_size = other.cache_size; + return *this; + } +}; + class Config : public RawData { public: Config() { config_ = nullptr; } @@ -30,11 +53,22 @@ class Config : public RawData { void SetLogDir(std::string &log_dir); + void AddCacheInfo(const struct CacheInfo &cache); + + void AddCacheInfo(struct CacheInfo &&cache); + + void AddCacheInfo(std::string name, int cache_size); + + std::vector &CacheInfos() { return cache_infos_; } + + void ClearCacheInfos() { cache_infos_.resize(0); } + private: gamma_api::Config *config_; std::string path_; std::string log_dir_; + std::vector cache_infos_; }; } // namespace tig_gamma diff --git a/c_api/api_data/gamma_doc.cc b/c_api/api_data/gamma_doc.cc index f1a0c15..9f91fe6 100644 --- a/c_api/api_data/gamma_doc.cc +++ b/c_api/api_data/gamma_doc.cc @@ -53,9 +53,9 @@ void Doc::Deserialize(const char *data, int len) { size_t fields_num = doc_->fields()->size(); if (fields_num != table_field_num + vector_field_num) { - LOG(ERROR) << "Add Doc fields num [" << fields_num - << "], not equal to table_field_num [" << table_field_num - << "] + vector_field_num [" << vector_field_num << "]"; + LOG(WARNING) << "Add Doc fields num [" << fields_num + << "], not equal to table_field_num [" << table_field_num + << "] + vector_field_num [" << vector_field_num << "]"; return; } diff --git a/c_api/gamma_api.cc b/c_api/gamma_api.cc index 707dcfc..69e2001 100644 --- a/c_api/gamma_api.cc +++ b/c_api/gamma_api.cc @@ -211,3 +211,19 @@ int DelDocByQuery(void *engine, const char *request_str, int len) { static_cast(engine)->DelDocByQuery(request); return ret; } + +int SetConfig(void *engine, const char *config_str, int len) { + tig_gamma::Config config; + config.Deserialize(config_str, len); + int ret = + static_cast(engine)->SetConfig(config); + return ret; +} + +int GetConfig(void *engine, char **config_str, int *len) { + tig_gamma::Config config; + int res = + static_cast(engine)->GetConfig(config); + if (res == 0) { res = config.Serialize(config_str, len); } + return res; +} \ No newline at end of file diff --git a/c_api/gamma_api.h b/c_api/gamma_api.h index 7c85b08..88f3e07 100644 --- a/c_api/gamma_api.h +++ b/c_api/gamma_api.h @@ -161,6 +161,22 @@ int Search(void *engine, const char *request_str, int req_len, */ int DelDocByQuery(void *engine, const char *request_str, int len); +/** alter all cache size by query + * + * @param engine search engine pointer + * @param cache_str caches' serialized string + * @return 0 successed, 1 failed + */ +int SetConfig(void *engine, const char *config_str, int len); + +/** get all cache size by query + * + * @param engine search engine pointer + * @param cache_str caches' serialized string + * @return 0 successed, 1 failed + */ +int GetConfig(void *engine, char **config_str, int *len); + #ifdef __cplusplus } #endif diff --git a/idl/fbs/config.fbs b/idl/fbs/config.fbs index ce3e643..e41a729 100644 --- a/idl/fbs/config.fbs +++ b/idl/fbs/config.fbs @@ -1,8 +1,14 @@ namespace gamma_api; +table CacheInfo { + field_name:string; + cache_size:int; +} + table Config { path:string; log_dir:string; + cache_infos:[CacheInfo]; } root_type Config; diff --git a/index/impl/gamma_index_ivfpq.cc b/index/impl/gamma_index_ivfpq.cc index 4c963fc..0dec116 100644 --- a/index/impl/gamma_index_ivfpq.cc +++ b/index/impl/gamma_index_ivfpq.cc @@ -1357,7 +1357,8 @@ int GammaIVFPQIndex::Load(const std::string &index_dir) { READ1(indexed_vec_count_); if (indexed_vec_count_ < 0 || indexed_vec_count_ > vector_->MetaInfo()->size_) { - LOG(ERROR) << "invalid indexed count=" << indexed_vec_count_; + LOG(ERROR) << "invalid indexed count [" << indexed_vec_count_ + << "] vector size [" << vector_->MetaInfo()->size_ << "]"; return INTERNAL_ERR; } // precomputed table not stored. It is cheaper to recompute it diff --git a/index/impl/gpu/gamma_index_ivfpq_gpu.cc b/index/impl/gpu/gamma_index_ivfpq_gpu.cc index e2bc058..a9d532c 100644 --- a/index/impl/gpu/gamma_index_ivfpq_gpu.cc +++ b/index/impl/gpu/gamma_index_ivfpq_gpu.cc @@ -676,7 +676,11 @@ int ParseFilters(GammaSearchCondition *condition, template bool IsInRange(Table *table, RangeFilter &range, long docid) { T value = 0; - table->GetField(docid, range.field, value); + std::string field_value; + int field_id = table->GetAttrIdx(range.field); + table->GetFieldRawValue(docid, field_id, field_value); + memcpy(&value, field_value.c_str(), sizeof(value)); + T lower_value, upper_value; memcpy(&lower_value, range.lower_value.c_str(), range.lower_value.size()); memcpy(&upper_value, range.upper_value.c_str(), range.upper_value.size()); @@ -718,11 +722,10 @@ bool FilteredByTermFilter(GammaSearchCondition *condition, auto term = condition->term_filters[i]; std::string field_value; - table::DecompressStr decompress_str; - int len = condition->table->GetFieldString(docid, term.field, field_value, - decompress_str); + int field_id = condition->table->GetAttrIdx(term.field); + condition->table->GetFieldRawValue(docid, field_id, field_value); vector field_items; - if (len >= 0) field_items = utils::split(field_value, kDelim); + if (field_value.size() >= 0) field_items = utils::split(field_value, kDelim); bool all_in_field_items; if (term.is_union == static_cast(FilterOperator::Or)) diff --git a/index/retrieval_model.h b/index/retrieval_model.h index 9e7f651..92fcb6c 100644 --- a/index/retrieval_model.h +++ b/index/retrieval_model.h @@ -8,8 +8,10 @@ #pragma once #include +#include -#include "concurrentqueue/concurrentqueue.h" + +// #include "concurrentqueue/concurrentqueue.h" #include "reflector.h" #include "utils.h" @@ -178,7 +180,6 @@ class ScopeVectors { size_t Size() { return ptr_.size(); } - private: std::vector ptr_; std::vector deletable_; }; @@ -292,7 +293,7 @@ class RetrievalModel { virtual int Load(const std::string &dir) = 0; VectorReader *vector_; - moodycamel::ConcurrentQueue updated_vids_; + tbb::concurrent_bounded_queue updated_vids_; // warining: indexed_count_ is only used by framework, sub-class cann't use it int indexed_count_; }; diff --git a/io/memory_raw_vector_io.h b/io/memory_raw_vector_io.h index 4271e45..efca374 100644 --- a/io/memory_raw_vector_io.h +++ b/io/memory_raw_vector_io.h @@ -1,8 +1,5 @@ #ifdef WITH_ROCKSDB -#ifndef MEMORY_RAW_VECTOR_IO_H_ -#define MEMORY_RAW_VECTOR_IO_H_ - #pragma once #include @@ -32,6 +29,4 @@ struct MemoryRawVectorIO : public RawVectorIO, public AsyncFlusher { } // namespace tig_gamma -#endif - #endif // WITH_ROCKSDB diff --git a/io/mmap_raw_vector_io.cc b/io/mmap_raw_vector_io.cc index f8f3ea7..128ac82 100644 --- a/io/mmap_raw_vector_io.cc +++ b/io/mmap_raw_vector_io.cc @@ -7,26 +7,14 @@ namespace tig_gamma { int MmapRawVectorIO::Init() { return 0; } int MmapRawVectorIO::Dump(int start_vid, int end_vid) { - for (int i = start_vid / raw_vector->segment_size_; - i < end_vid / raw_vector->segment_size_; i++) { - int ret = raw_vector->file_mappers_[i]->Sync(); - if (ret) return ret; - } return 0; } int MmapRawVectorIO::Load(int vec_num) { - int seg_num = vec_num / raw_vector->segment_size_ + 1; - int offset = vec_num % raw_vector->segment_size_; - for (int i = 1; i < seg_num; ++i) { - int ret = raw_vector->Extend(); - if (ret) { - LOG(ERROR) << "load extend error, i=" << i << ", ret=" << ret; - return ret; - } + if (raw_vector->storage_mgr_->Truncate(vec_num)) { + LOG(ERROR) << "truncate gamma db error, vec_num=" << vec_num; + return INTERNAL_ERR; } - assert(raw_vector->nsegment_ == seg_num); - raw_vector->file_mappers_[seg_num - 1]->SetCurrIdx(offset); raw_vector->MetaInfo()->size_ = vec_num; LOG(INFO) << "mmap load success! vec num=" << vec_num; return 0; diff --git a/io/mmap_raw_vector_io.h b/io/mmap_raw_vector_io.h index 7cc7190..d99dccd 100644 --- a/io/mmap_raw_vector_io.h +++ b/io/mmap_raw_vector_io.h @@ -1,6 +1,3 @@ -#ifndef MMAP_RAW_VECTOR_IO_H_ -#define MMAP_RAW_VECTOR_IO_H_ - #pragma once #include @@ -24,4 +21,3 @@ struct MmapRawVectorIO : public RawVectorIO { } // namespace tig_gamma -#endif diff --git a/io/table_io.cc b/io/table_io.cc index eeaeee4..cc81599 100644 --- a/io/table_io.cc +++ b/io/table_io.cc @@ -3,8 +3,6 @@ #include "io_common.h" namespace tig_gamma { -using std::string; -using std::vector; int TableIO::Init() { // do nothing @@ -12,8 +10,8 @@ int TableIO::Init() { } int TableIO::Dump(int start_docid, int end_docid) { - int ret = table->Sync(); - return ret; + // int ret = table->Sync(); + return 0; } int TableIO::Load(int &doc_num) { diff --git a/io/table_io.h b/io/table_io.h index 94eb1ff..ba32a75 100644 --- a/io/table_io.h +++ b/io/table_io.h @@ -1,6 +1,3 @@ -#ifndef TABLE_IO_H_ -#define TABLE_IO_H_ - #pragma once #include @@ -27,5 +24,3 @@ class TableIO : public AsyncFlusher { }; } // namespace tig_gamma - -#endif diff --git a/search/gamma_engine.cc b/search/gamma_engine.cc index 8fab006..9c4ce12 100644 --- a/search/gamma_engine.cc +++ b/search/gamma_engine.cc @@ -178,9 +178,9 @@ GammaEngine::GammaEngine(const string &index_root_path) vec_manager_ = nullptr; index_status_ = IndexStatus::UNINDEXED; delete_num_ = 0; - b_running_ = false; + b_running_ = 0; b_field_running_ = false; - dump_docid_ = 0; + dump_docid_ = -1; bitmap_bytes_size_ = 0; field_range_index_ = nullptr; created_table_ = false; @@ -193,7 +193,7 @@ GammaEngine::GammaEngine(const string &index_root_path) GammaEngine::~GammaEngine() { if (b_running_) { - b_running_ = false; + b_running_ = 0; std::mutex running_mutex; std::unique_lock lk(running_mutex); running_cv_.wait(lk); @@ -892,7 +892,8 @@ int GammaEngine::GetDoc(int docid, Doc &doc) { vec_manager_->VectorNames(index_names); table::DecompressStr decompress_str; - table_->GetDocInfo(docid, doc, decompress_str); + std::vector table_fields; + table_->GetDocInfo(docid, doc, table_fields, decompress_str); std::vector> vec_fields_ids; for (size_t i = 0; i < index_names.size(); ++i) { @@ -914,14 +915,14 @@ int GammaEngine::GetDoc(int docid, Doc &doc) { } int GammaEngine::BuildIndex() { - if (b_running_) { + int running = __sync_fetch_and_add(&b_running_, 1); + if (running) { if (vec_manager_->Indexing() != 0) { LOG(ERROR) << "Create index failed!"; return -1; } return 0; } - b_running_ = true; auto func_indexing = std::bind(&GammaEngine::Indexing, this); std::thread t(func_indexing); @@ -932,7 +933,7 @@ int GammaEngine::BuildIndex() { int GammaEngine::Indexing() { if (vec_manager_->Indexing() != 0) { LOG(ERROR) << "Create index failed!"; - b_running_ = false; + b_running_ = 0; return -1; } @@ -1343,21 +1344,7 @@ int GammaEngine::PackResultItem(const VectorDoc *vec_doc, Request &request, std::vector vec; int ret = vec_manager_->GetVector(vec_fields_ids, vec, true); - int table_fields_num = 0; - - if (table_fields.size() == 0) { - table_fields_num = table_->FieldsNum(); - - table_->GetDocInfo(docid, doc, decompress_str); - } else { - table_fields_num = table_fields.size(); - - for (int i = 0; i < table_fields_num; ++i) { - struct tig_gamma::Field field; - table_->GetFieldInfo(docid, table_fields[i], field, decompress_str); - doc.AddField(std::move(field)); - } - } + table_->GetDocInfo(docid, doc, table_fields, decompress_str); if (ret == 0 && vec.size() == vec_fields_ids.size()) { for (size_t i = 0; i < vec_fields_ids.size(); ++i) { @@ -1371,7 +1358,8 @@ int GammaEngine::PackResultItem(const VectorDoc *vec_doc, Request &request, ; } } else { - table_->GetDocInfo(docid, doc, decompress_str); + std::vector table_fields; + table_->GetDocInfo(docid, doc, table_fields, decompress_str); } std::vector &fields = doc.TableFields(); @@ -1407,4 +1395,37 @@ int GammaEngine::PackResultItem(const VectorDoc *vec_doc, Request &request, return 0; } +int GammaEngine::GetConfig(Config &conf) { + conf.ClearCacheInfos(); + vec_manager_->GetAllCacheSize(conf); + uint32_t table_cache_size = 0; + uint32_t str_cache_size = 0; + table_->GetCacheSize(table_cache_size, str_cache_size); + if (table_cache_size > 0) { + conf.AddCacheInfo("table", (int)table_cache_size); + } + if (str_cache_size > 0) { + conf.AddCacheInfo("string", (int)str_cache_size); + } + return 0; +} + +int GammaEngine::SetConfig(Config &conf) { + uint32_t table_cache_size = 0; + uint32_t str_cache_size = 0; + for (auto &c : conf.CacheInfos()) { + if (c.field_name == "table" && c.cache_size > 0) { + table_cache_size = (uint32_t)c.cache_size; + } + else if (c.field_name == "string" && c.cache_size > 0) { + str_cache_size = (uint32_t)c.cache_size; + } else { + vec_manager_->AlterCacheSize(c); + } + } + table_->AlterCacheSize(table_cache_size, str_cache_size); + GetConfig(conf); + return 0; +} + } // namespace tig_gamma diff --git a/search/gamma_engine.h b/search/gamma_engine.h index 754c823..83a2129 100644 --- a/search/gamma_engine.h +++ b/search/gamma_engine.h @@ -101,9 +101,15 @@ class GammaEngine { char **BatchDocsStr() { return batch_docs_.data(); } + int GetConfig(Config &config); + + int SetConfig(Config &config); + private: GammaEngine(const std::string &index_root_path); + int CreateTableFromLocal(std::string &table_name); + int Indexing(); private: @@ -123,7 +129,7 @@ class GammaEngine { std::atomic delete_num_; - bool b_running_; + int b_running_; // 0 not run, not 0 running bool b_field_running_; std::condition_variable running_cv_; diff --git a/storage/async_writer.cc b/storage/async_writer.cc new file mode 100644 index 0000000..ca46fe1 --- /dev/null +++ b/storage/async_writer.cc @@ -0,0 +1,151 @@ +/** + * Copyright 2019 The Gamma Authors. + * + * This source code is licensed under the Apache License, Version 2.0 license + * found in the LICENSE file in the root directory of this source tree. + */ + +#include "async_writer.h" + +#include + +#include "log.h" + +namespace tig_gamma { +namespace disk_io { + +AsyncWriter::AsyncWriter() { + running_ = true; + writer_q_ = new WriterQueue; + auto func_operate = std::bind(&AsyncWriter::WriterHandler, this); + handler_thread_ = std::thread(func_operate); +} + +AsyncWriter::~AsyncWriter() { + running_ = false; + handler_thread_.join(); + delete writer_q_; + writer_q_ = nullptr; +} + +static uint32_t WritenSize(int fd) { + uint32_t size; + pread(fd, &size, sizeof(size), sizeof(uint8_t) + sizeof(uint32_t)); + return size; +} + +static void UpdateSize(int fd, int num) { + uint32_t cur_size = WritenSize(fd) + num; + pwrite(fd, &cur_size, sizeof(cur_size), sizeof(uint8_t) + sizeof(uint32_t)); +} + +int AsyncWriter::WriterHandler() { + int bulk_size = 1000; + int bulk_bytes = 64 * 1024 * 2048; // TODO check overflow + uint8_t *buffer = new uint8_t[bulk_bytes]; + + while (running_) { + struct WriterStruct *writer_structs[bulk_size]; + + int size = 0; + + while (size == 0 && running_) { + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + int times = 0; + while(not writer_q_->empty() && size < bulk_size && times < 100) { + struct WriterStruct *pop_val = nullptr; + bool ret = writer_q_->try_pop(pop_val); + if (ret) writer_structs[size++] = pop_val; + ++times; + } + } + + if (size > 1) { + int fd = -1; + int prev_fd = writer_structs[0]->fd; + + uint32_t buffered_size = 0; + uint32_t buffered_start = writer_structs[0]->start; + + for (size_t i = 0; i < size; ++i) { + fd = writer_structs[i]->fd; + uint8_t *data = writer_structs[i]->data; + uint32_t len = writer_structs[i]->len; + uint32_t start = writer_structs[i]->start; + + if (prev_fd != fd) { + // flush prev data + pwrite(prev_fd, buffer, buffered_size, buffered_start); + UpdateSize(prev_fd, buffered_size / item_length_); + prev_fd = fd; + buffered_start = start; + buffered_size = 0; + // TODO check buffered_size + len < bulk_bytes + memcpy(buffer + buffered_size, data, len); + buffered_size += len; + } else { + if (buffered_size + len < bulk_bytes) { + memcpy(buffer + buffered_size, data, len); + buffered_size += len; + } else { + buffered_size += len; + pwrite(fd, buffer, buffered_size, buffered_start); + UpdateSize(fd, buffered_size / item_length_); + buffered_size = 0; + buffered_start = start; + } + } + + delete[] data; + delete writer_structs[i]; + } + pwrite(fd, buffer, buffered_size, buffered_start); + UpdateSize(fd, buffered_size / item_length_); + buffered_size = 0; + } else if (size == 1) { + int fd = writer_structs[0]->fd; + uint8_t *data = writer_structs[0]->data; + uint32_t start = writer_structs[0]->start; + uint32_t len = writer_structs[0]->len; + + pwrite(fd, data, len, start); + UpdateSize(fd, len / item_length_); + + delete[] data; + delete writer_structs[0]; + } + // if (size < bulk_size) { + // std::this_thread::sleep_for(std::chrono::milliseconds(10)); + // } + } + delete buffer; + return 0; +} + +int AsyncWriter::AsyncWrite(struct WriterStruct *writer_struct) { + auto qu_size = writer_q_->size(); + while (qu_size > 10000) { + LOG(INFO) << "AsyncWriter queue size[" << qu_size << "] > 10000, sleep 10ms"; + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + qu_size = writer_q_->size(); + } + writer_q_->push(writer_struct); + return 0; +} + +int AsyncWriter::SyncWrite(struct WriterStruct *writer_struct) { + int fd = writer_struct->fd; + uint8_t *data = writer_struct->data; + uint32_t start = writer_struct->start; + uint32_t len = writer_struct->len; + + pwrite(fd, data, len, start); + UpdateSize(fd, len / item_length_); + + delete data; + delete writer_struct; + return 0; +} + +} // namespace disk_io +} // namespace tig_gamma \ No newline at end of file diff --git a/storage/async_writer.h b/storage/async_writer.h new file mode 100644 index 0000000..4c0d334 --- /dev/null +++ b/storage/async_writer.h @@ -0,0 +1,54 @@ +/** + * Copyright 2019 The Gamma Authors. + * + * This source code is licensed under the Apache License, Version 2.0 license + * found in the LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include "concurrentqueue/blockingconcurrentqueue.h" + +namespace tig_gamma { +namespace disk_io { + +struct WriterStruct { + int fd; + uint8_t *data; + uint32_t start; + uint32_t len; +}; + +//typedef moodycamel::BlockingConcurrentQueue WriterQueue; +typedef tbb::concurrent_bounded_queue WriterQueue; + +class AsyncWriter { + public: + AsyncWriter(); + ~AsyncWriter(); + + int AsyncWrite(struct WriterStruct *writer_struct); + + int SyncWrite(struct WriterStruct *writer_struct); + + void Set(uint32_t header_size, int item_length) { + header_size_ = header_size; + item_length_ = item_length; + } + + private: + int WriterHandler(); + + WriterQueue *writer_q_; + + bool running_; + std::thread handler_thread_; + + uint32_t header_size_; + int item_length_; +}; + +} // namespace disk_io +} \ No newline at end of file diff --git a/storage/block.cc b/storage/block.cc new file mode 100644 index 0000000..08e7526 --- /dev/null +++ b/storage/block.cc @@ -0,0 +1,187 @@ +/** + * Copyright 2019 The Gamma Authors. + * + * This source code is licensed under the Apache License, Version 2.0 license + * found in the LICENSE file in the root directory of this source tree. + */ + +#include "block.h" + +#include + +namespace tig_gamma { + +Block::Block(int fd, int per_block_size, int length, uint32_t header_size) + : fd_(fd), + per_block_size_(per_block_size), + item_length_(length), + header_size_(header_size) { + compressor_ = nullptr; + size_ = 0; + block_pos_fp_ = nullptr; +} + +Block::~Block() { + lru_cache_ = nullptr; + compressor_ = nullptr; + if (block_pos_fp_ != nullptr) { + fclose(block_pos_fp_); + block_pos_fp_ = nullptr; + } +} + +void Block::Init(void *lru, Compressor *compressor) { + lru_cache_ = + (LRUCache, ReadFunParameter *> *)lru; + compressor_ = compressor; + InitSubclass(); +} + +int Block::LoadIndex(const std::string &file_path) { + FILE *file = fopen(file_path.c_str(), "rb"); + if (file != nullptr) { + size_t read_num = 0; + do { + uint32_t pos; + read_num = fread(&pos, sizeof(pos), 1, file); + if (read_num == 0) { + break; + } + block_pos_.push_back(pos); + } while (read_num != 0); + + fclose(file); + } + + block_pos_fp_ = fopen(file_path.c_str(), "ab+"); + if (block_pos_fp_ == nullptr) { + LOG(ERROR) << "open block pos file error, path=" << file_path; + return -1; + } + block_pos_file_path_ = file_path; + return 0; +} + +int Block::AddBlockPos(uint32_t block_pos) { + block_pos_.push_back(block_pos); + bool is_close = false; + if (block_pos_fp_ == nullptr) { + block_pos_fp_ = fopen(block_pos_file_path_.c_str(), "ab+"); + if (block_pos_fp_ == nullptr) { + LOG(ERROR) << "open block pos file error, path=" + << block_pos_file_path_; + return -1; + } + is_close = true; + } + fwrite(&block_pos, sizeof(block_pos), 1, block_pos_fp_); + fflush(block_pos_fp_); + if (is_close) { + CloseBlockPosFile(); + } + return 0; +} + +int Block::Write(const uint8_t *value, int n_bytes, uint32_t start, + disk_io::AsyncWriter *disk_io) { + if (size_ / per_block_size_ >= block_pos_.size()) { + AddBlockPos(start); + // compress prev + } + size_ += n_bytes; + WriteContent(value, n_bytes, start, disk_io); + return 0; +} + +static uint32_t WritenSize(int fd) { + uint32_t size; + pread(fd, &size, sizeof(size), sizeof(uint8_t) + sizeof(uint32_t)); + return size; +} + +int Block::Read(uint8_t *value, uint32_t n_bytes, uint32_t start) { + int read_num = 0; + while (n_bytes) { + int len = n_bytes; + if (len > per_block_size_) len = per_block_size_; + + uint32_t block_id = start / per_block_size_; + uint32_t block_pos = block_pos_[block_id]; + uint32_t block_offset = start % per_block_size_; + + if (len > per_block_size_ - block_offset) + len = per_block_size_ - block_offset; + + uint32_t cur_size = WritenSize(fd_); + uint32_t b = cur_size * item_length_ / per_block_size_; + // TODO needn't read last block's disk if it is not in last segment + if (b <= block_id) { + pread(fd_, value + read_num, len, + block_pos + header_size_ + block_offset); + } else { + std::shared_ptr> block; + uint64_t uni_block_id = block_id; + uni_block_id = uni_block_id << 32; + uni_block_id |= fd_; + bool res = lru_cache_->Get(uni_block_id, block); + if (not res) { + ReadFunParameter parameter; + parameter.len = per_block_size_; + parameter.offset = block_pos; + parameter.fd = fd_; // TODO remove + parameter.cmprs = (void*)compressor_; + GetReadFunParameter(parameter); + res = lru_cache_->SetOrGet(uni_block_id, block, ¶meter); + } + + if (not res) { + LOG(ERROR) << "Read block fails from disk_file, block_id[" << block_id + << "]"; + return -1; + } + memcpy(value + read_num, block->data() + block_offset, len); + } + + start += len; + read_num += len; + n_bytes -= len; + } + return 0; +} + +int Block::Update(const uint8_t *data, int n_bytes, uint32_t offset) { + int res = SubclassUpdate(data, n_bytes, offset); + if (res != 0) return res; + + while (n_bytes) { + int len = n_bytes; + if (len > per_block_size_) len = per_block_size_; + + uint32_t block_id = offset / per_block_size_; + uint32_t block_offset = offset % per_block_size_; + + if (len > per_block_size_ - block_offset) + len = per_block_size_ - block_offset; + + uint64_t uni_block_id = block_id; + uni_block_id = uni_block_id << 32; + uni_block_id |= fd_; + lru_cache_->Evict(uni_block_id); + + offset += len; + n_bytes -= len; + } + return res; +} + + +int Block::CloseBlockPosFile() { + if (block_pos_fp_ != nullptr) { + fclose(block_pos_fp_); + block_pos_fp_ = nullptr; + return 0; + } + return -1; +} + +} // namespace tig_gamma \ No newline at end of file diff --git a/storage/block.h b/storage/block.h new file mode 100644 index 0000000..34be852 --- /dev/null +++ b/storage/block.h @@ -0,0 +1,91 @@ +/** + * Copyright 2019 The Gamma Authors. + * + * This source code is licensed under the Apache License, Version 2.0 license + * found in the LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include +#include + +#include "async_writer.h" +#include "lru_cache.h" +#include "compress/compressor_zfp.h" +#include "compress/compressor_zstd.h" + +typedef uint32_t str_offset_t; +typedef uint16_t str_len_t; + +namespace tig_gamma { + +// struct ReadFunParameter{ +// int fd; +// uint32_t len; +// uint32_t offset; +// }; + +enum class BlockType : uint8_t {TableBlockType = 0, StringBlockType, VectorBlockType}; + +class Block { + public: + Block(int fd, int per_block_size, int length, uint32_t header_size); + + virtual ~Block(); + + void Init(void *lru, Compressor *compressor = nullptr); + + int Write(const uint8_t *data, int len, uint32_t offset, + disk_io::AsyncWriter *disk_io); + + int Read(uint8_t *value, uint32_t len, uint32_t offset); + + int LoadIndex(const std::string &file_path); + + int Update(const uint8_t *data, int n_bytes, uint32_t offset); + + int CloseBlockPosFile(); + // virtual const uint8_t *Get(int id) = 0; + + protected: + // virtual int Compress() = 0; + + // virtual int Uncompress() = 0; + + virtual void InitSubclass() = 0; + + virtual int WriteContent(const uint8_t *data, int len, uint32_t offset, + disk_io::AsyncWriter *disk_io) = 0; + + virtual int GetReadFunParameter(ReadFunParameter ¶meter) = 0; + + virtual int ReadContent(uint8_t *value, uint32_t len, uint32_t offset) = 0; + + virtual int SubclassUpdate(const uint8_t *data, int len, uint32_t offset) = 0; + + int AddBlockPos(uint32_t block_pos); + + LRUCache, ReadFunParameter *> *lru_cache_; + + int fd_; + + Compressor *compressor_; + + uint32_t per_block_size_; + + uint32_t size_; + + int item_length_; + + FILE *block_pos_fp_; + tbb::concurrent_vector block_pos_; // + + uint32_t header_size_; + + std::string block_pos_file_path_; +}; + +} // namespace tig_gamma diff --git a/storage/compress/compressor.h b/storage/compress/compressor.h new file mode 100644 index 0000000..d76a6b7 --- /dev/null +++ b/storage/compress/compressor.h @@ -0,0 +1,45 @@ +/** + * Copyright 2019 The Gamma Authors. + * + * This source code is licensed under the Apache License, Version 2.0 license + * found in the LICENSE file in the root directory of this source tree. + */ + +#pragma once + +namespace tig_gamma { + +#define DEFAULT_RATE 16 +enum class CompressType : uint8_t { NotCompress, Zfp, Zstd }; + +class Compressor { + public: + Compressor(CompressType type) { + type_ = type; + } + + virtual ~Compressor() {} + + virtual void Init(int d, double r = DEFAULT_RATE, int t = 0) = 0; + + virtual size_t GetCompressLen(int data_len = 0) = 0; + + virtual int GetRawLen() = 0; + + virtual size_t Compress(char* data, char* output, int data_len) = 0; + + virtual size_t Decompress(char* data, char* output, int data_len) = 0; + + virtual size_t CompressBatch(char* datum, char* output, int n, + int data_len) = 0; + + virtual size_t DecompressBatch(char* datum, char* output, int n, + int data_len) = 0; + + CompressType GetCompressType() { return type_; } + + private: + CompressType type_; +}; + +} // namespace tig_gamma diff --git a/storage/compress/compressor_zfp.h b/storage/compress/compressor_zfp.h new file mode 100644 index 0000000..355e0e0 --- /dev/null +++ b/storage/compress/compressor_zfp.h @@ -0,0 +1,136 @@ +/** + * Copyright 2019 The Gamma Authors. + * + * This source code is licensed under the Apache License, Version 2.0 license + * found in the LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#ifdef WITH_ZFP + +#include +#include +#include + +#include "compressor.h" +#include "log.h" +#include "zfp.h" + +namespace tig_gamma { + +class CompressorZFP : public Compressor { + public: + CompressorZFP(CompressType type) : Compressor(type) { + LOG(INFO) << "CompressorZFP construction!"; + } + + ~CompressorZFP() { LOG(INFO) << "CompressorZFP destroyed successfully!"; } + + void Init(int d, double r = DEFAULT_RATE, int t = 0) { + dims = d; + threads = t; + int n = 4; + int remain = d % 4 == 0 ? 24 : 16; + int blocks = floor((dims + n - 1) / n); + int bits = floor(n * r + 0.5); + bits = bits > 9 ? bits : 9; + rate = (double)bits / n; + zfpsize = ((ZFP_HEADER_MAX_BITS + blocks * bits + stream_word_bits - 1) & + ~(stream_word_bits - 1)) / + CHAR_BIT - + remain; + raw_len = n * d; + } + + size_t GetCompressLen(int data_len = 0) { return zfpsize; } + + int GetRawLen() { return raw_len; } + + size_t Compress(char* data, char* output, int data_len) { + zfp_field *field = zfp_field_1d(data, type, dims); + zfp_stream *zfp = zfp_stream_open(NULL); + zfp_stream_set_rate(zfp, rate, type, 1, 0); + + bitstream *b_stream; + b_stream = stream_open(output, zfpsize); + zfp_stream_set_bit_stream(zfp, b_stream); + // zfp_stream_rewind(zfp); + size_t size = (size_t)zfp_compress(zfp, field); + zfp_field_free(field); + zfp_stream_close(zfp); + stream_close(b_stream); + return size; + } + + size_t CompressBatch(char* datum, char* output, int n, int data_len) { + size_t flag = n * zfpsize; + int size; + + if (!threads) threads = omp_get_max_threads(); + int chunks = (n + threads - 1) / threads; + +#pragma omp parallel for num_threads(threads) + for (int i = 0; i < threads; i++) { + for (int j = 0; j < chunks; j++) { + if (j + i * chunks > n - 1) break; + size = Compress(datum + sizeof(float) * dims * (j + i * chunks), + output + zfpsize * (j + i * chunks), 0); + if (size == 0) { + flag = 0; + } + } + } + return flag; + } + + size_t Decompress(char* data, char* output, int data_len) { + zfp_field *field = zfp_field_1d(output, type, dims); + zfp_stream *zfp = zfp_stream_open(NULL); + zfp_stream_set_rate(zfp, rate, type, 1, 0); + /* zfp_stream_set_execution(zfp, zfp_exec_omp); */ + /* zfp_stream_set_reversible(zfp); */ + bitstream *b_stream; + zfp_field_set_pointer(field, output); + b_stream = stream_open(data, zfpsize); + zfp_stream_set_bit_stream(zfp, b_stream); + // zfp_stream_rewind(zfp); + size_t size = (size_t)zfp_decompress(zfp, field); + zfp_field_free(field); + zfp_stream_close(zfp); + stream_close(b_stream); + return size; + } + + size_t DecompressBatch(char* datum, char* output, int n, int data_len) { + size_t flag = n * zfpsize; + int size; + if (!threads) threads = omp_get_max_threads(); + int chunks = (n + threads - 1) / threads; + +#pragma omp parallel for num_threads(threads) + for (int i = 0; i < threads; i++) { + for (int j = 0; j < chunks; j++) { + if (j + i * chunks > n - 1) break; + size = Decompress(datum + zfpsize * (j + i * chunks), + output + sizeof(float) * dims * (j + i * chunks), 0); + if (size == 0) { + flag = 0; + } + } + } + return flag; + } + + private: + int dims; // the dims of 1D_array + double rate; // the rate of compress, default is 16 + int threads; + size_t zfpsize; + int raw_len; + zfp_type type = zfp_type_float; +}; + +} // namespace tig_gamma + +#endif // WITH_ZFP diff --git a/storage/compress/compressor_zstd.h b/storage/compress/compressor_zstd.h new file mode 100644 index 0000000..e82b7b8 --- /dev/null +++ b/storage/compress/compressor_zstd.h @@ -0,0 +1,94 @@ +/** + * Copyright 2019 The Gamma Authors. + * + * This source code is licensed under the Apache License, Version 2.0 license + * found in the LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include + +#include "compressor.h" +#include "log.h" + +namespace tig_gamma { + +class CompressorZSTD : public Compressor { + public: + CompressorZSTD(CompressType type) : Compressor(type) { + LOG(INFO) << "CompressorZSTD construction!"; + } + + ~CompressorZSTD() { LOG(INFO) << "CompressorZSTD destroyed successfully!"; } + + void Init(int d, double r = DEFAULT_RATE, int t = 0) { } + + size_t GetCompressLen(int data_len = 0) { return ZSTD_compressBound(data_len); } + + size_t Compress(char* data, char* output, int data_len) { + if (data == nullptr || output == nullptr || 0 == data_len) { + LOG(ERROR) << "data is nullptr or output is nullptr or data_len is 0"; + return 0; + } + size_t dst_capacity = ZSTD_compressBound(data_len); + + auto len = ZSTD_compress(output, dst_capacity, data, data_len, 1); + size_t ret = ZSTD_isError(len); + if (ret != 0) { + LOG(ERROR) << "ZSTD_compress error"; + delete[] output; + output = nullptr; + return 0; + } + // StatisticCompressRate((float)data_len / len); + // PrintCompressRate(10000); + return len; + } + + size_t Decompress(char* data, char* output, int data_len) { + if (data == nullptr || output == nullptr || 0 == data_len) { + LOG(ERROR) << "data is NULL or output is NULL or data_len is 0"; + return 0; + } + auto de_capacity = ZSTD_getDecompressedSize(data, data_len); + + auto len = ZSTD_decompress(output, de_capacity, data, data_len); + size_t ret = ZSTD_isError(len); + + if (ret != 0) { + LOG(ERROR) << "ZSTD_decompress error"; + return 0; + } + return len; + } + + size_t CompressBatch(char* datum, char* output, int n, int data_len) { + return 0; + } + + size_t DecompressBatch(char* datum, char* output, int n, int data_len) { + return 0; + } + + int GetRawLen() { return 0; } + + private: + // void StatisticCompressRate(float rate) { + // avg_cmprs_rate_ = + // (avg_cmprs_rate_ * compress_num_ + rate) / (compress_num_ + 1); + // ++compress_num_; + // } + + // void PrintCompressRate(int interval) { + // if (interval > 0 && compress_num_ % interval == 0) { + // LOG(INFO) << "CompressorZSTD compress_num[" << compress_num_ + // << "], avg_cmprs_rate[" << avg_cmprs_rate_ << "]"; + // } + // } + + // uint64_t compress_num_ = 0; + // double avg_cmprs_rate_ = 0; +}; + +} // namespace tig_gamma diff --git a/storage/lru_cache.h b/storage/lru_cache.h new file mode 100644 index 0000000..b6a0d82 --- /dev/null +++ b/storage/lru_cache.h @@ -0,0 +1,395 @@ +/** + * Copyright 2019 The Gamma Authors. + * + * This source code is licensed under the Apache License, Version 2.0 license + * found in the LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "log.h" + +struct ReadFunParameter { + int fd; + uint32_t len; + uint32_t offset; + void *cmprs; +}; + +struct ReadStrFunParameter { + int fd; + uint32_t block_id; + uint32_t in_block_pos; + uint32_t len; + void *str_block; +}; + +#define THRESHOLD_OF_SWAP 250 +#define THRESHOLD_TYPE uint8_t +#define MAP_GROUP_NUM 100 + +template +class CacheQueue { + public: + struct Node { + Value val; + Node *prev; + Node *next; + }; + CacheQueue() { + head_ = nullptr; + tail_ = nullptr; + } + ~CacheQueue() { + while (head_) { + Node *del = head_; + head_ = del->next; + delete del; + } + tail_ = nullptr; + } + + void Init() { + head_ = new Node; + tail_ = head_; + head_->next = nullptr; + head_->prev = nullptr; + } + + void Erase(void *n) { + if (!n) { + return; + } + Node *del = (Node *)n; + del->prev->next = del->next; + if (del->next) { + del->next->prev = del->prev; + } else { + tail_ = del->prev; + } + delete del; + } + + void MoveToTail(void *n) { + Node *node = (Node *)n; + if (!node || node->prev == nullptr || node->next == NULL || node == tail_) { + return; + } + node->prev->next = node->next; + node->next->prev = node->prev; + tail_->next = node; + node->prev = tail_; + node->next = nullptr; + tail_ = node; + } + + void *Insert(Value value) { + tail_->next = new Node; + tail_->next->prev = tail_; + tail_ = tail_->next; + tail_->val = value; + tail_->next = nullptr; + + return (void *)tail_; + } + + bool Pop(Value &value) { + if (head_ != tail_) { + Node *del = head_->next; + value = del->val; + head_->next = del->next; + if (del->next) { + del->next->prev = head_; + } else { + tail_ = head_; + } + delete del; + return true; + } + return false; + } + + private: + Node *head_; + Node *tail_; +}; + +template > +class LRUCache { + public: + using LoadFunc = bool (*)(Key, std::shared_ptr &, FuncToken); + + struct Cell { + std::shared_ptr value; + void *queue_ite; + THRESHOLD_TYPE hits; + }; + + struct InsertInfo { + explicit InsertInfo(LRUCache &cache) : lru_cache_(cache) {} + std::mutex mtx_; + bool is_clean_ = false; + bool is_product_ = false; + std::shared_ptr value_; + LRUCache &lru_cache_; + }; + + class CellsGroup { + public: + std::unordered_map cells_; + std::unordered_map, HashFunction> + insert_infos_; + }; + + private: + size_t max_size_; + std::unordered_map, HashFunction> + insert_infos_; + size_t max_overflow_ = 0; + size_t last_show_log_ = 1; + std::atomic cur_size_{0}; + std::atomic hits_{0}; + std::atomic misses_{0}; + std::atomic set_hits_{0}; + std::unordered_map cells_; + + CacheQueue queue_; + LoadFunc load_func_; + // std::mutex mtx_; + pthread_rwlock_t rw_lock_; + + public: + LRUCache(size_t max_size, LoadFunc func) + : max_size_(std::max(static_cast(1), max_size)) { + max_overflow_ = max_size / 20; + if(max_overflow_ > 1000) { + max_overflow_ = 1000; + } + load_func_ = func; + LOG(INFO) << "LruCache open! Max_size[" << max_size_ << "], max_overflow[" + << max_overflow_ << "]"; + } + + virtual ~LRUCache() { + pthread_rwlock_destroy(&rw_lock_); + LOG(INFO) << "Lrucache destroyed successfully!"; + } + + int Init() { + queue_.Init(); + int ret = pthread_rwlock_init(&rw_lock_, nullptr); + if (ret != 0) { + LOG(ERROR) << "init read-write lock error, ret=" << ret; + return 2; + } + return 0; + } + + bool Get(Key key, std::shared_ptr &mapped) { + pthread_rwlock_rdlock(&rw_lock_); + bool res = GetImpl(key, mapped); + pthread_rwlock_unlock(&rw_lock_); + + if (res) + ++hits_; + else + ++misses_; + if (hits_ % 1000000 == 0 && hits_ != last_show_log_) { + LOG(INFO) << "LruCache cur_size[" << cur_size_ << "] cells_size[" + << cells_.size() << "] hits[" << hits_ << "] set_hits[" + << set_hits_ << "] misses[" << misses_ << "]"; + last_show_log_ = hits_; + } + return res; + } + + void Set(Key key, std::shared_ptr &mapped) { + // std::lock_guard lock(mtx_); + pthread_rwlock_wrlock(&rw_lock_); + SetImpl(key, mapped); + pthread_rwlock_unlock(&rw_lock_); + } + + bool SetOrGet(Key key, std::shared_ptr &load_mapped, + FuncToken token) { + std::shared_ptr insert_info; + + // std::lock_guard cache_lck(mtx_); + pthread_rwlock_wrlock(&rw_lock_); + bool res = GetImpl2(key, load_mapped); + if (res) { + pthread_rwlock_unlock(&rw_lock_); + ++set_hits_; + return true; + } + auto res_ite = insert_infos_.find(key); + if (res_ite == insert_infos_.end()) { + insert_info.reset(new InsertInfo(*this)); + insert_infos_.insert(std::make_pair(key, insert_info)); + } else { + insert_info = res_ite->second; + } + pthread_rwlock_unlock(&rw_lock_); + + InsertInfo *insert = insert_info.get(); + std::lock_guard insert_lck(insert->mtx_); + + if (insert->is_product_) { + ++set_hits_; + load_mapped = insert->value_; + return true; + } + res = load_func_(key, load_mapped, token); + if (res) { + insert->value_ = load_mapped; + insert->is_product_ = true; + } + + // std::lock_guard cache_lck(mtx_); + pthread_rwlock_wrlock(&rw_lock_); + auto ite = insert_infos_.find(key); + if (res && ite != insert_infos_.end() && ite->second.get() == insert) { + SetImpl(key, insert->value_); + } + + if (!insert_info->is_clean_) { + insert->is_clean_ = true; + insert_infos_.erase(key); + } + pthread_rwlock_unlock(&rw_lock_); + return res; + } + + void Evict(Key key) { + // std::lock_guard lock(mtx_); + pthread_rwlock_wrlock(&rw_lock_); + auto ite = cells_.find(key); + if (ite == cells_.end()) { + pthread_rwlock_unlock(&rw_lock_); + return; + } + auto que_ite = ite->second.queue_ite; + cells_.erase(ite); + --cur_size_; + + queue_.Erase(que_ite); + pthread_rwlock_unlock(&rw_lock_); + } + + void AlterMaxSize(size_t max_size) { + max_size_ = max_size; + max_overflow_ = max_size / 20; + if(max_overflow_ > 1000) { + max_overflow_ = 1000; + } + LOG(INFO) << "LruCache Max_size[" << max_size_ << "], max_overflow[" + << max_overflow_ << "]"; + } + + size_t GetMaxSize() { + return max_size_; + } + + size_t Count() const { return cur_size_; } + + size_t GetHits() { return hits_; } + + size_t GetSetHits() { return set_hits_; } + + size_t GetMisses() { return misses_; } + + private: + bool GetImpl(const Key &key, std::shared_ptr &mapped) { + auto ite = cells_.find(key); + if (ite == cells_.end()) { + return false; + } + Cell &cell = ite->second; + mapped = cell.value; + + if (cell.hits >= THRESHOLD_OF_SWAP) { + pthread_rwlock_unlock(&rw_lock_); + pthread_rwlock_wrlock(&rw_lock_); + queue_.MoveToTail(cell.queue_ite); + } else { + ++cell.hits; + } + return true; + } + + bool GetImpl2(const Key &key, std::shared_ptr &mapped) { + auto ite = cells_.find(key); + if (ite == cells_.end()) { + return false; + } + Cell &cell = ite->second; + mapped = cell.value; + + if (cur_size_ >= max_size_) { + if (cell.hits >= THRESHOLD_OF_SWAP) { + queue_.MoveToTail(cell.queue_ite); + } else { + ++cell.hits; + } + } + return true; + } + + void SetImpl(const Key &key, const std::shared_ptr &add_mapped) { + auto res = + cells_.emplace(std::piecewise_construct, std::forward_as_tuple(key), + std::forward_as_tuple()); + Cell &cell = res.first->second; + + bool is_emplace = res.second; + if (is_emplace) { + cell.queue_ite = queue_.Insert(key); + cell.hits = 0; + ++cur_size_; + EvictOverflow(); + } else { + if (cell.hits >= THRESHOLD_OF_SWAP) { + queue_.MoveToTail(cell.queue_ite); + } else { + ++cell.hits; + } + } + cell.value = add_mapped; + } + + void EvictOverflow() { + if (cur_size_ >= max_size_ + max_overflow_) { + int evict_num = cur_size_ - max_size_; + cur_size_ -= evict_num; + + int fail_pop_num = 0; + Key key; + for (int i = 0; i < evict_num; ++i) { + if (!queue_.Pop(key)) { + ++fail_pop_num; + continue; + } + auto ite = cells_.find(key); + if (ite == cells_.end()) { + LOG(ERROR) << "error, LruCache queue and map is inconsistent."; + abort(); + } + cells_.erase(ite); + } + cur_size_ += fail_pop_num; + } + } +}; diff --git a/storage/segment.cc b/storage/segment.cc new file mode 100644 index 0000000..aeded7f --- /dev/null +++ b/storage/segment.cc @@ -0,0 +1,343 @@ +/** + * Copyright 2019 The Gamma Authors. + * + * This source code is licensed under the Apache License, Version 2.0 license + * found in the LICENSE file in the root directory of this source tree. + */ + +#include "segment.h" + +#include +#include +#include +#include +#include +#include + +#include "error_code.h" +#include "log.h" +#include "table_block.h" +#include "vector_block.h" +#include "thread_util.h" +#include "utils.h" + +namespace tig_gamma { + +namespace { + +inline size_t CapacityOff() { return sizeof(uint8_t); } + +inline size_t SizeOff() { + uint32_t capacity; + return CapacityOff() + sizeof(capacity); +} + +inline size_t StrCapacityOff() { + uint32_t size; + return SizeOff() + sizeof(size); +} + +inline size_t StrSizeOff() { + uint64_t str_capacity; + return StrCapacityOff() + sizeof(str_capacity); +} + +inline size_t StrBlocksSizeOff() { + uint64_t str_capacity; + return StrSizeOff() + sizeof(str_capacity); +} + +inline size_t StrCompressedOff() { + str_offset_t str_size; + return StrBlocksSizeOff() + sizeof(str_size); +} + +inline size_t BCompressedOff() { + str_offset_t str_compressed_size; + return StrCompressedOff() + sizeof(str_compressed_size); +} + +} // namespace + +Segment::Segment(const std::string &file_path, int max_size, int vec_byte_size, + disk_io::AsyncWriter *disk_io, void *table_cache, + void *str_cache) + : file_path_(file_path), + max_size_(max_size), + item_length_(vec_byte_size), + disk_io_(disk_io), + cache_(table_cache), + str_cache_(str_cache) { + base_fd_ = -1; + str_fd_ = -1; + + cur_size_ = 0; + capacity_ = 0; + version_ = 0; + uint32_t capacity; + uint32_t size; + uint64_t str_capacity; + str_offset_t str_size; + uint32_t str_blocks_size; + str_offset_t str_compressed_size; + uint8_t b_compressed; + + seg_header_size_ = sizeof(version_) + sizeof(capacity) + sizeof(size) + + sizeof(str_capacity) + sizeof(str_size) + + sizeof(str_blocks_size) + sizeof(str_compressed_size) + + sizeof(b_compressed); + mapped_byte_size_ = (size_t)max_size * item_length_ + seg_header_size_; + + per_block_size_ = ((64 * 1024) / item_length_) * item_length_; // block~=64k + buffered_size_ = 0; +} + +Segment::~Segment() { + if (base_fd_ != -1) { + close(base_fd_); + base_fd_ = -1; + } + + if (str_fd_ != -1) { + close(str_fd_); + str_fd_ = -1; + } + + if (blocks_ != nullptr) { + delete blocks_; + blocks_ = nullptr; + } + + if (str_blocks_ != nullptr) { + delete str_blocks_; + str_blocks_ = nullptr; + } +} + +uint8_t Segment::Version() { + uint8_t version = 0; + pread(base_fd_, &version, sizeof(version), 0); + return version; +} + +void Segment::SetVersion(uint8_t version) { + pwrite(base_fd_, &version, sizeof(version), 0); +} + +uint32_t Segment::BufferedSize() { return buffered_size_; } + +void Segment::PersistentedSize() { + uint32_t capacity; + uint32_t size = 0; + pread(base_fd_, &size, sizeof(size), sizeof(version_) + sizeof(capacity)); + cur_size_ = size; +} + +void Segment::SetSize(uint32_t size) { + uint32_t capacity; + pwrite(base_fd_, &size, sizeof(size), sizeof(version_) + sizeof(capacity)); +} + +uint64_t Segment::StrCapacity() { + uint64_t str_capacity; + pread(base_fd_, &str_capacity, sizeof(str_capacity), StrCapacityOff()); + return str_capacity; +} + +void Segment::SetStrCapacity(uint64_t str_capacity) { + pwrite(base_fd_, &str_capacity, sizeof(str_capacity), StrCapacityOff()); +} + +uint32_t Segment::StrBlocksSize() { + uint32_t str_blocks_size; + pread(base_fd_, &str_blocks_size, sizeof(str_blocks_size), + StrBlocksSizeOff()); + return str_blocks_size; +} + +void Segment::SetStrBlocksSize(uint32_t str_blocks_size) { + pwrite(base_fd_, &str_blocks_size, sizeof(str_blocks_size), + StrBlocksSizeOff()); +} + +str_offset_t Segment::StrSize() { + str_offset_t str_size; + pread(base_fd_, &str_size, sizeof(str_size), StrSizeOff()); + return str_size; +} + +void Segment::SetStrSize(str_offset_t str_size) { + pwrite(base_fd_, &str_size, sizeof(str_size), StrSizeOff()); +} + +uint8_t Segment::BCompressed() { + uint8_t b_compressed; + pread(base_fd_, &b_compressed, sizeof(b_compressed), BCompressedOff()); + return b_compressed; +} + +void Segment::SetCompressed(uint8_t compressed) { + pwrite(base_fd_, &compressed, sizeof(compressed), BCompressedOff()); +} + +str_offset_t Segment::StrCompressedSize() { + str_offset_t str_compressed_size; + pread(base_fd_, &str_compressed_size, sizeof(str_compressed_size), + StrCompressedOff()); + return str_compressed_size; +} + +void Segment::SetStrCompressedSize(str_offset_t str_compressed_size) { + pwrite(base_fd_, &str_compressed_size, sizeof(str_compressed_size), + StrCompressedOff()); +} + +int Segment::Init(BlockType block_type, Compressor *compressor) { + OpenFile(); + if (ftruncate(base_fd_, seg_header_size_ + item_length_ * max_size_)) { + close(base_fd_); + LOG(ERROR) << "truncate file error:" << strerror(errno); + return IO_ERR; + } + + uint64_t str_capacity = seg_header_size_ + max_size_ * 4; + SetStrCapacity(str_capacity); + SetStrSize(0); + int ret = ftruncate(str_fd_, StrCapacity()); + if (ret != 0) { + return -1; + } + InitBlock(block_type, compressor); + + return 0; +} + +int Segment::OpenFile() { + base_fd_ = open(file_path_.c_str(), O_RDWR | O_CREAT, 0666); + if (-1 == base_fd_) { + LOG(ERROR) << "open vector file error, path=" << file_path_; + return IO_ERR; + } + + str_fd_ = open((file_path_ + "_str").c_str(), O_RDWR | O_CREAT, 0666); + if (-1 == str_fd_) { + LOG(ERROR) << "open vector file error, path=" << (file_path_ + "_str"); + return -1; + } + return 0; +} + +int Segment::InitBlock(BlockType block_type, Compressor *compressor) { + switch (block_type) + { + case BlockType::TableBlockType: + blocks_ = + new TableBlock(base_fd_, per_block_size_, item_length_, seg_header_size_); + break; + case BlockType::VectorBlockType: + blocks_ = + new VectorBlock(base_fd_, per_block_size_, item_length_, seg_header_size_); + break; + default: + LOG(ERROR) << "BlockType is error"; + break; + } + + blocks_->Init(cache_, compressor); + blocks_->LoadIndex(file_path_ + ".idx"); + + str_blocks_ = + new StringBlock(str_fd_, 1024 * 1024, item_length_, seg_header_size_); + str_blocks_->LoadIndex(file_path_ + "_str.idx"); + str_blocks_->InitStrBlock(str_cache_); + if (BufferedSize() == max_size_) { + blocks_->CloseBlockPosFile(); + str_blocks_->CloseBlockPosFile(); + } + return 0; +} + +// TODO: Load compressor +int Segment::Load(BlockType block_type, Compressor *compressor) { + OpenFile(); + InitBlock(block_type, compressor); + + uint64_t str_capacity = StrCapacity(); + PersistentedSize(); + return cur_size_; +} + +int Segment::Add(const uint8_t *data, int len) { + size_t offset = (size_t)buffered_size_ * item_length_; + blocks_->Write(data, len, offset, disk_io_); + ++buffered_size_; + return 0; +} + +str_offset_t Segment::AddString(const char *str, int len, uint32_t &block_id, + uint32_t &in_block_pos) { + str_offset_t str_size = StrSize(); + uint64_t str_capacity = StrCapacity(); + if (str_size + len >= str_capacity) { + uint64_t extend_capacity = str_capacity << 1; + while (str_size + len >= extend_capacity) { + extend_capacity = extend_capacity << 1; + } + + int ret = 0; + SetStrCapacity(extend_capacity); + + ret = ftruncate(str_fd_, StrCapacity()); + if (ret != 0) { + return -1; + } + } + + str_blocks_->WriteString(str, len, str_size, block_id, in_block_pos); + + SetStrSize(str_size + len); + return str_size; +} + +int Segment::GetValue(uint8_t *value, int id) { + return GetValues(value, id, 1); +} + +int Segment::GetValues(uint8_t *value, int id, int n) { + int start = id * item_length_; + int n_bytes = n * item_length_; + // TODO read from buffer queue + while (id >= (int)cur_size_) { + PersistentedSize(); + if (id < (int)cur_size_) break; + LOG(INFO) << "Data not brushed disk, wait 10ms."; + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + } + blocks_->Read(value, n_bytes, start); + return 0; +} + +std::string Segment::GetString(uint32_t block_id, uint32_t in_block_pos, + str_len_t len) { + std::string str; + str_blocks_->Read(block_id, in_block_pos, len, str); + return str; +} + +bool Segment::IsFull() { + if (BufferedSize() == max_size_) { + blocks_->CloseBlockPosFile(); + str_blocks_->CloseBlockPosFile(); + return true; + } else { + return false; + } +} + +int Segment::Update(int id, uint8_t *data, int len) { + size_t offset = (size_t)id * item_length_; + blocks_->Update(data, len, offset); + return 0; +} + +} // namespace tig_gamma diff --git a/storage/segment.h b/storage/segment.h new file mode 100644 index 0000000..a4e5de5 --- /dev/null +++ b/storage/segment.h @@ -0,0 +1,117 @@ +/** + * Copyright 2019 The Gamma Authors. + * + * This source code is licensed under the Apache License, Version 2.0 license + * found in the LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include + +#include + +#include "block.h" +#include "string_block.h" + +namespace tig_gamma { + +const static int MAX_SEGMENT_NUM = 102400; // max segment num + +class Segment { + public: + Segment(const std::string &file_path, int max_size, int vec_byte_size, + disk_io::AsyncWriter *disk_io, void *table_cache, void *str_cache); + + ~Segment(); + + int Init(BlockType block_type, Compressor *compressor = nullptr); + + int Load(BlockType block_type, Compressor *compressor = nullptr); + + int Add(const uint8_t *vec, int len); + + str_offset_t AddString(const char *vec, int len, uint32_t &block_id, + uint32_t &in_block_pos); + + int GetValue(uint8_t *value, int id); + + int GetValues(uint8_t *value, int id, int size); + + std::string GetString(uint32_t block_id, uint32_t in_block_pos, + str_len_t len); + + bool IsFull(); + + void SetCurrIdx(int curr_idx) { SetSize(curr_idx); } + + int Update(int id, uint8_t *vec, int len); + + private: + uint8_t Version(); + + void SetVersion(uint8_t version); + + uint32_t BufferedSize(); + + void PersistentedSize(); + + void SetSize(uint32_t size); + + uint64_t StrCapacity(); + + void SetStrCapacity(uint64_t str_capacity); + + uint32_t StrBlocksSize(); + + void SetStrBlocksSize(uint32_t str_blocks_size); + + void SetBlocksStrSize(uint32_t str_blocks_size); + + str_offset_t StrSize(); + + void SetStrSize(str_offset_t str_size); + + uint8_t BCompressed(); + + void SetCompressed(uint8_t compressed); + + str_offset_t StrCompressedSize(); + + void SetStrCompressedSize(str_offset_t str_compressed_size); + + int OpenFile(); + + int InitBlock(BlockType block_type, Compressor *compressor); + + private: + std::string file_path_; + size_t mapped_byte_size_; + + int max_size_; + uint32_t cur_size_; + + uint32_t buffered_size_; + + uint32_t capacity_; + + uint64_t seg_header_size_; + uint8_t version_; + + uint32_t item_length_; + + int base_fd_; + int str_fd_; + + Block *blocks_; + + StringBlock *str_blocks_; + + uint32_t per_block_size_; + disk_io::AsyncWriter *disk_io_; + + void *cache_; + void *str_cache_; +}; + +} // namespace tig_gamma diff --git a/storage/storage_manager.cc b/storage/storage_manager.cc new file mode 100644 index 0000000..d477989 --- /dev/null +++ b/storage/storage_manager.cc @@ -0,0 +1,315 @@ +/** + * Copyright 2019 The Gamma Authors. + * + * This source code is licensed under the Apache License, Version 2.0 license + * found in the LICENSE file in the root directory of this source tree. + */ + +#include "storage_manager.h" + +#include "error_code.h" +#include "log.h" +#include "table_block.h" +#include "vector_block.h" +#include "utils.h" + +namespace tig_gamma { + +StorageManager::StorageManager(const std::string &root_path, + BlockType block_type, + const StorageManagerOptions &options) + : root_path_(root_path), block_type_(block_type), options_(options) { + size_ = 0; + cache_ = nullptr; + str_cache_ = nullptr; + compressor_ = nullptr; +} + +StorageManager::~StorageManager() { + for (size_t i = 0; i < segments_.size(); i++) { + CHECK_DELETE(segments_[i]); + } + delete disk_io_; + disk_io_ = nullptr; + CHECK_DELETE(str_cache_); + CHECK_DELETE(cache_); + CHECK_DELETE(compressor_); +} + +std::string StorageManager::NextSegmentFilePath() { + char buf[7]; + snprintf(buf, 7, "%06d", (int)segments_.size()); + std::string file_path = root_path_ + "/" + buf; + return file_path; +} + +int StorageManager::UseCompress(CompressType type, int d, double rate) { + if(type == CompressType::Zfp) { +#ifdef WITH_ZFP + if(d > 0) { + compressor_ = new CompressorZFP(type); + compressor_->Init(d); + } +#endif + } + return (compressor_? 0 : -1); +} + +bool StorageManager::AlterCacheSize(uint32_t cache_size, + uint32_t str_cache_size) { + if (cache_size > 0) { + if (cache_ != nullptr) { + uint32_t cache_max_size = (cache_size * 1024) / 64; //cache_max unit: M + cache_->AlterMaxSize((size_t)cache_max_size); + } else { + LOG(WARNING) << "Alter cache_ failure, cache_ is nullptr."; + } + } + if (str_cache_size > 0) { + if (str_cache_ != nullptr) { + uint32_t cache_max_size = (str_cache_size * 1024) / 64; //cache_max unit: M + str_cache_->AlterMaxSize((size_t)cache_max_size); + } else { + LOG(WARNING) << "Alter str_cache_ failure, str_cache_ is nullptr."; + } + } + return true; +} + +void StorageManager::GetCacheSize(uint32_t &cache_size, uint32_t &str_cache_size) { + if (cache_ != nullptr) { + size_t max_size = cache_->GetMaxSize(); + cache_size = (uint32_t)(max_size * 64 / 1024); + } + if (str_cache_ != nullptr) { + size_t max_size = str_cache_->GetMaxSize(); + str_cache_size = (uint32_t)(max_size * 64 / 1024); + } +} + +int StorageManager::Init(int cache_size, int str_cache_size) { + int cache_max_size = (cache_size * 1024) / 64; //cache_max unit: M + int str_cache_max_size = (str_cache_size * 1024) / 64; + LOG(INFO) << "lrucache cache_size[" << cache_size + << "M], string lrucache cache_size[" << str_cache_size << "M]"; + auto fun = &TableBlock::ReadBlock; + if (block_type_ == BlockType::VectorBlockType) { + fun = &VectorBlock::ReadBlock; + } + cache_ = + new LRUCache, ReadFunParameter *>( + cache_max_size, fun); + cache_->Init(); + + if (str_cache_max_size > 0) { + str_cache_ = + new LRUCache, ReadStrFunParameter *>( + str_cache_max_size, &StringBlock::ReadString); + str_cache_->Init(); + } + + disk_io_ = new disk_io::AsyncWriter(); + if (!options_.IsValid()) { + LOG(ERROR) << "invalid options=" << options_.ToStr(); + return PARAM_ERR; + } + if (utils::make_dir(root_path_.c_str())) { + LOG(ERROR) << "mkdir error, path=" << root_path_; + return IO_ERR; + } + + Load(); + // init the first segment + if (segments_.size() == 0 && Extend()) { + return INTERNAL_ERR; + } + LOG(INFO) << "init gamma storage success! options=" << options_.ToStr() + << ", segment num=" << segments_.size(); + return 0; +} + +int StorageManager::Load() { + // load existed segments + while (utils::file_exist(NextSegmentFilePath())) { + Segment *segment = new Segment(NextSegmentFilePath(), options_.segment_size, + options_.fixed_value_bytes, disk_io_, + (void *)cache_, (void *)str_cache_); + int ret = segment->Load(block_type_, compressor_); + if (ret < 0) { + LOG(ERROR) << "extend file segment error, ret=" << ret; + return ret; + } + size_ += ret; + segments_.push_back(segment); + } + + LOG(INFO) << "init gamma storage success! options=" << options_.ToStr() + << ", segment num=" << segments_.size(); + return size_; +} + +int StorageManager::Extend() { + Segment *segment = new Segment(NextSegmentFilePath(), options_.segment_size, + options_.fixed_value_bytes, disk_io_, + (void *)cache_, (void *)str_cache_); + int ret = segment->Init(block_type_, compressor_); + if (ret) { + LOG(ERROR) << "extend file segment error, ret=" << ret; + return ret; + } + segments_.push_back(segment); + return 0; +} + +int StorageManager::Add(const uint8_t *value, int len) { + if (len != options_.fixed_value_bytes) { + LOG(ERROR) << "Add len error [" << len << "] != options_.fixed_value_bytes[" + << options_.fixed_value_bytes << "]"; + return PARAM_ERR; + } + + Segment *segment = segments_.back(); + int ret = segment->Add(value, len); + if (ret) { + LOG(ERROR) << "segment add error [" << ret << "]"; + return ret; + } + ++size_; + + if (segment->IsFull() && Extend()) { + LOG(ERROR) << "extend error"; + return INTERNAL_ERR; + } + return 0; +} + +str_offset_t StorageManager::AddString(const char *value, int len, + uint32_t &block_id, + uint32_t &in_block_pos) { + Segment *segment = segments_.back(); + str_offset_t ret = segment->AddString(value, len, block_id, in_block_pos); + return ret; +} + +int StorageManager::GetHeaders(int start, int n, + std::vector &vecs, + std::vector &lens) { + if ((size_t)start + n > size_) { + LOG(ERROR) << "start [" << start << "] + n [" << n << "] > size_ [" << size_ + << "]"; + return PARAM_ERR; + } + while (n) { + int offset = start % options_.segment_size; + int len = options_.segment_size - offset; + if (len > n) len = n; + lens.push_back(len); + Segment *Segment = segments_[start / options_.segment_size]; + uint8_t *value = new uint8_t[len * options_.fixed_value_bytes]; + Segment->GetValues(value, offset, len); + // std::stringstream ss; + // for (int i = 0; i < 100; ++i) { + // float a; + // memcpy(&a, value + i * 4, 4); + // ss << a << " "; + // } + // std::string aa = ss.str(); + vecs.push_back(value); + start += len; + n -= len; + } + return 0; +} + +int StorageManager::Update(int id, uint8_t *v, int len) { + if ((size_t)id >= size_ || id < 0 || len != options_.fixed_value_bytes) { + LOG(ERROR) << "id [" << id << "] size_ [" << size_ << "]"; + return PARAM_ERR; + } + return segments_[id / options_.segment_size]->Update( + id % options_.segment_size, v, len); +} + +str_offset_t StorageManager::UpdateString(int id, const char *value, int len, + uint32_t &block_id, uint32_t &in_block_pos) { + if ((size_t)id >= size_ || id < 0) { + LOG(ERROR) << "id [" << id << "] size_ [" << size_ << "]"; + return PARAM_ERR; + } + int seg_id = id / options_.segment_size; + while (seg_id >= segments_.size()) { + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + LOG(INFO) << "Get() ,seg_id:" << seg_id << " segments_.size():" << segments_.size(); + } + Segment *segment = segments_[seg_id]; + str_offset_t ret = segment->AddString(value, len, block_id, in_block_pos); + return ret; +} + +int StorageManager::Get(long id, const uint8_t *&value) { + if ((size_t)id >= size_ || id < 0) { + LOG(WARNING) << "id [" << id << "] size_ [" << size_ << "]"; + return PARAM_ERR; + } + + int seg_id = id / options_.segment_size; + while (seg_id >= segments_.size()) { + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + LOG(INFO) << "Get() ,seg_id:" << seg_id << " segments_.size():" << segments_.size(); + } + Segment *segment = segments_[seg_id]; + + uint8_t *value2 = new uint8_t[options_.fixed_value_bytes]; + segment->GetValue(value2, id % options_.segment_size); + value = value2; + return 0; +} + +int StorageManager::GetString(long id, std::string &value, uint32_t block_id, + uint32_t in_block_pos, str_len_t len) { + if ((size_t)id >= size_ || id < 0) { + LOG(ERROR) << "id [" << id << "] size_ [" << size_ << "]"; + return PARAM_ERR; + } + // TODO wait while seg_id >= segments_.size() + int seg_id = id / options_.segment_size; + while (seg_id >= segments_.size()) { + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + LOG(INFO) << "GetString(), seg_id:" << seg_id << " segments_.size():" << segments_.size(); + } + + value = segments_[seg_id]->GetString(block_id, in_block_pos, len); + return 0; +} + +int StorageManager::Truncate(size_t size) { + size_t seg_num = size / options_.segment_size; + size_t offset = size % options_.segment_size; + if (offset > 0) ++seg_num; + if (seg_num > segments_.size()) { + LOG(ERROR) << "gamma storage only has " << segments_.size() + << " segments, but expect " << seg_num + << ", trucate size=" << size; + return PARAM_ERR; + } + + for (int i = (int)segments_.size() - 1; i >= (int)seg_num; --i) { + delete segments_[i]; + segments_[i] = nullptr; + } + segments_.resize(seg_num); + if (offset > 0) { + segments_.back()->SetCurrIdx((int)offset); + } + size_ = size; + + if (seg_num == 0 && Extend()) { + return INTERNAL_ERR; + } + LOG(INFO) << "gamma storage truncate to size=" << size + << ", current segment num=" << segments_.size() + << ", last offset=" << offset; + return 0; +} + +} // namespace tig_gamma diff --git a/storage/storage_manager.h b/storage/storage_manager.h new file mode 100644 index 0000000..8b18bea --- /dev/null +++ b/storage/storage_manager.h @@ -0,0 +1,106 @@ +/** + * Copyright 2019 The Gamma Authors. + * + * This source code is licensed under the Apache License, Version 2.0 license + * found in the LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include + +#include "async_writer.h" +#include "compress/compressor_zfp.h" +#include "compress/compressor_zstd.h" +#include "lru_cache.h" +#include "segment.h" +#include "vector_buffer_queue.h" + +namespace tig_gamma { + +struct StorageManagerOptions { + int segment_size; + int fixed_value_bytes; + + StorageManagerOptions() { + segment_size = -1; + fixed_value_bytes = -1; + } + + StorageManagerOptions(const StorageManagerOptions &options) { + segment_size = options.segment_size; + fixed_value_bytes = options.fixed_value_bytes; + } + + bool IsValid() { + if (segment_size == -1 || fixed_value_bytes == -1) return false; + return true; + } + + std::string ToStr() { + std::stringstream ss; + ss << "{segment_size=" << segment_size + << ", fixed_value_bytes=" << fixed_value_bytes << "}"; + return ss.str(); + } +}; + +class StorageManager { + public: + StorageManager(const std::string &root_path, BlockType block_type, + const StorageManagerOptions &options); + ~StorageManager(); + int Init(int cache_size, int str_cache_size = 0); + + int Add(const uint8_t *value, int len); + + str_offset_t AddString(const char *value, int len, uint32_t &block_id, + uint32_t &in_block_pos); + + int Update(int id, uint8_t *value, int len); + + str_offset_t UpdateString(int id, const char *value, int len, + uint32_t &block_id, uint32_t &in_block_pos); + + // warning: vec can't be free + int Get(long id, const uint8_t *&value); + + int GetString(long id, std::string &value, uint32_t blocck_id, + uint32_t in_block_pos, str_len_t len); + + int GetHeaders(int start, int n, std::vector &values, + std::vector &lens); + + // currently it must call truncate after loading to set size of gamma db + int Truncate(size_t size); + + int Size() { return size_; } + + int UseCompress(CompressType type, int d = -1, double rate = -1); + + bool AlterCacheSize(uint32_t cache_size, uint32_t str_cache_size); + + void GetCacheSize(uint32_t &cache_size, uint32_t &str_cache_size); + + private: + int Load(); + + int Extend(); + + std::string NextSegmentFilePath(); + + private: + std::string root_path_; + StorageManagerOptions options_; + size_t size_; + tbb::concurrent_vector segments_; + disk_io::AsyncWriter *disk_io_; + BlockType block_type_; + LRUCache, ReadFunParameter *> *cache_; + LRUCache, ReadStrFunParameter *> *str_cache_; + Compressor *compressor_; +}; + +} // namespace tig_gamma diff --git a/storage/string_block.cc b/storage/string_block.cc new file mode 100644 index 0000000..918e380 --- /dev/null +++ b/storage/string_block.cc @@ -0,0 +1,107 @@ +/** + * Copyright 2019 The Gamma Authors. + * + * This source code is licensed under the Apache License, Version 2.0 license + * found in the LICENSE file in the root directory of this source tree. + */ + +#include "string_block.h" + +#include +// #include + +namespace tig_gamma { + +const static int MAX_STR_BLOCK_SIZE = 102400; + +StringBlock::StringBlock(int fd, int per_block_size, int length, + uint32_t header_size) + : Block(fd, per_block_size, length, header_size) {} + +void StringBlock::InitStrBlock(void *lru) { + lru_cache_ = + (LRUCache, ReadStrFunParameter *> *)lru; +} + +int StringBlock::WriteContent(const uint8_t *data, int len, uint32_t offset, + disk_io::AsyncWriter *disk_io) { + return 0; +} + +int StringBlock::ReadContent(uint8_t *value, uint32_t len, uint32_t offset) { + return 0; +} + +int StringBlock::WriteString(const char *data, str_len_t len, + str_offset_t offset, uint32_t &block_id, + uint32_t &in_block_pos) { + if (block_pos_.size() == 0) { + AddBlockPos(0); + } + uint32_t cur_pos = block_pos_.back(); + in_block_pos = offset - cur_pos; + block_id = block_pos_.size() - 1; + if (in_block_pos + len > MAX_STR_BLOCK_SIZE) { + pwrite(fd_, data, len, offset); + AddBlockPos(offset + len); + } else { + pwrite(fd_, data, len, offset); + } + return 0; +} + +int StringBlock::Read(uint32_t block_id, uint32_t in_block_pos, str_len_t len, + std::string &str_out) { + // uint32_t off = block_pos_[block_id] + in_block_pos; + // std::vector str(len); + // pread(fd_, str.data(), str.size(), off); + // str_out = std::move(std::string(str.data(), len)); + + char *str = new char[len]; + uint32_t block_pos = block_pos_[block_id]; + + uint32_t last_block_id = block_pos_.size() - 1; + // TODO needn't read last block's disk if it is not in last segment + if (block_id >= last_block_id - 1) { + pread(fd_, str, len, block_pos + in_block_pos); + } else { + std::shared_ptr> block; + uint64_t uni_block_id = block_id; + uni_block_id = uni_block_id << 32; + uni_block_id |= fd_; + bool res = lru_cache_->Get(uni_block_id, block); + if (not res) { + ReadStrFunParameter parameter; + parameter.str_block = this; + parameter.block_id = block_id; + parameter.in_block_pos = in_block_pos; + parameter.fd = fd_; // TODO remove + res = lru_cache_->SetOrGet(uni_block_id, block, ¶meter); + } + + if (not res) { + LOG(ERROR) << "Read block fails from disk_file, block_id[" << block_id + << "]"; + return -1; + } + memcpy(str, block->data() + in_block_pos, len); + } + + str_out = std::string(str, len); + delete[] str; + return 0; +} + +bool StringBlock::ReadString(uint64_t key, + std::shared_ptr> &block, + ReadStrFunParameter *param) { + StringBlock *str_block = reinterpret_cast(param->str_block); + uint32_t len = str_block->block_pos_[param->block_id + 1] - + str_block->block_pos_[param->block_id]; + block = std::make_shared>(len); + uint32_t cur_pos = str_block->block_pos_[param->block_id]; // TODO check size + pread(param->fd, block->data(), len, cur_pos); + return true; +} + +} // namespace tig_gamma \ No newline at end of file diff --git a/storage/string_block.h b/storage/string_block.h new file mode 100644 index 0000000..a6b88a8 --- /dev/null +++ b/storage/string_block.h @@ -0,0 +1,52 @@ +/** + * Copyright 2019 The Gamma Authors. + * + * This source code is licensed under the Apache License, Version 2.0 license + * found in the LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include + +#include "block.h" + +namespace tig_gamma { + +class StringBlock : public Block { + public: + StringBlock(int fd, int max_size, int length, uint32_t header_size); + + void InitSubclass() {}; + + int GetReadFunParameter(ReadFunParameter ¶meter) {}; + + int AddStrBlockPos(uint32_t block_pos); + + int WriteContent(const uint8_t *data, int len, uint32_t offset, + disk_io::AsyncWriter *disk_io); + + void InitStrBlock(void *lru); + + int Add(const uint8_t *data, int len); + + int ReadContent(uint8_t *value, uint32_t len, uint32_t offset); + + int WriteString(const char *data, str_len_t len, str_offset_t offset, + uint32_t &block_id, uint32_t &in_block_pos); + + int Read(uint32_t block_id, uint32_t in_block_pos, str_len_t len, + std::string &str_out); + + static bool ReadString(uint64_t key, + std::shared_ptr> &block, + ReadStrFunParameter *param); + + int SubclassUpdate(const uint8_t *data, int len, uint32_t offset) { + return 0; + }; + private: + LRUCache, ReadStrFunParameter *> *lru_cache_; +}; + +} // namespace tig_gamma \ No newline at end of file diff --git a/storage/table_block.cc b/storage/table_block.cc new file mode 100644 index 0000000..7e1034b --- /dev/null +++ b/storage/table_block.cc @@ -0,0 +1,58 @@ +/** + * Copyright 2019 The Gamma Authors. + * + * This source code is licensed under the Apache License, Version 2.0 license + * found in the LICENSE file in the root directory of this source tree. + */ + +#include "table_block.h" + +#include +#include +// #include + +namespace tig_gamma { + +TableBlock::TableBlock(int fd, int per_block_size, int length, + uint32_t header_size) + : Block(fd, per_block_size, length, header_size) {} + +int TableBlock::GetReadFunParameter(ReadFunParameter ¶meter) { + parameter.offset += header_size_; + return 0; +} + + +int TableBlock::WriteContent(const uint8_t *data, int len, uint32_t offset, + disk_io::AsyncWriter *disk_io) { + disk_io->Set(header_size_, item_length_); + struct disk_io::WriterStruct *write_struct = new struct disk_io::WriterStruct; + write_struct->fd = fd_; + write_struct->data = new uint8_t[len]; + memcpy(write_struct->data, data, len); + write_struct->start = header_size_ + offset; + write_struct->len = len; + disk_io->AsyncWrite(write_struct); + // disk_io->SyncWrite(write_struct); + return 0; +} + +bool TableBlock::ReadBlock(uint64_t key, + std::shared_ptr> &block, + ReadFunParameter *param) { + block = std::make_shared>(param->len); + pread(param->fd, block->data(), param->len, param->offset); + return true; +} + +int TableBlock::ReadContent(uint8_t *value, uint32_t len, uint32_t offset) { + pread(fd_, value, len, header_size_ + offset); + return 0; +} + +int TableBlock::SubclassUpdate(const uint8_t *data, int len, uint32_t offset) { + pwrite(fd_, data, len, header_size_ + offset); + return 0; +} + +} // namespace tig_gamma diff --git a/storage/table_block.h b/storage/table_block.h new file mode 100644 index 0000000..a63ce04 --- /dev/null +++ b/storage/table_block.h @@ -0,0 +1,35 @@ +/** + * Copyright 2019 The Gamma Authors. + * + * This source code is licensed under the Apache License, Version 2.0 license + * found in the LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include "block.h" + +namespace tig_gamma { + +class TableBlock : public Block { + public: + TableBlock(int fd, int max_size, int length, uint32_t header_size); + + void InitSubclass() {}; + + int GetReadFunParameter(ReadFunParameter ¶meter); + + static bool ReadBlock(uint64_t key, std::shared_ptr> &block, + ReadFunParameter *param); + + int WriteContent(const uint8_t *data, int len, uint32_t offset, + disk_io::AsyncWriter *disk_io) override; + + int Add(const uint8_t *data, int len); + + int ReadContent(uint8_t *value, uint32_t len, uint32_t offset) override; + + int SubclassUpdate(const uint8_t *data, int len, uint32_t offset) override; +}; + +} // namespace tig_gamma \ No newline at end of file diff --git a/storage/vector_block.cc b/storage/vector_block.cc new file mode 100644 index 0000000..17f6ad8 --- /dev/null +++ b/storage/vector_block.cc @@ -0,0 +1,151 @@ +/** + * Copyright 2019 The Gamma Authors. + * + * This source code is licensed under the Apache License, Version 2.0 license + * found in the LICENSE file in the root directory of this source tree. + */ + +#include "vector_block.h" + +#include + +namespace tig_gamma { + +VectorBlock::VectorBlock(int fd, int per_block_size, int length, + uint32_t header_size) + : Block(fd, per_block_size, length, header_size) { + vec_item_len_ = item_length_; + LOG(INFO) << "VectorBlock construction!"; +} + +void VectorBlock::InitSubclass() { + if(compressor_) { + vec_item_len_ = compressor_->GetCompressLen(); + LOG(INFO) << "Vector block use compress. vec_item_len_[" + << vec_item_len_ << "]"; + if (compressor_->GetCompressType() != CompressType::Zfp) { + LOG(ERROR) << "The compression method used by vec_block is not ZFP."; + } + } +} + +int VectorBlock::GetReadFunParameter(ReadFunParameter ¶meter) { +#ifdef WITH_ZFP + if (compressor_) { + int raw_len = compressor_->GetRawLen(); + parameter.offset = (parameter.offset / raw_len) * vec_item_len_; + parameter.len = (parameter.len / raw_len) * vec_item_len_; + } +#endif + parameter.offset += header_size_; + return 0; +} + +bool VectorBlock::ReadBlock(uint64_t key, + std::shared_ptr> &block, + ReadFunParameter *param) { +#ifdef WITH_ZFP + Compressor *compressor = (Compressor *)param->cmprs; + if (compressor) { + char *cmprs_data = new char[param->len]; + int cmprs_len = compressor->GetCompressLen(); + int batch_num = param->len / cmprs_len; + block = std::make_shared>(batch_num * compressor->GetRawLen()); + pread(param->fd, cmprs_data, param->len, param->offset); + if (batch_num == 1) { + compressor->Decompress((char *)cmprs_data, (char *)(block->data()), param->len); + } else { + compressor->DecompressBatch((char *)cmprs_data, (char *)(block->data()), + batch_num, param->len); + } + delete[] cmprs_data; + } else +#endif + { + block = std::make_shared>(param->len); + pread(param->fd, block->data(), param->len, param->offset); + } + return true; +} + +int VectorBlock::WriteContent(const uint8_t *data, int len, uint32_t offset, + disk_io::AsyncWriter *disk_io) { +#ifdef WITH_ZFP + const uint8_t *raw_val = data; + std::vector cmprs_val; + if (compressor_) { + int raw_len = len; + len = vec_item_len_; + offset = (offset / raw_len) * len; + cmprs_val.resize(len); + compressor_->Compress((char *)raw_val, (char *)cmprs_val.data(), + raw_len); + data = (const uint8_t *)cmprs_val.data(); + } +#endif + + disk_io->Set(header_size_, vec_item_len_); + struct disk_io::WriterStruct *write_struct = new struct disk_io::WriterStruct; + write_struct->fd = fd_; + write_struct->data = new uint8_t[len]; + memcpy(write_struct->data, data, len); + write_struct->start = header_size_ + offset; + write_struct->len = len; + disk_io->AsyncWrite(write_struct); + // disk_io->SyncWrite(write_struct); + return 0; +} + +int VectorBlock::ReadContent(uint8_t *value, uint32_t len, uint32_t offset) { + +#ifdef WITH_ZFP + if (compressor_) { + int raw_len = compressor_->GetRawLen(); + int batch_num = len / raw_len; + int cmprs_data_len = batch_num * vec_item_len_; + char *cmprs_data = new char[cmprs_data_len]; + offset = (offset / raw_len) * vec_item_len_; + pread(fd_, cmprs_data, cmprs_data_len, header_size_ + offset); + + if (batch_num == 1) { + compressor_->Decompress((char *)cmprs_data, (char *)value, len); + } else { + compressor_->DecompressBatch((char *)cmprs_data, (char *)value, batch_num, + len); + } + delete[] cmprs_data; + } else +#endif + { + pread(fd_, value, len, header_size_ + offset); + } + return 0; +} + +int VectorBlock::SubclassUpdate(const uint8_t *data, int len, uint32_t offset) { +#ifdef WITH_ZFP + if (compressor_) { + int raw_len = compressor_->GetRawLen(); + int batch_num = len / raw_len; + int cmprs_data_len = batch_num * vec_item_len_; + char *cmprs_data = new char[cmprs_data_len]; + offset = (offset / raw_len) * vec_item_len_; + + if (batch_num == 1) { + compressor_->Compress((char *)data, (char *)cmprs_data, len); + } else { + compressor_->CompressBatch((char *)data, (char *)cmprs_data, batch_num, + len); + } + pwrite(fd_, cmprs_data, cmprs_data_len, header_size_ + offset); + delete[] cmprs_data; + cmprs_data = nullptr; + } else +#endif + { + pwrite(fd_, data, len, header_size_ + offset); + } + return 0; +} + +} // namespace tig_gamma \ No newline at end of file diff --git a/storage/vector_block.h b/storage/vector_block.h new file mode 100644 index 0000000..371c337 --- /dev/null +++ b/storage/vector_block.h @@ -0,0 +1,46 @@ +/** + * Copyright 2019 The Gamma Authors. + * + * This source code is licensed under the Apache License, Version 2.0 license + * found in the LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include + +#include "block.h" +#include "lru_cache.h" + + + +typedef uint32_t str_offset_t; +typedef uint16_t str_len_t; + +namespace tig_gamma { + + +class VectorBlock : public Block { + public: + VectorBlock(int fd, int per_block_size, int length, uint32_t header_size); + + void InitSubclass() override; + + int GetReadFunParameter(ReadFunParameter ¶meter) override; + + static bool ReadBlock(uint64_t key, std::shared_ptr> &block, + ReadFunParameter *param); + + int WriteContent(const uint8_t *data, int len, uint32_t offset, + disk_io::AsyncWriter *disk_io) override; + + int ReadContent(uint8_t *value, uint32_t len, uint32_t offset) override; + + int SubclassUpdate(const uint8_t *data, int len, uint32_t offset) override; + private: + int vec_item_len_; +}; + +} // namespace tig_gamma diff --git a/table/field_range_index.cc b/table/field_range_index.cc index fcdf31c..447dbce 100644 --- a/table/field_range_index.cc +++ b/table/field_range_index.cc @@ -930,6 +930,9 @@ MultiFieldsRangeIndex::MultiFieldsRangeIndex(std::string &path, MultiFieldsRangeIndex::~MultiFieldsRangeIndex() { b_running_ = false; + while (field_operate_q_->size() > 0) { + std::this_thread::sleep_for(std::chrono::seconds(1)); + } for (size_t i = 0; i < fields_.size(); i++) { if (fields_[i]) { delete fields_[i]; @@ -971,8 +974,10 @@ void MultiFieldsRangeIndex::FieldOperateWorker() { bool ret = false; while (b_running_ || ret) { FieldOperate *field_op = nullptr; - ret = field_operate_q_->wait_dequeue_timed(field_op, 1000); + ret = field_operate_q_->try_pop(field_op); + if (not ret) { + std::this_thread::sleep_for(std::chrono::milliseconds(10)); continue; } @@ -999,12 +1004,7 @@ int MultiFieldsRangeIndex::Add(int docid, int field) { } FieldOperate *field_op = new FieldOperate(FieldOperate::ADD, docid, field); - bool ret = field_operate_q_->enqueue(field_op); - - if (not ret) { - LOG(ERROR) << "Add failed!"; - return -1; - } + field_operate_q_->push(field_op); return 0; } @@ -1017,12 +1017,7 @@ int MultiFieldsRangeIndex::Delete(int docid, int field) { FieldOperate *field_op = new FieldOperate(FieldOperate::DELETE, docid, field); table_->GetFieldRawValue(docid, field, field_op->value); - bool ret = field_operate_q_->enqueue(field_op); - - if (not ret) { - LOG(ERROR) << "Delete failed!"; - return -1; - } + field_operate_q_->push(field_op); return 0; } @@ -1059,11 +1054,11 @@ int MultiFieldsRangeIndex::Search(const std::vector &origin_filters, for (const auto &filter : origin_filters) { if (filter.field < 0) { - return PARAM_ERR; + return -1; } FieldRangeIndex *index = fields_[filter.field]; if (index == nullptr) { - return PARAM_ERR; + return -1; } if (not index->IsNumeric() && (filter.is_union == FilterOperator::And)) { // type is string and operator is "and", split this filter diff --git a/table/field_range_index.h b/table/field_range_index.h index 44fbb00..79536dc 100644 --- a/table/field_range_index.h +++ b/table/field_range_index.h @@ -11,6 +11,7 @@ #include #include #include +#include #include "concurrentqueue/blockingconcurrentqueue.h" #include "range_query_result.h" @@ -63,7 +64,7 @@ class FieldOperate { }; typedef moodycamel::BlockingConcurrentQueue ResourceQueue; -typedef moodycamel::BlockingConcurrentQueue FieldOperateQueue; +typedef tbb::concurrent_bounded_queue FieldOperateQueue; class FieldRangeIndex; class MultiFieldsRangeIndex { diff --git a/table/table.cc b/table/table.cc index 9ebf7e4..c69eae6 100644 --- a/table/table.cc +++ b/table/table.cc @@ -23,8 +23,6 @@ using std::vector; namespace tig_gamma { namespace table { -const static string kTableDumpedNum = "profile_dumped_num"; - Table::Table(const string &root_path, bool b_compress) { item_length_ = 0; field_num_ = 0; @@ -33,57 +31,25 @@ Table::Table(const string &root_path, bool b_compress) { root_path_ = root_path + "/table"; seg_num_ = 0; b_compress_ = b_compress; - compressed_num_ = 0; - - // TODO : there is a failure. - // if (!item_to_docid_.reserve(max_doc_size)) { - // LOG(ERROR) << "item_to_docid reserve failed, max_doc_size [" << - // max_doc_size - // << "]"; - // } table_created_ = false; - last_docid_ = 0; + last_docid_ = -1; table_params_ = nullptr; LOG(INFO) << "Table created success!"; } Table::~Table() { -#ifdef USE_BTREE - if (cache_mgr_) { - bt_mgrclose(cache_mgr_); - cache_mgr_ = nullptr; - } - if (main_mgr_) { - bt_mgrclose(main_mgr_); - main_mgr_ = nullptr; - } -#endif - - for (int i = 0; i < seg_num_; ++i) { - delete main_file_[i]; - } CHECK_DELETE(table_params_); LOG(INFO) << "Table deleted."; } int Table::Load(int &num) { - std::string file_name = - root_path_ + "/" + std::to_string(seg_num_) + ".profile"; - int doc_num = 0; - while (utils::file_exist(file_name)) { - main_file_[seg_num_] = new TableData(item_length_); - main_file_[seg_num_]->Load(seg_num_, root_path_); - doc_num += main_file_[seg_num_]->Size(); - ++seg_num_; - if (doc_num >= num) { - doc_num = num; - break; - } - file_name = root_path_ + "/" + std::to_string(seg_num_) + ".profile"; - } + int doc_num = storage_mgr_->Size(); + storage_mgr_->Truncate(num); + LOG(INFO) << "Load doc_num [" << doc_num << "] truncate to [" << num << "]"; + doc_num = num; - const string str_id = "_id"; + const std::string str_id = "_id"; const auto &iter = attr_idx_map_.find(str_id); if (iter == attr_idx_map_.end()) { LOG(ERROR) << "cannot find field [" << str_id << "]"; @@ -91,28 +57,28 @@ int Table::Load(int &num) { } int idx = iter->second; -#pragma omp parallel for - for (int i = 0; i < doc_num; ++i) { - if (id_type_ == 0) { + if (id_type_ == 0) { + for (int i = 0; i < doc_num; ++i) { std::string key; - DecompressStr decompress_str; - GetFieldString(i, idx, key, decompress_str); + GetFieldRawValue(i, idx, key); int64_t k = utils::StringToInt64(key); item_to_docid_.insert(k, i); - } else { + } + } else { + for (int i = 0; i < doc_num; ++i) { long key = -1; - GetField(i, idx, key); + std::string key_str; + GetFieldRawValue(i, idx, key_str); + memcpy(&key, key_str.c_str(), sizeof(key)); item_to_docid_.insert(key, i); } } LOG(INFO) << "Table load successed! doc num=" << doc_num; - last_docid_ = doc_num; + last_docid_ = doc_num - 1; return 0; } -int Table::Sync() { return 0; } - int Table::CreateTable(TableInfo &table, TableParams &table_params) { if (table_created_) { return -10; @@ -145,34 +111,27 @@ int Table::CreateTable(TableInfo &table, TableParams &table_params) { mkdir(root_path_.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH); } -#ifdef USE_BTREE - uint mainleafxtra = 0; - uint maxleaves = 1000000; - uint poolsize = 500; - uint leafxtra = 0; - uint mainpool = 500; - uint mainbits = 16; - uint bits = 16; - - string cache_file = root_path_ + string("/cache_") + ".dis"; - string main_file = root_path_ + string("/main_") + ".dis"; - - remove(cache_file.c_str()); - remove(main_file.c_str()); - - cache_mgr_ = - bt_mgr(const_cast(cache_file.c_str()), bits, leafxtra, poolsize); - cache_mgr_->maxleaves = maxleaves; - main_mgr_ = bt_mgr(const_cast(main_file.c_str()), mainbits, - mainleafxtra, mainpool); - main_mgr_->maxleaves = maxleaves; -#endif - table_params_ = new TableParams("table"); table_created_ = true; LOG(INFO) << "Create table " << name_ << " success! item length=" << item_length_ << ", field num=" << (int)field_num_; + + StorageManagerOptions options; + options.segment_size = 102400; + options.fixed_value_bytes = item_length_; + storage_mgr_ = + new StorageManager(root_path_, BlockType::TableBlockType, options); + int cache_size = 512; // unit : M + int str_cache_size = 512; + int ret = storage_mgr_->Init(cache_size, str_cache_size); + if (ret) { + LOG(ERROR) << "init gamma db error, ret=" << ret; + return ret; + } + + LOG(INFO) << "init storageManager success! vector byte size=" + << options.fixed_value_bytes << ", path=" << root_path_; return 0; } @@ -187,46 +146,12 @@ int Table::FTypeSize(DataType fType) { } else if (fType == DataType::DOUBLE) { length = sizeof(double); } else if (fType == DataType::STRING) { - length = sizeof(str_offset_t) + sizeof(str_len_t); + // block_id, in_block_pos, str_len + length = sizeof(uint32_t) + sizeof(uint32_t) + sizeof(str_len_t); } return length; } -void Table::SetFieldValue(int docid, const std::string &field, int field_id, - const char *value, str_len_t len) { - size_t offset = idx_attr_offset_[field_id]; - DataType attr = attrs_[field_id]; - - if (attr != DataType::STRING) { - int type_size = FTypeSize(attr); - - int seg_pos; - size_t in_seg_pos; - int ret = GetSegPos(docid, 0, seg_pos, in_seg_pos, false); - if (ret != 0) { - return; - } - offset += in_seg_pos * item_length_; - TableData *seg_file = main_file_[seg_pos]; - seg_file->Write(value, offset, type_size); - } else { - size_t ofst = sizeof(str_offset_t); - int seg_pos; - size_t in_seg_pos; - int ret = GetSegPos(docid, 0, seg_pos, in_seg_pos, false); - if (ret != 0) { - return; - } - offset += in_seg_pos * item_length_; - TableData *seg_file = main_file_[seg_pos]; - str_offset_t str_offset = seg_file->StrOffset(); - seg_file->Write((char *)&str_offset, offset, sizeof(str_offset)); - seg_file->Write((char *)&len, offset + ofst, sizeof(len)); - - seg_file->WriteStr(value, sizeof(char) * len); - } -} - int Table::AddField(const string &name, DataType ftype, bool is_index) { if (attr_idx_map_.find(name) != attr_idx_map_.end()) { LOG(ERROR) << "Duplicate field " << name; @@ -237,6 +162,7 @@ int Table::AddField(const string &name, DataType ftype, bool is_index) { id_type_ = ftype == DataType::STRING ? 0 : 1; } if (ftype == DataType::STRING) { + str_field_id_.insert(std::make_pair(field_num_, string_field_num_)); ++string_field_num_; } idx_attr_offset_.push_back(item_length_); @@ -251,16 +177,6 @@ int Table::AddField(const string &name, DataType ftype, bool is_index) { } int Table::GetDocIDByKey(std::string &key, int &docid) { -#ifdef USE_BTREE - BtDb *bt = bt_open(cache_mgr_, main_mgr_); - int ret = bt_findkey(bt, reinterpret_cast(&key), sizeof(key), - (unsigned char *)&docid, sizeof(int)); - bt_close(bt); - - if (ret >= 0) { - return 0; - } -#else if (id_type_ == 0) { int64_t k = utils::StringToInt64(key); if (item_to_docid_.find(k, docid)) { @@ -274,8 +190,6 @@ int Table::GetDocIDByKey(std::string &key, int &docid) { return 0; } } - -#endif return -1; } @@ -291,17 +205,6 @@ int Table::Add(const std::string &key, const std::vector &fields, return -3; } -#ifdef USE_BTREE - BtDb *bt = bt_open(cache_mgr_, main_mgr_); - - BTERR bterr = bt_insertkey( - bt->main, reinterpret_cast(&key.data()), sizeof(key), 0, - static_cast(&docid), sizeof(int), Unique); - if (bterr) { - LOG(ERROR) << "Error " << bt->mgr->err; - } - bt_close(bt); -#else if (id_type_ == 0) { int64_t k = utils::StringToInt64(key); item_to_docid_.insert(k, docid); @@ -311,24 +214,36 @@ int Table::Add(const std::string &key, const std::vector &fields, item_to_docid_.insert(key_long, docid); } -#endif + + uint8_t doc_value[item_length_]; for (size_t i = 0; i < fields.size(); ++i) { const auto &field_value = fields[i]; - const string &name = field_value.name; + const std::string &name = field_value.name; + size_t offset = idx_attr_offset_[i]; - SetFieldValue(docid, name, i, field_value.value.c_str(), - field_value.value.size()); - } + DataType attr = attrs_[i]; - TableData *seg_file = main_file_[docid / DOCNUM_PER_SEGMENT]; - seg_file->SetSize(seg_file->Size() + 1); - // if (PutToDB(docid)) { - // LOG(ERROR) << "Put to rocksdb error, docid [" << docid << "]"; - // return -2; - // } + if (attr != DataType::STRING) { + int type_size = FTypeSize(attr); + memcpy(doc_value + offset, field_value.value.c_str(), type_size); + } else { + size_t ofst = sizeof(str_offset_t); + str_len_t len = field_value.value.size(); + int str_field_id = str_field_id_[attr_idx_map_[name]]; + uint32_t block_id, in_block_pos; + str_offset_t str_offset = storage_mgr_->AddString( + field_value.value.c_str(), len, block_id, in_block_pos); + + memcpy(doc_value + offset, &block_id, sizeof(block_id)); + memcpy(doc_value + offset + sizeof(block_id), &in_block_pos, + sizeof(in_block_pos)); + memcpy(doc_value + offset + sizeof(block_id) + sizeof(in_block_pos), &len, + sizeof(len)); + } + } - Compress(); + storage_mgr_->Add((const uint8_t *)doc_value, item_length_); if (docid % 10000 == 0) { if (id_type_ == 0) { @@ -377,13 +292,34 @@ int Table::BatchAdd(int start_id, int batch_size, int docid, int id = docid + i; Doc &doc = doc_vec[start_id + i]; std::vector &fields = doc.TableFields(); - for (size_t j = 0; j < attr_idx_map_.size(); ++j) { + uint8_t doc_value[item_length_]; + + for (size_t j = 0; j < fields.size(); ++j) { const auto &field_value = fields[j]; const string &name = field_value.name; + size_t offset = idx_attr_offset_[j]; - SetFieldValue(id, name, j, field_value.value.c_str(), - field_value.value.size()); + DataType attr = attrs_[j]; + + if (attr != DataType::STRING) { + int type_size = FTypeSize(attr); + memcpy(doc_value + offset, field_value.value.c_str(), type_size); + } else { + size_t ofst = sizeof(str_offset_t); + str_len_t len = field_value.value.size(); + uint32_t block_id, in_block_pos; + str_offset_t str_offset = storage_mgr_->AddString( + field_value.value.c_str(), len, block_id, in_block_pos); + + memcpy(doc_value + offset, &block_id, sizeof(block_id)); + memcpy(doc_value + offset + sizeof(block_id), &in_block_pos, + sizeof(in_block_pos)); + memcpy(doc_value + offset + sizeof(block_id) + sizeof(in_block_pos), + &len, sizeof(len)); + } } + + storage_mgr_->Add((const uint8_t *)doc_value, item_length_); if (id % 10000 == 0) { std::string &key = doc_vec[i].Key(); if (id_type_ == 0) { @@ -394,16 +330,9 @@ int Table::BatchAdd(int start_id, int batch_size, int docid, LOG(INFO) << "Add item _id [" << key_long << "], num [" << id << "]"; } } - TableData *seg_file = main_file_[id / DOCNUM_PER_SEGMENT]; - seg_file->SetSize(seg_file->Size() + 1); } - // if (BatchPutToDB(docid, batch_size)) { - // LOG(ERROR) << "put to rocksdb error, docid=" << docid; - // return -2; - // } - - Compress(); + // Compress(); #ifdef PERFORMANCE_TESTING double end = utils::getmillisecs(); if (docid % 10000 == 0) { @@ -417,6 +346,13 @@ int Table::BatchAdd(int start_id, int batch_size, int docid, int Table::Update(const std::vector &fields, int docid) { if (fields.size() == 0) return 0; + const uint8_t *ori_doc_value; + storage_mgr_->Get(docid, ori_doc_value); + + uint8_t doc_value[item_length_]; + + memcpy(doc_value, ori_doc_value, item_length_); + for (size_t i = 0; i < fields.size(); ++i) { const struct Field &field_value = fields[i]; const string &name = field_value.name; @@ -427,51 +363,28 @@ int Table::Update(const std::vector &fields, int docid) { } int field_id = it->second; + int offset = idx_attr_offset_[field_id]; if (field_value.datatype == DataType::STRING) { int offset = idx_attr_offset_[field_id]; - int seg_pos; - size_t in_seg_pos; - int ret = GetSegPos(docid, 0, seg_pos, in_seg_pos, false); - if (ret != 0) { - return ret; - } - offset += in_seg_pos * item_length_; - TableData *seg_file = main_file_[seg_pos]; - char *base = seg_file->Base(); - - str_offset_t str_offset = 0; - memcpy(&str_offset, base + offset, sizeof(str_offset)); - str_len_t len; - memcpy(&len, base + offset + sizeof(str_offset), sizeof(len)); - - size_t value_len = field_value.value.size(); - if (len >= value_len) { - seg_file->Write((char *)&value_len, offset + sizeof(str_offset), - sizeof(len)); - seg_file->WriteStr(field_value.value.data(), str_offset, value_len); - } else { - len = value_len; - int ofst = sizeof(str_offset); - str_offset = seg_file->StrOffset(); - seg_file->Write((char *)&str_offset, offset, sizeof(str_offset)); - seg_file->Write((char *)&len, offset + ofst, sizeof(len)); - seg_file->WriteStr(field_value.value.data(), sizeof(char) * len); - } + str_len_t len = field_value.value.size(); + uint32_t block_id, in_block_pos; + str_offset_t res = storage_mgr_->UpdateString(docid, field_value.value.c_str(), + len, block_id, in_block_pos); + memcpy(doc_value + offset, &block_id, sizeof(block_id)); + memcpy(doc_value + offset + sizeof(block_id), &in_block_pos, + sizeof(in_block_pos)); + memcpy(doc_value + offset + sizeof(block_id) + sizeof(in_block_pos), + &len, sizeof(len)); } else { - SetFieldValue(docid, name, field_id, field_value.value.data(), - field_value.value.size()); + memcpy(doc_value + offset, field_value.value.data(), + field_value.value.size()); } } - // if (PutToDB(docid)) { - // LOG(ERROR) << "update to rocksdb error, docid=" << docid; - // return -2; - // } - - Compress(); - + storage_mgr_->Update(docid, doc_value, item_length_); + delete[] ori_doc_value; return 0; } @@ -488,225 +401,118 @@ int Table::Delete(std::string &key) { return 0; } -int Table::GetRawDoc(int docid, vector &raw_doc) { - int len = item_length_; - raw_doc.resize(len, 0); - int seg_pos; - size_t in_seg_pos; - int ret = GetSegPos(docid, 0, seg_pos, in_seg_pos); - if (ret != 0) { - return ret; - } - size_t offset = in_seg_pos * item_length_; - TableData *seg_file = main_file_[seg_pos]; - char *base = seg_file->Base(); - memcpy((void *)raw_doc.data(), base + offset, item_length_); - DecompressStr decompress_str; - - for (int i = 0; i < (int)idx_attr_offset_.size(); i++) { - if (attrs_[i] != DataType::STRING) continue; - - char *field = base + offset + idx_attr_offset_[i]; - str_len_t str_len = 0; - memcpy((void *)&str_len, field + sizeof(str_offset_t), sizeof(str_len)); - if (str_len == 0) continue; - - raw_doc.resize(len + str_len, 0); - str_offset_t str_offset = 0; - memcpy((void *)&str_offset, field, sizeof(str_offset)); - std::string str; - int ret = seg_file->GetStr(str_offset, str_len, str, decompress_str); - if (ret != 0) { - LOG(ERROR) << "Get str error [" << docid << "] len [" << (int)str_len - << "]"; - } - memcpy((void *)(raw_doc.data() + len), str.c_str(), str_len); - len += str_len; - } - return 0; -} - -int Table::GetSegPos(IN int32_t docid, IN int32_t field_id, OUT int &seg_pos, - OUT size_t &in_seg_pos, bool bRead) { - seg_pos = docid / DOCNUM_PER_SEGMENT; - if (seg_pos >= seg_num_) { - if (bRead) { - LOG(ERROR) << "Pos [" << seg_pos << "] out of bound [" << seg_num_ << "]"; - return -1; - } - int ret = Extend(); - if (ret != 0) { - LOG(ERROR) << "docid [" << docid << "], main_file [" << seg_pos - << "] is NULL"; - return -1; - } - } - in_seg_pos = docid % DOCNUM_PER_SEGMENT; - return 0; -} - -int Table::Extend() { - main_file_[seg_num_] = new TableData(item_length_); - main_file_[seg_num_]->Init(seg_num_, root_path_, string_field_num_); - ++seg_num_; - return 0; -} - -void Table::Compress() { - if (b_compress_) { - if (seg_num_ < 2) return; - - for (int i = compressed_num_; i < seg_num_ - 1; ++i) { - main_file_[i]->Compress(); - } - - compressed_num_ = seg_num_ - 1; - } -} - long Table::GetMemoryBytes() { long total_mem_bytes = 0; - for (int i = 0; i < seg_num_; ++i) { - total_mem_bytes += main_file_[i]->GetMemoryBytes(); - } + // for (int i = 0; i < seg_num_; ++i) { + // total_mem_bytes += main_file_[i]->GetMemoryBytes(); + // } return total_mem_bytes; } int Table::GetDocInfo(std::string &id, Doc &doc, + std::vector &fields, DecompressStr &decompress_str) { int doc_id = 0; int ret = GetDocIDByKey(id, doc_id); if (ret < 0) { return ret; } - return GetDocInfo(doc_id, doc, decompress_str); + return GetDocInfo(doc_id, doc, fields, decompress_str); } int Table::GetDocInfo(const int docid, Doc &doc, + std::vector &fields, DecompressStr &decompress_str) { if (docid > last_docid_) { LOG(ERROR) << "doc [" << docid << "] in front of [" << last_docid_ << "]"; return -1; } - int i = 0; + const uint8_t *doc_value; + storage_mgr_->Get(docid, doc_value); std::vector &table_fields = doc.TableFields(); - table_fields.resize(attr_type_map_.size()); - - for (const auto &it : attr_type_map_) { - const string &attr = it.first; - GetFieldInfo(docid, attr, table_fields[i], decompress_str); - ++i; - } - return 0; -} - -void Table::GetFieldInfo(const int docid, const string &field_name, - struct Field &field, DecompressStr &decompress_str) { - const auto &it = attr_type_map_.find(field_name); - if (it == attr_type_map_.end()) { - LOG(ERROR) << "Cannot find field [" << field_name << "]"; - return; - } - - DataType type = it->second; - std::string source; - field.name = field_name; - field.source = source; - field.datatype = type; - - if (type == DataType::STRING) { - GetFieldString(docid, field_name, field.value, decompress_str); + if (fields.size() == 0) { + int i = 0; + table_fields.resize(attr_type_map_.size()); + + for (const auto &it : attr_idx_map_) { + DataType type = attr_type_map_[it.first]; + std::string source; + table_fields[i].name = it.first; + table_fields[i].source = source; + table_fields[i].datatype = type; + GetFieldRawValue(docid, it.second, table_fields[i].value, doc_value); + ++i; + } } else { - int value_len = FTypeSize(type); - - std::string str_value; - if (type == DataType::INT) { - int value = 0; - GetField(docid, field_name, value); - str_value = std::string(reinterpret_cast(&value), value_len); - } else if (type == DataType::LONG) { - long value = 0; - GetField(docid, field_name, value); - str_value = std::string(reinterpret_cast(&value), value_len); - } else if (type == DataType::FLOAT) { - float value = 0; - GetField(docid, field_name, value); - str_value = std::string(reinterpret_cast(&value), value_len); - } else if (type == DataType::DOUBLE) { - double value = 0; - GetField(docid, field_name, value); - str_value = std::string(reinterpret_cast(&value), value_len); + table_fields.resize(fields.size()); + int i = 0; + for (std::string &f : fields) { + const auto &iter = attr_idx_map_.find(f); + if (iter == attr_idx_map_.end()) { + LOG(ERROR) << "Cannot find field [" << f << "]"; + } + int field_idx = iter->second; + DataType type = attr_type_map_[f]; + std::string source; + table_fields[i].name = f; + table_fields[i].source = source; + table_fields[i].datatype = type; + GetFieldRawValue(docid, field_idx, table_fields[i].value, doc_value); + ++i; } - field.value = std::move(str_value); } + delete[] doc_value; + return 0; } -int Table::GetFieldString(int docid, const std::string &field, - std::string &value, DecompressStr &decompress_str) { - const auto &iter = attr_idx_map_.find(field); +int Table::GetFieldRawValue(int docid, const std::string &field_name, + std::string &value, const uint8_t *doc_v) { + const auto iter = attr_idx_map_.find(field_name); if (iter == attr_idx_map_.end()) { - LOG(ERROR) << "docid " << docid << " field " << field; + LOG(ERROR) << "Cannot find field [" << field_name << "]"; return -1; } - int idx = iter->second; - return GetFieldString(docid, idx, value, decompress_str); + GetFieldRawValue(docid, iter->second, value, doc_v); } -int Table::GetFieldString(int docid, int field_id, std::string &value, - DecompressStr &decompress_str) { - size_t offset = idx_attr_offset_[field_id]; - str_offset_t str_offset = 0; +int Table::GetFieldRawValue(int docid, int field_id, std::string &value, + const uint8_t *doc_v) { + if ((docid < 0) or (field_id < 0 || field_id >= field_num_)) return -1; - int seg_pos; - size_t in_seg_pos; - int ret = GetSegPos(docid, 0, seg_pos, in_seg_pos); - if (ret != 0) { - return ret; - } - offset += in_seg_pos * item_length_; - TableData *seg_file = main_file_[seg_pos]; - if (seg_pos == decompress_str.SegID()) { - decompress_str.SetHit(true); - } else { - decompress_str.SetHit(false); + const uint8_t *doc_value = doc_v; + bool free = false; + if (doc_value == nullptr) { + free = true; + storage_mgr_->Get(docid, doc_value); } - decompress_str.SetSegID(seg_pos); - char *base = seg_file->Base(); + DataType data_type = attrs_[field_id]; + size_t offset = idx_attr_offset_[field_id]; - memcpy(&str_offset, base + offset, sizeof(str_offset)); + if (data_type == DataType::STRING) { + uint32_t block_id = 0; + memcpy(&block_id, doc_value + offset, sizeof(block_id)); - str_len_t len; - memcpy(&len, base + offset + sizeof(str_offset), sizeof(len)); - ret = seg_file->GetStr(str_offset, len, value, decompress_str); - if (ret != 0) { - decompress_str.SetHit(false); - } - return ret; -} + uint32_t in_block_pos = 0; + memcpy(&in_block_pos, doc_value + offset + sizeof(block_id), + sizeof(in_block_pos)); -int Table::GetFieldRawValue(int docid, int field_id, std::string &value) { - if ((docid < 0) or (field_id < 0 || field_id >= field_num_)) return -1; - - DataType data_type = attrs_[field_id]; - if (data_type != DataType::STRING) { - size_t offset = idx_attr_offset_[field_id]; - int data_len = FTypeSize(data_type); - int seg_pos; - size_t in_seg_pos; - int ret = GetSegPos(docid, 0, seg_pos, in_seg_pos); - if (ret != 0) { - return ret; - } - offset += in_seg_pos * item_length_; - TableData *seg_file = main_file_[seg_pos]; - char *base = seg_file->Base(); - value = std::string(base + offset, data_len); + str_len_t len; + memcpy(&len, doc_value + offset + sizeof(block_id) + sizeof(in_block_pos), + sizeof(len)); + std::string str; + storage_mgr_->GetString(docid, str, block_id, in_block_pos, len); + value = std::move(str); } else { - DecompressStr decompress_str; - GetFieldString(docid, field_id, value, decompress_str); + int value_len = FTypeSize(data_type); + value = std::string((const char *)(doc_value + offset), value_len); + } + + if (free) { + delete[] doc_value; } + return 0; } @@ -739,33 +545,13 @@ int Table::GetAttrIdx(const std::string &field) const { return (iter != attr_idx_map_.end()) ? iter->second : -1; } -int Table::AddRawDoc(int docid, const char *raw_doc, int doc_size) { - int seg_pos; - size_t in_seg_pos; - int ret = GetSegPos(docid, 0, seg_pos, in_seg_pos); - if (ret != 0) { - return ret; - } - size_t offset = in_seg_pos * item_length_; - TableData *seg_file = main_file_[seg_pos]; - char *base = seg_file->Base(); - uint64_t str_offset = seg_file->StrOffset(); - - memcpy((void *)(base + offset), raw_doc, item_length_); - raw_doc += item_length_; - seg_file->WriteStr(raw_doc, doc_size - item_length_); - - for (size_t field_id = 0; field_id < idx_attr_offset_.size(); ++field_id) { - if (attrs_[field_id] != DataType::STRING) continue; +bool Table::AlterCacheSize(uint32_t cache_size, + uint32_t str_cache_size) { + return storage_mgr_->AlterCacheSize(cache_size, str_cache_size); +} - int field_offset = idx_attr_offset_[field_id]; - char *field = base + offset + field_offset; // TODO base is read only - memcpy((void *)field, (void *)&str_offset, sizeof(str_offset)); - str_len_t field_len = 0; - memcpy((void *)&field_len, (field + sizeof(str_offset)), sizeof(field_len)); - str_offset += field_len; - } - return 0; +void Table::GetCacheSize(uint32_t &cache_size, uint32_t &str_cache_size) { + storage_mgr_->GetCacheSize(cache_size, str_cache_size); } } // namespace table diff --git a/table/table.h b/table/table.h index 4f46d20..8cd42c1 100755 --- a/table/table.h +++ b/table/table.h @@ -17,13 +17,9 @@ #include "api_data/gamma_table.h" #include "io_common.h" #include "log.h" -#include "table_data.h" +#include "storage_manager.h" #include "table_define.h" -#ifdef USE_BTREE -#include "threadskv10h.h" -#endif - using namespace tig_gamma::table; namespace tig_gamma { @@ -93,47 +89,16 @@ class Table { long GetMemoryBytes(); - int GetDocInfo(std::string &id, Doc &doc, DecompressStr &decompress_str); - int GetDocInfo(const int docid, Doc &doc, DecompressStr &decompress_str); - - void GetFieldInfo(const int docid, const std::string &field_name, - struct Field &field, DecompressStr &decompress_str); - - template - bool GetField(const int docid, const int field_id, T &value) { - if ((docid < 0) or (field_id < 0 || field_id >= field_num_)) return false; - - size_t offset = idx_attr_offset_[field_id]; - - int seg_pos; - size_t in_seg_pos; - int ret = GetSegPos(docid, field_id, seg_pos, in_seg_pos); - if (ret != 0) { - return false; - } - offset += in_seg_pos * item_length_; - TableData *seg_file = main_file_[seg_pos]; - char *base = seg_file->Base(); - memcpy(&value, base + offset, sizeof(T)); - return true; - } + int GetDocInfo(std::string &id, Doc &doc, std::vector &fields, + DecompressStr &decompress_str); + int GetDocInfo(const int docid, Doc &doc, std::vector &fields, + DecompressStr &decompress_str); - template - void GetField(int docid, const std::string &field, T &value) { - const auto &iter = attr_idx_map_.find(field); - if (iter == attr_idx_map_.end()) { - return; - } - GetField(docid, iter->second, value); - } + int GetFieldRawValue(int docid, const std::string &field_name, std::string &value, + const uint8_t *doc_v = nullptr); - int GetFieldString(int docid, const std::string &field, std::string &value, - DecompressStr &decompress_str); - - int GetFieldString(int docid, int field_id, std::string &value, - DecompressStr &decompress_str); - - int GetFieldRawValue(int docid, int field_id, std::string &value); + int GetFieldRawValue(int docid, int field_id, std::string &value, + const uint8_t *doc_v = nullptr); int GetFieldType(const std::string &field, DataType &type); @@ -147,7 +112,7 @@ class Table { int Load(int &doc_num); - int Sync(); + // int Sync(); int FieldsNum() { return attrs_.size(); } @@ -155,66 +120,44 @@ class Table { DumpConfig *GetDumpConfig() { return table_params_; } - int GetRawDoc(int docid, std::vector &raw_doc); - bool IsCompress() { return b_compress_; } + bool AlterCacheSize(uint32_t cache_size, uint32_t str_cache_size); + + void GetCacheSize(uint32_t &cache_size, uint32_t &str_cache_size); + std::string root_path_; int last_docid_; private: int FTypeSize(DataType fType); - void SetFieldValue(int docid, const std::string &field, int field_id, - const char *value, str_len_t len); - int AddField(const std::string &name, DataType ftype, bool is_index); - // void ToRowKey(int id, std::string &key) const; - - int AddRawDoc(int docid, const char *raw_doc, int doc_size); - - int GetSegPos(IN int32_t docid, IN int32_t field_id, OUT int &seg_pos, - OUT size_t &in_seg_pos, bool bRead = true); - - // int PutToDB(int docid); - - int BatchPutToDB(int docid, int batch_size); - - int Extend(); - - void Compress(); - - void BufferQueueWorker(); - std::string name_; // table name int item_length_; // every doc item length uint8_t field_num_; // field number uint8_t string_field_num_; int key_idx_; // key postion - std::map idx_attr_map_; - std::map attr_idx_map_; - std::map attr_type_map_; - std::map attr_is_index_map_; + std::map idx_attr_map_; // + std::map attr_idx_map_; // + std::map attr_type_map_; // + std::map attr_is_index_map_; // std::vector idx_attr_offset_; std::vector attrs_; + std::map str_field_id_; // uint8_t id_type_; // 0 string, 1 long, default 1 bool b_compress_; cuckoohash_map item_to_docid_; - TableData *main_file_[MAX_SEGMENT_NUM]; int seg_num_; // cur segment num - int compressed_num_; bool table_created_; -#ifdef USE_BTREE - BtMgr *main_mgr_; - BtMgr *cache_mgr_; -#endif TableParams *table_params_; + StorageManager *storage_mgr_; }; } // namespace table diff --git a/table/table_data.cc b/table/table_data.cc deleted file mode 100644 index 4532c98..0000000 --- a/table/table_data.cc +++ /dev/null @@ -1,435 +0,0 @@ -/** - * Copyright 2019 The Gamma Authors. - * - * This source code is licensed under the Apache License, Version 2.0 license - * found in the LICENSE file in the root directory of this source tree. - */ - -#include "table_data.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "error_code.h" -#include "log.h" -#include "thread_util.h" -#include "utils.h" - -namespace tig_gamma { -namespace table { - -namespace { - -inline size_t CapacityOff() { return sizeof(uint8_t); } - -inline size_t SizeOff() { - uint32_t capacity; - return CapacityOff() + sizeof(capacity); -} - -inline size_t StrCapacityOff() { - uint32_t size; - return SizeOff() + sizeof(size); -} - -inline size_t StrSizeOff() { - uint64_t str_capacity; - return StrCapacityOff() + sizeof(str_capacity); -} - -inline size_t StrCompressedOff() { - str_offset_t str_size; - return StrSizeOff() + sizeof(str_size); -} - -inline size_t BCompressedOff() { - str_offset_t str_compressed_size; - return StrCompressedOff() + sizeof(str_compressed_size); -} - -} // namespace - -TableData::TableData(int item_length) { - base_fd_ = -1; - base_str_fd_ = -1; - base_ = nullptr; - base_str_ = nullptr; - item_length_ = item_length; - capacity_ = 0; - version_ = 0; - seg_header_backup_ = 20; - uint32_t capacity; - uint32_t size; - uint64_t str_capacity; - str_offset_t str_size; - str_offset_t str_compressed_size; - uint8_t b_compressed; - mode_ = TABLE_LOAD_MODE::MODE_MEMORY_DISK; - seg_header_size_ = sizeof(version_) + sizeof(capacity) + sizeof(size) + - sizeof(str_capacity) + sizeof(str_size) + - sizeof(str_compressed_size) + sizeof(b_compressed) + - seg_header_backup_; -} - -TableData::~TableData() { - if (base_fd_ != -1) { - close(base_fd_); - base_fd_ = -1; - } - - if (base_str_fd_ != -1) { - close(base_str_fd_); - base_str_fd_ = -1; - } - - if (base_) { - delete[] base_; - base_ = nullptr; - } - if (base_str_) { - delete[] base_str_; - base_str_ = nullptr; - } - - pthread_rwlock_destroy(&shared_mutex_); -} - -uint8_t TableData::Version() { - uint8_t version = 0; - memcpy(&version, base_, sizeof(version)); - return version; -} - -void TableData::SetVersion(uint8_t version) { - memcpy(base_, &version, sizeof(version)); - pwrite(base_fd_, &version, sizeof(version), 0); -} - -uint32_t TableData::Size() { - uint32_t capacity; - uint32_t size; - memcpy(&size, base_ + sizeof(version_) + sizeof(capacity), sizeof(size)); - return size; -} - -void TableData::SetSize(uint32_t size) { - uint32_t capacity; - memcpy(base_ + sizeof(version_) + sizeof(capacity), &size, sizeof(size)); - pwrite(base_fd_, &size, sizeof(size), sizeof(version_) + sizeof(capacity)); -} - -uint64_t TableData::StrCapacity() { - uint64_t str_capacity; - memcpy(&str_capacity, base_ + StrCapacityOff(), sizeof(str_capacity)); - return str_capacity; -} - -void TableData::SetStrCapacity(uint64_t str_capacity) { - memcpy(base_ + StrCapacityOff(), &str_capacity, sizeof(str_capacity)); - pwrite(base_fd_, &str_capacity, sizeof(str_capacity), StrCapacityOff()); -} - -str_offset_t TableData::StrSize() { - str_offset_t str_size; - memcpy(&str_size, base_ + StrSizeOff(), sizeof(str_size)); - return str_size; -} - -void TableData::SetStrSize(str_offset_t str_size) { - memcpy(base_ + StrSizeOff(), &str_size, sizeof(str_size)); - pwrite(base_fd_, &str_size, sizeof(str_size), StrSizeOff()); -} - -uint8_t TableData::BCompressed() { - uint8_t b_compressed; - memcpy(&b_compressed, base_ + BCompressedOff(), sizeof(b_compressed)); - return b_compressed; -} - -void TableData::SetCompressed(uint8_t compressed) { - memcpy(base_ + BCompressedOff(), &compressed, sizeof(compressed)); - pwrite(base_fd_, &compressed, sizeof(compressed), BCompressedOff()); -} - -str_offset_t TableData::StrCompressedSize() { - str_offset_t str_compressed_size; - memcpy(&str_compressed_size, base_ + StrCompressedOff(), - sizeof(str_compressed_size)); - return str_compressed_size; -} - -void TableData::SetStrCompressedSize(str_offset_t str_compressed_size) { - memcpy(base_ + StrCompressedOff(), &str_compressed_size, - sizeof(str_compressed_size)); - pwrite(base_fd_, &str_compressed_size, sizeof(str_compressed_size), - StrCompressedOff()); -} - -long TableData::GetMemoryBytes() { - return StrCapacity() + seg_header_size_ + item_length_ * DOCNUM_PER_SEGMENT; -} - -int TableData::Init(int id, const std::string &path, uint8_t string_field_num) { - base_ = new ( - std::nothrow) char[seg_header_size_ + item_length_ * DOCNUM_PER_SEGMENT]; - memset(base_, 0, seg_header_size_ + item_length_ * DOCNUM_PER_SEGMENT); - if (base_ == nullptr) { - LOG(ERROR) << "Cannot init table data, not enough memory!"; - return -1; - } - uint64_t str_capacity = DOCNUM_PER_SEGMENT * 32 * string_field_num + 1; - base_str_ = new (std::nothrow) char[str_capacity]; - if (base_str_ == nullptr) return -1; - - id_ = id; - file_name_ = path + "/" + std::to_string(id_) + ".profile"; - base_fd_ = open(file_name_.c_str(), O_RDWR | O_CREAT, 00666); - if (-1 == base_fd_) { - LOG(ERROR) << "open vector file error, path=" << file_name_; - return -1; - } - - int ret = Truncate(file_name_, - seg_header_size_ + item_length_ * DOCNUM_PER_SEGMENT); - if (ret != 0) { - return -1; - } - SetStrCapacity(str_capacity); - - str_file_name_ = path + "/" + std::to_string(id_) + ".str.profile"; - base_str_fd_ = open(str_file_name_.c_str(), O_RDWR | O_CREAT, 00666); - if (-1 == base_str_fd_) { - LOG(ERROR) << "open vector file error, path=" << str_file_name_; - return -1; - } - - ret = Truncate(str_file_name_, StrCapacity()); - if (ret != 0) { - return -1; - } - - ret = pthread_rwlock_init(&shared_mutex_, NULL); - if (ret != 0) { - LOG(ERROR) << "Mutex init failed"; - } - - SetVersion(version_); - return ret; -} - -int TableData::Truncate(std::string &path, off_t length) { - if (truncate(path.c_str(), length)) { - LOG(ERROR) << "truncate feature file=" << path << " to " << length - << ", error:" << strerror(errno); - return -1; - } - return 0; -} - -char *TableData::Base() { - char *base = base_ + seg_header_size_; - return base; -} - -int TableData::Write(const char *value, uint64_t offset, int len) { - WriteThreadLock write_lock(shared_mutex_); - memcpy(base_ + seg_header_size_ + offset, value, len); - pwrite(base_fd_, value, len, seg_header_size_ + offset); - return 0; -} - -int TableData::GetStr(IN str_offset_t offset, IN str_len_t len, - OUT std::string &str, DecompressStr &decompress_str) { - if (offset > StrSize()) { - LOG(ERROR) << "offset [" << offset << "] out of range [" << StrSize() - << "]"; - return -1; - } - ReadThreadLock read_lock(shared_mutex_); - char *base_str = base_str_; - str_offset_t compressed_size = StrCompressedSize(); - if (BCompressed() == 0) { - str = std::string(base_str + offset, len); - } else { - if (decompress_str.Hit()) { - str = std::string(decompress_str.Str().c_str() + offset, len); - } else { - auto de_size = ZSTD_getDecompressedSize(base_str, compressed_size); - char *de_char = new char[de_size]; - size_t size = - ZSTD_decompress(de_char, de_size, base_str, compressed_size); - size_t ret = ZSTD_isError(size); - if (ret != 0) { - LOG(ERROR) << "ZSTD_decompress error"; - delete[] de_char; - return -1; - } - str = std::string(de_char + offset, len); - decompress_str.SetStr(std::string(de_char, size)); - delete[] de_char; - } - } - return 0; -} - -int TableData::WriteStr(IN const char *str, IN str_len_t len) { - WriteThreadLock write_lock(shared_mutex_); - auto str_size = StrSize(); - if (BCompressed() == 0) { - uint64_t str_capacity = StrCapacity(); - if (str_size + len >= str_capacity) { - char *new_base_str = new (std::nothrow) char[str_capacity << 1]; - memcpy(new_base_str, base_str_, str_capacity); - char *old = base_str_; - base_str_ = new_base_str; - delete[] old; - - int ret = Truncate(str_file_name_, str_capacity << 1); - if (ret != 0) { - return -1; - } - SetStrCapacity(str_capacity << 1); - } - - memcpy(base_str_ + str_size, str, len); - pwrite(base_str_fd_, str, len, str_size); - - SetStrSize(str_size + len); - } else { - str_offset_t compressed_size = StrCompressedSize(); - auto de_size = ZSTD_getDecompressedSize(base_str_, compressed_size); - char *de_char = new char[de_size + len]; - size_t size = ZSTD_decompress(de_char, de_size, base_str_, compressed_size); - size_t ret = ZSTD_isError(size); - if (ret != 0) { - LOG(ERROR) << "ZSTD_decompress error"; - delete[] de_char; - return -1; - } - memcpy(de_char + de_size, str, len); - SetStrSize(str_size + len); - SetStrCapacity(str_size + len); - Compress(de_char); - delete[] de_char; - } - return 0; -} - -int TableData::WriteStr(IN const char *str, IN str_offset_t offset, - IN str_len_t len) { - WriteThreadLock write_lock(shared_mutex_); - if (BCompressed() == 0) { - memcpy(base_str_ + offset, str, len); - pwrite(base_str_fd_, str, len, offset); - } else { - str_offset_t compressed_size = StrCompressedSize(); - auto de_size = ZSTD_getDecompressedSize(base_str_, compressed_size); - char *de_char = new char[de_size]; - size_t size = ZSTD_decompress(de_char, de_size, base_str_, compressed_size); - size_t ret = ZSTD_isError(size); - if (ret != 0) { - LOG(ERROR) << "ZSTD_decompress error"; - delete[] de_char; - return -1; - } - memcpy(de_char + offset, str, len); - Compress(de_char); - delete[] de_char; - } - return 0; -} - -void TableData::Compress(IN char *str) { - auto str_size = StrSize(); - size_t dstCapacity = ZSTD_compressBound(str_size); - char *compress_str = new char[dstCapacity]; - size_t size = ZSTD_compress(compress_str, dstCapacity, str, str_size, 1); - size_t ret = ZSTD_isError(size); - if (ret != 0) { - LOG(ERROR) << "ZSTD_compress error"; - delete[] compress_str; - return; - } - str_offset_t str_compressed_size = size; - char *new_str = new char[str_compressed_size]; - memcpy(new_str, compress_str, str_compressed_size); - char *old = base_str_; - base_str_ = new_str; - delete[] old; - - pwrite(base_str_fd_, compress_str, str_compressed_size, 0); - SetStrCompressedSize(str_compressed_size); - delete[] compress_str; -} - -int TableData::Compress() { - WriteThreadLock write_lock(shared_mutex_); - Compress(base_str_); - SetStrCapacity(StrSize()); - SetCompressed(1); - return 0; -} - -int TableData::Load(int id, const std::string &path) { - id_ = id; - file_name_ = path + "/" + std::to_string(id_) + ".profile"; - base_fd_ = open(file_name_.c_str(), O_RDWR, 00666); - if (-1 == base_fd_) { - LOG(ERROR) << "open vector file error, path=" << file_name_; - return -1; - } - - str_file_name_ = path + "/" + std::to_string(id_) + ".str.profile"; - base_str_fd_ = open(str_file_name_.c_str(), O_RDWR, 00666); - if (-1 == base_str_fd_) { - LOG(WARNING) << "No string file" << str_file_name_; - } - - int ret = pthread_rwlock_init(&shared_mutex_, NULL); - if (ret != 0) { - LOG(ERROR) << "Mutex init failed"; - } - - size_t base_size = seg_header_size_ + item_length_ * DOCNUM_PER_SEGMENT; - base_ = new (std::nothrow) char[base_size]; - memset(base_, 0, base_size); - if (base_ == nullptr) { - LOG(ERROR) << "Cannot init table data, not enough memory!"; - return -1; - } - - FILE *p_file = fopen(file_name_.c_str(), "rb"); - if (p_file == nullptr) { - LOG(ERROR) << "open vector file error, path=" << file_name_; - return IO_ERR; - } - fread(base_, base_size, 1, p_file); - fclose(p_file); - - uint64_t str_capacity = StrCapacity(); - base_str_ = new (std::nothrow) char[str_capacity]; - if (base_str_ == nullptr) return -1; - - p_file = fopen(str_file_name_.c_str(), "rb"); - if (p_file == nullptr) { - LOG(ERROR) << "open vector file error, path=" << str_file_name_; - return IO_ERR; - } - fread(base_str_, str_capacity, 1, p_file); - fclose(p_file); - - return ret; -} - -} // namespace table -} // namespace tig_gamma diff --git a/table/table_data.h b/table/table_data.h deleted file mode 100644 index f351eb6..0000000 --- a/table/table_data.h +++ /dev/null @@ -1,110 +0,0 @@ -/** - * Copyright 2019 The Gamma Authors. - * - * This source code is licensed under the Apache License, Version 2.0 license - * found in the LICENSE file in the root directory of this source tree. - */ - -#pragma once - -#include -#include - -#include -#include - -#include "table_define.h" - -namespace tig_gamma { -namespace table { - -enum class TABLE_LOAD_MODE : std::uint16_t { - MODE_FULL_MEMORY = 1, - MODE_DISK = 2, - MODE_MEMORY_DISK = 3 -}; - -class TableData { - public: - TableData(int item_length); - ~TableData(); - - int Init(int id, const std::string &path, uint8_t string_field_num); - - char *Base(); - - int Write(const char *value, uint64_t offset, int len); - - int GetStr(IN str_offset_t offset, IN str_len_t len, OUT std::string &str, - DecompressStr &decompress_str); - - int WriteStr(IN const char *str, IN str_len_t len); - - int WriteStr(IN const char *str, IN str_offset_t offset, IN str_len_t len); - - str_offset_t StrOffset() { return StrSize(); } - - int Compress(); - - int Load(int id, const std::string &path); - - uint8_t Version(); - - void SetVersion(uint8_t version); - - uint32_t Size(); - - void SetSize(uint32_t size); - - uint64_t StrCapacity(); - - void SetStrCapacity(uint64_t str_capacity); - - str_offset_t StrSize(); - - void SetStrSize(str_offset_t str_size); - - uint8_t BCompressed(); - - void SetCompressed(uint8_t compressed); - - str_offset_t StrCompressedSize(); - - void SetStrCompressedSize(str_offset_t str_compressed_size); - - long GetMemoryBytes(); - - private: - - int Truncate(std::string &path, off_t length); - - void Compress(IN char *str); - - int Close(); - - protected: - - uint32_t capacity_; - - uint64_t seg_header_size_; - uint32_t seg_header_backup_; - uint8_t version_; - - uint32_t item_length_; - - char *base_; - char *base_str_; - - int base_fd_; - int base_str_fd_; - - std::string file_name_; - std::string str_file_name_; - int id_; - - TABLE_LOAD_MODE mode_; - pthread_rwlock_t shared_mutex_; -}; - -} // namespace table -} \ No newline at end of file diff --git a/table/table_define.h b/table/table_define.h index 8089142..9ffdda8 100644 --- a/table/table_define.h +++ b/table/table_define.h @@ -18,20 +18,9 @@ namespace table { #define OUT #endif -#define TABLE_MAIN "table.main" -#define TABLE_EXT "table.ext" - const static int DOCNUM_PER_SEGMENT = 1 << 20; // 1048576 const static int MAX_SEGMENT_NUM = 102400; // max segment num -#ifdef TABLE_STR_INT64 -typedef uint64_t str_offset_t; -typedef uint16_t str_len_t; -#else -typedef uint32_t str_offset_t; -typedef uint8_t str_len_t; -#endif - class DecompressStr { public: DecompressStr() { diff --git a/tests/test.h b/tests/test.h index d9255c4..237e3dc 100644 --- a/tests/test.h +++ b/tests/test.h @@ -65,7 +65,7 @@ struct Options { log_dir = "log"; model_id = "model"; retrieval_type = "IVFPQ"; - store_type = "MemoryOnly"; + store_type = "MMap"; // store_type = "RocksDB"; profiles.resize(max_doc_size * fields_vec.size()); engine = nullptr; @@ -173,6 +173,7 @@ float *fvecs_read(const char *fname, size_t *d_out, size_t *n_out) { } int d; fread(&d, 1, sizeof(int), f); + LOG(INFO) << "assert" << d; assert((d > 0 && d < 1000000) || !"unreasonable dimension"); fseek(f, 0, SEEK_SET); struct stat st; diff --git a/tests/test_dump.cc b/tests/test_dump.cc new file mode 100644 index 0000000..58da27a --- /dev/null +++ b/tests/test_dump.cc @@ -0,0 +1,773 @@ +/** + * Copyright 2019 The Gamma Authors. + * + * This source code is licensed under the Apache License, Version 2.0 license + * found in the LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include +#include +#include +#include +#include +#include "c_api/api_data/gamma_engine_status.h" +#include "c_api/api_data/gamma_request.h" +#include "c_api/api_data/gamma_response.h" +#include "c_api/api_data/gamma_table.h" +#include "gamma_api.h" +#include "test.h" + +namespace Test { + +using namespace std; +using namespace tig_gamma; + +struct TestDumpOptions { + TestDumpOptions() { + nprobe = 10; + doc_id = 0; + d = 512; + max_doc_size = 10000 * 10; + search_num = 10000 * 10; + fields_vec = {"_id", "img_url", "cid1", "cid2", "cid3"}; + fields_type = {tig_gamma::DataType::LONG, tig_gamma::DataType::STRING, + tig_gamma::DataType::INT, tig_gamma::DataType::INT, + tig_gamma::DataType::INT}; + fields_do_index = {false, false, false, false, false}; + vector_name = "abc"; + path = "files"; + model_id = "model"; + retrieval_type = "IVFPQ"; // "HNSW"; // "FLAT"; + store_type = "Mmap"; + store_param = "{\"cache_size\": 256, \"segment_size\": 1000}"; + profiles.resize(search_num * fields_vec.size()); + feature = new float[d * search_num]; + print_doc = true; + log_dir = "test_dump_logs"; + utils::remove_dir(log_dir.c_str()); + } + ~TestDumpOptions() { + if (feature) { + delete[] feature; + } + } + + int nprobe; + int doc_id; + int d; + int max_doc_size; + int search_num; + std::vector fields_vec; + std::vector fields_type; + std::vector fields_do_index; + string path; + string log_dir; + string vector_name; + string model_id; + string retrieval_type; + string store_type; + string store_param; + bool print_doc; + + std::vector profiles; + float *feature; + + char *docids_bitmap_; +}; + +static struct TestDumpOptions opt; + +string profile_file = "./profile_10w.txt"; +string feature_file = "./feat_10w.dat"; + +int AddDoc(void *engine, int start_id, int end_id, int interval = 0, + long fet_offset = 0) { + FILE *fet_fp = fopen(feature_file.c_str(), "rb"); + if (fet_fp == nullptr) { + cerr << "open feature file error" << endl; + return -1; + } + if (fet_offset == 0) { + fet_offset = start_id * opt.d * sizeof(float); + } + if (fseek(fet_fp, fet_offset, SEEK_SET)) { + cerr << "fseek error, offset=" << fet_offset << endl; + return -1; + } + cerr << "add feature file offset=" << fet_offset << endl; + std::ifstream fin; + fin.open(profile_file.c_str()); + std::string str; + long docid = start_id; + + for (int i = 0; i < end_id; ++i) { + double start = utils::getmillisecs(); + if (fin.eof()) { + LOG(ERROR) << "profile is eof, i=" << i; + return -1; + } + std::getline(fin, str); + if (str == "") { + LOG(ERROR) << "profile get empty line, i=" << i; + return -1; + } + vector profile = std::move(utils::split(str, "\t")); + if (i < opt.search_num) { + for (size_t j = 0; j < opt.fields_vec.size(); j++) { + opt.profiles[i * opt.fields_vec.size() + j] = profile[j]; + } + } + if (i < start_id) { + continue; + } + + float vector[opt.d]; + size_t ret = fread((void *)vector, sizeof(float), opt.d, fet_fp); + assert(ret == (size_t)opt.d); + if (i < opt.search_num) { + memcpy((void *)(opt.feature + i * opt.d), (void *)vector, + sizeof(float) * opt.d); + } + tig_gamma::Doc doc; + for (size_t j = 0; j < opt.fields_vec.size(); ++j) { + tig_gamma::Field field; + field.name = opt.fields_vec[j]; + field.datatype = opt.fields_type[j]; + + string &data = opt.profiles[(uint64_t)i * opt.fields_vec.size() + j]; + if (opt.fields_vec[j] == "_id") { + field.value = std::string((char *)(&docid), sizeof(long)); + docid++; + } else if (opt.fields_type[j] == tig_gamma::DataType::INT) { + int v = atoi(data.c_str()); + field.value = std::string((char *)(&v), sizeof(v)); + } else if (opt.fields_type[j] == tig_gamma::DataType::LONG) { + long v = atol(data.c_str()); + field.value = std::string((char *)(&v), sizeof(v)); + } else { + // field.value = data + "\001all"; + field.value = data; + } + + field.source = ""; + doc.AddField(std::move(field)); + } + + tig_gamma::Field field; + field.name = opt.vector_name; + field.datatype = tig_gamma::DataType::VECTOR; + field.source = ""; + int len = opt.d * sizeof(float); + if (opt.retrieval_type == "BINARYIVF") { + len = opt.d * sizeof(char) / 8; + } + field.value = std::string((char *)(vector), len); + doc.AddField(std::move(field)); + + char *doc_str = nullptr; + int doc_len = 0; + doc.Serialize(&doc_str, &doc_len); + AddOrUpdateDoc(engine, doc_str, doc_len); + free(doc_str); + ++opt.doc_id; + double elap = utils::getmillisecs() - start; + if (i % 1000 == 0) { + cerr << "AddDoc use [" << elap << "]ms" << endl; + } + if (interval > 0) { + std::this_thread::sleep_for(std::chrono::milliseconds(interval)); + } + } + fin.close(); + fclose(fet_fp); + return 0; +} + +int DeleteDoc(void *engine, long start_id, long end_id) { + for (long i = start_id; i < end_id; ++i) { + int ret = ::DeleteDoc(engine, (const char *)&i, (int)sizeof(long)); + assert(ret == 0); + } + cerr << "delete start id=" << start_id << ", end id=" << end_id << endl; + return 0; +} + +void PrintDoc(struct tig_gamma::ResultItem &result_item, std::string &msg, + struct TestDumpOptions &opt) { + msg += string("score [") + std::to_string(result_item.score) + "], "; + for (size_t i = 0; i < result_item.names.size(); ++i) { + std::string &name = result_item.names[i]; + tig_gamma::DataType data_type; + for (size_t j = 0; j < opt.fields_vec.size(); ++j) { + if (name == opt.vector_name) { + data_type = tig_gamma::DataType::VECTOR; + break; + } + if (name == opt.fields_vec[j]) { + data_type = opt.fields_type[j]; + break; + } + } + + msg += "field name [" + name + "], type [" + + std::to_string(static_cast(data_type)) + "], value ["; + std::string &value = result_item.values[i]; + if (data_type == tig_gamma::DataType::INT) { + msg += std::to_string(*((int *)value.data())); + } else if (data_type == tig_gamma::DataType::LONG) { + msg += std::to_string(*((long *)value.data())); + } else if (data_type == tig_gamma::DataType::FLOAT) { + msg += std::to_string(*((float *)value.data())); + } else if (data_type == tig_gamma::DataType::DOUBLE) { + msg += std::to_string(*((double *)value.data())); + } else if (data_type == tig_gamma::DataType::STRING) { + msg += value; + } else if (data_type == tig_gamma::DataType::VECTOR) { + std::string str_vec; + int d = -1; + memcpy((void *)&d, value.data(), sizeof(int)); + + d /= sizeof(float); + int cur = sizeof(int); + + const float *feature = + reinterpret_cast(value.data() + cur); + + cur += d * sizeof(float); + int len = value.length(); + char source[len - cur]; + + memcpy(source, value.data() + cur, len - cur); + + for (int i = 0; i < d; ++i) { + str_vec += std::to_string(feature[i]) + ","; + } + str_vec.pop_back(); + + std::string source_str = std::string(source, len - cur); + msg += str_vec + "], source [" + source_str + "]"; + } + msg += "], "; + } +} + +int SearchThread(void *engine, int num, int start_id, long fet_offset = 0, + string retrieval_type="") { + FILE *fet_fp = fopen(feature_file.c_str(), "rb"); + if (fet_fp == nullptr) { + LOG(ERROR) << "open feature file error"; + return -1; + } + if (fet_offset == 0) { + fet_offset = start_id * opt.d * sizeof(float); + } + + if (fseek(fet_fp, fet_offset, SEEK_SET)) { + LOG(ERROR) << "fseek error, offset=" << fet_offset; + return -1; + } + LOG(INFO) << "search feature file offset=" << fet_offset; + int idx = start_id; + double time = 0; + int failed_count = 0; + int req_num = 1; + string error; + float *feature = new float[opt.d * req_num]; + int end_id = start_id + num; + while (idx < end_id) { + double start = utils::getmillisecs(); + struct tig_gamma::VectorQuery vector_query; + vector_query.name = opt.vector_name; + + int len = opt.d * sizeof(float) * req_num; + if (opt.retrieval_type == "BINARYIVF") { + len = opt.d * sizeof(char) / 8 * req_num; + } + int ret = + (int)fread((void *)feature, sizeof(float) * opt.d, req_num, fet_fp); + assert(ret == req_num); + char *value = reinterpret_cast(feature); + vector_query.value = std::string(value, len); + + vector_query.min_score = 0; + vector_query.max_score = 10000; + vector_query.boost = 0.1; + vector_query.has_boost = 0; + vector_query.retrieval_type = retrieval_type; + + tig_gamma::Request request; + request.SetTopN(10); + request.AddVectorQuery(vector_query); + request.SetReqNum(req_num); + request.SetBruteForceSearch(0); + request.SetHasRank(true); + std::string retrieval_params = + "{\"metric_type\" : \"InnerProduct\", \"recall_num\" : " + "10, \"nprobe\" : 10, \"ivf_flat\" : 0, \"efSearch\": 100}"; + request.SetRetrievalParams(retrieval_params); + // request.SetOnlineLogLevel(""); + request.SetMultiVectorRank(0); + request.SetL2Sqrt(false); + + char *request_str, *response_str; + int request_len, response_len; + + request.Serialize(&request_str, &request_len); + ret = + Search(engine, request_str, request_len, &response_str, &response_len); + + assert(ret == 0); + free(request_str); + + tig_gamma::Response response; + response.Deserialize(response_str, response_len); + + free(response_str); + + if (opt.print_doc) { + std::vector &results = response.Results(); + assert(results.size() > 0); + for (size_t i = 0; i < results.size(); ++i) { + int ii = idx + i; + string msg = std::to_string(ii) + ", "; + struct tig_gamma::SearchResult &result = results[i]; + + std::vector &result_items = + result.result_items; + assert(result_items.size() > 0); + msg += string("total [") + std::to_string(result.total) + "], "; + msg += string("result_num [") + std::to_string(result_items.size()) + + "], "; + for (size_t j = 0; j < result_items.size(); ++j) { + struct tig_gamma::ResultItem &result_item = result_items[j]; + PrintDoc(result_item, msg, opt); + msg += "\n"; + } + if (abs(result_items[0].score - 1.0) < 0.001) { + if (ii % 1000 == 0) { + LOG(INFO) << msg << endl; + } + } else { + LOG(ERROR) << msg; + error += std::to_string(ii) + ","; + failed_count++; + } + } + } + double elap = utils::getmillisecs() - start; + time += elap; + if (idx % 10000 == 0) { + LOG(INFO) << "search time [" << time / 10000 << "]ms"; + time = 0; + } + idx += req_num; + } + delete[] feature; + LOG(ERROR) << error; + return failed_count; +} + +void *CreateEngine(string &path) { + tig_gamma::Config config; + config.SetPath(path); + config.SetLogDir(opt.log_dir); + + char *config_str = nullptr; + int len = 0; + config.Serialize(&config_str, &len); + void *engine = Init(config_str, len); + free(config_str); + return engine; +} + +int CreateTable(void *engine, string &name, string store_type = "MemoryOnly", + bool multi_model = false) { + tig_gamma::TableInfo table; + table.SetName(name); + table.SetIndexingSize(10000); + if (multi_model) { + vector retrieval_types = {"IVFPQ", "HNSW"}; + vector retrieval_params = {kIVFPQParam, kHNSWParam_str}; + table.SetRetrievalTypes(retrieval_types); + table.SetRetrievalParams(retrieval_params); + } else { + table.SetRetrievalType(opt.retrieval_type); + table.SetRetrievalParam(kIVFPQParam); + } + + for (size_t i = 0; i < opt.fields_vec.size(); ++i) { + struct tig_gamma::FieldInfo field_info; + field_info.name = opt.fields_vec[i]; + + field_info.is_index = opt.fields_do_index[i]; + field_info.data_type = opt.fields_type[i]; + table.AddField(field_info); + } + + struct tig_gamma::VectorInfo vector_info; + vector_info.name = opt.vector_name; + vector_info.data_type = tig_gamma::DataType::FLOAT; + vector_info.is_index = true; + vector_info.dimension = opt.d; + vector_info.model_id = ""; + vector_info.store_type = store_type; + vector_info.store_param = "{\"cache_size\": 2048}"; + vector_info.has_source = false; + + table.AddVectorInfo(vector_info); + + char *table_str = nullptr; + int len = 0; + table.Serialize(&table_str, &len); + + int ret = ::CreateTable(engine, table_str, len); + + free(table_str); + + return ret; +} + +int MakeLastNotDone(string &path) { + std::map folders_map; + std::vector folders_tm; + string dump_path = path + "/retrieval_model_index"; + string date_time_format = "%Y-%m-%d-%H:%M:%S"; + std::vector folders = utils::ls_folder(dump_path); + for (const string &folder_name : folders) { + struct tm result; + strptime(folder_name.c_str(), date_time_format.c_str(), &result); + + std::time_t t = std::mktime(&result); + folders_tm.push_back(t); + folders_map.insert(std::make_pair(t, folder_name)); + } + + std::sort(folders_tm.begin(), folders_tm.end()); + folders.clear(); + for (const std::time_t t : folders_tm) { + folders.push_back(dump_path + "/" + folders_map[t]); + } + string folder_path = folders[folders.size() - 1]; + const string done_file = folder_path + "/dump.done"; + LOG(INFO) << "done_file=" << done_file; + if (utils::get_file_size(done_file.c_str()) >= 0) { + return remove(done_file.c_str()); + } + return 0; +} + +void BuildIdx(void *engine) { + LOG(INFO) << "begin to build index"; + ::BuildIndex(engine); + int n_index_status = -1; + do { + char *status = nullptr; + int len = 0; + GetEngineStatus(engine, &status, &len); + tig_gamma::EngineStatus engine_status; + engine_status.Deserialize(status, len); + free(status); + std::this_thread::sleep_for(std::chrono::seconds(1)); + n_index_status = engine_status.IndexStatus(); + } while (n_index_status != 2); +} + +void CreateMultiTable() { + string case_name = GetCurrentCaseName(); + string table_name = "test_table"; + // int max_doc_size = 10000 * 2000; + utils::remove_dir(case_name.c_str()); + utils::make_dir(case_name.c_str()); + string root_path = "./" + case_name; + + // Sleep(20 * 1000); + + for (int i = 0; i < 1; i++) { + LOG(INFO) << "------------------create table-------------------id=" << i; + void *engine = CreateEngine(root_path); + ASSERT_NE(nullptr, engine); + ASSERT_EQ(0, CreateTable(engine, table_name)); + // Sleep(10 * 1000); + LOG(INFO) << "------------------close--------------------id=" << i; + EXPECT_EQ(0, AddDoc(engine, 0, 1 * 10000)); + ASSERT_EQ(0, Dump(engine)); + Close(engine); + engine = nullptr; + // Sleep(10 * 1000); + } + // Sleep(1000 * 1000); +} + +void TestDumpNormal(const string &store_type) { + string case_name = GetCurrentCaseName(); + string table_name = "test_table"; + utils::remove_dir(case_name.c_str()); + utils::make_dir(case_name.c_str()); + string root_path = "./" + case_name; + + cout << "------------------create table and close--------------------" + << endl; + void *engine = CreateEngine(root_path); + ASSERT_NE(nullptr, engine); + ASSERT_EQ(0, CreateTable(engine, table_name, store_type)); + Close(engine); + engine = nullptr; + + cout << "------------------load no data--------------------" << endl; + engine = CreateEngine(root_path); + ASSERT_NE(nullptr, engine); + ASSERT_EQ(0, CreateTable(engine, table_name, store_type)); + ASSERT_EQ(0, Load(engine)); + + cout << "------------------add doc and dump--------------------" << endl; + EXPECT_EQ(0, AddDoc(engine, 0, 10000)); + BuildIdx(engine); + ASSERT_EQ(0, Dump(engine)); + Close(engine); + engine = nullptr; + + cout << "------------------load data--------------------" << endl; + engine = CreateEngine(root_path); + ASSERT_NE(nullptr, engine); + ASSERT_EQ(0, CreateTable(engine, table_name, store_type)); + ASSERT_EQ(0, Load(engine)); + BuildIdx(engine); + + LOG(INFO) << "------------------add_dump_add_dump-- ------------------"; + EXPECT_EQ(0, AddDoc(engine, 10000, 11000)); + Sleep(1000); + ASSERT_EQ(0, Dump(engine)); + EXPECT_EQ(0, AddDoc(engine, 11000, 12000)); + Sleep(1000); + ASSERT_EQ(0, Dump(engine)); + Close(engine); + engine = nullptr; + + LOG(INFO) << "------------------reload--------------------"; + engine = CreateEngine(root_path); + ASSERT_NE(nullptr, engine); + ASSERT_EQ(0, CreateTable(engine, table_name, store_type)); + ASSERT_EQ(0, Load(engine)); + BuildIdx(engine); + + ASSERT_EQ(0, SearchThread(engine, 12000, 0)); + ASSERT_EQ(0, Dump(engine)); + Close(engine); + engine = nullptr; +} + +TEST(Engine, DumpNormal_MemoryOnly) { TestDumpNormal("MemoryOnly"); } + +TEST(Engine, DumpNormal_Mmap) { TestDumpNormal("Mmap"); } + +TEST(Engine, DumpNormal_RocksDB) { TestDumpNormal("RocksDB"); } + +void TestDumpNotDone(const string &store_type) { + string case_name = GetCurrentCaseName(); + string table_name = "test_table"; + utils::remove_dir(case_name.c_str()); + utils::make_dir(case_name.c_str()); + string root_path = "./" + case_name; + + cerr << "------------------init--------------------"; + void *engine = CreateEngine(root_path); + ASSERT_NE(nullptr, engine); + ASSERT_EQ(0, CreateTable(engine, table_name, store_type)); + EXPECT_EQ(0, AddDoc(engine, 0, 10000)); + BuildIdx(engine); + + cerr << "------------------dump and close--------------------"; + ASSERT_EQ(0, Dump(engine)); + Close(engine); + engine = nullptr; + + cerr << "------------------remove dump.done file--------------------"; + ASSERT_EQ(0, MakeLastNotDone(root_path)); + + cerr << "------------------load--------------------"; + engine = CreateEngine(root_path); + ASSERT_NE(nullptr, engine); + ASSERT_EQ(0, CreateTable(engine, table_name, store_type)); + ASSERT_EQ(0, Load(engine)); + BuildIdx(engine); + + cerr << "------------------add_dump and close--------------------"; + EXPECT_EQ(0, AddDoc(engine, 10000, 11000)); + Sleep(1000); + ASSERT_EQ(0, Dump(engine)); + Close(engine); + engine = nullptr; + + cerr << "------------------reload--------------------"; + engine = CreateEngine(root_path); + ASSERT_NE(nullptr, engine); + ASSERT_EQ(0, CreateTable(engine, table_name, store_type)); + ASSERT_EQ(0, Load(engine)); + BuildIdx(engine); + + cerr << "------------------search--------------------"; + ASSERT_EQ(0, SearchThread(engine, 11000, 0)); + ASSERT_EQ(0, Dump(engine)); + Close(engine); + engine = nullptr; +} + +TEST(Engine, DumpNotDone_MemoryOnly) { TestDumpNotDone("MemoryOnly"); } + +TEST(Engine, DumpNotDone_Mmap) { TestDumpNotDone("Mmap"); } + +TEST(Engine, DumpNotDone_RocksDB) { TestDumpNotDone("RocksDB"); } + +TEST(Engine, CreateTableFromLocal) { + string case_name = GetCurrentCaseName(); + string table_name = "test_table"; + utils::remove_dir(case_name.c_str()); + utils::make_dir(case_name.c_str()); + string root_path = "./" + case_name; + + cerr << "------------------create table--------------------\n"; + void *engine = CreateEngine(root_path); + ASSERT_NE(nullptr, engine); + ASSERT_EQ(0, CreateTable(engine, table_name)); + + cerr << "------------------add doc--------------------\n"; + EXPECT_EQ(0, AddDoc(engine, 0, 1 * 10000)); + BuildIdx(engine); + + cerr << "------------------dump and close--------------------\n"; + ASSERT_EQ(0, Dump(engine)); + Close(engine); + engine = nullptr; + + cerr << "------------------load data and create table from " + "local--------------------\n"; + engine = CreateEngine(root_path); + ASSERT_NE(nullptr, engine); + ASSERT_EQ(0, Load(engine)); + + cerr << "------------------readd doc--------------------\n"; + EXPECT_EQ(0, AddDoc(engine, 1 * 10000, 11000)); + BuildIdx(engine); + + cerr << "------------------search--------------------\n"; + ASSERT_EQ(0, SearchThread(engine, 11000, 0)); + ASSERT_EQ(0, Dump(engine)); + Close(engine); + engine = nullptr; +} + +TEST(Engine, UpdateAndCompactIndex) { + string case_name = GetCurrentCaseName(); + string table_name = "test_compact_index"; + utils::remove_dir(case_name.c_str()); + utils::make_dir(case_name.c_str()); + string root_path = "./" + case_name; + + LOG(INFO) << "------------------create table--------------------"; + void *engine = CreateEngine(root_path); + ASSERT_NE(nullptr, engine); + ASSERT_EQ(0, CreateTable(engine, table_name, "MemoryOnly")); + + LOG(INFO) << "------------------add doc and build--------------------"; + ASSERT_EQ(0, AddDoc(engine, 0, 1 * 10000)); + BuildIdx(engine); + + LOG(INFO) << "------------------update docs--------------------"; + long fet_offset = (long)50000 * opt.d * 4; + ASSERT_EQ(0, AddDoc(engine, 3000, 5000, 0, fet_offset)); + + Sleep(1000 * 10); + ASSERT_EQ(0, SearchThread(engine, 2000, 3000, fet_offset)); + + LOG(INFO) << "------------------delete docs--------------------"; + ASSERT_EQ(0, DeleteDoc(engine, 0, 4000)); + + Sleep(1000 * 6); + LOG(INFO) << "------------------add docs--------------------"; + ASSERT_EQ(0, AddDoc(engine, 10000, 14000)); + + LOG(INFO) << "------------------dump and close--------------------"; + ASSERT_EQ(0, Dump(engine)); + Close(engine); + engine = nullptr; + + LOG(INFO) << "------------------load--------------------"; + engine = CreateEngine(root_path); + ASSERT_NE(nullptr, engine); + ASSERT_EQ(0, CreateTable(engine, table_name, "MemoryOnly")); + ASSERT_EQ(0, Load(engine)); + BuildIdx(engine); + + LOG(INFO) << "------------------final search--------------------"; + Sleep(1000 * 5); + fet_offset += (long)1000 * opt.d * sizeof(float); + ASSERT_EQ(0, SearchThread(engine, 1000, 4000, fet_offset)); + ASSERT_EQ(0, SearchThread(engine, 9000, 5000)); + + Close(engine); + engine = nullptr; +} + +TEST(Engine, MultiModel) { + string case_name = GetCurrentCaseName(); + string table_name = "test_table"; + utils::remove_dir(case_name.c_str()); + utils::make_dir(case_name.c_str()); + string root_path = "./" + case_name; + string store_type = "MemoryOnly"; + + cout << "------------------create table and close--------------------" + << endl; + void *engine = CreateEngine(root_path); + ASSERT_NE(nullptr, engine); + ASSERT_EQ(0, CreateTable(engine, table_name, store_type, true)); + // Close(engine); + // engine = nullptr; + + cout << "------------------add doc and dump--------------------" << endl; + EXPECT_EQ(0, AddDoc(engine, 0, 10000)); + BuildIdx(engine); + ASSERT_EQ(0, Dump(engine)); + Close(engine); + engine = nullptr; + + cout << "------------------load data--------------------" << endl; + engine = CreateEngine(root_path); + ASSERT_NE(nullptr, engine); + ASSERT_EQ(0, CreateTable(engine, table_name, store_type, true)); + ASSERT_EQ(0, Load(engine)); + BuildIdx(engine); + + LOG(INFO) << "------------------add_dump_add_dump-- ------------------"; + EXPECT_EQ(0, AddDoc(engine, 10000, 11000)); + Sleep(1000); + ASSERT_EQ(0, Dump(engine)); + EXPECT_EQ(0, AddDoc(engine, 11000, 12000)); + Sleep(1000); + ASSERT_EQ(0, Dump(engine)); + Close(engine); + engine = nullptr; + + LOG(INFO) << "------------------reload--------------------"; + engine = CreateEngine(root_path); + ASSERT_NE(nullptr, engine); + ASSERT_EQ(0, CreateTable(engine, table_name, store_type, true)); + ASSERT_EQ(0, Load(engine)); + BuildIdx(engine); + + RandomGenerator rg; + string retrieval_type = rg.Rand(2) == 0 ? "IVFPQ" : "HNSW"; + ASSERT_EQ(0, SearchThread(engine, 12000, 0, 0, retrieval_type)); + ASSERT_EQ(0, Dump(engine)); + Close(engine); + engine = nullptr; +} + +int main(int argc, char **argv) { + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} + +} // namespace Test diff --git a/tests/test_files.cc b/tests/test_files.cc index 2b7f0b4..6242e18 100644 --- a/tests/test_files.cc +++ b/tests/test_files.cc @@ -20,6 +20,7 @@ #include "c_api/api_data/gamma_request.h" #include "c_api/api_data/gamma_response.h" #include "c_api/api_data/gamma_table.h" +#include "c_api/api_data/gamma_config.h" #include "test.h" #include "utils.h" @@ -401,55 +402,110 @@ int GetVector(void *engine) { } void UpdateThread(void *engine) { - int doc_id = 0; - tig_gamma::Doc doc; - - for (size_t j = 0; j < opt.fields_vec.size(); ++j) { - tig_gamma::DataType data_type = opt.fields_type[j]; - std::string &name = opt.fields_vec[j]; - + auto DocAddField = [&](tig_gamma::Doc &doc, std::string name, + std::string source, std::string val, + tig_gamma::DataType data_type) { tig_gamma::Field field; field.name = name; - field.source = "abc"; + field.source = source; field.datatype = data_type; - - std::string &data = - opt.profiles[(uint64_t)doc_id * opt.fields_vec.size() + j]; - if (opt.fields_type[j] == tig_gamma::DataType::INT) { - char *value_str = static_cast(malloc(sizeof(int))); - int len = sizeof(int); - int v = atoi("88"); - memcpy(value_str, &v, len); - field.value = std::string(value_str, len); - free(value_str); - } else if (opt.fields_type[j] == tig_gamma::DataType::LONG) { - char *value_str = static_cast(malloc(sizeof(long))); - int len = sizeof(long); - long v = atol(data.c_str()); - memcpy(value_str, &v, len); - field.value = std::string(value_str, len); - free(value_str); - } else { - field.value = data; - } + field.value = val; doc.AddField(field); - } + }; + + auto DocInfoToString = [&](tig_gamma::Doc &doc, std::string &res_str) { + auto fields = doc.TableFields(); + std::stringstream ss; + for (auto &f : fields) { + if (f.datatype == tig_gamma::DataType::INT) { + int val = *(int*)(f.value.c_str()); + ss << val << ", "; + } else { + auto val = f.value; + ss << val << ", "; + } + } + res_str = ss.str(); + }; - tig_gamma::Field field; - field.name = opt.vector_name; - field.source = "abc"; - field.datatype = tig_gamma::DataType::VECTOR; - field.value = std::string( - reinterpret_cast(opt.feature + (uint64_t)doc_id * opt.d), - opt.d * sizeof(float)); - doc.AddField(field); + for (int i = 0; i < opt.add_doc_num; i+=1000) { + int doc_id = i; + std::string _id; + tig_gamma::Doc doc; + int size = opt.fields_vec.size(); - char *doc_str = nullptr; - int len = 0; - doc.Serialize(&doc_str, &len); + for (size_t j = 0; j < opt.fields_vec.size(); ++j) { + tig_gamma::DataType data_type = opt.fields_type[j]; + std::string &name = opt.fields_vec[j]; + std::string &data = + opt.profiles[(uint64_t)doc_id * opt.fields_vec.size() + j]; + if (name == "_id") { + _id = data; + } + std::string value; + if (opt.fields_type[j] == tig_gamma::DataType::INT) { + char *value_str = static_cast(malloc(sizeof(int))); + int v = atoi("88"); + memcpy(value_str, &v, sizeof(int)); + value = std::string(value_str, sizeof(int)); + free(value_str); + } else if (opt.fields_type[j] == tig_gamma::DataType::LONG) { + char *value_str = static_cast(malloc(sizeof(long))); + long v = atol(data.c_str()); + memcpy(value_str, &v, sizeof(long)); + value = std::string(value_str, sizeof(long)); + free(value_str); + } else { + if (name != "_id") { + value = "00000"; + } else { + value = data; + } + } + DocAddField(doc, name, "abc", value, data_type); + } + { + float val = 0; + std::string data((char*)&val, sizeof(val)); + DocAddField(doc, "float", "abc", data, + tig_gamma::DataType::FLOAT); + data = std::string((char *)(opt.feature + (uint64_t)doc_id * opt.d), + opt.d * sizeof(float)); + DocAddField(doc, opt.vector_name, "abc", data, + tig_gamma::DataType::VECTOR); + } - UpdateDoc(engine, doc_str, len); - free(doc_str); + { + char *str_doc = nullptr; + int str_len = 0; + GetDocByID(engine, _id.c_str(), _id.size(), &str_doc, &str_len); + tig_gamma::Doc old_doc; + old_doc.SetEngine((tig_gamma::GammaEngine*)engine); + old_doc.Deserialize(str_doc, str_len); + std::string get_res; + DocInfoToString(old_doc, get_res); + LOG(INFO) << "old doc info:" << get_res; + } + + char *doc_str = nullptr; + int len = 0; + doc.Serialize(&doc_str, &len); + AddOrUpdateDoc(engine, doc_str, len); + + char *str_doc = nullptr; + int str_len = 0; + GetDocByID(engine, _id.c_str(), _id.size(), &str_doc, &str_len); + tig_gamma::Doc get_doc; + get_doc.SetEngine((tig_gamma::GammaEngine*)engine); + get_doc.Deserialize(str_doc, str_len); + std::string get_res; + std::string correct_res; + DocInfoToString(get_doc, get_res); + DocInfoToString(doc, correct_res); + LOG(INFO) << "get_res: " << get_res; + LOG(INFO) << "correct_res: " << correct_res; + free(str_doc); + } } int InitEngine() { @@ -526,6 +582,7 @@ int Create() { vector_info.dimension = opt.d; vector_info.model_id = opt.model_id; vector_info.store_type = opt.store_type; + // vector_info.store_param = "{\"cache_size\": 2048, \"compress\": {\"rate\":16}}"; vector_info.store_param = "{\"cache_size\": 2048}"; vector_info.has_source = false; @@ -642,20 +699,38 @@ int Search() { double end = utils::getmillisecs(); LOG(INFO) << "Search cost [" << end - start << "] ms"; // add_thread.join(); + return 0; } -int DumpEngine() { - int ret = Dump(opt.engine); - - // ret = AddDocToEngine(opt.engine, opt.add_doc_num); +int AlterCacheSizeTest() { + tig_gamma::Config conf; + conf.AddCacheInfo("table", 1024); + conf.AddCacheInfo("string", 2048); + conf.AddCacheInfo(opt.vector_name, 4096); + char *buf = nullptr; + int len = 0; + conf.Serialize(&buf, &len); + return SetConfig(opt.engine, buf, len); +} - // std::this_thread::sleep_for(std::chrono::seconds(10)); +int GetCacheSizeTest() { + tig_gamma::Config config; + char *buf = nullptr; + int len = 0; + GetConfig(opt.engine, &buf, &len); + config.Deserialize(buf, len); + for (auto &cache_info : config.CacheInfos()) { + LOG(INFO) << "TestGetCacheSize() field_name:" << cache_info.field_name + << ", cache_size:" << cache_info.cache_size; + } + delete[] buf; + return 0; +} - // ret = Dump(opt.engine); - Close(opt.engine); - opt.engine = nullptr; +int DumpEngine() { + int ret = Dump(opt.engine); return ret; } @@ -679,11 +754,6 @@ int LoadEngine() { return ret; } -int DumpAfterLoad() { - int ret = Dump(opt.engine); - return ret; -} - int CloseEngine() { Close(opt.engine); opt.engine = nullptr; @@ -723,13 +793,18 @@ int main(int argc, char **argv) { test::BuildEngineIndex(); // test::Add(); test::Search(); + + test::GetCacheSizeTest(); + test::AlterCacheSizeTest(); + test::GetCacheSizeTest(); + + test::UpdateThread(test::opt.engine); if (not bLoad) { test::DumpEngine(); } // test::LoadEngine(); // test::BuildEngineIndex(); // test::Search(); - // test::DumpAfterLoad(); test::CloseEngine(); return 0; diff --git a/third_party/concurrentqueue/blockingconcurrentqueue.h b/third_party/concurrentqueue/blockingconcurrentqueue.h index c855f9d..66579b6 100644 --- a/third_party/concurrentqueue/blockingconcurrentqueue.h +++ b/third_party/concurrentqueue/blockingconcurrentqueue.h @@ -1,419 +1,23 @@ // Provides an efficient blocking version of moodycamel::ConcurrentQueue. -// ©2015-2016 Cameron Desrochers. Distributed under the terms of the simplified +// ©2015-2020 Cameron Desrochers. Distributed under the terms of the simplified // BSD license, available at the top of concurrentqueue.h. +// Also dual-licensed under the Boost Software License (see LICENSE.md) // Uses Jeff Preshing's semaphore implementation (under the terms of its -// separate zlib license, embedded below). +// separate zlib license, see lightweightsemaphore.h). #pragma once #include "concurrentqueue.h" +#include "lightweightsemaphore.h" + #include #include #include #include #include -#if defined(_WIN32) -// Avoid including windows.h in a header; we only need a handful of -// items, so we'll redeclare them here (this is relatively safe since -// the API generally has to remain stable between Windows versions). -// I know this is an ugly hack but it still beats polluting the global -// namespace with thousands of generic names or adding a .cpp for nothing. -extern "C" { - struct _SECURITY_ATTRIBUTES; - __declspec(dllimport) void* __stdcall CreateSemaphoreW(_SECURITY_ATTRIBUTES* lpSemaphoreAttributes, long lInitialCount, long lMaximumCount, const wchar_t* lpName); - __declspec(dllimport) int __stdcall CloseHandle(void* hObject); - __declspec(dllimport) unsigned long __stdcall WaitForSingleObject(void* hHandle, unsigned long dwMilliseconds); - __declspec(dllimport) int __stdcall ReleaseSemaphore(void* hSemaphore, long lReleaseCount, long* lpPreviousCount); -} -#elif defined(__MACH__) -#include -#elif defined(__unix__) -#include -#endif - namespace moodycamel { -namespace details -{ - // Code in the mpmc_sema namespace below is an adaptation of Jeff Preshing's - // portable + lightweight semaphore implementations, originally from - // https://github.com/preshing/cpp11-on-multicore/blob/master/common/sema.h - // LICENSE: - // Copyright (c) 2015 Jeff Preshing - // - // This software is provided 'as-is', without any express or implied - // warranty. In no event will the authors be held liable for any damages - // arising from the use of this software. - // - // Permission is granted to anyone to use this software for any purpose, - // including commercial applications, and to alter it and redistribute it - // freely, subject to the following restrictions: - // - // 1. The origin of this software must not be misrepresented; you must not - // claim that you wrote the original software. If you use this software - // in a product, an acknowledgement in the product documentation would be - // appreciated but is not required. - // 2. Altered source versions must be plainly marked as such, and must not be - // misrepresented as being the original software. - // 3. This notice may not be removed or altered from any source distribution. - namespace mpmc_sema - { -#if defined(_WIN32) - class Semaphore - { - private: - void* m_hSema; - - Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION; - Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION; - - public: - Semaphore(int initialCount = 0) - { - assert(initialCount >= 0); - const long maxLong = 0x7fffffff; - m_hSema = CreateSemaphoreW(nullptr, initialCount, maxLong, nullptr); - } - - ~Semaphore() - { - CloseHandle(m_hSema); - } - - void wait() - { - const unsigned long infinite = 0xffffffff; - WaitForSingleObject(m_hSema, infinite); - } - - bool try_wait() - { - const unsigned long RC_WAIT_TIMEOUT = 0x00000102; - return WaitForSingleObject(m_hSema, 0) != RC_WAIT_TIMEOUT; - } - - bool timed_wait(std::uint64_t usecs) - { - const unsigned long RC_WAIT_TIMEOUT = 0x00000102; - return WaitForSingleObject(m_hSema, (unsigned long)(usecs / 1000)) != RC_WAIT_TIMEOUT; - } - - void signal(int count = 1) - { - ReleaseSemaphore(m_hSema, count, nullptr); - } - }; -#elif defined(__MACH__) - //--------------------------------------------------------- - // Semaphore (Apple iOS and OSX) - // Can't use POSIX semaphores due to http://lists.apple.com/archives/darwin-kernel/2009/Apr/msg00010.html - //--------------------------------------------------------- - class Semaphore - { - private: - semaphore_t m_sema; - - Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION; - Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION; - - public: - Semaphore(int initialCount = 0) - { - assert(initialCount >= 0); - semaphore_create(mach_task_self(), &m_sema, SYNC_POLICY_FIFO, initialCount); - } - - ~Semaphore() - { - semaphore_destroy(mach_task_self(), m_sema); - } - - void wait() - { - semaphore_wait(m_sema); - } - - bool try_wait() - { - return timed_wait(0); - } - - bool timed_wait(std::uint64_t timeout_usecs) - { - mach_timespec_t ts; - ts.tv_sec = static_cast(timeout_usecs / 1000000); - ts.tv_nsec = (timeout_usecs % 1000000) * 1000; - - // added in OSX 10.10: https://developer.apple.com/library/prerelease/mac/documentation/General/Reference/APIDiffsMacOSX10_10SeedDiff/modules/Darwin.html - kern_return_t rc = semaphore_timedwait(m_sema, ts); - - return rc != KERN_OPERATION_TIMED_OUT && rc != KERN_ABORTED; - } - - void signal() - { - semaphore_signal(m_sema); - } - - void signal(int count) - { - while (count-- > 0) - { - semaphore_signal(m_sema); - } - } - }; -#elif defined(__unix__) - //--------------------------------------------------------- - // Semaphore (POSIX, Linux) - //--------------------------------------------------------- - class Semaphore - { - private: - sem_t m_sema; - - Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION; - Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION; - - public: - Semaphore(int initialCount = 0) - { - assert(initialCount >= 0); - sem_init(&m_sema, 0, initialCount); - } - - ~Semaphore() - { - sem_destroy(&m_sema); - } - - void wait() - { - // http://stackoverflow.com/questions/2013181/gdb-causes-sem-wait-to-fail-with-eintr-error - int rc; - do { - rc = sem_wait(&m_sema); - } while (rc == -1 && errno == EINTR); - } - - bool try_wait() - { - int rc; - do { - rc = sem_trywait(&m_sema); - } while (rc == -1 && errno == EINTR); - return !(rc == -1 && errno == EAGAIN); - } - - bool timed_wait(std::uint64_t usecs) - { - struct timespec ts; - const int usecs_in_1_sec = 1000000; - const int nsecs_in_1_sec = 1000000000; - clock_gettime(CLOCK_REALTIME, &ts); - ts.tv_sec += usecs / usecs_in_1_sec; - ts.tv_nsec += (usecs % usecs_in_1_sec) * 1000; - // sem_timedwait bombs if you have more than 1e9 in tv_nsec - // so we have to clean things up before passing it in - if (ts.tv_nsec >= nsecs_in_1_sec) { - ts.tv_nsec -= nsecs_in_1_sec; - ++ts.tv_sec; - } - - int rc; - do { - rc = sem_timedwait(&m_sema, &ts); - } while (rc == -1 && errno == EINTR); - return !(rc == -1 && errno == ETIMEDOUT); - } - - void signal() - { - sem_post(&m_sema); - } - - void signal(int count) - { - while (count-- > 0) - { - sem_post(&m_sema); - } - } - }; -#else -#error Unsupported platform! (No semaphore wrapper available) -#endif - - //--------------------------------------------------------- - // LightweightSemaphore - //--------------------------------------------------------- - class LightweightSemaphore - { - public: - typedef std::make_signed::type ssize_t; - - private: - std::atomic m_count; - Semaphore m_sema; - - bool waitWithPartialSpinning(std::int64_t timeout_usecs = -1) - { - ssize_t oldCount; - // Is there a better way to set the initial spin count? - // If we lower it to 1000, testBenaphore becomes 15x slower on my Core i7-5930K Windows PC, - // as threads start hitting the kernel semaphore. - int spin = 10000; - while (--spin >= 0) - { - oldCount = m_count.load(std::memory_order_relaxed); - if ((oldCount > 0) && m_count.compare_exchange_strong(oldCount, oldCount - 1, std::memory_order_acquire, std::memory_order_relaxed)) - return true; - std::atomic_signal_fence(std::memory_order_acquire); // Prevent the compiler from collapsing the loop. - } - oldCount = m_count.fetch_sub(1, std::memory_order_acquire); - if (oldCount > 0) - return true; - if (timeout_usecs < 0) - { - m_sema.wait(); - return true; - } - if (m_sema.timed_wait((std::uint64_t)timeout_usecs)) - return true; - // At this point, we've timed out waiting for the semaphore, but the - // count is still decremented indicating we may still be waiting on - // it. So we have to re-adjust the count, but only if the semaphore - // wasn't signaled enough times for us too since then. If it was, we - // need to release the semaphore too. - while (true) - { - oldCount = m_count.load(std::memory_order_acquire); - if (oldCount >= 0 && m_sema.try_wait()) - return true; - if (oldCount < 0 && m_count.compare_exchange_strong(oldCount, oldCount + 1, std::memory_order_relaxed, std::memory_order_relaxed)) - return false; - } - } - - ssize_t waitManyWithPartialSpinning(ssize_t max, std::int64_t timeout_usecs = -1) - { - assert(max > 0); - ssize_t oldCount; - int spin = 10000; - while (--spin >= 0) - { - oldCount = m_count.load(std::memory_order_relaxed); - if (oldCount > 0) - { - ssize_t newCount = oldCount > max ? oldCount - max : 0; - if (m_count.compare_exchange_strong(oldCount, newCount, std::memory_order_acquire, std::memory_order_relaxed)) - return oldCount - newCount; - } - std::atomic_signal_fence(std::memory_order_acquire); - } - oldCount = m_count.fetch_sub(1, std::memory_order_acquire); - if (oldCount <= 0) - { - if (timeout_usecs < 0) - m_sema.wait(); - else if (!m_sema.timed_wait((std::uint64_t)timeout_usecs)) - { - while (true) - { - oldCount = m_count.load(std::memory_order_acquire); - if (oldCount >= 0 && m_sema.try_wait()) - break; - if (oldCount < 0 && m_count.compare_exchange_strong(oldCount, oldCount + 1, std::memory_order_relaxed, std::memory_order_relaxed)) - return 0; - } - } - } - if (max > 1) - return 1 + tryWaitMany(max - 1); - return 1; - } - - public: - LightweightSemaphore(ssize_t initialCount = 0) : m_count(initialCount) - { - assert(initialCount >= 0); - } - - bool tryWait() - { - ssize_t oldCount = m_count.load(std::memory_order_relaxed); - while (oldCount > 0) - { - if (m_count.compare_exchange_weak(oldCount, oldCount - 1, std::memory_order_acquire, std::memory_order_relaxed)) - return true; - } - return false; - } - - void wait() - { - if (!tryWait()) - waitWithPartialSpinning(); - } - - bool wait(std::int64_t timeout_usecs) - { - return tryWait() || waitWithPartialSpinning(timeout_usecs); - } - - // Acquires between 0 and (greedily) max, inclusive - ssize_t tryWaitMany(ssize_t max) - { - assert(max >= 0); - ssize_t oldCount = m_count.load(std::memory_order_relaxed); - while (oldCount > 0) - { - ssize_t newCount = oldCount > max ? oldCount - max : 0; - if (m_count.compare_exchange_weak(oldCount, newCount, std::memory_order_acquire, std::memory_order_relaxed)) - return oldCount - newCount; - } - return 0; - } - - // Acquires at least one, and (greedily) at most max - ssize_t waitMany(ssize_t max, std::int64_t timeout_usecs) - { - assert(max >= 0); - ssize_t result = tryWaitMany(max); - if (result == 0 && max > 0) - result = waitManyWithPartialSpinning(max, timeout_usecs); - return result; - } - - ssize_t waitMany(ssize_t max) - { - ssize_t result = waitMany(max, -1); - assert(result > 0); - return result; - } - - void signal(ssize_t count = 1) - { - assert(count >= 0); - ssize_t oldCount = m_count.fetch_add(count, std::memory_order_release); - ssize_t toRelease = -oldCount < count ? -oldCount : count; - if (toRelease > 0) - { - m_sema.signal((int)toRelease); - } - } - - ssize_t availableApprox() const - { - ssize_t count = m_count.load(std::memory_order_relaxed); - return count > 0 ? count : 0; - } - }; - } // end namespace mpmc_sema -} // end namespace details - - // This is a blocking version of the queue. It has an almost identical interface to // the normal non-blocking version, with the addition of various wait_dequeue() methods // and the removal of producer-specific dequeue methods. @@ -422,7 +26,7 @@ class BlockingConcurrentQueue { private: typedef ::moodycamel::ConcurrentQueue ConcurrentQueue; - typedef details::mpmc_sema::LightweightSemaphore LightweightSemaphore; + typedef ::moodycamel::LightweightSemaphore LightweightSemaphore; public: typedef typename ConcurrentQueue::producer_token_t producer_token_t; @@ -452,7 +56,7 @@ class BlockingConcurrentQueue // includes making the memory effects of construction visible, possibly with a // memory barrier). explicit BlockingConcurrentQueue(size_t capacity = 6 * BLOCK_SIZE) - : inner(capacity), sema(create(), &BlockingConcurrentQueue::template destroy) + : inner(capacity), sema(create(0, (int)Traits::MAX_SEMA_SPINS), &BlockingConcurrentQueue::template destroy) { assert(reinterpret_cast((BlockingConcurrentQueue*)1) == &((BlockingConcurrentQueue*)1)->inner && "BlockingConcurrentQueue must have ConcurrentQueue as its first member"); if (!sema) { @@ -461,7 +65,7 @@ class BlockingConcurrentQueue } BlockingConcurrentQueue(size_t minCapacity, size_t maxExplicitProducers, size_t maxImplicitProducers) - : inner(minCapacity, maxExplicitProducers, maxImplicitProducers), sema(create(), &BlockingConcurrentQueue::template destroy) + : inner(minCapacity, maxExplicitProducers, maxImplicitProducers), sema(create(0, (int)Traits::MAX_SEMA_SPINS), &BlockingConcurrentQueue::template destroy) { assert(reinterpret_cast((BlockingConcurrentQueue*)1) == &((BlockingConcurrentQueue*)1)->inner && "BlockingConcurrentQueue must have ConcurrentQueue as its first member"); if (!sema) { @@ -754,7 +358,9 @@ class BlockingConcurrentQueue template inline void wait_dequeue(U& item) { - sema->wait(); + while (!sema->wait()) { + continue; + } while (!inner.try_dequeue(item)) { continue; } @@ -795,7 +401,9 @@ class BlockingConcurrentQueue template inline void wait_dequeue(consumer_token_t& token, U& item) { - sema->wait(); + while (!sema->wait()) { + continue; + } while (!inner.try_dequeue(token, item)) { continue; } @@ -943,18 +551,11 @@ class BlockingConcurrentQueue private: - template - static inline U* create() - { - auto p = (Traits::malloc)(sizeof(U)); - return p != nullptr ? new (p) U : nullptr; - } - - template - static inline U* create(A1&& a1) + template + static inline U* create(A1&& a1, A2&& a2) { - auto p = (Traits::malloc)(sizeof(U)); - return p != nullptr ? new (p) U(std::forward(a1)) : nullptr; + void* p = (Traits::malloc)(sizeof(U)); + return p != nullptr ? new (p) U(std::forward(a1), std::forward(a2)) : nullptr; } template diff --git a/third_party/concurrentqueue/concurrentqueue.h b/third_party/concurrentqueue/concurrentqueue.h index 453bacf..ff3156f 100644 --- a/third_party/concurrentqueue/concurrentqueue.h +++ b/third_party/concurrentqueue/concurrentqueue.h @@ -5,7 +5,7 @@ // http://moodycamel.com/blog/2014/detailed-design-of-a-lock-free-queue // Simplified BSD license: -// Copyright (c) 2013-2016, Cameron Desrochers. +// Copyright (c) 2013-2020, Cameron Desrochers. // All rights reserved. // // Redistribution and use in source and binary forms, with or without modification, @@ -27,6 +27,7 @@ // TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// Also dual-licensed under the Boost Software License (see LICENSE.md) #pragma once @@ -42,6 +43,13 @@ #endif #endif +#if defined(_MSC_VER) && (!defined(_HAS_CXX17) || !_HAS_CXX17) +// VS2019 with /W4 warns about constant conditional expressions but unless /std=c++17 or higher +// does not support `if constexpr`, so we have no choice but to simply disable the warning +#pragma warning(push) +#pragma warning(disable: 4127) // conditional expression is constant +#endif + #if defined(__APPLE__) #include "TargetConditionals.h" #endif @@ -146,10 +154,21 @@ namespace moodycamel { namespace details { typedef std::uintptr_t thread_id_t; static const thread_id_t invalid_thread_id = 0; // Address can't be nullptr static const thread_id_t invalid_thread_id2 = 1; // Member accesses off a null pointer are also generally invalid. Plus it's not aligned. - static inline thread_id_t thread_id() { static MOODYCAMEL_THREADLOCAL int x; return reinterpret_cast(&x); } + inline thread_id_t thread_id() { static MOODYCAMEL_THREADLOCAL int x; return reinterpret_cast(&x); } } } #endif +// Constexpr if +#ifndef MOODYCAMEL_CONSTEXPR_IF +#if (defined(_MSC_VER) && defined(_HAS_CXX17) && _HAS_CXX17) || __cplusplus > 201402L +#define MOODYCAMEL_CONSTEXPR_IF if constexpr +#define MOODYCAMEL_MAYBE_UNUSED [[maybe_unused]] +#else +#define MOODYCAMEL_CONSTEXPR_IF if +#define MOODYCAMEL_MAYBE_UNUSED +#endif +#endif + // Exceptions #ifndef MOODYCAMEL_EXCEPTIONS_ENABLED #if (defined(_MSC_VER) && defined(_CPPUNWIND)) || (defined(__GNUC__) && defined(__EXCEPTIONS)) || (!defined(_MSC_VER) && !defined(__GNUC__)) @@ -162,8 +181,8 @@ namespace moodycamel { namespace details { #define MOODYCAMEL_RETHROW throw #define MOODYCAMEL_THROW(expr) throw (expr) #else -#define MOODYCAMEL_TRY if (true) -#define MOODYCAMEL_CATCH(...) else if (false) +#define MOODYCAMEL_TRY MOODYCAMEL_CONSTEXPR_IF (true) +#define MOODYCAMEL_CATCH(...) else MOODYCAMEL_CONSTEXPR_IF (false) #define MOODYCAMEL_RETHROW #define MOODYCAMEL_THROW(expr) #endif @@ -214,6 +233,44 @@ namespace moodycamel { namespace details { #endif #endif +namespace moodycamel { namespace details { +#ifndef MOODYCAMEL_ALIGNAS +// VS2013 doesn't support alignas or alignof, and align() requires a constant literal +#if defined(_MSC_VER) && _MSC_VER <= 1800 +#define MOODYCAMEL_ALIGNAS(alignment) __declspec(align(alignment)) +#define MOODYCAMEL_ALIGNOF(obj) __alignof(obj) +#define MOODYCAMEL_ALIGNED_TYPE_LIKE(T, obj) typename details::Vs2013Aligned::value, T>::type + template struct Vs2013Aligned { }; // default, unsupported alignment + template struct Vs2013Aligned<1, T> { typedef __declspec(align(1)) T type; }; + template struct Vs2013Aligned<2, T> { typedef __declspec(align(2)) T type; }; + template struct Vs2013Aligned<4, T> { typedef __declspec(align(4)) T type; }; + template struct Vs2013Aligned<8, T> { typedef __declspec(align(8)) T type; }; + template struct Vs2013Aligned<16, T> { typedef __declspec(align(16)) T type; }; + template struct Vs2013Aligned<32, T> { typedef __declspec(align(32)) T type; }; + template struct Vs2013Aligned<64, T> { typedef __declspec(align(64)) T type; }; + template struct Vs2013Aligned<128, T> { typedef __declspec(align(128)) T type; }; + template struct Vs2013Aligned<256, T> { typedef __declspec(align(256)) T type; }; +#else + template struct identity { typedef T type; }; +#define MOODYCAMEL_ALIGNAS(alignment) alignas(alignment) +#define MOODYCAMEL_ALIGNOF(obj) alignof(obj) +#define MOODYCAMEL_ALIGNED_TYPE_LIKE(T, obj) alignas(alignof(obj)) typename details::identity::type +#endif +#endif +} } + + +// TSAN can false report races in lock-free code. To enable TSAN to be used from projects that use this one, +// we can apply per-function compile-time suppression. +// See https://clang.llvm.org/docs/ThreadSanitizer.html#has-feature-thread-sanitizer +#define MOODYCAMEL_NO_TSAN +#if defined(__has_feature) + #if __has_feature(thread_sanitizer) + #undef MOODYCAMEL_NO_TSAN + #define MOODYCAMEL_NO_TSAN __attribute__((no_sanitize("thread"))) + #endif // TSAN +#endif // TSAN + // Compiler-specific likely/unlikely hints namespace moodycamel { namespace details { #if defined(__GNUC__) @@ -315,6 +372,12 @@ struct ConcurrentQueueDefaultTraits // that this limit is enforced at the block level (for performance reasons), i.e. // it's rounded up to the nearest block size. static const size_t MAX_SUBQUEUE_SIZE = details::const_numeric_max::value; + + // The number of times to spin before sleeping when waiting on a semaphore. + // Recommended values are on the order of 1000-10000 unless the number of + // consumer threads exceeds the number of idle cores (in which case try 0-100). + // Only affects instances of the BlockingConcurrentQueue. + static const int MAX_SEMA_SPINS = 10000; #ifndef MCDBGQ_USE_RELACY @@ -785,7 +848,7 @@ class ConcurrentQueue } // Destroy implicit producer hash tables - if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE != 0) { + MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE != 0) { auto hash = implicitProducerHash.load(std::memory_order_relaxed); while (hash != nullptr) { auto prev = hash->prev; @@ -910,8 +973,8 @@ class ConcurrentQueue // Thread-safe. inline bool enqueue(T const& item) { - if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; - return inner_enqueue(item); + MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; + else return inner_enqueue(item); } // Enqueues a single item (by moving it, if possible). @@ -921,8 +984,8 @@ class ConcurrentQueue // Thread-safe. inline bool enqueue(T&& item) { - if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; - return inner_enqueue(std::move(item)); + MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; + else return inner_enqueue(std::move(item)); } // Enqueues a single item (by copying it) using an explicit producer token. @@ -952,8 +1015,8 @@ class ConcurrentQueue template bool enqueue_bulk(It itemFirst, size_t count) { - if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; - return inner_enqueue_bulk(itemFirst, count); + MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; + else return inner_enqueue_bulk(itemFirst, count); } // Enqueues several items using an explicit producer token. @@ -975,8 +1038,8 @@ class ConcurrentQueue // Thread-safe. inline bool try_enqueue(T const& item) { - if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; - return inner_enqueue(item); + MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; + else return inner_enqueue(item); } // Enqueues a single item (by moving it, if possible). @@ -986,8 +1049,8 @@ class ConcurrentQueue // Thread-safe. inline bool try_enqueue(T&& item) { - if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; - return inner_enqueue(std::move(item)); + MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; + else return inner_enqueue(std::move(item)); } // Enqueues a single item (by copying it) using an explicit producer token. @@ -1016,8 +1079,8 @@ class ConcurrentQueue template bool try_enqueue_bulk(It itemFirst, size_t count) { - if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; - return inner_enqueue_bulk(itemFirst, count); + MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; + else return inner_enqueue_bulk(itemFirst, count); } // Enqueues several items using an explicit producer token. @@ -1485,7 +1548,7 @@ class ConcurrentQueue template inline bool is_empty() const { - if (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) { + MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) { // Check flags for (size_t i = 0; i < BLOCK_SIZE; ++i) { if (!emptyFlags[i].load(std::memory_order_relaxed)) { @@ -1510,9 +1573,9 @@ class ConcurrentQueue // Returns true if the block is now empty (does not apply in explicit context) template - inline bool set_empty(index_t i) + inline bool set_empty(MOODYCAMEL_MAYBE_UNUSED index_t i) { - if (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) { + MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) { // Set flag assert(!emptyFlags[BLOCK_SIZE - 1 - static_cast(i & static_cast(BLOCK_SIZE - 1))].load(std::memory_order_relaxed)); emptyFlags[BLOCK_SIZE - 1 - static_cast(i & static_cast(BLOCK_SIZE - 1))].store(true, std::memory_order_release); @@ -1529,9 +1592,9 @@ class ConcurrentQueue // Sets multiple contiguous item statuses to 'empty' (assumes no wrapping and count > 0). // Returns true if the block is now empty (does not apply in explicit context). template - inline bool set_many_empty(index_t i, size_t count) + inline bool set_many_empty(MOODYCAMEL_MAYBE_UNUSED index_t i, size_t count) { - if (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) { + MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) { // Set flags std::atomic_thread_fence(std::memory_order_release); i = BLOCK_SIZE - 1 - static_cast(i & static_cast(BLOCK_SIZE - 1)) - count + 1; @@ -1552,7 +1615,7 @@ class ConcurrentQueue template inline void set_all_empty() { - if (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) { + MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) { // Set all flags for (size_t i = 0; i != BLOCK_SIZE; ++i) { emptyFlags[i].store(true, std::memory_order_relaxed); @@ -1567,7 +1630,7 @@ class ConcurrentQueue template inline void reset_empty() { - if (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) { + MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) { // Reset flags for (size_t i = 0; i != BLOCK_SIZE; ++i) { emptyFlags[i].store(false, std::memory_order_relaxed); @@ -1583,20 +1646,8 @@ class ConcurrentQueue inline T const* operator[](index_t idx) const MOODYCAMEL_NOEXCEPT { return static_cast(static_cast(elements)) + static_cast(idx & static_cast(BLOCK_SIZE - 1)); } private: - // IMPORTANT: This must be the first member in Block, so that if T depends on the alignment of - // addresses returned by malloc, that alignment will be preserved. Apparently clang actually - // generates code that uses this assumption for AVX instructions in some cases. Ideally, we - // should also align Block to the alignment of T in case it's higher than malloc's 16-byte - // alignment, but this is hard to do in a cross-platform way. Assert for this case: - static_assert(std::alignment_of::value <= std::alignment_of::value, "The queue does not support super-aligned types at this time"); - // Additionally, we need the alignment of Block itself to be a multiple of max_align_t since - // otherwise the appropriate padding will not be added at the end of Block in order to make - // arrays of Blocks all be properly aligned (not just the first one). We use a union to force - // this. - union { - char elements[sizeof(T) * BLOCK_SIZE]; - details::max_align_t dummy; - }; + static_assert(std::alignment_of::value <= sizeof(T), "The queue does not support types with an alignment greater than their size at this time"); + MOODYCAMEL_ALIGNED_TYPE_LIKE(char[sizeof(T) * BLOCK_SIZE], T) elements; public: Block* next; std::atomic elementsCompletelyDequeued; @@ -1611,7 +1662,7 @@ class ConcurrentQueue void* owner; #endif }; - static_assert(std::alignment_of::value >= std::alignment_of::value, "Internal error: Blocks must be at least as aligned as the type they are wrapping"); + static_assert(std::alignment_of::value >= std::alignment_of::value, "Internal error: Blocks must be at least as aligned as the type they are wrapping"); #ifdef MCDBGQ_TRACKMEM @@ -1637,7 +1688,7 @@ class ConcurrentQueue { } - virtual ~ProducerBase() { }; + virtual ~ProducerBase() { } template inline bool dequeue(U& element) @@ -1818,7 +1869,10 @@ class ConcurrentQueue // to allocate a new index. Note pr_blockIndexRaw can only be nullptr if // the initial allocation failed in the constructor. - if (allocMode == CannotAlloc || !new_block_index(pr_blockIndexSlotsUsed)) { + MOODYCAMEL_CONSTEXPR_IF (allocMode == CannotAlloc) { + return false; + } + else if (!new_block_index(pr_blockIndexSlotsUsed)) { return false; } } @@ -1843,7 +1897,7 @@ class ConcurrentQueue ++pr_blockIndexSlotsUsed; } - if (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new ((T*)nullptr) T(std::forward(element)))) { + MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast(nullptr)) T(std::forward(element)))) { // The constructor may throw. We want the element not to appear in the queue in // that case (without corrupting the queue): MOODYCAMEL_TRY { @@ -1869,7 +1923,7 @@ class ConcurrentQueue blockIndex.load(std::memory_order_relaxed)->front.store(pr_blockIndexFront, std::memory_order_release); pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1); - if (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new ((T*)nullptr) T(std::forward(element)))) { + MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast(nullptr)) T(std::forward(element)))) { this->tailIndex.store(newTailIndex, std::memory_order_release); return true; } @@ -1983,7 +2037,7 @@ class ConcurrentQueue } template - bool enqueue_bulk(It itemFirst, size_t count) + bool MOODYCAMEL_NO_TSAN enqueue_bulk(It itemFirst, size_t count) { // First, we need to make sure we have enough room to enqueue all of the elements; // this means pre-allocating blocks and putting them in the block index (but only if @@ -2022,7 +2076,14 @@ class ConcurrentQueue assert(!details::circular_less_than(currentTailIndex, head)); bool full = !details::circular_less_than(head, currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != details::const_numeric_max::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head)); if (pr_blockIndexRaw == nullptr || pr_blockIndexSlotsUsed == pr_blockIndexSize || full) { - if (allocMode == CannotAlloc || full || !new_block_index(originalBlockIndexSlotsUsed)) { + MOODYCAMEL_CONSTEXPR_IF (allocMode == CannotAlloc) { + // Failed to allocate, undo changes (but keep injected blocks) + pr_blockIndexFront = originalBlockIndexFront; + pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed; + this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock; + return false; + } + else if (full || !new_block_index(originalBlockIndexSlotsUsed)) { // Failed to allocate, undo changes (but keep injected blocks) pr_blockIndexFront = originalBlockIndexFront; pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed; @@ -2078,7 +2139,7 @@ class ConcurrentQueue block = block->next; } - if (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new ((T*)nullptr) T(details::deref_noexcept(itemFirst)))) { + MOODYCAMEL_CONSTEXPR_IF (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast(nullptr)) T(details::deref_noexcept(itemFirst)))) { blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), std::memory_order_release); } } @@ -2093,11 +2154,11 @@ class ConcurrentQueue this->tailBlock = firstAllocatedBlock; } while (true) { - auto stopIndex = (currentTailIndex & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); + index_t stopIndex = (currentTailIndex & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); if (details::circular_less_than(newTailIndex, stopIndex)) { stopIndex = newTailIndex; } - if (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new ((T*)nullptr) T(details::deref_noexcept(itemFirst)))) { + MOODYCAMEL_CONSTEXPR_IF (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast(nullptr)) T(details::deref_noexcept(itemFirst)))) { while (currentTailIndex != stopIndex) { new ((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++); } @@ -2112,7 +2173,7 @@ class ConcurrentQueue // may only define a (noexcept) move constructor, and so calls to the // cctor will not compile, even if they are in an if branch that will never // be executed - new ((*this->tailBlock)[currentTailIndex]) T(details::nomove_if<(bool)!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new ((T*)nullptr) T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst)); + new ((*this->tailBlock)[currentTailIndex]) T(details::nomove_if(nullptr)) T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst)); ++currentTailIndex; ++itemFirst; } @@ -2159,8 +2220,9 @@ class ConcurrentQueue this->tailBlock = this->tailBlock->next; } - if (!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new ((T*)nullptr) T(details::deref_noexcept(itemFirst))) && firstAllocatedBlock != nullptr) { - blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), std::memory_order_release); + MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast(nullptr)) T(details::deref_noexcept(itemFirst)))) { + if (firstAllocatedBlock != nullptr) + blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), std::memory_order_release); } this->tailIndex.store(newTailIndex, std::memory_order_release); @@ -2177,7 +2239,7 @@ class ConcurrentQueue desiredCount = desiredCount < max ? desiredCount : max; std::atomic_thread_fence(std::memory_order_acquire); - auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed);; + auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed); tail = this->tailIndex.load(std::memory_order_acquire); auto actualCount = static_cast(tail - (myDequeueCount - overcommit)); @@ -2204,7 +2266,7 @@ class ConcurrentQueue auto index = firstIndex; do { auto firstIndexInBlock = index; - auto endIndex = (index & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); + index_t endIndex = (index & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); endIndex = details::circular_less_than(firstIndex + static_cast(actualCount), endIndex) ? firstIndex + static_cast(actualCount) : endIndex; auto block = localBlockIndex->entries[indexIndex].block; if (MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, details::deref_noexcept(itemFirst) = std::move((*(*block)[index])))) { @@ -2438,8 +2500,8 @@ class ConcurrentQueue newBlock->owner = this; #endif newBlock->ConcurrentQueue::Block::template reset_empty(); - - if (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new ((T*)nullptr) T(std::forward(element)))) { + + MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast(nullptr)) T(std::forward(element)))) { // May throw, try to insert now before we publish the fact that we have this new block MOODYCAMEL_TRY { new ((*newBlock)[currentTailIndex]) T(std::forward(element)); @@ -2457,7 +2519,7 @@ class ConcurrentQueue this->tailBlock = newBlock; - if (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new ((T*)nullptr) T(std::forward(element)))) { + MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast(nullptr)) T(std::forward(element)))) { this->tailIndex.store(newTailIndex, std::memory_order_release); return true; } @@ -2541,6 +2603,10 @@ class ConcurrentQueue return false; } +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable: 4706) // assignment within conditional expression +#endif template bool enqueue_bulk(It itemFirst, size_t count) { @@ -2576,6 +2642,7 @@ class ConcurrentQueue auto head = this->headIndex.load(std::memory_order_relaxed); assert(!details::circular_less_than(currentTailIndex, head)); bool full = !details::circular_less_than(head, currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != details::const_numeric_max::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head)); + if (full || !(indexInserted = insert_block_index_entry(idxEntry, currentTailIndex)) || (newBlock = this->parent->ConcurrentQueue::template requisition_block()) == nullptr) { // Index allocation or block allocation failed; revert any other allocations // and index insertions done so far for this operation @@ -2626,11 +2693,11 @@ class ConcurrentQueue this->tailBlock = firstAllocatedBlock; } while (true) { - auto stopIndex = (currentTailIndex & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); + index_t stopIndex = (currentTailIndex & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); if (details::circular_less_than(newTailIndex, stopIndex)) { stopIndex = newTailIndex; } - if (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new ((T*)nullptr) T(details::deref_noexcept(itemFirst)))) { + MOODYCAMEL_CONSTEXPR_IF (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast(nullptr)) T(details::deref_noexcept(itemFirst)))) { while (currentTailIndex != stopIndex) { new ((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++); } @@ -2638,7 +2705,7 @@ class ConcurrentQueue else { MOODYCAMEL_TRY { while (currentTailIndex != stopIndex) { - new ((*this->tailBlock)[currentTailIndex]) T(details::nomove_if<(bool)!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new ((T*)nullptr) T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst)); + new ((*this->tailBlock)[currentTailIndex]) T(details::nomove_if(nullptr)) T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst)); ++currentTailIndex; ++itemFirst; } @@ -2690,6 +2757,9 @@ class ConcurrentQueue this->tailIndex.store(newTailIndex, std::memory_order_release); return true; } +#ifdef _MSC_VER +#pragma warning(pop) +#endif template size_t dequeue_bulk(It& itemFirst, size_t max) @@ -2721,7 +2791,7 @@ class ConcurrentQueue auto indexIndex = get_block_index_index_for_index(index, localBlockIndex); do { auto blockStartIndex = index; - auto endIndex = (index & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); + index_t endIndex = (index & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); endIndex = details::circular_less_than(firstIndex + static_cast(actualCount), endIndex) ? firstIndex + static_cast(actualCount) : endIndex; auto entry = localBlockIndex->index[indexIndex]; @@ -2819,7 +2889,7 @@ class ConcurrentQueue if (localBlockIndex == nullptr) { return false; // this can happen if new_block_index failed in the constructor } - auto newTail = (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) & (localBlockIndex->capacity - 1); + size_t newTail = (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) & (localBlockIndex->capacity - 1); idxEntry = localBlockIndex->index[newTail]; if (idxEntry->key.load(std::memory_order_relaxed) == INVALID_BLOCK_BASE || idxEntry->value.load(std::memory_order_relaxed) == nullptr) { @@ -2830,7 +2900,10 @@ class ConcurrentQueue } // No room in the old block index, try to allocate another one! - if (allocMode == CannotAlloc || !new_block_index()) { + MOODYCAMEL_CONSTEXPR_IF (allocMode == CannotAlloc) { + return false; + } + else if (!new_block_index()) { return false; } localBlockIndex = blockIndex.load(std::memory_order_relaxed); @@ -3010,11 +3083,12 @@ class ConcurrentQueue return block; } - if (canAlloc == CanAlloc) { + MOODYCAMEL_CONSTEXPR_IF (canAlloc == CanAlloc) { return create(); } - - return nullptr; + else { + return nullptr; + } } @@ -3243,50 +3317,56 @@ class ConcurrentQueue inline void populate_initial_implicit_producer_hash() { - if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return; - - implicitProducerHashCount.store(0, std::memory_order_relaxed); - auto hash = &initialImplicitProducerHash; - hash->capacity = INITIAL_IMPLICIT_PRODUCER_HASH_SIZE; - hash->entries = &initialImplicitProducerHashEntries[0]; - for (size_t i = 0; i != INITIAL_IMPLICIT_PRODUCER_HASH_SIZE; ++i) { - initialImplicitProducerHashEntries[i].key.store(details::invalid_thread_id, std::memory_order_relaxed); + MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) { + return; + } + else { + implicitProducerHashCount.store(0, std::memory_order_relaxed); + auto hash = &initialImplicitProducerHash; + hash->capacity = INITIAL_IMPLICIT_PRODUCER_HASH_SIZE; + hash->entries = &initialImplicitProducerHashEntries[0]; + for (size_t i = 0; i != INITIAL_IMPLICIT_PRODUCER_HASH_SIZE; ++i) { + initialImplicitProducerHashEntries[i].key.store(details::invalid_thread_id, std::memory_order_relaxed); + } + hash->prev = nullptr; + implicitProducerHash.store(hash, std::memory_order_relaxed); } - hash->prev = nullptr; - implicitProducerHash.store(hash, std::memory_order_relaxed); } void swap_implicit_producer_hashes(ConcurrentQueue& other) { - if (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return; - - // Swap (assumes our implicit producer hash is initialized) - initialImplicitProducerHashEntries.swap(other.initialImplicitProducerHashEntries); - initialImplicitProducerHash.entries = &initialImplicitProducerHashEntries[0]; - other.initialImplicitProducerHash.entries = &other.initialImplicitProducerHashEntries[0]; - - details::swap_relaxed(implicitProducerHashCount, other.implicitProducerHashCount); - - details::swap_relaxed(implicitProducerHash, other.implicitProducerHash); - if (implicitProducerHash.load(std::memory_order_relaxed) == &other.initialImplicitProducerHash) { - implicitProducerHash.store(&initialImplicitProducerHash, std::memory_order_relaxed); + MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) { + return; } else { - ImplicitProducerHash* hash; - for (hash = implicitProducerHash.load(std::memory_order_relaxed); hash->prev != &other.initialImplicitProducerHash; hash = hash->prev) { - continue; + // Swap (assumes our implicit producer hash is initialized) + initialImplicitProducerHashEntries.swap(other.initialImplicitProducerHashEntries); + initialImplicitProducerHash.entries = &initialImplicitProducerHashEntries[0]; + other.initialImplicitProducerHash.entries = &other.initialImplicitProducerHashEntries[0]; + + details::swap_relaxed(implicitProducerHashCount, other.implicitProducerHashCount); + + details::swap_relaxed(implicitProducerHash, other.implicitProducerHash); + if (implicitProducerHash.load(std::memory_order_relaxed) == &other.initialImplicitProducerHash) { + implicitProducerHash.store(&initialImplicitProducerHash, std::memory_order_relaxed); } - hash->prev = &initialImplicitProducerHash; - } - if (other.implicitProducerHash.load(std::memory_order_relaxed) == &initialImplicitProducerHash) { - other.implicitProducerHash.store(&other.initialImplicitProducerHash, std::memory_order_relaxed); - } - else { - ImplicitProducerHash* hash; - for (hash = other.implicitProducerHash.load(std::memory_order_relaxed); hash->prev != &initialImplicitProducerHash; hash = hash->prev) { - continue; + else { + ImplicitProducerHash* hash; + for (hash = implicitProducerHash.load(std::memory_order_relaxed); hash->prev != &other.initialImplicitProducerHash; hash = hash->prev) { + continue; + } + hash->prev = &initialImplicitProducerHash; + } + if (other.implicitProducerHash.load(std::memory_order_relaxed) == &initialImplicitProducerHash) { + other.implicitProducerHash.store(&other.initialImplicitProducerHash, std::memory_order_relaxed); + } + else { + ImplicitProducerHash* hash; + for (hash = other.implicitProducerHash.load(std::memory_order_relaxed); hash->prev != &initialImplicitProducerHash; hash = hash->prev) { + continue; + } + hash->prev = &other.initialImplicitProducerHash; } - hash->prev = &other.initialImplicitProducerHash; } } @@ -3311,6 +3391,7 @@ class ConcurrentQueue auto hashedId = details::hash_thread_id(id); auto mainHash = implicitProducerHash.load(std::memory_order_acquire); + assert(mainHash != nullptr); // silence clang-tidy and MSVC warnings (hash cannot be null) for (auto hash = mainHash; hash != nullptr; hash = hash->prev) { // Look for the id in this hash auto index = hashedId; @@ -3378,7 +3459,7 @@ class ConcurrentQueue } auto newHash = new (raw) ImplicitProducerHash; - newHash->capacity = newCapacity; + newHash->capacity = static_cast(newCapacity); newHash->entries = reinterpret_cast(details::align_for(raw + sizeof(ImplicitProducerHash))); for (size_t i = 0; i != newCapacity; ++i) { new (newHash->entries + i) ImplicitProducerKVP; @@ -3488,55 +3569,76 @@ class ConcurrentQueue ////////////////////////////////// // Utility functions ////////////////////////////////// - + + template + static inline void* aligned_malloc(size_t size) + { + MOODYCAMEL_CONSTEXPR_IF (std::alignment_of::value <= std::alignment_of::value) + return (Traits::malloc)(size); + else { + size_t alignment = std::alignment_of::value; + void* raw = (Traits::malloc)(size + alignment - 1 + sizeof(void*)); + if (!raw) + return nullptr; + char* ptr = details::align_for(reinterpret_cast(raw) + sizeof(void*)); + *(reinterpret_cast(ptr) - 1) = raw; + return ptr; + } + } + + template + static inline void aligned_free(void* ptr) + { + MOODYCAMEL_CONSTEXPR_IF (std::alignment_of::value <= std::alignment_of::value) + return (Traits::free)(ptr); + else + (Traits::free)(ptr ? *(reinterpret_cast(ptr) - 1) : nullptr); + } + template static inline U* create_array(size_t count) { assert(count > 0); - auto p = static_cast((Traits::malloc)(sizeof(U) * count)); - if (p == nullptr) { + U* p = static_cast(aligned_malloc(sizeof(U) * count)); + if (p == nullptr) return nullptr; - } - - for (size_t i = 0; i != count; ++i) { + + for (size_t i = 0; i != count; ++i) new (p + i) U(); - } return p; } - + template static inline void destroy_array(U* p, size_t count) { if (p != nullptr) { assert(count > 0); - for (size_t i = count; i != 0; ) { + for (size_t i = count; i != 0; ) (p + --i)->~U(); - } - (Traits::free)(p); } + aligned_free(p); } - + template static inline U* create() { - auto p = (Traits::malloc)(sizeof(U)); + void* p = aligned_malloc(sizeof(U)); return p != nullptr ? new (p) U : nullptr; } - + template static inline U* create(A1&& a1) { - auto p = (Traits::malloc)(sizeof(U)); + void* p = aligned_malloc(sizeof(U)); return p != nullptr ? new (p) U(std::forward(a1)) : nullptr; } - + template static inline void destroy(U* p) { - if (p != nullptr) { + if (p != nullptr) p->~U(); - } - (Traits::free)(p); + aligned_free(p); } private: @@ -3596,7 +3698,7 @@ ConsumerToken::ConsumerToken(ConcurrentQueue& queue) : itemsConsumedFromCurrent(0), currentProducer(nullptr), desiredProducer(nullptr) { initialOffset = queue.nextExplicitConsumerId.fetch_add(1, std::memory_order_release); - lastKnownGlobalOffset = -1; + lastKnownGlobalOffset = static_cast(-1); } template @@ -3604,7 +3706,7 @@ ConsumerToken::ConsumerToken(BlockingConcurrentQueue& queue) : itemsConsumedFromCurrent(0), currentProducer(nullptr), desiredProducer(nullptr) { initialOffset = reinterpret_cast*>(&queue)->nextExplicitConsumerId.fetch_add(1, std::memory_order_release); - lastKnownGlobalOffset = -1; + lastKnownGlobalOffset = static_cast(-1); } template @@ -3631,6 +3733,10 @@ inline void swap(typename ConcurrentQueue::ImplicitProducerKVP& a, ty } +#if defined(_MSC_VER) && (!defined(_HAS_CXX17) || !_HAS_CXX17) +#pragma warning(pop) +#endif + #if defined(__GNUC__) #pragma GCC diagnostic pop #endif diff --git a/third_party/concurrentqueue/lightweightsemaphore.h b/third_party/concurrentqueue/lightweightsemaphore.h new file mode 100644 index 0000000..b0f24e1 --- /dev/null +++ b/third_party/concurrentqueue/lightweightsemaphore.h @@ -0,0 +1,411 @@ +// Provides an efficient implementation of a semaphore (LightweightSemaphore). +// This is an extension of Jeff Preshing's sempahore implementation (licensed +// under the terms of its separate zlib license) that has been adapted and +// extended by Cameron Desrochers. + +#pragma once + +#include // For std::size_t +#include +#include // For std::make_signed + +#if defined(_WIN32) +// Avoid including windows.h in a header; we only need a handful of +// items, so we'll redeclare them here (this is relatively safe since +// the API generally has to remain stable between Windows versions). +// I know this is an ugly hack but it still beats polluting the global +// namespace with thousands of generic names or adding a .cpp for nothing. +extern "C" { + struct _SECURITY_ATTRIBUTES; + __declspec(dllimport) void* __stdcall CreateSemaphoreW(_SECURITY_ATTRIBUTES* lpSemaphoreAttributes, long lInitialCount, long lMaximumCount, const wchar_t* lpName); + __declspec(dllimport) int __stdcall CloseHandle(void* hObject); + __declspec(dllimport) unsigned long __stdcall WaitForSingleObject(void* hHandle, unsigned long dwMilliseconds); + __declspec(dllimport) int __stdcall ReleaseSemaphore(void* hSemaphore, long lReleaseCount, long* lpPreviousCount); +} +#elif defined(__MACH__) +#include +#elif defined(__unix__) +#include +#endif + +namespace moodycamel +{ +namespace details +{ + +// Code in the mpmc_sema namespace below is an adaptation of Jeff Preshing's +// portable + lightweight semaphore implementations, originally from +// https://github.com/preshing/cpp11-on-multicore/blob/master/common/sema.h +// LICENSE: +// Copyright (c) 2015 Jeff Preshing +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must not +// claim that you wrote the original software. If you use this software +// in a product, an acknowledgement in the product documentation would be +// appreciated but is not required. +// 2. Altered source versions must be plainly marked as such, and must not be +// misrepresented as being the original software. +// 3. This notice may not be removed or altered from any source distribution. +#if defined(_WIN32) +class Semaphore +{ +private: + void* m_hSema; + + Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION; + Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION; + +public: + Semaphore(int initialCount = 0) + { + assert(initialCount >= 0); + const long maxLong = 0x7fffffff; + m_hSema = CreateSemaphoreW(nullptr, initialCount, maxLong, nullptr); + assert(m_hSema); + } + + ~Semaphore() + { + CloseHandle(m_hSema); + } + + bool wait() + { + const unsigned long infinite = 0xffffffff; + return WaitForSingleObject(m_hSema, infinite) == 0; + } + + bool try_wait() + { + return WaitForSingleObject(m_hSema, 0) == 0; + } + + bool timed_wait(std::uint64_t usecs) + { + return WaitForSingleObject(m_hSema, (unsigned long)(usecs / 1000)) == 0; + } + + void signal(int count = 1) + { + while (!ReleaseSemaphore(m_hSema, count, nullptr)); + } +}; +#elif defined(__MACH__) +//--------------------------------------------------------- +// Semaphore (Apple iOS and OSX) +// Can't use POSIX semaphores due to http://lists.apple.com/archives/darwin-kernel/2009/Apr/msg00010.html +//--------------------------------------------------------- +class Semaphore +{ +private: + semaphore_t m_sema; + + Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION; + Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION; + +public: + Semaphore(int initialCount = 0) + { + assert(initialCount >= 0); + kern_return_t rc = semaphore_create(mach_task_self(), &m_sema, SYNC_POLICY_FIFO, initialCount); + assert(rc == KERN_SUCCESS); + (void)rc; + } + + ~Semaphore() + { + semaphore_destroy(mach_task_self(), m_sema); + } + + bool wait() + { + return semaphore_wait(m_sema) == KERN_SUCCESS; + } + + bool try_wait() + { + return timed_wait(0); + } + + bool timed_wait(std::uint64_t timeout_usecs) + { + mach_timespec_t ts; + ts.tv_sec = static_cast(timeout_usecs / 1000000); + ts.tv_nsec = static_cast((timeout_usecs % 1000000) * 1000); + + // added in OSX 10.10: https://developer.apple.com/library/prerelease/mac/documentation/General/Reference/APIDiffsMacOSX10_10SeedDiff/modules/Darwin.html + kern_return_t rc = semaphore_timedwait(m_sema, ts); + return rc == KERN_SUCCESS; + } + + void signal() + { + while (semaphore_signal(m_sema) != KERN_SUCCESS); + } + + void signal(int count) + { + while (count-- > 0) + { + while (semaphore_signal(m_sema) != KERN_SUCCESS); + } + } +}; +#elif defined(__unix__) +//--------------------------------------------------------- +// Semaphore (POSIX, Linux) +//--------------------------------------------------------- +class Semaphore +{ +private: + sem_t m_sema; + + Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION; + Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION; + +public: + Semaphore(int initialCount = 0) + { + assert(initialCount >= 0); + int rc = sem_init(&m_sema, 0, static_cast(initialCount)); + assert(rc == 0); + (void)rc; + } + + ~Semaphore() + { + sem_destroy(&m_sema); + } + + bool wait() + { + // http://stackoverflow.com/questions/2013181/gdb-causes-sem-wait-to-fail-with-eintr-error + int rc; + do { + rc = sem_wait(&m_sema); + } while (rc == -1 && errno == EINTR); + return rc == 0; + } + + bool try_wait() + { + int rc; + do { + rc = sem_trywait(&m_sema); + } while (rc == -1 && errno == EINTR); + return rc == 0; + } + + bool timed_wait(std::uint64_t usecs) + { + struct timespec ts; + const int usecs_in_1_sec = 1000000; + const int nsecs_in_1_sec = 1000000000; + clock_gettime(CLOCK_REALTIME, &ts); + ts.tv_sec += (time_t)(usecs / usecs_in_1_sec); + ts.tv_nsec += (long)(usecs % usecs_in_1_sec) * 1000; + // sem_timedwait bombs if you have more than 1e9 in tv_nsec + // so we have to clean things up before passing it in + if (ts.tv_nsec >= nsecs_in_1_sec) { + ts.tv_nsec -= nsecs_in_1_sec; + ++ts.tv_sec; + } + + int rc; + do { + rc = sem_timedwait(&m_sema, &ts); + } while (rc == -1 && errno == EINTR); + return rc == 0; + } + + void signal() + { + while (sem_post(&m_sema) == -1); + } + + void signal(int count) + { + while (count-- > 0) + { + while (sem_post(&m_sema) == -1); + } + } +}; +#else +#error Unsupported platform! (No semaphore wrapper available) +#endif + +} // end namespace details + + +//--------------------------------------------------------- +// LightweightSemaphore +//--------------------------------------------------------- +class LightweightSemaphore +{ +public: + typedef std::make_signed::type ssize_t; + +private: + std::atomic m_count; + details::Semaphore m_sema; + int m_maxSpins; + + bool waitWithPartialSpinning(std::int64_t timeout_usecs = -1) + { + ssize_t oldCount; + int spin = m_maxSpins; + while (--spin >= 0) + { + oldCount = m_count.load(std::memory_order_relaxed); + if ((oldCount > 0) && m_count.compare_exchange_strong(oldCount, oldCount - 1, std::memory_order_acquire, std::memory_order_relaxed)) + return true; + std::atomic_signal_fence(std::memory_order_acquire); // Prevent the compiler from collapsing the loop. + } + oldCount = m_count.fetch_sub(1, std::memory_order_acquire); + if (oldCount > 0) + return true; + if (timeout_usecs < 0) + { + if (m_sema.wait()) + return true; + } + if (timeout_usecs > 0 && m_sema.timed_wait((std::uint64_t)timeout_usecs)) + return true; + // At this point, we've timed out waiting for the semaphore, but the + // count is still decremented indicating we may still be waiting on + // it. So we have to re-adjust the count, but only if the semaphore + // wasn't signaled enough times for us too since then. If it was, we + // need to release the semaphore too. + while (true) + { + oldCount = m_count.load(std::memory_order_acquire); + if (oldCount >= 0 && m_sema.try_wait()) + return true; + if (oldCount < 0 && m_count.compare_exchange_strong(oldCount, oldCount + 1, std::memory_order_relaxed, std::memory_order_relaxed)) + return false; + } + } + + ssize_t waitManyWithPartialSpinning(ssize_t max, std::int64_t timeout_usecs = -1) + { + assert(max > 0); + ssize_t oldCount; + int spin = m_maxSpins; + while (--spin >= 0) + { + oldCount = m_count.load(std::memory_order_relaxed); + if (oldCount > 0) + { + ssize_t newCount = oldCount > max ? oldCount - max : 0; + if (m_count.compare_exchange_strong(oldCount, newCount, std::memory_order_acquire, std::memory_order_relaxed)) + return oldCount - newCount; + } + std::atomic_signal_fence(std::memory_order_acquire); + } + oldCount = m_count.fetch_sub(1, std::memory_order_acquire); + if (oldCount <= 0) + { + if ((timeout_usecs == 0) || (timeout_usecs < 0 && !m_sema.wait()) || (timeout_usecs > 0 && !m_sema.timed_wait((std::uint64_t)timeout_usecs))) + { + while (true) + { + oldCount = m_count.load(std::memory_order_acquire); + if (oldCount >= 0 && m_sema.try_wait()) + break; + if (oldCount < 0 && m_count.compare_exchange_strong(oldCount, oldCount + 1, std::memory_order_relaxed, std::memory_order_relaxed)) + return 0; + } + } + } + if (max > 1) + return 1 + tryWaitMany(max - 1); + return 1; + } + +public: + LightweightSemaphore(ssize_t initialCount = 0, int maxSpins = 10000) : m_count(initialCount), m_maxSpins(maxSpins) + { + assert(initialCount >= 0); + assert(maxSpins >= 0); + } + + bool tryWait() + { + ssize_t oldCount = m_count.load(std::memory_order_relaxed); + while (oldCount > 0) + { + if (m_count.compare_exchange_weak(oldCount, oldCount - 1, std::memory_order_acquire, std::memory_order_relaxed)) + return true; + } + return false; + } + + bool wait() + { + return tryWait() || waitWithPartialSpinning(); + } + + bool wait(std::int64_t timeout_usecs) + { + return tryWait() || waitWithPartialSpinning(timeout_usecs); + } + + // Acquires between 0 and (greedily) max, inclusive + ssize_t tryWaitMany(ssize_t max) + { + assert(max >= 0); + ssize_t oldCount = m_count.load(std::memory_order_relaxed); + while (oldCount > 0) + { + ssize_t newCount = oldCount > max ? oldCount - max : 0; + if (m_count.compare_exchange_weak(oldCount, newCount, std::memory_order_acquire, std::memory_order_relaxed)) + return oldCount - newCount; + } + return 0; + } + + // Acquires at least one, and (greedily) at most max + ssize_t waitMany(ssize_t max, std::int64_t timeout_usecs) + { + assert(max >= 0); + ssize_t result = tryWaitMany(max); + if (result == 0 && max > 0) + result = waitManyWithPartialSpinning(max, timeout_usecs); + return result; + } + + ssize_t waitMany(ssize_t max) + { + ssize_t result = waitMany(max, -1); + assert(result > 0); + return result; + } + + void signal(ssize_t count = 1) + { + assert(count >= 0); + ssize_t oldCount = m_count.fetch_add(count, std::memory_order_release); + ssize_t toRelease = -oldCount < count ? -oldCount : count; + if (toRelease > 0) + { + m_sema.signal((int)toRelease); + } + } + + std::size_t availableApprox() const + { + ssize_t count = m_count.load(std::memory_order_relaxed); + return count > 0 ? static_cast(count) : 0; + } +}; + +} // end namespace moodycamel diff --git a/vector/mmap_raw_vector.cc b/vector/mmap_raw_vector.cc index 127f3f3..8579474 100644 --- a/vector/mmap_raw_vector.cc +++ b/vector/mmap_raw_vector.cc @@ -27,88 +27,84 @@ MmapRawVector::MmapRawVector(VectorMetaInfo *meta_info, const string &root_path, const StoreParams &store_params, const char *docids_bitmap) : RawVector(meta_info, root_path, docids_bitmap, store_params) { + allow_use_zpf = false; vector_byte_size_ = meta_info_->DataSize() * meta_info->Dimension(); - nsegment_ = 0; - segment_size_ = store_params.segment_size; + storage_mgr_ = nullptr; } -MmapRawVector::~MmapRawVector() { - for (int i = 0; i < file_mappers_.size(); i++) { - CHECK_DELETE(file_mappers_[i]); - } -} - -string MmapRawVector::NextSegmentFilePath() { - char buf[7]; - snprintf(buf, 7, "%06d", nsegment_); - string vec_dir = root_path_ + "/" + meta_info_->Name(); - string file_path = vec_dir + "/vector-" + buf; - return file_path; -} +MmapRawVector::~MmapRawVector() { CHECK_DELETE(storage_mgr_); } int MmapRawVector::InitStore() { std::string vec_dir = root_path_ + "/" + meta_info_->Name(); - if (utils::make_dir(vec_dir.c_str())) { - LOG(ERROR) << "mkdir error, path=" << vec_dir; - return IO_ERR; + uint32_t var = 0; + --var; + uint32_t max_seg_size = var / vector_byte_size_; + if (max_seg_size < store_params_.segment_size) { + store_params_.segment_size = max_seg_size; + LOG(INFO) << "Because the vector length is too long, segment_size becomes " + << max_seg_size; } - file_mappers_.resize(kMaxSegments, nullptr); - int ret = Extend(); - if (ret) return ret; - - LOG(INFO) << "Init success! vector byte size=" << vector_byte_size_ - << ", segment size=" << segment_size_; - return 0; -} - -int MmapRawVector::Extend() { - VectorFileMapper *file_mapper = new VectorFileMapper( - NextSegmentFilePath(), segment_size_, vector_byte_size_); - int ret = file_mapper->Init(); + StorageManagerOptions options; + options.segment_size = store_params_.segment_size; + options.fixed_value_bytes = vector_byte_size_; + storage_mgr_ = new StorageManager(vec_dir, BlockType::VectorBlockType, options); +#ifdef WITH_ZFP + if(!store_params_.compress.IsEmpty()) { + if (meta_info_->DataType() != VectorValueType::FLOAT) { + LOG(ERROR) << "data type is not float, compress is unsupported"; + return PARAM_ERR; + } + int res = storage_mgr_->UseCompress(CompressType::Zfp, meta_info_->Dimension()); + if(res == 0) { + LOG(INFO) << "Storage_manager use zfp compress vector"; + } else { + LOG(INFO) << "ZFP initialization failed, not use zfp"; + } + }else { + LOG(INFO) << "store_params_.compress.IsEmpty() is true, not use zfp"; + } +#endif + int ret = storage_mgr_->Init(store_params_.cache_size); if (ret) { - LOG(ERROR) << "extend file mapper error, ret=" << ret; + LOG(ERROR) << "init gamma db error, ret=" << ret; return ret; } - file_mappers_[nsegment_++] = file_mapper; - return 0; -} -int MmapRawVector::AddToStore(uint8_t *v, int len) { - if (file_mappers_[nsegment_ - 1]->IsFull() && Extend()) { - LOG(ERROR) << "extend error"; - return INTERNAL_ERR; - } - return file_mappers_[nsegment_ - 1]->Add(v, len); + LOG(INFO) << "init mmap raw vector success! vector byte size=" + << vector_byte_size_ << ", path=" << vec_dir; + return 0; } int MmapRawVector::GetVectorHeader(int start, int n, ScopeVectors &vecs, std::vector &lens) { - if (start + n > meta_info_->Size()) return PARAM_ERR; - while (n) { - int offset = start % segment_size_; - vecs.Add(file_mappers_[start / segment_size_]->GetVector(offset), false); - int len = segment_size_ - offset; - if (len > n) len = n; - lens.push_back(len); - start += len; - n -= len; - } - return 0; + int ret = storage_mgr_->GetHeaders(start, n, vecs.ptr_, lens); + vecs.deletable_.resize(vecs.ptr_.size(), true); + return ret; } +int MmapRawVector::AddToStore(uint8_t *v, int len) { return storage_mgr_->Add(v, len); } + int MmapRawVector::UpdateToStore(int vid, uint8_t *v, int len) { - if (vid >= (long)meta_info_->Size() || vid < 0 || len != vector_byte_size_) { - return PARAM_ERR; - } - return file_mappers_[vid / segment_size_]->Update(vid % segment_size_, v, len); -}; + return storage_mgr_->Update(vid, v, len); +} + +int MmapRawVector::AlterCacheSize(uint32_t cache_size) { + if(storage_mgr_ == nullptr) return -1; + storage_mgr_->AlterCacheSize(cache_size, 0); + return 0; +} + +int MmapRawVector::GetCacheSize(uint32_t &cache_size) { + if(storage_mgr_ == nullptr) return -1; + uint32_t str_cache_size = 0; + storage_mgr_->GetCacheSize(cache_size, str_cache_size); + return 0; +} int MmapRawVector::GetVector(long vid, const uint8_t *&vec, bool &deletable) const { - if (vid >= meta_info_->Size() || vid < 0) return -1; - vec = file_mappers_[vid / segment_size_]->GetVector(vid % segment_size_); - deletable = false; - return 0; + deletable = true; + return storage_mgr_->Get(vid, vec); } } // namespace tig_gamma diff --git a/vector/mmap_raw_vector.h b/vector/mmap_raw_vector.h index cce555a..dab6e29 100644 --- a/vector/mmap_raw_vector.h +++ b/vector/mmap_raw_vector.h @@ -12,8 +12,7 @@ #include #include "raw_vector.h" -#include "vector_buffer_queue.h" -#include "vector_file_mapper.h" +#include "storage_manager.h" namespace tig_gamma { @@ -30,6 +29,10 @@ class MmapRawVector : public RawVector { std::vector &lens) override; int UpdateToStore(int vid, uint8_t *v, int len) override; + int AlterCacheSize(uint32_t cache_size) override; + + int GetCacheSize(uint32_t &cache_size) override; + protected: int GetVector(long vid, const uint8_t *&vec, bool &deletable) const override; @@ -39,9 +42,7 @@ class MmapRawVector : public RawVector { private: friend MmapRawVectorIO; - std::vector file_mappers_; - int nsegment_; - int segment_size_; + StorageManager *storage_mgr_; }; } // namespace tig_gamma diff --git a/vector/raw_vector.cc b/vector/raw_vector.cc index 05f6adb..acf50f4 100644 --- a/vector/raw_vector.cc +++ b/vector/raw_vector.cc @@ -176,11 +176,18 @@ RawVector::RawVector(VectorMetaInfo *meta_info, const string &root_path, vio_ = nullptr; str_mem_ptr_ = nullptr; vid_mgr_ = nullptr; +#ifdef WITH_ZFP + zfp_compressor_ = nullptr; +#endif + allow_use_zpf = true; } RawVector::~RawVector() { CHECK_DELETE_ARRAY(str_mem_ptr_); CHECK_DELETE(vid_mgr_); +#ifdef WITH_ZFP + CHECK_DELETE(zfp_compressor_); +#endif } int RawVector::Init(bool has_source, bool multi_vids) { @@ -206,15 +213,16 @@ int RawVector::Init(bool has_source, bool multi_vids) { vector_byte_size_ = meta_info_->Dimension() * data_size_; #ifdef WITH_ZFP - if (!store_params_.compress.IsEmpty()) { + if (!store_params_.compress.IsEmpty() && allow_use_zpf) { if (meta_info_->DataType() != VectorValueType::FLOAT) { LOG(ERROR) << "data type is not float, compress is unsupported"; return PARAM_ERR; } + zfp_compressor_ = new ZFPCompressor; int ret = - zfp_compressor_.Init(meta_info_->Dimension(), store_params_.compress); + zfp_compressor_->Init(meta_info_->Dimension(), store_params_.compress); if (ret) return ret; - vector_byte_size_ = zfp_compressor_.ZfpSize(); + vector_byte_size_ = zfp_compressor_->ZfpSize(); } #endif @@ -325,9 +333,9 @@ int RawVector::Update(int docid, struct Field &field) { int RawVector::Compress(uint8_t *v, ScopeVector &svec) { #ifdef WITH_ZFP - if (!store_params_.compress.IsEmpty()) { + if (zfp_compressor_) { uint8_t *cmprs_v = nullptr; - if (zfp_compressor_.Compress((float *)v, cmprs_v)) { + if (zfp_compressor_->Compress((float *)v, cmprs_v)) { return INTERNAL_ERR; } svec.Set(cmprs_v, true); @@ -342,9 +350,9 @@ int RawVector::Compress(uint8_t *v, ScopeVector &svec) { int RawVector::Decompress(uint8_t *cmprs_v, int n, uint8_t *&vec, bool &deletable) const { #ifdef WITH_ZFP - if (!store_params_.compress.IsEmpty()) { + if (zfp_compressor_) { float *v = nullptr; - if (zfp_compressor_.Decompress(cmprs_v, n, v)) { + if (zfp_compressor_->Decompress(cmprs_v, n, v)) { return INTERNAL_ERR; } vec = (uint8_t *)v; @@ -400,7 +408,7 @@ int StoreParams::Parse(utils::JsonParser &jp) { int StoreParams::MergeRight(StoreParams &other) { cache_size = other.cache_size; segment_size = other.segment_size; - compress.MergeRight(other.compress); + // compress.MergeRight(other.compress); return 0; } diff --git a/vector/raw_vector.h b/vector/raw_vector.h index 716c89f..7c3458a 100644 --- a/vector/raw_vector.h +++ b/vector/raw_vector.h @@ -28,7 +28,6 @@ static const int kInitSize = 1000 * 1000; class RawVectorIO; struct StoreParams : DumpConfig { - std::string store_type; long cache_size; // bytes int segment_size; utils::JsonParser compress; @@ -40,7 +39,6 @@ struct StoreParams : DumpConfig { StoreParams(const StoreParams &other) { name = other.name; - store_type = other.store_type; cache_size = other.cache_size; segment_size = other.segment_size; compress = other.compress; @@ -53,7 +51,6 @@ struct StoreParams : DumpConfig { std::string ToJsonStr() { std::stringstream ss; ss << "{"; - ss << "\"store_type\":" << store_type << ","; ss << "\"cache_size\":" << cache_size << ","; ss << "\"segment_size\":" << segment_size << ","; ss << "\"compress\":" << compress.ToStr(); @@ -62,7 +59,6 @@ struct StoreParams : DumpConfig { } int ToJson(utils::JsonParser &jp) { - jp.PutString("store_type", store_type); jp.PutDouble("cache_size", cache_size); jp.PutInt("segment_size", segment_size); jp.PutObject("compress", compress); @@ -161,6 +157,10 @@ class RawVector : public VectorReader { virtual int UpdateToStore(int vid, uint8_t *v, int len) = 0; + virtual int GetCacheSize(uint32_t &cache_size) { return -1; }; + + virtual int AlterCacheSize(uint32_t cache_size) { return -1; } + RawVectorIO *GetIO() { return vio_; } void SetIO(RawVectorIO *vio) { vio_ = vio; } @@ -200,8 +200,9 @@ class RawVector : public VectorReader { bool has_source_; std::string desc_; // description of this raw vector StoreParams store_params_; + bool allow_use_zpf; #ifdef WITH_ZFP - ZFPCompressor zfp_compressor_; + ZFPCompressor *zfp_compressor_; #endif const char *docids_bitmap_; VIDMgr *vid_mgr_; diff --git a/vector/raw_vector_factory.h b/vector/raw_vector_factory.h index 538a74c..14265bd 100644 --- a/vector/raw_vector_factory.h +++ b/vector/raw_vector_factory.h @@ -42,19 +42,13 @@ class RawVectorFactory { case VectorStorageType::MemoryOnly: raw_vector = new MemoryRawVector(meta_info, root_path, store_params, docids_bitmap); - store_params.store_type = "rocksdb"; #ifdef WITH_ROCKSDB vio = new MemoryRawVectorIO((MemoryRawVector *)raw_vector); #endif break; case VectorStorageType::Mmap: - if (!store_params.compress.IsEmpty()) { - LOG(ERROR) << "mmap unsupport compress"; - return nullptr; - } raw_vector = new MmapRawVector(meta_info, root_path, store_params, docids_bitmap); - store_params.store_type = "file"; vio = new MmapRawVectorIO((MmapRawVector *)raw_vector); break; #ifdef WITH_ROCKSDB diff --git a/vector/rocksdb_raw_vector.cc b/vector/rocksdb_raw_vector.cc index 7061048..4b575f3 100644 --- a/vector/rocksdb_raw_vector.cc +++ b/vector/rocksdb_raw_vector.cc @@ -16,7 +16,6 @@ #include "rocksdb/table.h" #include "utils.h" -using namespace std; using namespace rocksdb; namespace tig_gamma { @@ -40,7 +39,6 @@ int RocksDBRawVector::InitStore() { block_cache_size_ = (size_t)store_params_.cache_size * 1024 * 1024; std::shared_ptr cache = NewLRUCache(block_cache_size_); - // BlockBasedTableOptions table_options_; table_options_.block_cache = cache; Options options; options.table_factory.reset(NewBlockBasedTableFactory(table_options_)); @@ -50,7 +48,7 @@ int RocksDBRawVector::InitStore() { // create the DB if it's not already present options.create_if_missing = true; - string db_path = this->root_path_ + "/" + meta_info_->Name(); + std::string db_path = this->root_path_ + "/" + meta_info_->Name(); if (!utils::isFolderExist(db_path.c_str())) { mkdir(db_path.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH); } @@ -72,7 +70,7 @@ int RocksDBRawVector::GetVector(long vid, const uint8_t *&vec, if ((size_t)vid >= meta_info_->Size() || vid < 0) { return 1; } - string key, value; + std::string key, value; ToRowKey((int)vid, key); Status s = db_->Get(ReadOptions(), Slice(key), &value); if (!s.ok()) { @@ -90,12 +88,10 @@ int RocksDBRawVector::GetVector(long vid, const uint8_t *&vec, int RocksDBRawVector::Gets(const std::vector &vids, ScopeVectors &vecs) const { - std::vector keys_data; size_t k = vids.size(); - keys_data.resize(k); + std::vector keys_data(k); std::vector keys; keys.reserve(k); - std::vector values; size_t j = 0; for (size_t i = 0; i < k; i++) { @@ -107,7 +103,8 @@ int RocksDBRawVector::Gets(const std::vector &vids, ++j; // LOG(INFO) << "i=" << i << "key=" << keys[i].ToString(); } - values.resize(j); + + std::vector values(j); std::vector statuses = db_->MultiGet(ReadOptions(), keys, &values); assert(statuses.size() == j); @@ -161,7 +158,7 @@ int RocksDBRawVector::UpdateToStore(int vid, uint8_t *v, int len) { return INTERNAL_ERR; } - string key; + std::string key; ToRowKey(vid, key); Status s = db_->Put(WriteOptions(), Slice(key), Slice((const char *)svec.Get(), this->vector_byte_size_)); @@ -179,7 +176,7 @@ int RocksDBRawVector::GetVectorHeader(int start, int n, ScopeVectors &vecs, } rocksdb::Iterator *it = db_->NewIterator(rocksdb::ReadOptions()); - string start_key, end_key; + std::string start_key, end_key; ToRowKey(start, start_key); ToRowKey(start + n, end_key); it->Seek(Slice(start_key)); @@ -195,7 +192,7 @@ int RocksDBRawVector::GetVectorHeader(int start, int n, ScopeVectors &vecs, } Slice value = it->value(); - string vstr = value.ToString(); + std::string vstr = value.ToString(); if (Decompress(vstr, dst)) { LOG(ERROR) << "rocksdb get, decompress error, vid=" << start + c; delete it; @@ -220,7 +217,7 @@ int RocksDBRawVector::GetVectorHeader(int start, int n, ScopeVectors &vecs, return 0; } -void RocksDBRawVector::ToRowKey(int vid, string &key) const { +void RocksDBRawVector::ToRowKey(int vid, std::string &key) const { char data[11]; snprintf(data, 11, "%010d", vid); key.assign(data, 10); @@ -228,8 +225,8 @@ void RocksDBRawVector::ToRowKey(int vid, string &key) const { int RocksDBRawVector::Decompress(std::string &cmprs_data, uint8_t *&vec) const { #ifdef WITH_ZFP - if (!store_params_.compress.IsEmpty()) { - if (zfp_compressor_.Decompress((const uint8_t *)cmprs_data.c_str(), 1, + if (zfp_compressor_) { + if (zfp_compressor_->Decompress((const uint8_t *)cmprs_data.c_str(), 1, (float *&)vec)) { return INTERNAL_ERR; } diff --git a/vector/vector_file_mapper.cc b/vector/vector_file_mapper.cc deleted file mode 100644 index 0dea7ec..0000000 --- a/vector/vector_file_mapper.cc +++ /dev/null @@ -1,101 +0,0 @@ -/** - * Copyright 2019 The Gamma Authors. - * - * This source code is licensed under the Apache License, Version 2.0 license - * found in the LICENSE file in the root directory of this source tree. - */ - -#include "vector_file_mapper.h" - -#include -#include -#include -#include -#include -#include -#include - -#include "error_code.h" -#include "log.h" -#include "utils.h" -#include "error_code.h" - -namespace tig_gamma { - -VectorFileMapper::VectorFileMapper(const std::string &file_path, - int max_vec_size, int vec_byte_size) - : file_path_(file_path), - max_vec_size_(max_vec_size), - vec_byte_size_(vec_byte_size) { - mapped_byte_size_ = (size_t)max_vec_size * vec_byte_size_; - vectors_ = nullptr; - curr_idx_ = 0; -} - -VectorFileMapper::~VectorFileMapper() { - if (vectors_ != nullptr) { - int ret = munmap(vectors_, mapped_byte_size_); - if (ret != 0) { - LOG(ERROR) << "munmap error: " << strerror(errno) << ", ret=" << ret; - } - vectors_ = nullptr; - } -} - -int VectorFileMapper::Init() { - int fd = open(file_path_.c_str(), O_RDWR | O_CREAT, 0666); - if (-1 == fd) { - LOG(ERROR) << "open vector file error, path=" << file_path_; - return IO_ERR; - } - - if (ftruncate(fd, mapped_byte_size_)) { - close(fd); - LOG(ERROR) << "truncate file error:" << strerror(errno); - return IO_ERR; - } - - vectors_ = (uint8_t *)mmap(NULL, mapped_byte_size_, PROT_READ | PROT_WRITE, - MAP_SHARED, fd, 0); - close(fd); - if (vectors_ == MAP_FAILED) { - LOG(ERROR) << "mmap error:" << strerror(errno); - return INTERNAL_ERR; - } - - int ret = madvise(vectors_, mapped_byte_size_, MADV_RANDOM); - if (ret != 0) { - LOG(ERROR) << "madvise error: " << strerror(errno) << ", ret=" << ret; - return INTERNAL_ERR; - } - LOG(INFO) << "map success! max byte size=" << mapped_byte_size_ - << ", file path=" << file_path_; - return 0; -} - -int VectorFileMapper::Add(uint8_t *vec, int len) { - memcpy(vectors_ + (size_t)curr_idx_ * vec_byte_size_, vec, len); - ++curr_idx_; - return 0; -} - -const uint8_t *VectorFileMapper::GetVector(int id) { - return vectors_ + ((long)id) * vec_byte_size_; -} - -const uint8_t *VectorFileMapper::GetVectors() { return vectors_; } - -int VectorFileMapper::Update(int vid, uint8_t *vec, int len) { - assert(vec_byte_size_ == len); - memcpy(vectors_ + (size_t)vid * vec_byte_size_, vec, vec_byte_size_); - return 0; -} -int VectorFileMapper::Sync() { - if (msync(vectors_, mapped_byte_size_, MS_SYNC)) { - LOG(ERROR) << "msync error: " << strerror(errno); - return IO_ERR; - } - return 0; -} - -} // namespace tig_gamma diff --git a/vector/vector_file_mapper.h b/vector/vector_file_mapper.h deleted file mode 100644 index 9c2d2cf..0000000 --- a/vector/vector_file_mapper.h +++ /dev/null @@ -1,49 +0,0 @@ -/** - * Copyright 2019 The Gamma Authors. - * - * This source code is licensed under the Apache License, Version 2.0 license - * found in the LICENSE file in the root directory of this source tree. - */ - -#pragma once - -#include - -#include - -namespace tig_gamma { - -class VectorFileMapper { - public: - VectorFileMapper(const std::string &file_path, int max_vec_size, - int vec_byte_size); - - ~VectorFileMapper(); - - int Init(); - - int Add(uint8_t *vec, int len); - - const uint8_t *GetVector(int id); - - const uint8_t *GetVectors(); - - bool IsFull() { return curr_idx_ == max_vec_size_; } - - int Sync(); - - void SetCurrIdx(int curr_idx) { curr_idx_ = curr_idx; } - - int Update(int vid, uint8_t *vec, int len); - - private: - uint8_t *vectors_; - std::string file_path_; - size_t mapped_byte_size_; - - int max_vec_size_; - int vec_byte_size_; - int curr_idx_; -}; - -} // namespace tig_gamma diff --git a/vector/vector_manager.cc b/vector/vector_manager.cc index 29ef21c..3a5d6e0 100644 --- a/vector/vector_manager.cc +++ b/vector/vector_manager.cc @@ -92,6 +92,8 @@ int VectorManager::CreateVectorTable(TableInfo &table, LOG(WARNING) << "NO support for store type " << store_type_str; return -1; } + } else { + store_type_str = "Mmap"; } std::string &store_param = vector_info.store_param; @@ -131,6 +133,8 @@ int VectorManager::CreateVectorTable(TableInfo &table, LOG(ERROR) << "create raw vector error"; return -1; } + LOG(INFO) << "create raw vector success, vec_name[" << vec_name + << "] store_type[" << store_type_str << "]"; bool has_source = vector_info.has_source; bool multi_vids = vec_dups[vec_name] > 1 ? true : false; int ret = vec->Init(has_source, multi_vids); @@ -216,7 +220,7 @@ int VectorManager::Update(int docid, std::vector &fields) { for (string &retrieval_type : retrieval_types_) { auto it = vector_indexes_.find(IndexName(name, retrieval_type)); if (it != vector_indexes_.end()) { - it->second->updated_vids_.enqueue(vid); + it->second->updated_vids_.push(vid); } } if (raw_vector->GetIO()) { @@ -322,7 +326,7 @@ int VectorManager::AddRTVecsToIndex() { } std::vector vids; int vid; - while (retrieval_model->updated_vids_.try_dequeue(vid)) { + while (retrieval_model->updated_vids_.try_pop(vid)) { if (bitmap::test(raw_vec->Bitmap(), raw_vec->VidMgr()->VID2DocID(vid))) continue; vids.push_back(vid); @@ -748,4 +752,35 @@ int VectorManager::MinIndexedNum() { } return min; } + +int VectorManager::AlterCacheSize(struct CacheInfo &cache_info) { + auto ite = raw_vectors_.find(cache_info.field_name); + if (ite != raw_vectors_.end()) { + RawVector *raw_vec = ite->second; + uint32_t cache_size = (uint32_t)cache_info.cache_size; + int res = raw_vec->AlterCacheSize(cache_size); + if (res == 0) { + LOG(INFO) << "vector field[" << cache_info.field_name + << "] AlterCacheSize success!"; + } else { + LOG(INFO) << "vector field[" << cache_info.field_name + << "] AlterCacheSize failure!"; + } + } else { + LOG(INFO) << "field_name[" << cache_info.field_name << "] error."; + } + return 0; +} + +int VectorManager::GetAllCacheSize(Config &conf) { + auto ite = raw_vectors_.begin(); + for (ite; ite != raw_vectors_.end(); ++ite) { + RawVector *raw_vec = ite->second; + uint32_t cache_size = 0; + if (0 != raw_vec->GetCacheSize(cache_size)) continue; + conf.AddCacheInfo(ite->first, (int)cache_size); + } + return 0; +} + } // namespace tig_gamma diff --git a/vector/vector_manager.h b/vector/vector_manager.h index 1463a8b..e228554 100644 --- a/vector/vector_manager.h +++ b/vector/vector_manager.h @@ -12,6 +12,7 @@ #include #include +#include "api_data/gamma_config.h" #include "gamma_common_data.h" #include "log.h" #include "raw_vector.h" @@ -68,6 +69,10 @@ class VectorManager { int MinIndexedNum(); + int AlterCacheSize(struct CacheInfo &cache_info); + + int GetAllCacheSize(Config &conf); + private: void Close(); // release all resource inline std::string IndexName(const std::string &field_name,