Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

filtered streaming fix #604

Draft
wants to merge 9 commits into
base: jegao/LabelHotFix
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions include/abstract_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,8 @@ class AbstractIndex
float *distances);

// insert points with labels, labels should be present for filtered index
template <typename data_type, typename tag_type, typename label_type>
int insert_point(const data_type *point, const tag_type tag, const std::vector<label_type> &labels);
template <typename data_type, typename tag_type>
int insert_point(const data_type *point, const tag_type tag, const std::vector<std::string> &labels);

// insert point for unfiltered index build. do not use with filtered index
template <typename data_type, typename tag_type> int insert_point(const data_type *point, const tag_type tag);
Expand Down Expand Up @@ -116,7 +116,7 @@ class AbstractIndex
virtual std::pair<uint32_t, uint32_t> _search_with_filters(const DataType &query, const std::string &filter_label,
const size_t K, const uint32_t L, std::any &indices,
float *distances) = 0;
virtual int _insert_point(const DataType &data_point, const TagType tag, Labelvector &labels) = 0;
virtual int _insert_point(const DataType &data_point, const TagType tag, const std::vector<std::string> &labels) = 0;
virtual int _insert_point(const DataType &data_point, const TagType tag) = 0;
virtual int _lazy_delete(const TagType &tag) = 0;
virtual void _lazy_delete(TagVector &tags, TagVector &failed_tags) = 0;
Expand Down
23 changes: 16 additions & 7 deletions include/index.h
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ template <typename T, typename TagT = uint32_t, typename LabelT = uint32_t> clas
DISKANN_DLLEXPORT void load(AlignedFileReader &reader, uint32_t num_threads, uint32_t search_l);
#else
// Reads the number of frozen points from graph's metadata file section.
DISKANN_DLLEXPORT static size_t get_graph_num_frozen_points(const std::string &graph_file);
// DISKANN_DLLEXPORT static size_t get_graph_num_frozen_points(const std::string &graph_file);

DISKANN_DLLEXPORT void load(const char *index_file, uint32_t num_threads, uint32_t search_l);
#endif
Expand Down Expand Up @@ -291,7 +291,7 @@ template <typename T, typename TagT = uint32_t, typename LabelT = uint32_t> clas

// repositions frozen points to the end of _data - if they have been moved
// during deletion
DISKANN_DLLEXPORT void reposition_frozen_point_to_end();
// DISKANN_DLLEXPORT void reposition_frozen_point_to_end();
DISKANN_DLLEXPORT void reposition_points(uint32_t old_location_start, uint32_t new_location_start,
uint32_t num_locations);

Expand Down Expand Up @@ -328,7 +328,7 @@ template <typename T, typename TagT = uint32_t, typename LabelT = uint32_t> clas
float *distances) override;

virtual int _insert_point(const DataType &data_point, const TagType tag) override;
virtual int _insert_point(const DataType &data_point, const TagType tag, Labelvector &labels) override;
virtual int _insert_point(const DataType &data_point, const TagType tag, const std::vector<std::string> &labels) override;

virtual int _lazy_delete(const TagType &tag) override;

Expand Down Expand Up @@ -358,7 +358,7 @@ template <typename T, typename TagT = uint32_t, typename LabelT = uint32_t> clas

// generates 1 frozen point that will never be deleted from the graph
// This is not visible to the user
void generate_frozen_point();
// void generate_frozen_point();

// determines navigating node of the graph by calculating medoid of datafopt
uint32_t calculate_entry_point();
Expand All @@ -384,6 +384,13 @@ template <typename T, typename TagT = uint32_t, typename LabelT = uint32_t> clas
InMemQueryScratch<T> *scratch, bool use_filter = false,
uint32_t filteredLindex = 0);

void search_for_point_and_prune(int location, uint32_t Lindex, std::vector<uint32_t>& pruned_list,
const std::vector<LabelT>& labels,
InMemQueryScratch<T>* scratch,
uint32_t filteredLindex);

void prune_search_result(int location, std::vector<uint32_t>& pruned_list, InMemQueryScratch<T>* scratch);

void prune_neighbors(const uint32_t location, std::vector<Neighbor> &pool, std::vector<uint32_t> &pruned_list,
InMemQueryScratch<T> *scratch);

Expand Down Expand Up @@ -413,6 +420,8 @@ template <typename T, typename TagT = uint32_t, typename LabelT = uint32_t> clas
size_t release_location(int location);
size_t release_locations(const tsl::robin_set<uint32_t> &locations);

bool is_frozen_point(uint32_t location) const;

// Resize the index when no slots are left for insertion.
// Acquire exclusive _update_lock and _tag_lock before calling.
void resize(size_t new_max_points);
Expand All @@ -423,7 +432,7 @@ template <typename T, typename TagT = uint32_t, typename LabelT = uint32_t> clas
// graph, mode = _consolidated_order in case of lazy deletion and
// _compacted_order in case of eager deletion
DISKANN_DLLEXPORT void compact_data();
DISKANN_DLLEXPORT void compact_frozen_point();
// DISKANN_DLLEXPORT void compact_frozen_point();

// Remove deleted nodes from adjacency list of node loc
// Replace removed neighbors with second order neighbors.
Expand Down Expand Up @@ -476,8 +485,8 @@ template <typename T, typename TagT = uint32_t, typename LabelT = uint32_t> clas
// externally and won't be returned by search. At least 1 frozen point is
// needed for a dynamic index. The frozen points have consecutive locations.
// See also _start below.
size_t _num_frozen_pts = 0;
size_t _frozen_pts_used = 0;
// size_t _num_frozen_pts = 0;
// size_t _frozen_pts_used = 0;
size_t _node_size;
size_t _data_len;
size_t _neighbor_len;
Expand Down
2 changes: 1 addition & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ else()
linux_aligned_file_reader.cpp math_utils.cpp natural_number_map.cpp
in_mem_data_store.cpp in_mem_graph_store.cpp
natural_number_set.cpp memory_mapper.cpp partition.cpp pq.cpp
pq_flash_index.cpp scratch.cpp logger.cpp utils.cpp filter_utils.cpp index_factory.cpp abstract_index.cpp pq_l2_distance.cpp pq_data_store.cpp)
pq_flash_index.cpp scratch.cpp logger.cpp utils.cpp filter_utils.cpp index_factory.cpp abstract_index.cpp pq_l2_distance.cpp pq_data_store.cpp neighbor_list.cpp in_mem_static_graph_store.cpp)
if (RESTAPI)
list(APPEND CPP_SOURCES restapi/search_wrapper.cpp restapi/server.cpp)
endif()
Expand Down
111 changes: 38 additions & 73 deletions src/abstract_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,12 @@ int AbstractIndex::insert_point(const data_type *point, const tag_type tag)
return this->_insert_point(any_point, any_tag);
}

template <typename data_type, typename tag_type, typename label_type>
int AbstractIndex::insert_point(const data_type *point, const tag_type tag, const std::vector<label_type> &labels)
template <typename data_type, typename tag_type>
int AbstractIndex::insert_point(const data_type *point, const tag_type tag, const std::vector<std::string>& labels)
{
auto any_point = std::any(point);
auto any_tag = std::any(tag);
auto any_labels = Labelvector(labels);
return this->_insert_point(any_point, any_tag, any_labels);
return this->_insert_point(any_point, any_tag, labels);
}

template <typename tag_type> int AbstractIndex::lazy_delete(const tag_type &tag)
Expand Down Expand Up @@ -259,75 +258,41 @@ template DISKANN_DLLEXPORT int AbstractIndex::insert_point<float, tag_uint128>(c
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<uint8_t, tag_uint128>(const uint8_t* point, const tag_uint128 tag);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<int8_t, tag_uint128>(const int8_t* point, const tag_uint128 tag);

template DISKANN_DLLEXPORT int AbstractIndex::insert_point<float, int32_t, uint16_t>(
const float *point, const int32_t tag, const std::vector<uint16_t> &labels);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<uint8_t, int32_t, uint16_t>(
const uint8_t *point, const int32_t tag, const std::vector<uint16_t> &labels);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<int8_t, int32_t, uint16_t>(
const int8_t *point, const int32_t tag, const std::vector<uint16_t> &labels);

template DISKANN_DLLEXPORT int AbstractIndex::insert_point<float, uint32_t, uint16_t>(
const float *point, const uint32_t tag, const std::vector<uint16_t> &labels);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<uint8_t, uint32_t, uint16_t>(
const uint8_t *point, const uint32_t tag, const std::vector<uint16_t> &labels);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<int8_t, uint32_t, uint16_t>(
const int8_t *point, const uint32_t tag, const std::vector<uint16_t> &labels);

template DISKANN_DLLEXPORT int AbstractIndex::insert_point<float, int64_t, uint16_t>(
const float *point, const int64_t tag, const std::vector<uint16_t> &labels);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<uint8_t, int64_t, uint16_t>(
const uint8_t *point, const int64_t tag, const std::vector<uint16_t> &labels);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<int8_t, int64_t, uint16_t>(
const int8_t *point, const int64_t tag, const std::vector<uint16_t> &labels);

template DISKANN_DLLEXPORT int AbstractIndex::insert_point<float, uint64_t, uint16_t>(
const float *point, const uint64_t tag, const std::vector<uint16_t> &labels);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<uint8_t, uint64_t, uint16_t>(
const uint8_t *point, const uint64_t tag, const std::vector<uint16_t> &labels);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<int8_t, uint64_t, uint16_t>(
const int8_t *point, const uint64_t tag, const std::vector<uint16_t> &labels);

template DISKANN_DLLEXPORT int AbstractIndex::insert_point<float, tag_uint128, uint16_t>(
const float* point, const tag_uint128 tag, const std::vector<uint16_t>& labels);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<uint8_t, tag_uint128, uint16_t>(
const uint8_t* point, const tag_uint128 tag, const std::vector<uint16_t>& labels);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<int8_t, tag_uint128, uint16_t>(
const int8_t* point, const tag_uint128 tag, const std::vector<uint16_t>& labels);

template DISKANN_DLLEXPORT int AbstractIndex::insert_point<float, int32_t, uint32_t>(
const float *point, const int32_t tag, const std::vector<uint32_t> &labels);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<uint8_t, int32_t, uint32_t>(
const uint8_t *point, const int32_t tag, const std::vector<uint32_t> &labels);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<int8_t, int32_t, uint32_t>(
const int8_t *point, const int32_t tag, const std::vector<uint32_t> &labels);

template DISKANN_DLLEXPORT int AbstractIndex::insert_point<float, uint32_t, uint32_t>(
const float *point, const uint32_t tag, const std::vector<uint32_t> &labels);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<uint8_t, uint32_t, uint32_t>(
const uint8_t *point, const uint32_t tag, const std::vector<uint32_t> &labels);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<int8_t, uint32_t, uint32_t>(
const int8_t *point, const uint32_t tag, const std::vector<uint32_t> &labels);

template DISKANN_DLLEXPORT int AbstractIndex::insert_point<float, int64_t, uint32_t>(
const float *point, const int64_t tag, const std::vector<uint32_t> &labels);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<uint8_t, int64_t, uint32_t>(
const uint8_t *point, const int64_t tag, const std::vector<uint32_t> &labels);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<int8_t, int64_t, uint32_t>(
const int8_t *point, const int64_t tag, const std::vector<uint32_t> &labels);

template DISKANN_DLLEXPORT int AbstractIndex::insert_point<float, uint64_t, uint32_t>(
const float *point, const uint64_t tag, const std::vector<uint32_t> &labels);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<uint8_t, uint64_t, uint32_t>(
const uint8_t *point, const uint64_t tag, const std::vector<uint32_t> &labels);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<int8_t, uint64_t, uint32_t>(
const int8_t *point, const uint64_t tag, const std::vector<uint32_t> &labels);

template DISKANN_DLLEXPORT int AbstractIndex::insert_point<float, tag_uint128, uint32_t>(
const float* point, const tag_uint128 tag, const std::vector<uint32_t>& labels);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<uint8_t, tag_uint128, uint32_t>(
const uint8_t* point, const tag_uint128 tag, const std::vector<uint32_t>& labels);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<int8_t, tag_uint128, uint32_t>(
const int8_t* point, const tag_uint128 tag, const std::vector<uint32_t>& labels);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<float, int32_t>(
const float *point, const int32_t tag, const std::vector<std::string> &labels);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<uint8_t, int32_t>(
const uint8_t *point, const int32_t tag, const std::vector<std::string> &labels);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<int8_t, int32_t>(
const int8_t *point, const int32_t tag, const std::vector<std::string> &labels);

template DISKANN_DLLEXPORT int AbstractIndex::insert_point<float, uint32_t>(
const float *point, const uint32_t tag, const std::vector<std::string> &labels);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<uint8_t, uint32_t>(
const uint8_t *point, const uint32_t tag, const std::vector<std::string> &labels);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<int8_t, uint32_t>(
const int8_t *point, const uint32_t tag, const std::vector<std::string> &labels);

template DISKANN_DLLEXPORT int AbstractIndex::insert_point<float, int64_t>(
const float *point, const int64_t tag, const std::vector<std::string> &labels);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<uint8_t, int64_t>(
const uint8_t *point, const int64_t tag, const std::vector<std::string> &labels);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<int8_t, int64_t>(
const int8_t *point, const int64_t tag, const std::vector<std::string> &labels);

template DISKANN_DLLEXPORT int AbstractIndex::insert_point<float, uint64_t>(
const float *point, const uint64_t tag, const std::vector<std::string> &labels);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<uint8_t, uint64_t>(
const uint8_t *point, const uint64_t tag, const std::vector<std::string> &labels);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<int8_t, uint64_t>(
const int8_t *point, const uint64_t tag, const std::vector<std::string> &labels);

template DISKANN_DLLEXPORT int AbstractIndex::insert_point<float, tag_uint128>(
const float* point, const tag_uint128 tag, const std::vector<std::string>& labels);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<uint8_t, tag_uint128>(
const uint8_t* point, const tag_uint128 tag, const std::vector<std::string>& labels);
template DISKANN_DLLEXPORT int AbstractIndex::insert_point<int8_t, tag_uint128>(
const int8_t* point, const tag_uint128 tag, const std::vector<std::string>& labels);


template DISKANN_DLLEXPORT int AbstractIndex::lazy_delete<int32_t>(const int32_t &tag);
template DISKANN_DLLEXPORT int AbstractIndex::lazy_delete<uint32_t>(const uint32_t &tag);
Expand Down
2 changes: 1 addition & 1 deletion src/dll/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
add_library(${PROJECT_NAME} SHARED dllmain.cpp ../abstract_data_store.cpp ../partition.cpp ../pq.cpp ../pq_flash_index.cpp ../logger.cpp ../utils.cpp
../windows_aligned_file_reader.cpp ../distance.cpp ../pq_l2_distance.cpp ../memory_mapper.cpp ../index.cpp
../in_mem_data_store.cpp ../pq_data_store.cpp ../in_mem_graph_store.cpp ../math_utils.cpp ../disk_utils.cpp ../filter_utils.cpp
../ann_exception.cpp ../natural_number_set.cpp ../natural_number_map.cpp ../scratch.cpp ../index_factory.cpp ../abstract_index.cpp)
../ann_exception.cpp ../natural_number_set.cpp ../natural_number_map.cpp ../scratch.cpp ../index_factory.cpp ../abstract_index.cpp ../neighbor_list.cpp ../in_mem_static_graph_store.cpp)

set(TARGET_DIR "$<$<CONFIG:Debug>:${CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG}>$<$<CONFIG:Release>:${CMAKE_LIBRARY_OUTPUT_DIRECTORY_RELEASE}>")

Expand Down
Loading
Loading