Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Check fields instead of indexes when filling matching elements. #32206

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions searchlib/src/vespa/searchlib/query/streaming/queryterm.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ class QueryTerm : public QueryTermUCS4, public QueryNode
FieldInfo & getFieldInfo(size_t fid) { return _fieldInfo[fid]; }
size_t getFieldInfoSize() const { return _fieldInfo.size(); }
QueryNodeResultBase & getQueryItem() { return *_result; }
const QueryNodeResultBase & getQueryItem() const noexcept { return *_result; }
const HitList & getHitList() const { return _hitList; }
void visitMembers(vespalib::ObjectVisitor &visitor) const override;
void setIndex(std::string index_) override { _index = std::move(index_); }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <vespa/searchlib/query/tree/stackdumpcreator.h>
#include <vespa/searchvisitor/hitcollector.h>
#include <vespa/searchvisitor/matching_elements_filler.h>
#include <vespa/searchvisitor/querytermdata.h>
#include <vespa/vdslib/container/searchresult.h>
#include <vespa/vespalib/gtest/gtest.h>
#include <vespa/vsm/searcher/fieldsearcher.h>
Expand Down Expand Up @@ -44,7 +45,12 @@ using search::fef::MatchData;
using search::query::StackDumpCreator;
using search::query::Weight;
using search::streaming::Query;
using search::streaming::QueryConnector;
using search::streaming::QueryNode;
using search::streaming::QueryNodeResultFactory;
using search::streaming::QueryTerm;
using streaming::QueryTermData;
using streaming::QueryTermDataFactory;
using streaming::HitCollector;
using streaming::MatchingElementsFiller;
using vdslib::SearchResult;
Expand Down Expand Up @@ -86,8 +92,8 @@ struct BoundTerm {

Query make_query(std::unique_ptr<search::query::Node> root) {
std::string stack_dump = StackDumpCreator::create(*root);
QueryNodeResultFactory empty;
Query query(empty, stack_dump);
QueryTermDataFactory factory(nullptr);
Query query(factory, stack_dump);
return query;
}

Expand Down Expand Up @@ -128,9 +134,12 @@ struct MyDocType {
const ArrayDataType _elem_array_type;
const MapDataType _elem_map_type;
const MapDataType _str_int_map_type;
const ArrayDataType _string_array_type;
const Field _elem_array_field;
const Field _elem_map_field;
const Field _str_int_map_field;
const Field _apples_field;
const Field _oranges_field;
DocumentType _document_type;

MyDocType();
Expand All @@ -139,6 +148,7 @@ struct MyDocType {
std::unique_ptr<ArrayFieldValue> make_elem_array(std::vector<std::pair<std::string, int>> values) const;
std::unique_ptr<MapFieldValue> make_elem_map(std::map<std::string, std::pair<std::string, int>> values) const;
std::unique_ptr<MapFieldValue> make_str_int_map(std::map<std::string, int> values) const;
std::unique_ptr<ArrayFieldValue> make_str_array(std::vector<std::string> values) const;
FieldPath make_field_path(std::string path) const;
std::unique_ptr<document::Document> make_test_doc() const;
};
Expand All @@ -150,14 +160,19 @@ MyDocType::MyDocType()
_elem_array_type(_elem_type),
_elem_map_type(*DataType::STRING, _elem_type),
_str_int_map_type(*DataType::STRING, *DataType::INT),
_string_array_type(*DataType::STRING),
_elem_array_field("elem_array", 3, _elem_array_type),
_elem_map_field("elem_map", 4, _elem_map_type),
_str_int_map_field("str_int_map", _str_int_map_type),
_apples_field("apples", _string_array_type),
_oranges_field("oranges", _string_array_type),
_document_type("test")
{
_document_type.addField(_elem_array_field);
_document_type.addField(_elem_map_field);
_document_type.addField(_str_int_map_field);
_document_type.addField(_apples_field);
_document_type.addField(_oranges_field);
}

MyDocType::~MyDocType() = default;
Expand Down Expand Up @@ -201,6 +216,16 @@ MyDocType::make_str_int_map(std::map<std::string, int> values) const
return ret;
}

std::unique_ptr<ArrayFieldValue>
MyDocType::make_str_array(std::vector<std::string> values) const
{
auto ret = std::make_unique<ArrayFieldValue>(_string_array_type);
for (auto& v : values) {
ret->add(StringFieldValue(v));
}
return ret;
}

FieldPath
MyDocType::make_field_path(std::string path) const
{
Expand All @@ -217,6 +242,8 @@ MyDocType::make_test_doc() const
// the elements in maps are ordered on the key
doc->setValue("elem_map", *make_elem_map({{"@foo", {"foo", 10}}, {"@bar", {"bar", 20}},{"@baz", {"baz", 30}},{"@foo@", {"foo", 40}},{"@zap", {"zap", 20}}, {"@zap@", {"zap", 20}}}));
doc->setValue("str_int_map", *make_str_int_map({{"@foo", 10}, {"@bar", 20}, {"@baz", 30}, {"@foo@", 40}, {"@zap", 20}, {"@zap@", 20}}));
doc->setValue("apples", make_str_array({"one", "two", "three"}));
doc->setValue("oranges", *make_str_array({"three", "two", "one"}));
return doc;
}

Expand All @@ -229,9 +256,29 @@ vsm::SharedFieldPathMap make_field_path_map(const MyDocType& doc_type) {
ret->emplace_back(doc_type.make_field_path("elem_map.value.weight"));
ret->emplace_back(doc_type.make_field_path("str_int_map.key"));
ret->emplace_back(doc_type.make_field_path("str_int_map.value"));
ret->emplace_back(doc_type.make_field_path("apples"));
ret->emplace_back(doc_type.make_field_path("oranges"));
return ret;
}

void
setup_index_env(streaming::IndexEnvironment& index_env)
{
using DataType = search::fef::FieldInfo::DataType;
// Populate mapping from field id to field name, cf. IndexEnvPrototype::detectFields
index_env.addField("elem_array.name", true, DataType::STRING);
index_env.addField("elem_array.weight", true, DataType::INT32);
index_env.addField("elem_map.key", true, DataType::STRING);
index_env.addField("elem_map.value.name", true, DataType::STRING);
index_env.addField("elem_map.value.weight", true, DataType::INT32);
index_env.addField("str_int_map.key", true, DataType::STRING);
index_env.addField("str_int_map.value", true, DataType::INT32);
index_env.addField("apples", true, DataType::STRING);
index_env.addField("oranges", true, DataType::STRING);
// Allocate field ids for virtual fields elem_array, elem_map, elem_map.value and str_int_map
index_env.add_virtual_fields();
}

vsm::FieldIdTSearcherMap make_field_searcher_map() {
vsm::FieldIdTSearcherMap ret;
ret.emplace_back(std::make_unique<UTF8StrChrFieldSearcher>(0));
Expand All @@ -241,6 +288,8 @@ vsm::FieldIdTSearcherMap make_field_searcher_map() {
ret.emplace_back(std::make_unique<IntFieldSearcher>(4));
ret.emplace_back(std::make_unique<UTF8StrChrFieldSearcher>(5));
ret.emplace_back(std::make_unique<IntFieldSearcher>(6));
ret.emplace_back(std::make_unique<UTF8StrChrFieldSearcher>(7));
ret.emplace_back(std::make_unique<UTF8StrChrFieldSearcher>(8));
return ret;
}

Expand All @@ -254,6 +303,9 @@ vsm::DocumentTypeIndexFieldMapT make_index_to_field_ids() {
index_map["elem_map.value.weight"] = FieldIdTList{4};
index_map["str_int_map.key"] = FieldIdTList{5};
index_map["str_int_map.value"] = FieldIdTList{6};
index_map["apples"] = FieldIdTList{7};
index_map["oranges"] = FieldIdTList{8};
index_map["fruit"] = FieldIdTList{7, 8};
return ret;
}

Expand All @@ -266,6 +318,8 @@ MatchingElementsFields make_matching_elements_fields() {
fields.add_mapping("elem_map", "elem_map.value.weight");
fields.add_mapping("str_int_map", "str_int_map.key");
fields.add_mapping("str_int_map", "str_int_map.value");
fields.add_field("apples");
fields.add_field("oranges");
return fields;
}

Expand All @@ -284,7 +338,10 @@ class MatchingElementsFillerTest : public ::testing::Test {
std::unique_ptr<MatchingElements> _matching_elements;
public:
MatchingElementsFillerTest();
~MatchingElementsFillerTest();
~MatchingElementsFillerTest() override;
template<typename T>
static T* as(QueryNode& query_node) { return dynamic_cast<T*>(&query_node); }
void populate_term_data(QueryNode& query_node);
void fill_matching_elements(Query &&query);
void assert_elements(uint32_t doc_lid, const std::string& field, const ElementVector& exp_elements);
void assert_same_element(const std::string& field, const std::string& term1, const std::string& term2, const ElementVector& exp_elements);
Expand All @@ -305,6 +362,7 @@ MatchingElementsFillerTest::MatchingElementsFillerTest()
_matching_elements()
{
_env.field_paths = make_field_path_map(_doc_type);
setup_index_env(_env.index_env);
_search_result.addHit(1, "id::test::1", 0.0, nullptr, 0);
auto sdoc = std::make_shared<StorageDocument>(_doc_type.make_test_doc(), _env.field_paths, _env.field_paths->size());
EXPECT_TRUE(sdoc->valid());
Expand All @@ -314,14 +372,35 @@ MatchingElementsFillerTest::MatchingElementsFillerTest()

MatchingElementsFillerTest::~MatchingElementsFillerTest() = default;

void
MatchingElementsFillerTest::populate_term_data(QueryNode& query_node)
{
auto& index_map = _index_to_field_ids["test"];
if (auto query_term = as<QueryTerm>(query_node)) {
auto& qtd = dynamic_cast<QueryTermData&>(query_term->getQueryItem());

auto itr = index_map.find(query_node.getIndex());
if (itr != index_map.end()) {
for (auto field_id : itr->second) {
qtd.getTermData().addField(field_id);
}
}
} else if (auto intermediate = as<QueryConnector>(query_node)) {
for (size_t i = 0; i < intermediate->size(); ++i) {
populate_term_data(*(*intermediate)[i]);
}
}
}

void
MatchingElementsFillerTest::fill_matching_elements(Query &&query)
{
_matching_elements_filler.reset();
_matching_elements.reset();
_query = std::move(query);
populate_term_data(_query.getRoot());
_env.prepare(_field_searcher_map, _index_to_field_ids, _query);
_matching_elements_filler = std::make_unique<MatchingElementsFiller>(_field_searcher_map, _query, _hit_collector, _search_result);
_matching_elements_filler = std::make_unique<MatchingElementsFiller>(_field_searcher_map, _env.index_env, _query, _hit_collector, _search_result);
_matching_elements = _matching_elements_filler->fill_matching_elements(_matching_elems_fields);
}

Expand Down Expand Up @@ -407,4 +486,14 @@ TEST_F(MatchingElementsFillerTest, union_of_matching_elements)
assert_elements(1, "elem_array", { 0, 1, 3, 4, 5 });
}

TEST_F(MatchingElementsFillerTest, separate_collection_for_each_field)
{
MyQueryBuilder builder;
// fieldset fruit { fields: apples, oranges }
builder.add_term("fruit:one", 0);
fill_matching_elements(make_query(builder.build()));
assert_elements(1, "apples", { 0 });
assert_elements(1, "oranges", { 2 });
}

GTEST_MAIN_RUN_ALL_TESTS()
Loading