Skip to content

Commit

Permalink
Check fields instead of indexes when filling matching elements.
Browse files Browse the repository at this point in the history
  • Loading branch information
toregge committed Aug 21, 2024
1 parent 34bb805 commit 9bc9228
Show file tree
Hide file tree
Showing 6 changed files with 172 additions and 34 deletions.
1 change: 1 addition & 0 deletions searchlib/src/vespa/searchlib/query/streaming/queryterm.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ class QueryTerm : public QueryTermUCS4, public QueryNode
FieldInfo & getFieldInfo(size_t fid) { return _fieldInfo[fid]; }
size_t getFieldInfoSize() const { return _fieldInfo.size(); }
QueryNodeResultBase & getQueryItem() { return *_result; }
const QueryNodeResultBase & getQueryItem() const noexcept { return *_result; }
const HitList & getHitList() const { return _hitList; }
void visitMembers(vespalib::ObjectVisitor &visitor) const override;
void setIndex(std::string index_) override { _index = std::move(index_); }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <vespa/searchlib/query/tree/stackdumpcreator.h>
#include <vespa/searchvisitor/hitcollector.h>
#include <vespa/searchvisitor/matching_elements_filler.h>
#include <vespa/searchvisitor/querytermdata.h>
#include <vespa/vdslib/container/searchresult.h>
#include <vespa/vespalib/gtest/gtest.h>
#include <vespa/vsm/searcher/fieldsearcher.h>
Expand Down Expand Up @@ -44,7 +45,12 @@ using search::fef::MatchData;
using search::query::StackDumpCreator;
using search::query::Weight;
using search::streaming::Query;
using search::streaming::QueryConnector;
using search::streaming::QueryNode;
using search::streaming::QueryNodeResultFactory;
using search::streaming::QueryTerm;
using streaming::QueryTermData;
using streaming::QueryTermDataFactory;
using streaming::HitCollector;
using streaming::MatchingElementsFiller;
using vdslib::SearchResult;
Expand Down Expand Up @@ -86,8 +92,8 @@ struct BoundTerm {

Query make_query(std::unique_ptr<search::query::Node> root) {
std::string stack_dump = StackDumpCreator::create(*root);
QueryNodeResultFactory empty;
Query query(empty, stack_dump);
QueryTermDataFactory factory(nullptr);
Query query(factory, stack_dump);
return query;
}

Expand Down Expand Up @@ -128,9 +134,12 @@ struct MyDocType {
const ArrayDataType _elem_array_type;
const MapDataType _elem_map_type;
const MapDataType _str_int_map_type;
const ArrayDataType _string_array_type;
const Field _elem_array_field;
const Field _elem_map_field;
const Field _str_int_map_field;
const Field _apples_field;
const Field _oranges_field;
DocumentType _document_type;

MyDocType();
Expand All @@ -139,6 +148,7 @@ struct MyDocType {
std::unique_ptr<ArrayFieldValue> make_elem_array(std::vector<std::pair<std::string, int>> values) const;
std::unique_ptr<MapFieldValue> make_elem_map(std::map<std::string, std::pair<std::string, int>> values) const;
std::unique_ptr<MapFieldValue> make_str_int_map(std::map<std::string, int> values) const;
std::unique_ptr<ArrayFieldValue> make_str_array(std::vector<std::string> values) const;
FieldPath make_field_path(std::string path) const;
std::unique_ptr<document::Document> make_test_doc() const;
};
Expand All @@ -150,14 +160,19 @@ MyDocType::MyDocType()
_elem_array_type(_elem_type),
_elem_map_type(*DataType::STRING, _elem_type),
_str_int_map_type(*DataType::STRING, *DataType::INT),
_string_array_type(*DataType::STRING),
_elem_array_field("elem_array", 3, _elem_array_type),
_elem_map_field("elem_map", 4, _elem_map_type),
_str_int_map_field("str_int_map", _str_int_map_type),
_apples_field("apples", _string_array_type),
_oranges_field("oranges", _string_array_type),
_document_type("test")
{
_document_type.addField(_elem_array_field);
_document_type.addField(_elem_map_field);
_document_type.addField(_str_int_map_field);
_document_type.addField(_apples_field);
_document_type.addField(_oranges_field);
}

MyDocType::~MyDocType() = default;
Expand Down Expand Up @@ -201,6 +216,16 @@ MyDocType::make_str_int_map(std::map<std::string, int> values) const
return ret;
}

std::unique_ptr<ArrayFieldValue>
MyDocType::make_str_array(std::vector<std::string> values) const
{
auto ret = std::make_unique<ArrayFieldValue>(_string_array_type);
for (auto& v : values) {
ret->add(StringFieldValue(v));
}
return ret;
}

FieldPath
MyDocType::make_field_path(std::string path) const
{
Expand All @@ -217,6 +242,8 @@ MyDocType::make_test_doc() const
// the elements in maps are ordered on the key
doc->setValue("elem_map", *make_elem_map({{"@foo", {"foo", 10}}, {"@bar", {"bar", 20}},{"@baz", {"baz", 30}},{"@foo@", {"foo", 40}},{"@zap", {"zap", 20}}, {"@zap@", {"zap", 20}}}));
doc->setValue("str_int_map", *make_str_int_map({{"@foo", 10}, {"@bar", 20}, {"@baz", 30}, {"@foo@", 40}, {"@zap", 20}, {"@zap@", 20}}));
doc->setValue("apples", make_str_array({"one", "two", "three"}));
doc->setValue("oranges", *make_str_array({"three", "two", "one"}));
return doc;
}

Expand All @@ -229,9 +256,29 @@ vsm::SharedFieldPathMap make_field_path_map(const MyDocType& doc_type) {
ret->emplace_back(doc_type.make_field_path("elem_map.value.weight"));
ret->emplace_back(doc_type.make_field_path("str_int_map.key"));
ret->emplace_back(doc_type.make_field_path("str_int_map.value"));
ret->emplace_back(doc_type.make_field_path("apples"));
ret->emplace_back(doc_type.make_field_path("oranges"));
return ret;
}

void
setup_index_env(streaming::IndexEnvironment& index_env)
{
using DataType = search::fef::FieldInfo::DataType;
// Populate mapping from field id to field name, cf. IndexEnvPrototype::detectFields
index_env.addField("elem_array.name", true, DataType::STRING);
index_env.addField("elem_array.weight", true, DataType::INT32);
index_env.addField("elem_map.key", true, DataType::STRING);
index_env.addField("elem_map.value.name", true, DataType::STRING);
index_env.addField("elem_map.value.weight", true, DataType::INT32);
index_env.addField("str_int_map.key", true, DataType::STRING);
index_env.addField("str_int_map.value", true, DataType::INT32);
index_env.addField("apples", true, DataType::STRING);
index_env.addField("oranges", true, DataType::STRING);
// Allocate field ids for virtual fields elem_array, elem_map, elem_map.value and str_int_map
index_env.add_virtual_fields();
}

vsm::FieldIdTSearcherMap make_field_searcher_map() {
vsm::FieldIdTSearcherMap ret;
ret.emplace_back(std::make_unique<UTF8StrChrFieldSearcher>(0));
Expand All @@ -241,6 +288,8 @@ vsm::FieldIdTSearcherMap make_field_searcher_map() {
ret.emplace_back(std::make_unique<IntFieldSearcher>(4));
ret.emplace_back(std::make_unique<UTF8StrChrFieldSearcher>(5));
ret.emplace_back(std::make_unique<IntFieldSearcher>(6));
ret.emplace_back(std::make_unique<UTF8StrChrFieldSearcher>(7));
ret.emplace_back(std::make_unique<UTF8StrChrFieldSearcher>(8));
return ret;
}

Expand All @@ -254,6 +303,9 @@ vsm::DocumentTypeIndexFieldMapT make_index_to_field_ids() {
index_map["elem_map.value.weight"] = FieldIdTList{4};
index_map["str_int_map.key"] = FieldIdTList{5};
index_map["str_int_map.value"] = FieldIdTList{6};
index_map["apples"] = FieldIdTList{7};
index_map["oranges"] = FieldIdTList{8};
index_map["fruit"] = FieldIdTList{7, 8};
return ret;
}

Expand All @@ -266,6 +318,8 @@ MatchingElementsFields make_matching_elements_fields() {
fields.add_mapping("elem_map", "elem_map.value.weight");
fields.add_mapping("str_int_map", "str_int_map.key");
fields.add_mapping("str_int_map", "str_int_map.value");
fields.add_field("apples");
fields.add_field("oranges");
return fields;
}

Expand All @@ -284,7 +338,10 @@ class MatchingElementsFillerTest : public ::testing::Test {
std::unique_ptr<MatchingElements> _matching_elements;
public:
MatchingElementsFillerTest();
~MatchingElementsFillerTest();
~MatchingElementsFillerTest() override;
template<typename T>
static T* as(QueryNode& query_node) { return dynamic_cast<T*>(&query_node); }
void populate_term_data(QueryNode& query_node);
void fill_matching_elements(Query &&query);
void assert_elements(uint32_t doc_lid, const std::string& field, const ElementVector& exp_elements);
void assert_same_element(const std::string& field, const std::string& term1, const std::string& term2, const ElementVector& exp_elements);
Expand All @@ -305,6 +362,7 @@ MatchingElementsFillerTest::MatchingElementsFillerTest()
_matching_elements()
{
_env.field_paths = make_field_path_map(_doc_type);
setup_index_env(_env.index_env);
_search_result.addHit(1, "id::test::1", 0.0, nullptr, 0);
auto sdoc = std::make_shared<StorageDocument>(_doc_type.make_test_doc(), _env.field_paths, _env.field_paths->size());
EXPECT_TRUE(sdoc->valid());
Expand All @@ -314,14 +372,35 @@ MatchingElementsFillerTest::MatchingElementsFillerTest()

MatchingElementsFillerTest::~MatchingElementsFillerTest() = default;

void
MatchingElementsFillerTest::populate_term_data(QueryNode& query_node)
{
auto& index_map = _index_to_field_ids["test"];
if (auto query_term = as<QueryTerm>(query_node)) {
auto& qtd = dynamic_cast<QueryTermData&>(query_term->getQueryItem());

auto itr = index_map.find(query_node.getIndex());
if (itr != index_map.end()) {
for (auto field_id : itr->second) {
qtd.getTermData().addField(field_id);
}
}
} else if (auto intermediate = as<QueryConnector>(query_node)) {
for (size_t i = 0; i < intermediate->size(); ++i) {
populate_term_data(*(*intermediate)[i]);
}
}
}

void
MatchingElementsFillerTest::fill_matching_elements(Query &&query)
{
_matching_elements_filler.reset();
_matching_elements.reset();
_query = std::move(query);
populate_term_data(_query.getRoot());
_env.prepare(_field_searcher_map, _index_to_field_ids, _query);
_matching_elements_filler = std::make_unique<MatchingElementsFiller>(_field_searcher_map, _query, _hit_collector, _search_result);
_matching_elements_filler = std::make_unique<MatchingElementsFiller>(_field_searcher_map, _env.index_env, _query, _hit_collector, _search_result);
_matching_elements = _matching_elements_filler->fill_matching_elements(_matching_elems_fields);
}

Expand Down Expand Up @@ -407,4 +486,14 @@ TEST_F(MatchingElementsFillerTest, union_of_matching_elements)
assert_elements(1, "elem_array", { 0, 1, 3, 4, 5 });
}

TEST_F(MatchingElementsFillerTest, separate_collection_for_each_field)
{
MyQueryBuilder builder;
// fieldset fruit { fields: apples, oranges }
builder.add_term("fruit:one", 0);
fill_matching_elements(make_query(builder.build()));
assert_elements(1, "apples", { 0 });
assert_elements(1, "oranges", { 2 });
}

GTEST_MAIN_RUN_ALL_TESTS()
Loading

0 comments on commit 9bc9228

Please sign in to comment.