Skip to content

Commit

Permalink
Merge pull request #29776 from vespa-engine/revert-29775-balder/only-…
Browse files Browse the repository at this point in the history
…rewrite-numeric-terms-for-text-fields

Revert "Balder/only rewrite numeric terms for text fields"
  • Loading branch information
baldersheim authored Jan 2, 2024
2 parents 2304529 + 241b524 commit 71b2c35
Show file tree
Hide file tree
Showing 8 changed files with 32 additions and 69 deletions.
33 changes: 15 additions & 18 deletions searchlib/src/tests/query/streaming_query_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -287,10 +287,7 @@ TEST(StreamingQueryTest, test_query_language)
class AllowRewrite : public QueryNodeResultFactory
{
public:
explicit AllowRewrite(vespalib::stringref index) noexcept : _allowedIndex(index) {}
bool getRewriteFloatTerms(vespalib::stringref index) const noexcept override { return index == _allowedIndex; }
private:
vespalib::string _allowedIndex;
virtual bool getRewriteFloatTerms() const override { return true; }
};

const char TERM_UNIQ = static_cast<char>(ParseItem::ITEM_TERM) | static_cast<char>(ParseItem::IF_UNIQUEID);
Expand All @@ -300,12 +297,12 @@ TEST(StreamingQueryTest, e_is_not_rewritten_even_if_allowed)
const char term[6] = {TERM_UNIQ, 3, 1, 'c', 1, 'e'};
vespalib::stringref stackDump(term, sizeof(term));
EXPECT_EQ(6u, stackDump.size());
AllowRewrite allowRewrite("c");
AllowRewrite allowRewrite;
const Query q(allowRewrite, stackDump);
EXPECT_TRUE(q.valid());
const QueryNode & root = q.getRoot();
EXPECT_TRUE(dynamic_cast<const QueryTerm *>(&root) != nullptr);
const auto & qt = static_cast<const QueryTerm &>(root);
const QueryTerm & qt = static_cast<const QueryTerm &>(root);
EXPECT_EQ("c", qt.index());
EXPECT_EQ(vespalib::stringref("e"), qt.getTerm());
EXPECT_EQ(3u, qt.uniqueId());
Expand All @@ -316,12 +313,12 @@ TEST(StreamingQueryTest, onedot0e_is_not_rewritten_by_default)
const char term[9] = {TERM_UNIQ, 3, 1, 'c', 4, '1', '.', '0', 'e'};
vespalib::stringref stackDump(term, sizeof(term));
EXPECT_EQ(9u, stackDump.size());
AllowRewrite empty("nix");
QueryNodeResultFactory empty;
const Query q(empty, stackDump);
EXPECT_TRUE(q.valid());
const QueryNode & root = q.getRoot();
EXPECT_TRUE(dynamic_cast<const QueryTerm *>(&root) != nullptr);
const auto & qt = static_cast<const QueryTerm &>(root);
const QueryTerm & qt = static_cast<const QueryTerm &>(root);
EXPECT_EQ("c", qt.index());
EXPECT_EQ(vespalib::stringref("1.0e"), qt.getTerm());
EXPECT_EQ(3u, qt.uniqueId());
Expand All @@ -332,34 +329,34 @@ TEST(StreamingQueryTest, onedot0e_is_rewritten_if_allowed_too)
const char term[9] = {TERM_UNIQ, 3, 1, 'c', 4, '1', '.', '0', 'e'};
vespalib::stringref stackDump(term, sizeof(term));
EXPECT_EQ(9u, stackDump.size());
AllowRewrite empty("c");
AllowRewrite empty;
const Query q(empty, stackDump);
EXPECT_TRUE(q.valid());
const QueryNode & root = q.getRoot();
EXPECT_TRUE(dynamic_cast<const EquivQueryNode *>(&root) != nullptr);
const auto & equiv = static_cast<const EquivQueryNode &>(root);
const EquivQueryNode & equiv = static_cast<const EquivQueryNode &>(root);
EXPECT_EQ(2u, equiv.size());
EXPECT_TRUE(dynamic_cast<const QueryTerm *>(equiv[0].get()) != nullptr);
{
const auto & qt = static_cast<const QueryTerm &>(*equiv[0]);
const QueryTerm & qt = static_cast<const QueryTerm &>(*equiv[0]);
EXPECT_EQ("c", qt.index());
EXPECT_EQ(vespalib::stringref("1.0e"), qt.getTerm());
EXPECT_EQ(3u, qt.uniqueId());
}
EXPECT_TRUE(dynamic_cast<const PhraseQueryNode *>(equiv[1].get()) != nullptr);
{
const auto & phrase = static_cast<const PhraseQueryNode &>(*equiv[1]);
const PhraseQueryNode & phrase = static_cast<const PhraseQueryNode &>(*equiv[1]);
EXPECT_EQ(2u, phrase.size());
EXPECT_TRUE(dynamic_cast<const QueryTerm *>(phrase[0].get()) != nullptr);
{
const auto & qt = static_cast<const QueryTerm &>(*phrase[0]);
const QueryTerm & qt = static_cast<const QueryTerm &>(*phrase[0]);
EXPECT_EQ("c", qt.index());
EXPECT_EQ(vespalib::stringref("1"), qt.getTerm());
EXPECT_EQ(0u, qt.uniqueId());
}
EXPECT_TRUE(dynamic_cast<const QueryTerm *>(phrase[1].get()) != nullptr);
{
const auto & qt = static_cast<const QueryTerm &>(*phrase[1]);
const QueryTerm & qt = static_cast<const QueryTerm &>(*phrase[1]);
EXPECT_EQ("c", qt.index());
EXPECT_EQ(vespalib::stringref("0e"), qt.getTerm());
EXPECT_EQ(0u, qt.uniqueId());
Expand Down Expand Up @@ -463,7 +460,7 @@ TEST(StreamingQueryTest, test_phrase_evaluate)
terms[1]->add(1, 5, 0, 1);
terms[2]->add(0, 5, 0, 1);
HitList hits;
auto * p = static_cast<PhraseQueryNode *>(phrases[0]);
PhraseQueryNode * p = static_cast<PhraseQueryNode *>(phrases[0]);
p->evaluateHits(hits);
ASSERT_EQ(3u, hits.size());
EXPECT_EQ(hits[0].wordpos(), 2u);
Expand Down Expand Up @@ -753,7 +750,7 @@ TEST(StreamingQueryTest, require_that_incorrectly_specified_diversity_can_be_par

TEST(StreamingQueryTest, require_that_we_do_not_break_the_stack_on_bad_query)
{
QueryTermSimple term(R"(<form><iframe+&#09;&#10;&#11;+src=\"javascript&#58;alert(1)\"&#11;&#10;&#09;;>)", TermType::WORD);
QueryTermSimple term("<form><iframe+&#09;&#10;&#11;+src=\\\"javascript&#58;alert(1)\\\"&#11;&#10;&#09;;>", TermType::WORD);
EXPECT_FALSE(term.isValid());
}

Expand All @@ -762,7 +759,7 @@ TEST(StreamingQueryTest, a_unhandled_sameElement_stack)
const char * stack = "\022\002\026xyz_abcdefghij_xyzxyzxQ\001\vxxxxxx_name\034xxxxxx_xxxx_xxxxxxx_xxxxxxxxE\002\005delta\b<0.00393";
vespalib::stringref stackDump(stack);
EXPECT_EQ(85u, stackDump.size());
AllowRewrite empty("");
AllowRewrite empty;
const Query q(empty, stackDump);
EXPECT_TRUE(q.valid());
const QueryNode & root = q.getRoot();
Expand Down Expand Up @@ -796,7 +793,7 @@ TEST(StreamingQueryTest, test_same_element_evaluate)
vespalib::string stackDump = StackDumpCreator::create(*node);
QueryNodeResultFactory empty;
Query q(empty, stackDump);
auto * sameElem = dynamic_cast<SameElementQueryNode *>(&q.getRoot());
SameElementQueryNode * sameElem = dynamic_cast<SameElementQueryNode *>(&q.getRoot());
EXPECT_TRUE(sameElem != nullptr);
EXPECT_EQ("field", sameElem->getIndex());
EXPECT_EQ(3u, sameElem->size());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factor
qt->setFuzzyMaxEditDistance(queryRep.getFuzzyMaxEditDistance());
qt->setFuzzyPrefixLength(queryRep.getFuzzyPrefixLength());
}
if (possibleFloat(*qt, ssTerm) && factory.getRewriteFloatTerms(ssIndex) && allowRewrite) {
if (possibleFloat(*qt, ssTerm) && factory.getRewriteFloatTerms() && allowRewrite) {
auto phrase = std::make_unique<PhraseQueryNode>();
auto dotPos = ssTerm.find('.');
phrase->addChild(std::make_unique<QueryTerm>(factory.create(), ssTerm.substr(0, dotPos), ssIndex, TermType::WORD));
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#pragma once

#include <vespa/vespalib/stllike/string.h>
#include <memory>

namespace search::streaming {
Expand All @@ -21,7 +20,7 @@ class QueryNodeResultBase
class QueryNodeResultFactory {
public:
virtual ~QueryNodeResultFactory() = default;
virtual bool getRewriteFloatTerms(vespalib::stringref index) const noexcept { (void) index; return false; }
virtual bool getRewriteFloatTerms() const { return false; }
virtual std::unique_ptr<QueryNodeResultBase> create() const { return {}; }
};
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class RankProcessorTest : public testing::Test

RankProcessorTest::RankProcessorTest()
: testing::Test(),
_factory(nullptr),
_factory(),
_query(),
_query_wrapper()
{
Expand Down
15 changes: 2 additions & 13 deletions streamingvisitors/src/vespa/searchvisitor/querytermdata.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,26 +17,15 @@ class QueryTermData : public search::streaming::QueryNodeResultBase
search::fef::SimpleTermData _termData;
public:
QueryTermData * clone() const override { return new QueryTermData(); }
search::fef::SimpleTermData &getTermData() noexcept { return _termData; }
};

class SearchMethodInfo {
public:
virtual ~SearchMethodInfo() = default;
virtual bool is_text_matching(vespalib::stringref index) const noexcept = 0;
search::fef::SimpleTermData &getTermData() { return _termData; }
};

class QueryTermDataFactory final : public search::streaming::QueryNodeResultFactory {
public:
QueryTermDataFactory(const SearchMethodInfo * searchMethodInfo) noexcept : _searchMethodInfo(searchMethodInfo) {}
std::unique_ptr<search::streaming::QueryNodeResultBase> create() const override {
return std::make_unique<QueryTermData>();
}
bool getRewriteFloatTerms(vespalib::stringref index ) const noexcept override {
return _searchMethodInfo && _searchMethodInfo->is_text_matching(index);
}
private:
const SearchMethodInfo * _searchMethodInfo;
bool getRewriteFloatTerms() const override { return true; }
};


Expand Down
36 changes: 11 additions & 25 deletions streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -238,16 +238,14 @@ SearchVisitor::SummaryGenerator::fillSummary(AttributeVector::DocId lid, const H
return {};
}

void
SearchVisitor::HitsResultPreparator::execute(vespalib::Identifiable & obj)
void SearchVisitor::HitsResultPreparator::execute(vespalib::Identifiable & obj)
{
auto & hitsAggr(static_cast<HitsAggregationResult &>(obj));
hitsAggr.setSummaryGenerator(_summaryGenerator);
_numHitsAggregators++;
}

bool
SearchVisitor::HitsResultPreparator::check(const vespalib::Identifiable & obj) const
bool SearchVisitor::HitsResultPreparator::check(const vespalib::Identifiable & obj) const
{
return obj.getClass().inherits(HitsAggregationResult::classId);
}
Expand All @@ -261,8 +259,7 @@ SearchVisitor::GroupingEntry::GroupingEntry(Grouping * grouping) :

SearchVisitor::GroupingEntry::~GroupingEntry() = default;

void
SearchVisitor::GroupingEntry::aggregate(const document::Document & doc, search::HitRank rank)
void SearchVisitor::GroupingEntry::aggregate(const document::Document & doc, search::HitRank rank)
{
if (_count < _limit) {
_grouping->aggregate(doc, rank);
Expand Down Expand Up @@ -313,15 +310,7 @@ SearchVisitor::SearchVisitor(StorageComponent& component,
LOG(debug, "Created SearchVisitor");
}

bool
SearchVisitor::is_text_matching(vespalib::stringref index) const noexcept {
vsm::FieldIdT fId = _fieldSearchSpecMap.nameIdMap().fieldNo(index);
auto found = _fieldSearchSpecMap.specMap().find(fId);
return (found != _fieldSearchSpecMap.specMap().end()) && found->second.uses_string_search_method();
}

void
SearchVisitor::init(const Parameters & params)
void SearchVisitor::init(const Parameters & params)
{
VISITOR_TRACE(6, "About to lazily init VSM adapter");
_attrMan.add(_documentIdAttributeBacking);
Expand Down Expand Up @@ -408,12 +397,7 @@ SearchVisitor::init(const Parameters & params)
if ( params.lookup("query", queryBlob) ) {
LOG(spam, "Received query blob of %zu bytes", queryBlob.size());
VISITOR_TRACE(9, vespalib::make_string("Setting up for query blob of %zu bytes", queryBlob.size()));

// Create mapping from field name to field id, from field id to search spec,
// and from index name to list of field ids
_fieldSearchSpecMap.buildFromConfig(_env->get_vsm_fields_config());

QueryTermDataFactory addOnFactory(this);
QueryTermDataFactory addOnFactory;
_query = Query(addOnFactory, vespalib::stringref(queryBlob.data(), queryBlob.size()));
_searchBuffer->reserve(0x10000);

Expand All @@ -430,6 +414,7 @@ SearchVisitor::init(const Parameters & params)
StringFieldIdTMap fieldsInQuery;
setupFieldSearchers(additionalFields, fieldsInQuery);


setupScratchDocument(fieldsInQuery);

_syntheticFieldsController.setup(_fieldSearchSpecMap.nameIdMap(), fieldsInQuery);
Expand Down Expand Up @@ -769,6 +754,9 @@ void
SearchVisitor::setupFieldSearchers(const std::vector<vespalib::string> & additionalFields,
StringFieldIdTMap & fieldsInQuery)
{
// Create mapping from field name to field id, from field id to search spec,
// and from index name to list of field ids
_fieldSearchSpecMap.buildFromConfig(_env->get_vsm_fields_config());
// Add extra elements to mapping from field name to field id
_fieldSearchSpecMap.buildFromConfig(additionalFields);

Expand Down Expand Up @@ -1157,8 +1145,7 @@ SearchVisitor::fillSortBuffer()
return pos;
}

void
SearchVisitor::completedBucket(const document::BucketId&, HitCounter&)
void SearchVisitor::completedBucket(const document::BucketId&, HitCounter&)
{
LOG(debug, "Completed bucket");
}
Expand All @@ -1170,8 +1157,7 @@ SearchVisitor::generate_query_result(HitCounter& counter)
return std::move(_queryResult);
}

void
SearchVisitor::completedVisitingInternal(HitCounter& hitCounter)
void SearchVisitor::completedVisitingInternal(HitCounter& hitCounter)
{
if (!_init_called) {
init(_params);
Expand Down
5 changes: 1 addition & 4 deletions streamingvisitors/src/vespa/searchvisitor/searchvisitor.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
#include "rankmanager.h"
#include "rankprocessor.h"
#include "searchenvironment.h"
#include "querytermdata.h"
#include <vespa/vsm/common/docsum.h>
#include <vespa/vsm/common/documenttypemapping.h>
#include <vespa/vsm/common/storagedocument.h>
Expand Down Expand Up @@ -43,8 +42,7 @@ class SearchEnvironmentSnapshot;
* @brief Visitor that applies a search query to visitor data and
* converts them to a QueryResultCommand.
**/
class SearchVisitor : public storage::Visitor,
public SearchMethodInfo {
class SearchVisitor : public storage::Visitor {
public:
SearchVisitor(storage::StorageComponent&, storage::VisitorEnvironment& vEnv,
const vdslib::Parameters & params);
Expand Down Expand Up @@ -490,7 +488,6 @@ class SearchVisitor : public storage::Visitor,
vsm::StringFieldIdTMapT _fieldsUnion;

void setupAttributeVector(const vsm::FieldPath &fieldPath);
bool is_text_matching(vespalib::stringref index) const noexcept override;
};

class SearchVisitorFactory : public storage::VisitorFactory {
Expand Down
5 changes: 0 additions & 5 deletions streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,6 @@ class FieldSearchSpec
bool valid() const { return static_cast<bool>(_searcher); }
size_t maxLength() const { return _maxLength; }
bool uses_nearest_neighbor_search_method() const noexcept { return _searchMethod == VsmfieldsConfig::Fieldspec::Searchmethod::NEAREST_NEIGHBOR; }
bool uses_string_search_method() const noexcept {
return (_searchMethod == VsmfieldsConfig::Fieldspec::Searchmethod::UTF8) ||
(_searchMethod == VsmfieldsConfig::Fieldspec::Searchmethod::AUTOUTF8) ||
(_searchMethod == VsmfieldsConfig::Fieldspec::Searchmethod::SSE2UTF8);
}
const vespalib::string& get_arg1() const noexcept { return _arg1; }

/**
Expand Down

0 comments on commit 71b2c35

Please sign in to comment.