From 5fe6c0a1250187000d25c012c0068a4f2e8805c6 Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Wed, 4 Dec 2024 14:11:36 +0100 Subject: [PATCH] Verify that attribute vector is sortable before using it for sorting. --- .../documentmetastore/documentmetastore.cpp | 6 +++++ .../documentmetastore/documentmetastore.h | 1 + .../imported_attribute_vector_test.cpp | 2 ++ .../searchcommon/attribute/iattributevector.h | 2 ++ .../vespa/searchlib/attribute/attrvector.h | 2 ++ .../vespa/searchlib/attribute/attrvector.hpp | 14 +++++++++++ .../src/vespa/searchlib/attribute/floatbase.h | 1 + .../vespa/searchlib/attribute/floatbase.hpp | 7 ++++++ .../imported_attribute_vector_read_guard.cpp | 4 +++ .../imported_attribute_vector_read_guard.h | 1 + .../vespa/searchlib/attribute/integerbase.h | 1 + .../vespa/searchlib/attribute/integerbase.hpp | 7 ++++++ .../attribute/multinumericattribute.h | 1 + .../attribute/multinumericattribute.hpp | 7 ++++++ .../attribute/multinumericenumattribute.h | 1 + .../attribute/multinumericenumattribute.hpp | 7 ++++++ .../attribute/not_implemented_attribute.cpp | 6 +++++ .../attribute/not_implemented_attribute.h | 1 + .../searchlib/attribute/raw_attribute.cpp | 6 +++++ .../vespa/searchlib/attribute/raw_attribute.h | 1 + .../vespa/searchlib/attribute/stringbase.cpp | 6 +++++ .../vespa/searchlib/attribute/stringbase.h | 1 + .../vespa/searchlib/common/sortresults.cpp | 4 +++ .../src/vespa/searchvisitor/searchvisitor.cpp | 25 +++++++++++-------- 24 files changed, 104 insertions(+), 10 deletions(-) diff --git a/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.cpp b/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.cpp index 02ff25f36f1b..3fd8d639f3ef 100644 --- a/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.cpp +++ b/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.cpp @@ -1096,6 +1096,12 @@ DocumentMetaStore::foreach(const search::IGidToLidMapperVisitor &visitor) const { visitor.visit(getRawMetaData(key.get_lid()).getGid(), key.get_lid()); }); } +bool +DocumentMetaStore::is_sortable() const noexcept +{ + return true; +} + long DocumentMetaStore::onSerializeForAscendingSort(DocId lid, void * serTo, long available, const search::common::BlobConverter *) const { if ( ! validLid(lid)) return 0; diff --git a/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.h b/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.h index 410cd8d1550b..4374d3794ff2 100644 --- a/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.h +++ b/searchcore/src/vespa/searchcore/proton/documentmetastore/documentmetastore.h @@ -271,6 +271,7 @@ class DocumentMetaStore final : public DocumentMetaStoreAttribute, uint32_t getVersion() const override; void setTrackDocumentSizes(bool trackDocumentSizes) { _trackDocumentSizes = trackDocumentSizes; } void foreach(const search::IGidToLidMapperVisitor &visitor) const override; + bool is_sortable() const noexcept override; long onSerializeForAscendingSort(DocId, void *, long, const search::common::BlobConverter *) const override; long onSerializeForDescendingSort(DocId, void *, long, const search::common::BlobConverter *) const override; }; diff --git a/searchlib/src/tests/attribute/imported_attribute_vector/imported_attribute_vector_test.cpp b/searchlib/src/tests/attribute/imported_attribute_vector/imported_attribute_vector_test.cpp index 27850004dc1b..4c58b6936be7 100644 --- a/searchlib/src/tests/attribute/imported_attribute_vector/imported_attribute_vector_test.cpp +++ b/searchlib/src/tests/attribute/imported_attribute_vector/imported_attribute_vector_test.cpp @@ -530,6 +530,8 @@ struct MockAttributeVector : NotImplementedAttribute { _bc = bc; } + bool is_sortable() const noexcept override { return true; } + long onSerializeForAscendingSort( DocId doc_id, void* ser_to, long available, const common::BlobConverter* bc) const override { diff --git a/searchlib/src/vespa/searchcommon/attribute/iattributevector.h b/searchlib/src/vespa/searchcommon/attribute/iattributevector.h index 540948aabf8c..f5b4f517011f 100644 --- a/searchlib/src/vespa/searchcommon/attribute/iattributevector.h +++ b/searchlib/src/vespa/searchcommon/attribute/iattributevector.h @@ -434,6 +434,8 @@ class IAttributeVector */ virtual bool has_uncased_matching() const noexcept { return true; } + virtual bool is_sortable() const noexcept = 0; + /** * Will serialize the values for the documentid in ascending order. The serialized form can be used by memcmp and * sortorder will be preserved. diff --git a/searchlib/src/vespa/searchlib/attribute/attrvector.h b/searchlib/src/vespa/searchlib/attribute/attrvector.h index c2e769595ab4..4d35fd99f4a3 100644 --- a/searchlib/src/vespa/searchlib/attribute/attrvector.h +++ b/searchlib/src/vespa/searchlib/attribute/attrvector.h @@ -125,6 +125,7 @@ class NumericDirectAttrVector : public search::NumericDirectAttribute uint32_t get(DocId doc, WeightedEnum * v, uint32_t sz) const override { return getAllEnumHelper(doc, v, sz); } uint32_t get(DocId doc, WeightedInt * v, uint32_t sz) const override { return getAllHelper(doc, v, sz); } uint32_t get(DocId doc, WeightedFloat * v, uint32_t sz) const override { return getAllHelper(doc, v, sz); } + bool is_sortable() const noexcept override; template long on_serialize_for_sort(DocId doc, void* serTo, long available) const; long onSerializeForAscendingSort(DocId doc, void* serTo, long available, const search::common::BlobConverter* bc) const override; @@ -224,6 +225,7 @@ class StringDirectAttrVector : public search::StringDirectAttribute } return available; } + bool is_sortable() const noexcept override; long on_serialize_for_sort(DocId doc, void* serTo, long available, const search::common::BlobConverter* bc, bool asc) const; long onSerializeForAscendingSort(DocId doc, void* serTo, long available, const search::common::BlobConverter* bc) const override; long onSerializeForDescendingSort(DocId doc, void* serTo, long available, const search::common::BlobConverter* bc) const override; diff --git a/searchlib/src/vespa/searchlib/attribute/attrvector.hpp b/searchlib/src/vespa/searchlib/attribute/attrvector.hpp index be1ab8a3507a..099b1c4c1f8e 100644 --- a/searchlib/src/vespa/searchlib/attribute/attrvector.hpp +++ b/searchlib/src/vespa/searchlib/attribute/attrvector.hpp @@ -91,6 +91,13 @@ NumericDirectAttrVector(const std::string & baseFileName) } } +template +bool +NumericDirectAttrVector::is_sortable() const noexcept +{ + return true; +} + template template long @@ -160,6 +167,13 @@ StringDirectAttrVector::on_serialize_for_sort(DocId doc, void* serTo, long av return writer.write(); } +template +bool +StringDirectAttrVector::is_sortable() const noexcept +{ + return true; +} + template long StringDirectAttrVector::onSerializeForAscendingSort(DocId doc, void* serTo, long available, const search::common::BlobConverter* bc) const diff --git a/searchlib/src/vespa/searchlib/attribute/floatbase.h b/searchlib/src/vespa/searchlib/attribute/floatbase.h index 5214f45493ac..29d57e2579c3 100644 --- a/searchlib/src/vespa/searchlib/attribute/floatbase.h +++ b/searchlib/src/vespa/searchlib/attribute/floatbase.h @@ -71,6 +71,7 @@ class FloatingPointAttributeTemplate : public FloatingPointAttribute virtual void load_enum_store(LoadedVector&) {} virtual void fillValues(LoadedVector &) {} virtual void load_posting_lists(LoadedVector&) {} + bool is_sortable() const noexcept override; long onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const override; long onSerializeForDescendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const override; diff --git a/searchlib/src/vespa/searchlib/attribute/floatbase.hpp b/searchlib/src/vespa/searchlib/attribute/floatbase.hpp index cd67fe773efe..43e364215b5e 100644 --- a/searchlib/src/vespa/searchlib/attribute/floatbase.hpp +++ b/searchlib/src/vespa/searchlib/attribute/floatbase.hpp @@ -48,6 +48,13 @@ FloatingPointAttributeTemplate::findFoldedEnums(const char *value) const return result; } +template +bool +FloatingPointAttributeTemplate::is_sortable() const noexcept +{ + return true; +} + template long FloatingPointAttributeTemplate::onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter *) const { diff --git a/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector_read_guard.cpp b/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector_read_guard.cpp index 1f91867cb695..e239822f8b68 100644 --- a/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector_read_guard.cpp +++ b/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector_read_guard.cpp @@ -281,6 +281,10 @@ bool ImportedAttributeVectorReadGuard::isUndefined(DocId doc) const { return _target_attribute.isUndefined(getTargetLid(doc)); } +bool ImportedAttributeVectorReadGuard::is_sortable() const noexcept { + return _target_attribute.is_sortable(); +} + long ImportedAttributeVectorReadGuard::onSerializeForAscendingSort(DocId doc, void *serTo, long available, diff --git a/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector_read_guard.h b/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector_read_guard.h index a65dd31ec2b9..c9a9cd850fee 100644 --- a/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector_read_guard.h +++ b/searchlib/src/vespa/searchlib/attribute/imported_attribute_vector_read_guard.h @@ -110,6 +110,7 @@ class ImportedAttributeVectorReadGuard : public IAttributeVector, return target_lid < _target_docid_limit ? target_lid : 0u; } + bool is_sortable() const noexcept override; long onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const override; long onSerializeForDescendingSort(DocId doc, void * serTo, long available, diff --git a/searchlib/src/vespa/searchlib/attribute/integerbase.h b/searchlib/src/vespa/searchlib/attribute/integerbase.h index 3ab5fa6265be..f398bd6e95c2 100644 --- a/searchlib/src/vespa/searchlib/attribute/integerbase.h +++ b/searchlib/src/vespa/searchlib/attribute/integerbase.h @@ -68,6 +68,7 @@ class IntegerAttributeTemplate : public IntegerAttribute virtual void load_enum_store(LoadedVector&) {} virtual void fillValues(LoadedVector &) {} virtual void load_posting_lists(LoadedVector&) {} + bool is_sortable() const noexcept override; long onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const override; long onSerializeForDescendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const override; diff --git a/searchlib/src/vespa/searchlib/attribute/integerbase.hpp b/searchlib/src/vespa/searchlib/attribute/integerbase.hpp index 0085c2c03136..7d7f2fd2fb28 100644 --- a/searchlib/src/vespa/searchlib/attribute/integerbase.hpp +++ b/searchlib/src/vespa/searchlib/attribute/integerbase.hpp @@ -61,6 +61,13 @@ IntegerAttributeTemplate::findFoldedEnums(const char *value) const return result; } +template +bool +IntegerAttributeTemplate::is_sortable() const noexcept +{ + return true; +} + template long IntegerAttributeTemplate::onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter *) const { diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericattribute.h b/searchlib/src/vespa/searchlib/attribute/multinumericattribute.h index 0161d04c0098..2ab6f6f80732 100644 --- a/searchlib/src/vespa/searchlib/attribute/multinumericattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/multinumericattribute.h @@ -54,6 +54,7 @@ class MultiValueNumericAttribute : public MultiValueAttribute { return array.size(); } + bool is_sortable() const noexcept override; template long on_serialize_for_sort(DocId doc, void* serTo, long available) const; long onSerializeForAscendingSort(DocId doc, void* serTo, long available, const common::BlobConverter* bc) const override; diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multinumericattribute.hpp index 30d556cedc13..f643f8405e43 100644 --- a/searchlib/src/vespa/searchlib/attribute/multinumericattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multinumericattribute.hpp @@ -186,6 +186,13 @@ MultiValueNumericAttribute::onInitSave(std::string_view fileName) (std::move(guard), this->createAttributeHeader(fileName), this->_mvMapping); } +template +bool +MultiValueNumericAttribute::is_sortable() const noexcept +{ + return true; +} + template template long diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.h b/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.h index c9367cb2527b..ed2de35ce604 100644 --- a/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.h @@ -40,6 +40,7 @@ class MultiValueNumericEnumAttribute : public MultiValueEnumAttribute { using WeightedInt = typename B::BaseClass::WeightedInt; using largeint_t = typename B::BaseClass::largeint_t; + bool is_sortable() const noexcept override; template long on_serialize_for_sort(DocId doc, void* serTo, long available) const; long onSerializeForAscendingSort(DocId doc, void* serTo, long available, const common::BlobConverter* bc) const override; diff --git a/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.hpp index 773a1723f512..ea47ccb8288c 100644 --- a/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multinumericenumattribute.hpp @@ -148,6 +148,13 @@ MultiValueNumericEnumAttribute::getSearch(QueryTermSimple::UP qTerm, return std::make_unique>(std::move(qTerm), *this, this->_mvMapping.make_read_view(doc_id_limit), this->_enumStore); } +template +bool +MultiValueNumericEnumAttribute::is_sortable() const noexcept +{ + return true; +} + template template long diff --git a/searchlib/src/vespa/searchlib/attribute/not_implemented_attribute.cpp b/searchlib/src/vespa/searchlib/attribute/not_implemented_attribute.cpp index b4549c1c0d05..803ee203e356 100644 --- a/searchlib/src/vespa/searchlib/attribute/not_implemented_attribute.cpp +++ b/searchlib/src/vespa/searchlib/attribute/not_implemented_attribute.cpp @@ -113,6 +113,12 @@ NotImplementedAttribute::findFoldedEnums(const char *) const { notImplemented(); } +bool +NotImplementedAttribute::is_sortable() const noexcept +{ + return false; +} + long NotImplementedAttribute::onSerializeForAscendingSort(DocId, void *, long, const common::BlobConverter *) const { notImplemented(); diff --git a/searchlib/src/vespa/searchlib/attribute/not_implemented_attribute.h b/searchlib/src/vespa/searchlib/attribute/not_implemented_attribute.h index df7c436a964d..43631e6fb1f0 100644 --- a/searchlib/src/vespa/searchlib/attribute/not_implemented_attribute.h +++ b/searchlib/src/vespa/searchlib/attribute/not_implemented_attribute.h @@ -29,6 +29,7 @@ struct NotImplementedAttribute : AttributeVector { bool findEnum(const char *, EnumHandle &) const override; std::vector findFoldedEnums(const char *value) const override; + bool is_sortable() const noexcept override; long onSerializeForAscendingSort(DocId, void *, long, const common::BlobConverter *) const override; long onSerializeForDescendingSort(DocId, void *, long, const common::BlobConverter *) const override; uint32_t clearDoc(DocId) override; diff --git a/searchlib/src/vespa/searchlib/attribute/raw_attribute.cpp b/searchlib/src/vespa/searchlib/attribute/raw_attribute.cpp index 76a71675379d..ba070b3eefe4 100644 --- a/searchlib/src/vespa/searchlib/attribute/raw_attribute.cpp +++ b/searchlib/src/vespa/searchlib/attribute/raw_attribute.cpp @@ -62,6 +62,12 @@ long serialize_for_sort(std::span raw, void* serTo, long available) } +bool +RawAttribute::is_sortable() const noexcept +{ + return true; +} + long RawAttribute::onSerializeForAscendingSort(DocId doc, void* serTo, long available, const common::BlobConverter*) const { diff --git a/searchlib/src/vespa/searchlib/attribute/raw_attribute.h b/searchlib/src/vespa/searchlib/attribute/raw_attribute.h index 649cd3c879a6..4f759f0e8846 100644 --- a/searchlib/src/vespa/searchlib/attribute/raw_attribute.h +++ b/searchlib/src/vespa/searchlib/attribute/raw_attribute.h @@ -15,6 +15,7 @@ class RawAttribute : public NotImplementedAttribute RawAttribute(const std::string& name, const Config& config); ~RawAttribute() override; + bool is_sortable() const noexcept override; long onSerializeForAscendingSort(DocId doc, void* serTo, long available, const common::BlobConverter*) const override; long onSerializeForDescendingSort(DocId doc, void* serTo, long available, const common::BlobConverter*) const override; }; diff --git a/searchlib/src/vespa/searchlib/attribute/stringbase.cpp b/searchlib/src/vespa/searchlib/attribute/stringbase.cpp index 5d04c75d13de..79ceaf05e68f 100644 --- a/searchlib/src/vespa/searchlib/attribute/stringbase.cpp +++ b/searchlib/src/vespa/searchlib/attribute/stringbase.cpp @@ -103,6 +103,12 @@ StringAttribute::get(DocId doc, largeint_t * v, uint32_t sz) const return n; } +bool +StringAttribute::is_sortable() const noexcept +{ + return true; +} + long StringAttribute::onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const { diff --git a/searchlib/src/vespa/searchlib/attribute/stringbase.h b/searchlib/src/vespa/searchlib/attribute/stringbase.h index aaadaba30298..a62f1235c306 100644 --- a/searchlib/src/vespa/searchlib/attribute/stringbase.h +++ b/searchlib/src/vespa/searchlib/attribute/stringbase.h @@ -73,6 +73,7 @@ class StringAttribute : public AttributeVector bool get_match_is_cased() const noexcept; bool has_uncased_matching() const noexcept override; + bool is_sortable() const noexcept override; long onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const override; long onSerializeForDescendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const override; private: diff --git a/searchlib/src/vespa/searchlib/common/sortresults.cpp b/searchlib/src/vespa/searchlib/common/sortresults.cpp index a0b547da5ca3..d0c0b78678ad 100644 --- a/searchlib/src/vespa/searchlib/common/sortresults.cpp +++ b/searchlib/src/vespa/searchlib/common/sortresults.cpp @@ -182,6 +182,10 @@ FastS_SortSpec::Add(IAttributeContext & vecMan, const SortInfo & sInfo) Issue::report("sort spec: Attribute vector '%s' is not valid. Skipped in sorting", sInfo._field.c_str()); return false; } + if (!vector->is_sortable()) { + Issue::report("sort spec: Attribute vector '%s' is not sortable. Skipped in sorting", sInfo._field.c_str()); + return false; + } } LOG(spam, "SortSpec: adding vector (%s)'%s'", diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp index 291586931878..f71fcf02a8ce 100644 --- a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp @@ -1001,18 +1001,23 @@ SearchVisitor::setupAttributeVectorsForSorting(const search::common::SortSpec & if ( fid != StringFieldIdTMap::npos ) { AttributeGuard::UP attr(_attrMan.getAttribute(sInfo._field)); if (attr->valid()) { - size_t index(_attributeFields.size()); - for(size_t j(0); j < index; j++) { - if ((_attributeFields[j]._field == fid) && notContained(_sortList, j)) { - index = j; - _attributeFields[index]._ascending = sInfo._ascending; - _attributeFields[index]._converter = sInfo._converter.get(); + if (attr->get()->is_sortable()) { + size_t index(_attributeFields.size()); + for (size_t j(0); j < index; j++) { + if ((_attributeFields[j]._field == fid) && notContained(_sortList, j)) { + index = j; + _attributeFields[index]._ascending = sInfo._ascending; + _attributeFields[index]._converter = sInfo._converter.get(); + } } + if (index == _attributeFields.size()) { + _attributeFields.emplace_back(fid, std::move(attr), sInfo._ascending, + sInfo._converter.get()); + } + _sortList.push_back(index); + } else { + LOG(warning, "Attribute '%s' is not sortable", sInfo._field.c_str()); } - if (index == _attributeFields.size()) { - _attributeFields.emplace_back(fid, std::move(attr), sInfo._ascending, sInfo._converter.get()); - } - _sortList.push_back(index); } else { LOG(warning, "Attribute '%s' is not valid", sInfo._field.c_str()); }