Skip to content

Commit

Permalink
Merge pull request #32990 from vespa-engine/toregge/verify-that-attri…
Browse files Browse the repository at this point in the history
…bute-vector-is-sortable-before-using-it-for-sorting

Verify that attribute vector is sortable before using it for sorting.
  • Loading branch information
geirst authored Dec 4, 2024
2 parents 12081e3 + 5fe6c0a commit 900a614
Show file tree
Hide file tree
Showing 24 changed files with 104 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1096,6 +1096,12 @@ DocumentMetaStore::foreach(const search::IGidToLidMapperVisitor &visitor) const
{ visitor.visit(getRawMetaData(key.get_lid()).getGid(), key.get_lid()); });
}

bool
DocumentMetaStore::is_sortable() const noexcept
{
return true;
}

long
DocumentMetaStore::onSerializeForAscendingSort(DocId lid, void * serTo, long available, const search::common::BlobConverter *) const {
if ( ! validLid(lid)) return 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,7 @@ class DocumentMetaStore final : public DocumentMetaStoreAttribute,
uint32_t getVersion() const override;
void setTrackDocumentSizes(bool trackDocumentSizes) { _trackDocumentSizes = trackDocumentSizes; }
void foreach(const search::IGidToLidMapperVisitor &visitor) const override;
bool is_sortable() const noexcept override;
long onSerializeForAscendingSort(DocId, void *, long, const search::common::BlobConverter *) const override;
long onSerializeForDescendingSort(DocId, void *, long, const search::common::BlobConverter *) const override;
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -530,6 +530,8 @@ struct MockAttributeVector : NotImplementedAttribute {
_bc = bc;
}

bool is_sortable() const noexcept override { return true; }

long onSerializeForAscendingSort(
DocId doc_id, void* ser_to,
long available, const common::BlobConverter* bc) const override {
Expand Down
2 changes: 2 additions & 0 deletions searchlib/src/vespa/searchcommon/attribute/iattributevector.h
Original file line number Diff line number Diff line change
Expand Up @@ -434,6 +434,8 @@ class IAttributeVector
*/
virtual bool has_uncased_matching() const noexcept { return true; }

virtual bool is_sortable() const noexcept = 0;

/**
* Will serialize the values for the documentid in ascending order. The serialized form can be used by memcmp and
* sortorder will be preserved.
Expand Down
2 changes: 2 additions & 0 deletions searchlib/src/vespa/searchlib/attribute/attrvector.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ class NumericDirectAttrVector : public search::NumericDirectAttribute<B>
uint32_t get(DocId doc, WeightedEnum * v, uint32_t sz) const override { return getAllEnumHelper(doc, v, sz); }
uint32_t get(DocId doc, WeightedInt * v, uint32_t sz) const override { return getAllHelper<WeightedInt, largeint_t>(doc, v, sz); }
uint32_t get(DocId doc, WeightedFloat * v, uint32_t sz) const override { return getAllHelper<WeightedFloat, double>(doc, v, sz); }
bool is_sortable() const noexcept override;
template <bool asc>
long on_serialize_for_sort(DocId doc, void* serTo, long available) const;
long onSerializeForAscendingSort(DocId doc, void* serTo, long available, const search::common::BlobConverter* bc) const override;
Expand Down Expand Up @@ -224,6 +225,7 @@ class StringDirectAttrVector : public search::StringDirectAttribute
}
return available;
}
bool is_sortable() const noexcept override;
long on_serialize_for_sort(DocId doc, void* serTo, long available, const search::common::BlobConverter* bc, bool asc) const;
long onSerializeForAscendingSort(DocId doc, void* serTo, long available, const search::common::BlobConverter* bc) const override;
long onSerializeForDescendingSort(DocId doc, void* serTo, long available, const search::common::BlobConverter* bc) const override;
Expand Down
14 changes: 14 additions & 0 deletions searchlib/src/vespa/searchlib/attribute/attrvector.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,13 @@ NumericDirectAttrVector(const std::string & baseFileName)
}
}

template <typename F, typename B>
bool
NumericDirectAttrVector<F, B>::is_sortable() const noexcept
{
return true;
}

template <typename F, typename B>
template <bool asc>
long
Expand Down Expand Up @@ -160,6 +167,13 @@ StringDirectAttrVector<F>::on_serialize_for_sort(DocId doc, void* serTo, long av
return writer.write();
}

template <typename F>
bool
StringDirectAttrVector<F>::is_sortable() const noexcept
{
return true;
}

template <typename F>
long
StringDirectAttrVector<F>::onSerializeForAscendingSort(DocId doc, void* serTo, long available, const search::common::BlobConverter* bc) const
Expand Down
1 change: 1 addition & 0 deletions searchlib/src/vespa/searchlib/attribute/floatbase.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ class FloatingPointAttributeTemplate : public FloatingPointAttribute
virtual void load_enum_store(LoadedVector&) {}
virtual void fillValues(LoadedVector &) {}
virtual void load_posting_lists(LoadedVector&) {}
bool is_sortable() const noexcept override;
long onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const override;
long onSerializeForDescendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const override;

Expand Down
7 changes: 7 additions & 0 deletions searchlib/src/vespa/searchlib/attribute/floatbase.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,13 @@ FloatingPointAttributeTemplate<T>::findFoldedEnums(const char *value) const
return result;
}

template<typename T>
bool
FloatingPointAttributeTemplate<T>::is_sortable() const noexcept
{
return true;
}

template<typename T>
long
FloatingPointAttributeTemplate<T>::onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter *) const {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,10 @@ bool ImportedAttributeVectorReadGuard::isUndefined(DocId doc) const {
return _target_attribute.isUndefined(getTargetLid(doc));
}

bool ImportedAttributeVectorReadGuard::is_sortable() const noexcept {
return _target_attribute.is_sortable();
}

long ImportedAttributeVectorReadGuard::onSerializeForAscendingSort(DocId doc,
void *serTo,
long available,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ class ImportedAttributeVectorReadGuard : public IAttributeVector,
return target_lid < _target_docid_limit ? target_lid : 0u;
}

bool is_sortable() const noexcept override;
long onSerializeForAscendingSort(DocId doc, void * serTo, long available,
const common::BlobConverter * bc) const override;
long onSerializeForDescendingSort(DocId doc, void * serTo, long available,
Expand Down
1 change: 1 addition & 0 deletions searchlib/src/vespa/searchlib/attribute/integerbase.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ class IntegerAttributeTemplate : public IntegerAttribute
virtual void load_enum_store(LoadedVector&) {}
virtual void fillValues(LoadedVector &) {}
virtual void load_posting_lists(LoadedVector&) {}
bool is_sortable() const noexcept override;
long onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const override;
long onSerializeForDescendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const override;

Expand Down
7 changes: 7 additions & 0 deletions searchlib/src/vespa/searchlib/attribute/integerbase.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,13 @@ IntegerAttributeTemplate<T>::findFoldedEnums(const char *value) const
return result;
}

template<typename T>
bool
IntegerAttributeTemplate<T>::is_sortable() const noexcept
{
return true;
}

template<typename T>
long
IntegerAttributeTemplate<T>::onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter *) const {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ class MultiValueNumericAttribute : public MultiValueAttribute<B, M> {
return array.size();
}

bool is_sortable() const noexcept override;
template <bool asc>
long on_serialize_for_sort(DocId doc, void* serTo, long available) const;
long onSerializeForAscendingSort(DocId doc, void* serTo, long available, const common::BlobConverter* bc) const override;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,13 @@ MultiValueNumericAttribute<B, M>::onInitSave(std::string_view fileName)
(std::move(guard), this->createAttributeHeader(fileName), this->_mvMapping);
}

template <typename B, typename M>
bool
MultiValueNumericAttribute<B, M>::is_sortable() const noexcept
{
return true;
}

template <typename B, typename M>
template <bool asc>
long
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ class MultiValueNumericEnumAttribute : public MultiValueEnumAttribute<B, M> {
using WeightedInt = typename B::BaseClass::WeightedInt;
using largeint_t = typename B::BaseClass::largeint_t;

bool is_sortable() const noexcept override;
template <bool asc>
long on_serialize_for_sort(DocId doc, void* serTo, long available) const;
long onSerializeForAscendingSort(DocId doc, void* serTo, long available, const common::BlobConverter* bc) const override;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,13 @@ MultiValueNumericEnumAttribute<B, M>::getSearch(QueryTermSimple::UP qTerm,
return std::make_unique<attribute::MultiNumericEnumSearchContext<T, M>>(std::move(qTerm), *this, this->_mvMapping.make_read_view(doc_id_limit), this->_enumStore);
}

template <typename B, typename M>
bool
MultiValueNumericEnumAttribute<B, M>::is_sortable() const noexcept
{
return true;
}

template <typename B, typename M>
template <bool asc>
long
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,12 @@ NotImplementedAttribute::findFoldedEnums(const char *) const {
notImplemented();
}

bool
NotImplementedAttribute::is_sortable() const noexcept
{
return false;
}

long
NotImplementedAttribute::onSerializeForAscendingSort(DocId, void *, long, const common::BlobConverter *) const {
notImplemented();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ struct NotImplementedAttribute : AttributeVector {
bool findEnum(const char *, EnumHandle &) const override;
std::vector<EnumHandle> findFoldedEnums(const char *value) const override;

bool is_sortable() const noexcept override;
long onSerializeForAscendingSort(DocId, void *, long, const common::BlobConverter *) const override;
long onSerializeForDescendingSort(DocId, void *, long, const common::BlobConverter *) const override;
uint32_t clearDoc(DocId) override;
Expand Down
6 changes: 6 additions & 0 deletions searchlib/src/vespa/searchlib/attribute/raw_attribute.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,12 @@ long serialize_for_sort(std::span<const char> raw, void* serTo, long available)

}

bool
RawAttribute::is_sortable() const noexcept
{
return true;
}

long
RawAttribute::onSerializeForAscendingSort(DocId doc, void* serTo, long available, const common::BlobConverter*) const
{
Expand Down
1 change: 1 addition & 0 deletions searchlib/src/vespa/searchlib/attribute/raw_attribute.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ class RawAttribute : public NotImplementedAttribute
RawAttribute(const std::string& name, const Config& config);
~RawAttribute() override;

bool is_sortable() const noexcept override;
long onSerializeForAscendingSort(DocId doc, void* serTo, long available, const common::BlobConverter*) const override;
long onSerializeForDescendingSort(DocId doc, void* serTo, long available, const common::BlobConverter*) const override;
};
Expand Down
6 changes: 6 additions & 0 deletions searchlib/src/vespa/searchlib/attribute/stringbase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,12 @@ StringAttribute::get(DocId doc, largeint_t * v, uint32_t sz) const
return n;
}

bool
StringAttribute::is_sortable() const noexcept
{
return true;
}

long
StringAttribute::onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const
{
Expand Down
1 change: 1 addition & 0 deletions searchlib/src/vespa/searchlib/attribute/stringbase.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ class StringAttribute : public AttributeVector

bool get_match_is_cased() const noexcept;
bool has_uncased_matching() const noexcept override;
bool is_sortable() const noexcept override;
long onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const override;
long onSerializeForDescendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const override;
private:
Expand Down
4 changes: 4 additions & 0 deletions searchlib/src/vespa/searchlib/common/sortresults.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,10 @@ FastS_SortSpec::Add(IAttributeContext & vecMan, const SortInfo & sInfo)
Issue::report("sort spec: Attribute vector '%s' is not valid. Skipped in sorting", sInfo._field.c_str());
return false;
}
if (!vector->is_sortable()) {
Issue::report("sort spec: Attribute vector '%s' is not sortable. Skipped in sorting", sInfo._field.c_str());
return false;
}
}

LOG(spam, "SortSpec: adding vector (%s)'%s'",
Expand Down
25 changes: 15 additions & 10 deletions streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1001,18 +1001,23 @@ SearchVisitor::setupAttributeVectorsForSorting(const search::common::SortSpec &
if ( fid != StringFieldIdTMap::npos ) {
AttributeGuard::UP attr(_attrMan.getAttribute(sInfo._field));
if (attr->valid()) {
size_t index(_attributeFields.size());
for(size_t j(0); j < index; j++) {
if ((_attributeFields[j]._field == fid) && notContained(_sortList, j)) {
index = j;
_attributeFields[index]._ascending = sInfo._ascending;
_attributeFields[index]._converter = sInfo._converter.get();
if (attr->get()->is_sortable()) {
size_t index(_attributeFields.size());
for (size_t j(0); j < index; j++) {
if ((_attributeFields[j]._field == fid) && notContained(_sortList, j)) {
index = j;
_attributeFields[index]._ascending = sInfo._ascending;
_attributeFields[index]._converter = sInfo._converter.get();
}
}
if (index == _attributeFields.size()) {
_attributeFields.emplace_back(fid, std::move(attr), sInfo._ascending,
sInfo._converter.get());
}
_sortList.push_back(index);
} else {
LOG(warning, "Attribute '%s' is not sortable", sInfo._field.c_str());
}
if (index == _attributeFields.size()) {
_attributeFields.emplace_back(fid, std::move(attr), sInfo._ascending, sInfo._converter.get());
}
_sortList.push_back(index);
} else {
LOG(warning, "Attribute '%s' is not valid", sInfo._field.c_str());
}
Expand Down

0 comments on commit 900a614

Please sign in to comment.