Skip to content

Commit

Permalink
Merge pull request #32473 from vespa-engine/toregge/extend-closest-fe…
Browse files Browse the repository at this point in the history
…ature-to-handle-mixed-tensors-with-multiple-mapped-dimensions

Extend closest feature to handle mixed tensors with multiple mapped d…
  • Loading branch information
geirst authored Sep 26, 2024
2 parents cb7ae6e + a4bb56f commit dda7caa
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 39 deletions.
81 changes: 54 additions & 27 deletions searchlib/src/tests/features/closest/closest_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,28 @@ const std::string field_and_label_feature_name("closest(bar,nns)");
const std::string field_feature_name("closest(bar)");

const std::string dense_tensor_type("tensor(x[2])");
const std::string mixed_tensor_type("tensor(a{},x[2])");
const std::vector<std::string> mixed_tensor_types{"error",
"tensor(a{},x[2])",
"tensor(a{},b{},x[2])"};
const std::string sparse_tensor_type("tensor(a{})");

TensorSpec no_subspace(sparse_tensor_type);
TensorSpec subspace_a = TensorSpec::from_expr("tensor(a{}):{{a:\"a\"}:1}");
TensorSpec subspace_b = TensorSpec::from_expr("tensor(a{}):{{a:\"b\"}:1}");

TensorSpec doc_tensor = TensorSpec::from_expr("tensor(a{},x[2]):{{a:\"a\",x:0}:3,{a:\"a\",x:1}:10,{a:\"b\",x:0}:5,{a:\"b\",x:1}:10}");
const std::vector<TensorSpec> no_subspaces{TensorSpec("error"),
TensorSpec(sparse_tensor_type),
TensorSpec("tensor(a{},b{})")};
const std::vector<TensorSpec> subspace_as{TensorSpec("error"),
TensorSpec::from_expr("tensor(a{}):{{a:\"a\"}:1}"),
TensorSpec::from_expr("tensor(a{},b{}):{{a:\"a\",b:\"K\"}:1}")};
const std::vector<TensorSpec> subspace_bs{TensorSpec("error"),
TensorSpec::from_expr("tensor(a{}):{{a:\"b\"}:1}"),
TensorSpec::from_expr("tensor(a{},b{}):{{a:\"b\",b:\"L\"}:1}")};

const std::vector<TensorSpec> doc_tensors{TensorSpec("error"),
TensorSpec::from_expr("tensor(a{},x[2]):"
"{{a:\"a\",x:0}:3,{a:\"a\",x:1}:10,"
"{a:\"b\",x:0}:5,{a:\"b\",x:1}:10}"),
TensorSpec::from_expr("tensor(a{},b{},x[2]):"
"{{a:\"a\",b:\"K\",x:0}:3,{a:\"a\",b:\"K\",x:1}:10,"
"{a:\"b\",b:\"L\",x:0}:5,{a:\"b\",b:\"L\",x:1}:10}")};

using RankFixture = DistanceClosenessFixture;

Expand All @@ -48,9 +62,11 @@ struct TestParam
{
std::string _name;
bool _direct_tensor;
TestParam(std::string name, bool direct_tensor)
uint32_t _mapped_dimensions;
TestParam(std::string name, bool direct_tensor, uint32_t mapped_dimensions)
: _name(std::move(name)),
_direct_tensor(direct_tensor)
_direct_tensor(direct_tensor),
_mapped_dimensions(mapped_dimensions)
{
}
~TestParam();
Expand Down Expand Up @@ -109,11 +125,12 @@ ClosestTest::~ClosestTest() = default;
void
ClosestTest::assert_closest(const Labels& labels, const std::string& feature_name, const std::string& query_tensor, const TensorSpec& exp_spec)
{
RankFixture f(mixed_tensor_type, direct_tensor(), 0, 1, labels, feature_name,
uint32_t mapped_dimensions = GetParam()._mapped_dimensions;
RankFixture f(mixed_tensor_types[mapped_dimensions], direct_tensor(), 0, 1, labels, feature_name,
dense_tensor_type + ":" + query_tensor);
ASSERT_FALSE(f.failed());
SCOPED_TRACE(query_tensor);
f.set_attribute_tensor(9, doc_tensor);
f.set_attribute_tensor(9, doc_tensors[mapped_dimensions]);
EXPECT_EQ(exp_spec, get_spec(f, 9));
}

Expand All @@ -126,83 +143,93 @@ ClosestTest::assert_closest(const Labels& labels, const std::string& feature_nam

INSTANTIATE_TEST_SUITE_P(ClosestMultiTest,
ClosestTest,
testing::Values(TestParam("Serialized", false),
TestParam("Direct", true)),
testing::Values(TestParam("Serialized_1_mapped_dim", false, 1),
TestParam("Direct_1_mapped_dim", true, 1),
TestParam("Serialized_2_mapped_dims", false, 2),
TestParam("Direct_2_mapped_dims", true, 2)),
testing::PrintToStringParamName());

TEST(ClosestTest, require_that_blueprint_can_be_created_from_factory)
TEST_F(ClosestTest, require_that_blueprint_can_be_created_from_factory)
{
BlueprintFactoryFixture f;
auto bp = f.factory.createBlueprint("closest");
EXPECT_TRUE(bp);
EXPECT_TRUE(dynamic_cast<ClosestBlueprint*>(bp.get()) != 0);
}

TEST(ClosestTest, require_that_no_features_are_dumped)
TEST_F(ClosestTest, require_that_no_features_are_dumped)
{
ClosestBlueprint f1;
IndexEnvironmentFixture f2;
FeatureDumpFixture f3;
f1.visitDumpFeatures(f2.indexEnv, f3);
}

TEST(ClosestTest, require_that_setup_fails_for_unknown_field)
TEST_P(ClosestTest, require_that_setup_fails_for_unknown_field)
{
assert_setup("random_field", false, mixed_tensor_type, std::nullopt);
uint32_t mapped_dimensions = this->GetParam()._mapped_dimensions;
assert_setup("random_field", false, mixed_tensor_types[mapped_dimensions], std::nullopt);
}

TEST(ClosestTest, require_that_setup_fails_if_field_type_is_not_attribute)
TEST_F(ClosestTest, require_that_setup_fails_if_field_type_is_not_attribute)
{
assert_setup("ibar", false, sparse_tensor_type, std::nullopt);
assert_setup("ibar", false, mixed_tensor_types[1], std::nullopt);
}

TEST(ClosestTest, require_that_setup_fails_if_field_data_type_is_not_tensor)
TEST_F(ClosestTest, require_that_setup_fails_if_field_data_type_is_not_tensor)
{
assert_setup("foo", false, sparse_tensor_type, std::nullopt);
assert_setup("foo", false, mixed_tensor_types[1], std::nullopt);
}

TEST(ClosestTest, require_that_setup_can_be_done_on_random_label)
TEST_P(ClosestTest, require_that_setup_can_be_done_on_random_label)
{
assert_setup("bar", true, mixed_tensor_type, "random_label");
uint32_t mapped_dimensions = this->GetParam()._mapped_dimensions;
assert_setup("bar", true, mixed_tensor_types[mapped_dimensions], "random_label");
}

TEST(ClosestTest, require_that_setup_fails_if_tensor_type_is_missing)
TEST_F(ClosestTest, require_that_setup_fails_if_tensor_type_is_missing)
{
assert_setup("bar", false, std::nullopt, std::nullopt);
}

TEST(ClosestTest, require_that_setup_fails_if_tensor_type_is_dense)
TEST_F(ClosestTest, require_that_setup_fails_if_tensor_type_is_dense)
{
assert_setup("bar", false, dense_tensor_type, std::nullopt);
}

TEST(ClosestTest, require_that_setup_fails_if_tensor_type_is_sparse)
TEST_F(ClosestTest, require_that_setup_fails_if_tensor_type_is_sparse)
{
assert_setup("bar", false, sparse_tensor_type, std::nullopt);
}

TEST_P(ClosestTest, require_that_no_label_gives_empty_result)
{
NoLabel f;
uint32_t mapped_dimensions = GetParam()._mapped_dimensions;
auto& no_subspace = no_subspaces[mapped_dimensions];
assert_closest(f, field_and_label_feature_name, {no_subspace, no_subspace});
}

TEST_P(ClosestTest, require_that_unrelated_label_gives_empty_result)
{
SingleLabel f("unrelated", 1);
uint32_t mapped_dimensions = GetParam()._mapped_dimensions;
auto& no_subspace = no_subspaces[mapped_dimensions];
assert_closest(f, field_and_label_feature_name, {no_subspace, no_subspace});
}

TEST_P(ClosestTest, closest_using_field_setup)
{
NoLabel f;
assert_closest(f, field_feature_name, {subspace_b, subspace_a});
uint32_t mapped_dimensions = GetParam()._mapped_dimensions;
assert_closest(f, field_feature_name, {subspace_bs[mapped_dimensions], subspace_as[mapped_dimensions]});
}

TEST_P(ClosestTest, closest_using_field_and_label_setup)
{
SingleLabel f("nns", 1);
assert_closest(f, field_and_label_feature_name, {subspace_b, subspace_a});
uint32_t mapped_dimensions = GetParam()._mapped_dimensions;
assert_closest(f, field_and_label_feature_name, {subspace_bs[mapped_dimensions], subspace_as[mapped_dimensions]});
}

GTEST_MAIN_RUN_ALL_TESTS()
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ class DistanceCalculatorTest : public testing::Test {
auto qt = make_tensor(query_tensor);
DistanceCalculator::make_with_validation(*attr, *qt);
}
void verify_mixed_tensors(const std::string& qt_1, const std::string& qt_2);
};

constexpr double max_distance = std::numeric_limits<double>::max();
Expand All @@ -80,13 +81,9 @@ TEST_F(DistanceCalculatorTest, calculation_over_dense_tensor_attribute)
EXPECT_EQ(OptSubspace(), calc_closest_subspace(2, qt));
}

TEST_F(DistanceCalculatorTest, calculation_over_mixed_tensor_attribute)
void
DistanceCalculatorTest::verify_mixed_tensors(const std::string& qt_1, const std::string& qt_2)
{
build_attribute("tensor(x{},y[2])",
{"{{x:\"a\",y:0}:3,{x:\"a\",y:1}:10,{x:\"b\",y:0}:5,{x:\"b\",y:1}:10}",
"{}", ""});
std::string qt_1 = "tensor(y[2]):[9,10]";
std::string qt_2 = "tensor(y[2]):[1,10]";
EXPECT_DOUBLE_EQ(16, calc_distance(1, qt_1));
EXPECT_DOUBLE_EQ(4, calc_distance(1, qt_2));
EXPECT_EQ(OptSubspace(1), calc_closest_subspace(1, qt_1));
Expand All @@ -102,14 +99,32 @@ TEST_F(DistanceCalculatorTest, calculation_over_mixed_tensor_attribute)
EXPECT_DOUBLE_EQ(0.0, calc_rawscore(3, qt_1));
}

TEST_F(DistanceCalculatorTest, calculation_over_mixed_tensor_attribute)
{
build_attribute("tensor(x{},y[2])",
{"{{x:\"a\",y:0}:3,{x:\"a\",y:1}:10,{x:\"b\",y:0}:5,{x:\"b\",y:1}:10}",
"{}", ""});
std::string qt_1 = "tensor(y[2]):[9,10]";
std::string qt_2 = "tensor(y[2]):[1,10]";
verify_mixed_tensors(qt_1, qt_2);
}

TEST_F(DistanceCalculatorTest, calculation_over_mixed_tensor_attribute_with_multiple_mapped_dimensions)
{
build_attribute("tensor(x{},y{},z[2])",
{"{{x:\"a\",y:\"K\",z:0}:3,{x:\"a\",y:\"K\",z:1}:10,"
"{x:\"b\",y:\"L\",z:0}:5,{x:\"b\",y:\"L\",z:1}:10}",
"{}", ""});
std::string qt_1 = "tensor(z[2]):[9,10]";
std::string qt_2 = "tensor(z[2]):[1,10]";
verify_mixed_tensors(qt_1, qt_2);
}

TEST_F(DistanceCalculatorTest, make_calculator_for_unsupported_types_throws)
{
build_attribute("tensor(x{},y{})", {});
EXPECT_THROW(make_calc_throws("tensor(y[2]):[9,10]"), vespalib::IllegalArgumentException);

build_attribute("tensor(x{},y{},z[2])", {});
EXPECT_THROW(make_calc_throws("tensor(z[2]):[9,10]"), vespalib::IllegalArgumentException);

build_attribute("tensor(x{},y[2])", {});
EXPECT_THROW(make_calc_throws("tensor(y{}):{{y:\"a\"}:9,{y:\"b\"}:10}"), vespalib::IllegalArgumentException);
EXPECT_THROW(make_calc_throws("tensor(y[3]):[9,10]"), vespalib::IllegalArgumentException);
Expand Down
4 changes: 2 additions & 2 deletions searchlib/src/vespa/searchlib/features/closest_feature.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ ClosestSerializedExecutor::execute(uint32_t docId)
ClosestDirectExecutor::ClosestDirectExecutor(DistanceCalculatorBundle&& bundle, Value& empty_output, TypedCells identity, const ITensorAttribute& attr)
: ClosestExecutor(std::move(bundle), empty_output, identity, attr),
_subspace_type(attr.getTensorType()),
_labels(1),
_labels(attr.getTensorType().count_mapped_dimensions()),
_label_ptrs(_labels.size())
{
for (size_t i = 0; i < _labels.size(); ++i) {
Expand Down Expand Up @@ -238,7 +238,7 @@ ClosestBlueprint::setup(const fef::IIndexEnvironment & env, const fef::Parameter
return false;
}
_field_tensor_type = ValueType::from_spec(attr_type_spec);
if (_field_tensor_type.is_error() || _field_tensor_type.is_double() || _field_tensor_type.count_mapped_dimensions() != 1 || _field_tensor_type.count_indexed_dimensions() != 1) {
if (_field_tensor_type.is_error() || _field_tensor_type.is_double() || _field_tensor_type.count_mapped_dimensions() < 1 || _field_tensor_type.count_indexed_dimensions() != 1) {
LOG(error, "%s: Field %s has invalid type: '%s'", getName().c_str(), _field_name.c_str(), attr_type_spec.c_str());
return false;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ supported_tensor_type(const vespalib::eval::ValueType& type)
if (type.is_dense() && type.dimensions().size() == 1) {
return true;
}
if (type.is_mixed() && type.dimensions().size() == 2) {
if (type.is_mixed() && type.count_indexed_dimensions() == 1 && type.count_mapped_dimensions() >= 1) {
return true;
}
return false;
Expand Down

0 comments on commit dda7caa

Please sign in to comment.