Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RNET-1131: optimize many OR'd terms on UUID/ObjectId queries #7582

Merged
merged 6 commits into from
Apr 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
* Add `SyncClientConfig::security_access_group` which allows specifying the access group to use for the sync metadata Realm's encryption key. Setting this is required when sharing the metadata Realm between apps on Apple platforms ([#7552](https://github.com/realm/realm-core/pull/7552)).
* When connecting to multiple server apps, a unique encryption key is used for each of the metadata Realms rather than sharing one between them ([#7552](https://github.com/realm/realm-core/pull/7552)).
* Introduce the new `SyncUser` interface which can be implemented by SDKs to use sync without the core App Services implementation (or just for greater control over user behavior in tests). ([PR #7300](https://github.com/realm/realm-core/pull/7300).
* Improve perfomance of "chained OR equality" queries for UUID/ObjectId types and RQL parsed "IN" queries on string/int/uuid/objectid types. ([.Net #3566](https://github.com/realm/realm-dotnet/issues/3566), since the introduction of these types)
* Introducing `Query::in()` which allows SDKs to take advantage of improved performance when building equality conditions against many constants. ([#7582](https://github.com/realm/realm-core/pull/7582))

### Fixed
* <How do the end-user experience this issue? what was the impact?> ([#????](https://github.com/realm/realm-core/issues/????), since v?.?.?)
Expand Down
13 changes: 13 additions & 0 deletions src/realm/parser/driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -537,6 +537,19 @@ Query EqualityNode::visit(ParserDriver* drv)
}
}

if (op == CompareType::IN || op == CompareType::EQUAL) {
if (auto mixed_list = dynamic_cast<ConstantMixedList*>(right.get());
mixed_list && mixed_list->size() &&
mixed_list->get_comparison_type().value_or(ExpressionComparisonType::Any) ==
ExpressionComparisonType::Any) {
if (auto lhs = dynamic_cast<ObjPropertyBase*>(left.get());
lhs && lhs->column_key() && !lhs->column_key().is_collection() && !lhs->links_exist() &&
lhs->column_key().get_type() != col_type_Mixed) {
return drv->m_base_table->where().in(lhs->column_key(), mixed_list->begin(), mixed_list->end());
}
}
}

if (left_type == type_Link && left_type == right_type && right->has_constant_evaluation()) {
if (auto link_column = dynamic_cast<const Columns<Link>*>(left.get())) {
if (link_column->link_map().get_nb_hops() == 1 &&
Expand Down
108 changes: 98 additions & 10 deletions src/realm/query.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,12 @@ struct MakeConditionNode {
return std::unique_ptr<ParentNode>{new Node(null{}, col_key)};
}

static std::unique_ptr<ParentNode> make(ColKey col_key, Mixed value)
{
return std::unique_ptr<ParentNode>{new Node(value.get<typename Node::TConditionValue>(), col_key)};
}

// overload for optional types
template <class T = typename Node::TConditionValue>
static typename std::enable_if<!std::is_same<typename util::RemoveOptional<T>::type, T>::value,
std::unique_ptr<ParentNode>>::type
Expand Down Expand Up @@ -326,6 +332,35 @@ struct MakeConditionNode<StringNode<Cond>> {
}
};

template <class Cond>
struct MakeConditionNode<TimestampNode<Cond>> {
static std::unique_ptr<ParentNode> make(ColKey col_key, Timestamp value)
{
return std::unique_ptr<ParentNode>{new TimestampNode<Cond>(std::move(value), col_key)};
}

// only enable certain template conditions of supported timestamp operations
template <typename... SubstitutionEnabler, typename U = Cond>
static std::enable_if_t<is_any_v<U, Equal, NotEqual, Greater, Less, GreaterEqual, LessEqual>,
std::unique_ptr<ParentNode>>
make(ColKey col_key, Mixed value)
{
static_assert(sizeof...(SubstitutionEnabler) == 0, "Do not specify template arguments");
return std::unique_ptr<ParentNode>{new TimestampNode<Cond>(value.get<Timestamp>(), col_key)};
}

static std::unique_ptr<ParentNode> make(ColKey col_key, null)
{
return std::unique_ptr<ParentNode>{new TimestampNode<Cond>(null{}, col_key)};
}

template <class T>
REALM_FORCEINLINE static std::unique_ptr<ParentNode> make(ColKey, T&&)
{
throw_type_mismatch_error();
}
};

template <class Cond>
struct MakeConditionNode<MixedNode<Cond>> {
template <class T>
Expand Down Expand Up @@ -851,6 +886,66 @@ Query& Query::like(ColKey column_key, Mixed value, bool case_sensitive)
add_condition<LikeIns>(column_key, value);
return *this;
}
Query& Query::in(ColKey column_key, const Mixed* begin, const Mixed* end)
{
REALM_ASSERT(!column_key.is_collection());
ColumnType col_type = column_key.get_type();
std::unique_ptr<ParentNode> node;
try {
if (begin == end) {
node = std::make_unique<ExpressionNode>(std::make_unique<FalseExpression>());
}
else if (col_type == col_type_UUID) {
node = std::make_unique<UUIDNode<Equal>>(column_key, begin, end);
}
else if (col_type == col_type_ObjectId) {
node = std::make_unique<ObjectIdNode<Equal>>(column_key, begin, end);
}
else if (col_type == col_type_String) {
node = std::make_unique<StringNode<Equal>>(column_key, begin, end);
}
else if (col_type == col_type_Int) {
if (column_key.is_nullable()) {
node = std::make_unique<IntegerNode<ArrayIntNull, Equal>>(column_key, begin, end);
}
else {
node = std::make_unique<IntegerNode<ArrayInteger, Equal>>(column_key, begin, end);
}
}
else {
// general path for nodes that don't have this optimization yet
Query cond = this->m_table->where();
if (col_type == col_type_Mixed) {
for (const Mixed* it = begin; it != end; ++it) {
cond.add_node(make_condition_node<Equal>(*m_table, column_key, *it));
cond.Or();
}
}
else {
for (const Mixed* it = begin; it != end; ++it) {
if (it->is_type(DataType(col_type))) {
cond.add_node(make_condition_node<Equal>(*m_table, column_key, *it));
cond.Or();
}
else if (it->is_null() && column_key.is_nullable()) {
cond.add_node(make_condition_node<Equal>(*m_table, column_key, realm::null()));
cond.Or();
}
}
}
this->and_query(cond);
return *this;
}
}
catch (const InvalidArgument&) {
// if none of the arguments matched the right type we'd end up with an
// empty condition node which won't evaluate correctly. The right behaviour
// is to match nothing, so make a false condition
node = std::make_unique<ExpressionNode>(std::make_unique<FalseExpression>());
}
add_node(std::move(node));
return *this;
}

// ------------- size
Query& Query::size_equal(ColKey column_key, int64_t value)
Expand Down Expand Up @@ -1017,9 +1112,7 @@ void Query::aggregate(QueryStateBase& st, ColKey column_key) const
auto pn = root_node();
auto best = find_best_node(pn);
auto node = pn->m_children[best];
if (node->has_search_index()) {
auto keys = node->index_based_keys();
REALM_ASSERT(keys);
if (auto keys = node->index_based_keys()) {
// The node having the search index can be removed from the query as we know that
// all the objects will match this condition
pn->m_children[best] = pn->m_children.back();
Expand Down Expand Up @@ -1344,10 +1437,7 @@ void Query::do_find_all(QueryStateBase& st) const
auto pn = root_node();
auto best = find_best_node(pn);
auto node = pn->m_children[best];
if (node->has_search_index()) {
auto keys = node->index_based_keys();
REALM_ASSERT(keys);

if (auto keys = node->index_based_keys()) {
// The node having the search index can be removed from the query as we know that
// all the objects will match this condition
pn->m_children[best] = pn->m_children.back();
Expand Down Expand Up @@ -1463,9 +1553,7 @@ size_t Query::do_count(size_t limit) const
auto pn = root_node();
auto best = find_best_node(pn);
auto node = pn->m_children[best];
if (node->has_search_index()) {
auto keys = node->index_based_keys();
REALM_ASSERT(keys);
if (auto keys = node->index_based_keys()) {
if (pn->m_children.size() > 1) {
// The node having the search index can be removed from the query as we know that
// all the objects will match this condition
Expand Down
1 change: 1 addition & 0 deletions src/realm/query.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ class Query final {
Query& ends_with(ColKey column_key, Mixed value, bool case_sensitive = true);
Query& contains(ColKey column_key, Mixed value, bool case_sensitive = true);
Query& like(ColKey column_key, Mixed value, bool case_sensitive = true);
Query& in(ColKey column_key, const Mixed* begin, const Mixed* end);

// Conditions: size
Query& size_equal(ColKey column_key, int64_t value);
Expand Down
22 changes: 22 additions & 0 deletions src/realm/query_engine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,28 @@ size_t IndexEvaluator::do_search_index(const Cluster* cluster, size_t start, siz
return not_found;
}

StringNode<Equal>::StringNode(ColKey col, const Mixed* begin, const Mixed* end)
: StringNodeEqualBase(StringData(), col)
{
// Don't use the search index if present since we're in a scenario where
// it'd be slower
m_index_evaluator.reset();

for (const Mixed* it = begin; it != end; ++it) {
if (it->is_null()) {
m_needles.emplace();
}
else if (const StringData* str = it->get_if<StringData>()) {
m_needle_storage.push_back(std::make_unique<char[]>(str->size()));
std::copy(str->data(), str->data() + str->size(), m_needle_storage.back().get());
m_needles.insert(StringData(m_needle_storage.back().get(), str->size()));
}
}
if (m_needles.empty()) {
throw InvalidArgument("No string arguments in query");
}
}

void StringNode<Equal>::_search_index_init()
{
REALM_ASSERT(bool(m_index_evaluator));
Expand Down
Loading
Loading