Skip to content

Commit

Permalink
fix: handle repeated sort keys and sort as int
Browse files Browse the repository at this point in the history
  • Loading branch information
oh2024 committed Jun 5, 2024
1 parent ff0062f commit b9cb9ab
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 16 deletions.
21 changes: 21 additions & 0 deletions hybridse/src/codegen/udf_ir_builder_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1199,6 +1199,27 @@ TEST_F(UdfIRBuilderTest, CustUdfs) {
false);
}

TEST_F(UdfIRBuilderTest, JsonArraySortAsInt) {
openmldb::base::StringRef json = R"([{"a": "6", "b": "2"}, {"a": "11", "b": "9"}])";
CheckUdf<StringRef, StringRef, StringRef, StringRef, int32_t, bool>("json_array_sort", "9,2", json, "a", "b", 10,
true);
CheckUdf<StringRef, StringRef, StringRef, StringRef, int32_t, bool>("json_array_sort", "2,9", json, "a", "b", 10,
false);
}

TEST_F(UdfIRBuilderTest, JsonArraySortRepeatedKey) {
openmldb::base::StringRef json = R"([{"a": "6", "b": "a"}, {"a": "11", "b": "aaa"}, {"a": "6", "b": "aa"}])";
CheckUdf<StringRef, StringRef, StringRef, StringRef, int32_t, bool>("json_array_sort", "aaa,aa,a", json, "a", "b",
10, true);
CheckUdf<StringRef, StringRef, StringRef, StringRef, int32_t, bool>("json_array_sort", "a,aa,aaa", json, "a", "b",
10, false);
}

TEST_F(UdfIRBuilderTest, JsonArraySortInvalidKey) {
openmldb::base::StringRef json = R"([{"a": "a", "b": "2"}, {"a": "11", "b": "9"}])";
CheckUdf<StringRef, StringRef, StringRef, StringRef, int32_t, bool>("json_array_sort", "", json, "a", "b", 10,
true);
}
}
} // namespace codegen

Expand Down
36 changes: 20 additions & 16 deletions hybridse/src/udf/default_defs/feature_zero_def.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
*/

#include <algorithm>
#include <charconv>
#include <iostream>
#include <queue>
#include <string>
#include <tuple>
Expand All @@ -25,15 +27,14 @@
#include "boost/algorithm/string.hpp"
#include "boost/algorithm/string/join.hpp"
#include "boost/algorithm/string/regex.hpp"

#include "codec/list_iterator_codec.h"
#include "codec/type_codec.h"
#include "simdjson.h"
#include "udf/containers.h"
#include "udf/default_udf_library.h"
#include "udf/udf.h"
#include "udf/udf_registry.h"
#include "vm/jit_runtime.h"
#include "simdjson.h"

using openmldb::base::Date;
using hybridse::codec::ListRef;
Expand Down Expand Up @@ -615,7 +616,7 @@ void json_array_sort(::openmldb::base::StringRef *json_array,

std::string_view order_ref(order->data_, order->size_);
std::string_view column_ref(column->data_, column->size_);
std::map<std::string, std::string> container;
std::vector<std::pair<int, std::string>> container;

for (auto ele : arr) {
simdjson::ondemand::object obj;
Expand All @@ -635,27 +636,30 @@ void json_array_sort(::openmldb::base::StringRef *json_array,
continue;
}

container.emplace(order_val, column_val);
int order_int;
auto [ptr_order, ec_order] = std::from_chars(order_val.data(), order_val.data() + order_val.size(), order_int);
if (ec_order != std::errc()) {
return;
}
container.emplace_back(order_int, column_val);
}

std::sort(container.begin(), container.end(), [desc](const auto& a, const auto& b) {
if (a.first == b.first) {
return desc ? a.second > b.second : a.second < b.second;
}
return desc ? a.first > b.first : a.first < b.first;
});

std::stringstream ss;
uint32_t topn = static_cast<uint32_t>(n);
auto sz = container.size();

for (uint32_t i = 0; i < topn && i < sz; ++i) {
if (desc) {
auto it = std::next(container.crbegin(), i);
ss << it->second;
if (std::next(it, 1) != container.crend() && i + 1 < topn) {
ss << ",";
}
} else {
auto it = std::next(container.cbegin(), i);
ss << it->second;
if (std::next(it, 1) != container.cend() && i + 1 < topn) {
ss << ",";
ss << container[i].second;
if (i + 1 < topn && i + 1 < sz) {
ss << ",";
}
}
}

auto str = ss.str();
Expand Down

0 comments on commit b9cb9ab

Please sign in to comment.