Skip to content

Commit

Permalink
minor fix
Browse files Browse the repository at this point in the history
  • Loading branch information
zhli1142015 committed Jan 6, 2025
1 parent 4f63a5b commit 48024c2
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 15 deletions.
3 changes: 1 addition & 2 deletions velox/docs/functions/spark/json.rst
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,7 @@ JSON Functions
REAL, DOUBLE, VARCHAR, ARRAY, MAP and ROW. When casting to ARRAY or MAP,
the element type of the array or the value type of the map must be one of
these supported types, and for maps, the key type must be VARCHAR. Casting
to ROW supports only JSON objects, where the keys must exactly match the ROW
field names (case sensitivity).
to ROW supports only JSON objects.
The current implementation has the following limitations.

* Does not support user provided options.
Expand Down
33 changes: 30 additions & 3 deletions velox/functions/sparksql/specialforms/FromJson.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -216,13 +216,16 @@ struct ExtractJsonTypeImpl {
if (type == simdjson::ondemand::json_type::object) {
SIMDJSON_ASSIGN_OR_RAISE(auto object, value.get_object());

folly::F14FastMap<std::string, int32_t> fieldIndices;
bool allFieldsAreAscii = true;
const auto size = rowType.size();
for (auto i = 0; i < size; ++i) {
std::string key = rowType.nameOf(i);
fieldIndices[key] = i;
const auto& name = rowType.nameOf(i);
allFieldsAreAscii &=
functions::stringCore::isAscii(name.data(), name.size());
}

auto fieldIndices = makeFieldIndicesMap(rowType, allFieldsAreAscii);

std::string key;
for (auto fieldResult : object) {
if (fieldResult.error() != ::simdjson::SUCCESS) {
Expand All @@ -232,6 +235,11 @@ struct ExtractJsonTypeImpl {
if (!field.value().is_null()) {
SIMDJSON_ASSIGN_OR_RAISE(key, field.unescaped_key(true));

if (allFieldsAreAscii) {
folly::toLowerAscii(key);
} else {
boost::algorithm::to_lower(key);
}
auto it = fieldIndices.find(key);
if (it != fieldIndices.end()) {
const auto index = it->second;
Expand Down Expand Up @@ -340,6 +348,25 @@ struct ExtractJsonTypeImpl {
writer.castTo<To>() = x;
return simdjson::SUCCESS;
}

static folly::F14FastMap<std::string, int32_t> makeFieldIndicesMap(
const RowType& rowType,
bool allFieldsAreAscii) {
folly::F14FastMap<std::string, int32_t> fieldIndices;
const auto size = rowType.size();
for (auto i = 0; i < size; ++i) {
std::string key = rowType.nameOf(i);
if (allFieldsAreAscii) {
folly::toLowerAscii(key);
} else {
boost::algorithm::to_lower(key);
}

fieldIndices[key] = i;
}

return fieldIndices;
}
};

/// @brief Parses a JSON string into the specified data type. Supports ROW,
Expand Down
12 changes: 2 additions & 10 deletions velox/functions/sparksql/tests/FromJsonTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ class FromJsonTest : public SparkFunctionBaseTest {
TEST_F(FromJsonTest, basicStruct) {
auto expected = makeFlatVector<int64_t>({1, 2, 3});
auto input = makeFlatVector<std::string>(
{R"({"a": 1})", R"({"a": 2})", R"({"a": 3})"});
testFromJson(input, makeRowVector({"a"}, {expected}));
{R"({"Id": 1})", R"({"Id": 2})", R"({"Id": 3})"});
testFromJson(input, makeRowVector({"Id"}, {expected}));
}

TEST_F(FromJsonTest, basicArray) {
Expand Down Expand Up @@ -191,14 +191,6 @@ TEST_F(FromJsonTest, nestedComplexType) {
testFromJson(input, arrayVector);
}

TEST_F(FromJsonTest, keyCaseSensitive) {
auto expected1 = makeNullableFlatVector<int64_t>({1, 2, 4});
auto expected2 = makeNullableFlatVector<int64_t>({3, 4, 5});
auto input = makeFlatVector<std::string>(
{R"({"a": 1, "A": 3})", R"({"a": 2, "A": 4})", R"({"a": 4, "A": 5})"});
testFromJson(input, makeRowVector({"a", "A"}, {expected1, expected2}));
}

TEST_F(FromJsonTest, nullOnFailure) {
auto expected = makeNullableFlatVector<int64_t>({1, std::nullopt, 3});
auto input =
Expand Down

0 comments on commit 48024c2

Please sign in to comment.