From 48024c2c304948f024ba3c1a5ee7751c773aadd5 Mon Sep 17 00:00:00 2001 From: zhli1142015 Date: Mon, 6 Jan 2025 23:16:06 +0800 Subject: [PATCH] minor fix --- velox/docs/functions/spark/json.rst | 3 +- .../sparksql/specialforms/FromJson.cpp | 33 +++++++++++++++++-- .../functions/sparksql/tests/FromJsonTest.cpp | 12 ++----- 3 files changed, 33 insertions(+), 15 deletions(-) diff --git a/velox/docs/functions/spark/json.rst b/velox/docs/functions/spark/json.rst index e1d02caae71d..1f6d29cf9374 100644 --- a/velox/docs/functions/spark/json.rst +++ b/velox/docs/functions/spark/json.rst @@ -53,8 +53,7 @@ JSON Functions REAL, DOUBLE, VARCHAR, ARRAY, MAP and ROW. When casting to ARRAY or MAP, the element type of the array or the value type of the map must be one of these supported types, and for maps, the key type must be VARCHAR. Casting - to ROW supports only JSON objects, where the keys must exactly match the ROW - field names (case sensitivity). + to ROW supports only JSON objects. The current implementation has the following limitations. * Does not support user provided options. diff --git a/velox/functions/sparksql/specialforms/FromJson.cpp b/velox/functions/sparksql/specialforms/FromJson.cpp index c07c3d530045..6ff1e2e0ed36 100644 --- a/velox/functions/sparksql/specialforms/FromJson.cpp +++ b/velox/functions/sparksql/specialforms/FromJson.cpp @@ -216,13 +216,16 @@ struct ExtractJsonTypeImpl { if (type == simdjson::ondemand::json_type::object) { SIMDJSON_ASSIGN_OR_RAISE(auto object, value.get_object()); - folly::F14FastMap fieldIndices; + bool allFieldsAreAscii = true; const auto size = rowType.size(); for (auto i = 0; i < size; ++i) { - std::string key = rowType.nameOf(i); - fieldIndices[key] = i; + const auto& name = rowType.nameOf(i); + allFieldsAreAscii &= + functions::stringCore::isAscii(name.data(), name.size()); } + auto fieldIndices = makeFieldIndicesMap(rowType, allFieldsAreAscii); + std::string key; for (auto fieldResult : object) { if (fieldResult.error() != ::simdjson::SUCCESS) { @@ -232,6 +235,11 @@ struct ExtractJsonTypeImpl { if (!field.value().is_null()) { SIMDJSON_ASSIGN_OR_RAISE(key, field.unescaped_key(true)); + if (allFieldsAreAscii) { + folly::toLowerAscii(key); + } else { + boost::algorithm::to_lower(key); + } auto it = fieldIndices.find(key); if (it != fieldIndices.end()) { const auto index = it->second; @@ -340,6 +348,25 @@ struct ExtractJsonTypeImpl { writer.castTo() = x; return simdjson::SUCCESS; } + + static folly::F14FastMap makeFieldIndicesMap( + const RowType& rowType, + bool allFieldsAreAscii) { + folly::F14FastMap fieldIndices; + const auto size = rowType.size(); + for (auto i = 0; i < size; ++i) { + std::string key = rowType.nameOf(i); + if (allFieldsAreAscii) { + folly::toLowerAscii(key); + } else { + boost::algorithm::to_lower(key); + } + + fieldIndices[key] = i; + } + + return fieldIndices; + } }; /// @brief Parses a JSON string into the specified data type. Supports ROW, diff --git a/velox/functions/sparksql/tests/FromJsonTest.cpp b/velox/functions/sparksql/tests/FromJsonTest.cpp index 4c4e62ed97f1..84dbc55f7a26 100644 --- a/velox/functions/sparksql/tests/FromJsonTest.cpp +++ b/velox/functions/sparksql/tests/FromJsonTest.cpp @@ -45,8 +45,8 @@ class FromJsonTest : public SparkFunctionBaseTest { TEST_F(FromJsonTest, basicStruct) { auto expected = makeFlatVector({1, 2, 3}); auto input = makeFlatVector( - {R"({"a": 1})", R"({"a": 2})", R"({"a": 3})"}); - testFromJson(input, makeRowVector({"a"}, {expected})); + {R"({"Id": 1})", R"({"Id": 2})", R"({"Id": 3})"}); + testFromJson(input, makeRowVector({"Id"}, {expected})); } TEST_F(FromJsonTest, basicArray) { @@ -191,14 +191,6 @@ TEST_F(FromJsonTest, nestedComplexType) { testFromJson(input, arrayVector); } -TEST_F(FromJsonTest, keyCaseSensitive) { - auto expected1 = makeNullableFlatVector({1, 2, 4}); - auto expected2 = makeNullableFlatVector({3, 4, 5}); - auto input = makeFlatVector( - {R"({"a": 1, "A": 3})", R"({"a": 2, "A": 4})", R"({"a": 4, "A": 5})"}); - testFromJson(input, makeRowVector({"a", "A"}, {expected1, expected2})); -} - TEST_F(FromJsonTest, nullOnFailure) { auto expected = makeNullableFlatVector({1, std::nullopt, 3}); auto input =