Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support other integer types for SubstringUTF8 & RightUTF8 functions (#9507) #9513

Merged
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
411 changes: 247 additions & 164 deletions dbms/src/Functions/FunctionsString.cpp

Large diffs are not rendered by default.

58 changes: 21 additions & 37 deletions dbms/src/Functions/tests/gtest_string_left.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,61 +66,55 @@ class StringLeftTest : public DB::tests::FunctionTest
for (bool is_length_const : is_consts)
inner_test(is_str_const, is_length_const);
}

template <typename Integer>
void testInvalidLengthType()
{
static_assert(!std::is_same_v<Integer, Int64> && !std::is_same_v<Integer, UInt64>);
auto inner_test = [&](bool is_str_const, bool is_length_const) {
ASSERT_THROW(
executeFunction(
func_name,
is_str_const ? createConstColumn<Nullable<String>>(1, "") : createColumn<Nullable<String>>({""}),
is_length_const ? createConstColumn<Nullable<Integer>>(1, 0) : createColumn<Nullable<Integer>>({0})),
Exception);
};
std::vector<bool> is_consts = {true, false};
for (bool is_str_const : is_consts)
for (bool is_length_const : is_consts)
inner_test(is_str_const, is_length_const);
}
};

TEST_F(StringLeftTest, testBoundary)
try
{
testBoundary<Int8>();
testBoundary<Int16>();
testBoundary<Int32>();
testBoundary<Int64>();
testBoundary<UInt8>();
testBoundary<UInt16>();
testBoundary<UInt32>();
testBoundary<UInt64>();
}
CATCH

TEST_F(StringLeftTest, testMoreCases)
try
{
#define CALL(A, B, C) \
test<Int8>(A, B, C); \
test<Int16>(A, B, C); \
test<Int32>(A, B, C); \
test<Int64>(A, B, C); \
test<UInt8>(A, B, C); \
test<UInt16>(A, B, C); \
test<UInt32>(A, B, C); \
test<UInt64>(A, B, C);

// test big string
// big_string.size() > length
String big_string;
// unit_string length = 22
String unit_string = "big string is 我!!!!!!!";
for (size_t i = 0; i < 1000; ++i)
big_string += unit_string;
test<Int64>(big_string, 22, unit_string);
test<UInt64>(big_string, 22, unit_string);
CALL(big_string, 22, unit_string);

// test origin_str.size() == length
String origin_str = "我的 size = 12";
test<Int64>(origin_str, 12, origin_str);
test<UInt64>(origin_str, 12, origin_str);
CALL(origin_str, 12, origin_str);

// test origin_str.size() < length
test<Int64>(origin_str, 22, origin_str);
test<UInt64>(origin_str, 22, origin_str);
CALL(origin_str, 22, origin_str);

// Mixed language
String english_str = "This is English";
String mixed_language_str = english_str + ",这是中文,C'est français,これが日本の";
test<Int64>(mixed_language_str, english_str.size(), english_str);
test<UInt64>(mixed_language_str, english_str.size(), english_str);
CALL(mixed_language_str, english_str.size(), english_str);

// column size != 1
// case 1
Expand All @@ -144,18 +138,8 @@ try
func_name,
createConstColumn<Nullable<String>>(8, second_case_string),
createColumn<Nullable<Int64>>({0, 1, 0, 1, 0, 0, 1, 1})));
}
CATCH

TEST_F(StringLeftTest, testInvalidLengthType)
try
{
testInvalidLengthType<Int8>();
testInvalidLengthType<Int16>();
testInvalidLengthType<Int32>();
testInvalidLengthType<UInt8>();
testInvalidLengthType<UInt16>();
testInvalidLengthType<UInt32>();
#undef CALL
}
CATCH

Expand Down
58 changes: 21 additions & 37 deletions dbms/src/Functions/tests/gtest_strings_right.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,61 +65,55 @@ class StringRightTest : public DB::tests::FunctionTest
for (bool is_length_const : is_consts)
inner_test(is_str_const, is_length_const);
}

template <typename Integer>
void testInvalidLengthType()
{
static_assert(!std::is_same_v<Integer, Int64> && !std::is_same_v<Integer, UInt64>);
auto inner_test = [&](bool is_str_const, bool is_length_const) {
ASSERT_THROW(
executeFunction(
func_name,
is_str_const ? createConstColumn<Nullable<String>>(1, "") : createColumn<Nullable<String>>({""}),
is_length_const ? createConstColumn<Nullable<Integer>>(1, 0) : createColumn<Nullable<Integer>>({0})),
Exception);
};
std::vector<bool> is_consts = {true, false};
for (bool is_str_const : is_consts)
for (bool is_length_const : is_consts)
inner_test(is_str_const, is_length_const);
}
};

TEST_F(StringRightTest, testBoundary)
try
{
testBoundary<Int8>();
testBoundary<Int16>();
testBoundary<Int32>();
testBoundary<Int64>();
testBoundary<UInt8>();
testBoundary<UInt16>();
testBoundary<UInt32>();
testBoundary<UInt64>();
}
CATCH

TEST_F(StringRightTest, testMoreCases)
try
{
#define CALL(A, B, C) \
test<Int8>(A, B, C); \
test<Int16>(A, B, C); \
test<Int32>(A, B, C); \
test<Int64>(A, B, C); \
test<UInt8>(A, B, C); \
test<UInt16>(A, B, C); \
test<UInt32>(A, B, C); \
test<UInt64>(A, B, C);

// test big string
// big_string.size() > length
String big_string;
// unit_string length = 22
String unit_string = "big string is 我!!!!!!!";
for (size_t i = 0; i < 1000; ++i)
big_string += unit_string;
test<Int64>(big_string, 22, unit_string);
test<UInt64>(big_string, 22, unit_string);
CALL(big_string, 22, unit_string);

// test origin_str.size() == length
String origin_str = "我的 size = 12";
test<Int64>(origin_str, 12, origin_str);
test<UInt64>(origin_str, 12, origin_str);
CALL(origin_str, 12, origin_str);

// test origin_str.size() < length
test<Int64>(origin_str, 22, origin_str);
test<UInt64>(origin_str, 22, origin_str);
CALL(origin_str, 22, origin_str);

// Mixed language
String english_str = "This is English";
String mixed_language_str = "这是中文,C'est français,これが日本の," + english_str;
test<Int64>(mixed_language_str, english_str.size(), english_str);
test<UInt64>(mixed_language_str, english_str.size(), english_str);
CALL(mixed_language_str, english_str.size(), english_str);

// column size != 1
// case 1
Expand All @@ -143,18 +137,8 @@ try
func_name,
createConstColumn<Nullable<String>>(8, second_case_string),
createColumn<Nullable<Int64>>({0, 1, 0, 1, 0, 0, 1, 1})));
}
CATCH

TEST_F(StringRightTest, testInvalidLengthType)
try
{
testInvalidLengthType<Int8>();
testInvalidLengthType<Int16>();
testInvalidLengthType<Int32>();
testInvalidLengthType<UInt8>();
testInvalidLengthType<UInt16>();
testInvalidLengthType<UInt32>();
#undef CALL
}
CATCH

Expand Down
163 changes: 153 additions & 10 deletions dbms/src/Functions/tests/gtest_substring.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,160 @@ class SubString : public DB::tests::FunctionTest
{
};

template <typename T1, typename T2>
class TestNullableSigned
{
public:
static void run(SubString & sub_string)
{
ASSERT_COLUMN_EQ(
createColumn<Nullable<String>>({"p.co", "ww.p", "pingcap", "com", ".com", "", "", "", {}, {}, {}}),
sub_string.executeFunction(
"substringUTF8",
createColumn<Nullable<String>>(
{"www.pingcap.com",
"ww.pingcap.com",
"w.pingcap.com",
".pingcap.com",
"pingcap.com",
"pingcap.com",
"pingcap.com",
"pingcap.com",
{},
"pingcap",
"pingcap"}),
createColumn<T1>({-5, 1, 3, -3, 8, 2, -100, 0, 2, {}, -3}),
createColumn<T2>({4, 4, 7, 4, 5, -5, 2, 3, 6, 4, {}})));
}
};

template <typename T1, typename T2>
class TestSigned
{
public:
static void run(SubString & sub_string)
{
ASSERT_COLUMN_EQ(
createColumn<Nullable<String>>({"p.co", "ww.p", "pingcap", "com", ".com", "", "", "", {}}),
sub_string.executeFunction(
"substringUTF8",
createColumn<Nullable<String>>(
{"www.pingcap.com",
"ww.pingcap.com",
"w.pingcap.com",
".pingcap.com",
"pingcap.com",
"pingcap.com",
"pingcap.com",
"pingcap.com",
{}}),
createColumn<T1>({-5, 1, 3, -3, 8, 2, -100, 0, 2}),
createColumn<T2>({4, 4, 7, 4, 5, -5, 2, 3, 6})));
}
};

template <typename T1, typename T2>
class TestNullableUnsigned
{
public:
static void run(SubString & sub_string)
{
ASSERT_COLUMN_EQ(
createColumn<Nullable<String>>({"p.co", "ww.p", "pingcap", "com", ".com", "", "", {}, {}, {}}),
sub_string.executeFunction(
"substringUTF8",
createColumn<Nullable<String>>(
{"www.pingcap.com",
"ww.pingcap.com",
"w.pingcap.com",
".pingcap.com",
"pingcap.com",
"pingcap.com",
"pingcap.com",
{},
"pingcap",
"pingcap"}),
createColumn<T1>({11, 1, 3, 10, 8, 2, 0, 9, {}, 7}),
createColumn<T2>({4, 4, 7, 4, 5, 0, 3, 6, 1, {}})));
}
};

template <typename T1, typename T2>
class TestUnsigned
{
public:
static void run(SubString & sub_string)
{
ASSERT_COLUMN_EQ(
createColumn<Nullable<String>>({"p.co", "ww.p", "pingcap", "com", ".com", "", "", {}}),
sub_string.executeFunction(
"substringUTF8",
createColumn<Nullable<String>>(
{"www.pingcap.com",
"ww.pingcap.com",
"w.pingcap.com",
".pingcap.com",
"pingcap.com",
"pingcap.com",
"pingcap.com",
{}}),
createColumn<T1>({11, 1, 3, 10, 8, 2, 0, 2}),
createColumn<T2>({4, 4, 7, 4, 5, 0, 3, 1})));
}
};

template <typename T1, typename T2>
class TestConstPos
{
public:
static void run(SubString & sub_string)
{
ASSERT_COLUMN_EQ(
createColumn<Nullable<String>>({"w", "ww", "w.p", ".pin"}),
sub_string.executeFunction(
"substringUTF8",
createColumn<Nullable<String>>({"www.pingcap.com", "ww.pingcap.com", "w.pingcap.com", ".pingcap.com"}),
createConstColumn<T1>(4, 1),
createColumn<T2>({1, 2, 3, 4})));
}
};

template <typename T1, typename T2>
class TestConstLength
{
public:
static void run(SubString & sub_string)
{
ASSERT_COLUMN_EQ(
createColumn<Nullable<String>>({"www.", "w.pi", "ping", "ngca"}),
sub_string.executeFunction(
"substringUTF8",
createColumn<Nullable<String>>({"www.pingcap.com", "ww.pingcap.com", "w.pingcap.com", ".pingcap.com"}),
createColumn<T1>({1, 2, 3, 4}),
createConstColumn<T1>(4, 4)));
}
};

TEST_F(SubString, subStringUTF8Test)
try
{
TestTypePair<TestNullableIntTypes, TestNullableIntTypes, TestNullableSigned, SubString>::run(*this);
TestTypePair<TestAllIntTypes, TestAllIntTypes, TestSigned, SubString>::run(*this);

TestTypePair<TestNullableIntTypes, TestNullableUIntTypes, TestNullableUnsigned, SubString>::run(*this);
TestTypePair<TestNullableUIntTypes, TestNullableIntTypes, TestNullableUnsigned, SubString>::run(*this);
TestTypePair<TestNullableUIntTypes, TestNullableUIntTypes, TestNullableUnsigned, SubString>::run(*this);

TestTypePair<TestAllIntTypes, TestAllUIntTypes, TestUnsigned, SubString>::run(*this);
TestTypePair<TestAllUIntTypes, TestAllIntTypes, TestUnsigned, SubString>::run(*this);
TestTypePair<TestAllUIntTypes, TestAllUIntTypes, TestUnsigned, SubString>::run(*this);

TestTypePair<TestAllIntTypes, TestAllIntTypes, TestConstPos, SubString>::run(*this);
TestTypePair<TestAllUIntTypes, TestAllUIntTypes, TestConstPos, SubString>::run(*this);

TestTypePair<TestAllIntTypes, TestAllIntTypes, TestConstLength, SubString>::run(*this);
TestTypePair<TestAllUIntTypes, TestAllUIntTypes, TestConstLength, SubString>::run(*this);

// column, const, const
ASSERT_COLUMN_EQ(
createColumn<Nullable<String>>({"www.", "ww.p", "w.pi", ".pin"}),
Expand All @@ -38,6 +189,7 @@ try
createColumn<Nullable<String>>({"www.pingcap.com", "ww.pingcap.com", "w.pingcap.com", ".pingcap.com"}),
createConstColumn<Nullable<Int64>>(4, 1),
createConstColumn<Nullable<Int64>>(4, 4)));

// const, const, const
ASSERT_COLUMN_EQ(
createConstColumn<String>(1, "www."),
Expand All @@ -46,17 +198,8 @@ try
createConstColumn<Nullable<String>>(1, "www.pingcap.com"),
createConstColumn<Nullable<Int64>>(1, 1),
createConstColumn<Nullable<Int64>>(1, 4)));
// Test Null
ASSERT_COLUMN_EQ(
createColumn<Nullable<String>>({{}, "www."}),
executeFunction(
"substringUTF8",
createColumn<Nullable<String>>(
{{}, "www.pingcap.com"}),
createConstColumn<Nullable<Int64>>(2, 1),
createConstColumn<Nullable<Int64>>(2, 4)));
}
CATCH

} // namespace tests
} // namespace DB
} // namespace DB
Loading