From 942861090e4feb3b3bc36a5f5705794bdc5537ea Mon Sep 17 00:00:00 2001 From: guo-shaoge Date: Thu, 14 Nov 2024 23:23:16 +0800 Subject: [PATCH 01/12] fix when first argument of replace function is column const Signed-off-by: guo-shaoge --- dbms/src/Functions/FunctionsStringReplace.h | 11 +++++--- tests/fullstack-test/expr/replace.test | 29 +++++++++++++++++++++ 2 files changed, 37 insertions(+), 3 deletions(-) create mode 100644 tests/fullstack-test/expr/replace.test diff --git a/dbms/src/Functions/FunctionsStringReplace.h b/dbms/src/Functions/FunctionsStringReplace.h index 604c2479bb0..1b02a4520ee 100644 --- a/dbms/src/Functions/FunctionsStringReplace.h +++ b/dbms/src/Functions/FunctionsStringReplace.h @@ -103,9 +103,9 @@ class FunctionStringReplace : public IFunction void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override { - const ColumnPtr & column_src = block.getByPosition(arguments[0]).column; - const ColumnPtr & column_needle = block.getByPosition(arguments[1]).column; - const ColumnPtr & column_replacement = block.getByPosition(arguments[2]).column; + ColumnPtr column_src = block.getByPosition(arguments[0]).column; + ColumnPtr column_needle = block.getByPosition(arguments[1]).column; + ColumnPtr column_replacement = block.getByPosition(arguments[2]).column; const ColumnPtr column_pos = arguments.size() > 3 ? block.getByPosition(arguments[3]).column : nullptr; const ColumnPtr column_occ = arguments.size() > 4 ? block.getByPosition(arguments[4]).column : nullptr; const ColumnPtr column_match_type = arguments.size() > 5 ? block.getByPosition(arguments[5]).column : nullptr; @@ -124,6 +124,11 @@ class FunctionStringReplace : public IFunction bool needle_const = column_needle->isColumnConst(); bool replacement_const = column_replacement->isColumnConst(); + + if (const auto * column_src_const = checkAndGetColumn(column_src.get())) + { + column_src = column_src_const->convertToFullColumn(); + } if (needle_const && replacement_const) { diff --git a/tests/fullstack-test/expr/replace.test b/tests/fullstack-test/expr/replace.test new file mode 100644 index 00000000000..df185607696 --- /dev/null +++ b/tests/fullstack-test/expr/replace.test @@ -0,0 +1,29 @@ +# Copyright 2023 PingCAP, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# test regexp and regexp_like +mysql> drop table if exists test.t +mysql> create table test.t(c1 varchar(100), c2 varchar(100), c3 varchar(100)) +mysql> insert into test.t values('hello world', 'hello', '???') +mysql> alter table test.t set tiflash replica 1 +func> wait_table test t +mysql> set tidb_isolation_read_engines = 'tiflash'; set tidb_enforce_mpp=1; select replace(c1, c2, c3) from test.t +replace(c1, c2, c3) +??? world + +mysql> set tidb_isolation_read_engines = 'tiflash'; set tidb_enforce_mpp=1; select replace('hello world', c2, c3) from test.t +replace('hello world', c2, c3) +??? world + +mysql> drop table if exists test.t From 9f61b0ead36af701b00c48c15fb8177b88ef98b4 Mon Sep 17 00:00:00 2001 From: guo-shaoge Date: Thu, 14 Nov 2024 23:24:54 +0800 Subject: [PATCH 02/12] fix Signed-off-by: guo-shaoge --- tests/fullstack-test/expr/replace.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/fullstack-test/expr/replace.test b/tests/fullstack-test/expr/replace.test index df185607696..bf20366e6a7 100644 --- a/tests/fullstack-test/expr/replace.test +++ b/tests/fullstack-test/expr/replace.test @@ -1,4 +1,4 @@ -# Copyright 2023 PingCAP, Inc. +# Copyright 2024 PingCAP, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From ba80e358faffcd4c247133fc03e9cb67bfb586c2 Mon Sep 17 00:00:00 2001 From: guo-shaoge Date: Fri, 15 Nov 2024 10:40:58 +0800 Subject: [PATCH 03/12] fmt Signed-off-by: guo-shaoge --- dbms/src/Functions/FunctionsStringReplace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Functions/FunctionsStringReplace.h b/dbms/src/Functions/FunctionsStringReplace.h index 1b02a4520ee..6d62c6b02bd 100644 --- a/dbms/src/Functions/FunctionsStringReplace.h +++ b/dbms/src/Functions/FunctionsStringReplace.h @@ -124,7 +124,7 @@ class FunctionStringReplace : public IFunction bool needle_const = column_needle->isColumnConst(); bool replacement_const = column_replacement->isColumnConst(); - + if (const auto * column_src_const = checkAndGetColumn(column_src.get())) { column_src = column_src_const->convertToFullColumn(); From 0f7faf809b8a35f89e714a56b3bf45027d77dcad Mon Sep 17 00:00:00 2001 From: guo-shaoge Date: Fri, 15 Nov 2024 10:51:31 +0800 Subject: [PATCH 04/12] fix Signed-off-by: guo-shaoge --- tests/fullstack-test/expr/replace.test | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/fullstack-test/expr/replace.test b/tests/fullstack-test/expr/replace.test index bf20366e6a7..db0cc946508 100644 --- a/tests/fullstack-test/expr/replace.test +++ b/tests/fullstack-test/expr/replace.test @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -# test regexp and regexp_like mysql> drop table if exists test.t mysql> create table test.t(c1 varchar(100), c2 varchar(100), c3 varchar(100)) mysql> insert into test.t values('hello world', 'hello', '???') From 4b56d6d0e3e2956b7625f5f02d248de8162ccada Mon Sep 17 00:00:00 2001 From: guo-shaoge Date: Fri, 15 Nov 2024 17:13:07 +0800 Subject: [PATCH 05/12] refine Signed-off-by: guo-shaoge --- dbms/src/Functions/FunctionsStringReplace.h | 216 +++++++++---------- dbms/src/Functions/FunctionsStringSearch.cpp | 27 --- 2 files changed, 102 insertions(+), 141 deletions(-) diff --git a/dbms/src/Functions/FunctionsStringReplace.h b/dbms/src/Functions/FunctionsStringReplace.h index 6d62c6b02bd..659624ebfb1 100644 --- a/dbms/src/Functions/FunctionsStringReplace.h +++ b/dbms/src/Functions/FunctionsStringReplace.h @@ -22,6 +22,7 @@ #include #include #include +#include #include namespace DB @@ -41,29 +42,11 @@ class FunctionStringReplace : public IFunction String getName() const override { return name; } - size_t getNumberOfArguments() const override { return 0; } + size_t getNumberOfArguments() const override { return 3; } - bool isVariadic() const override { return true; } + bool isVariadic() const override { return false; } bool useDefaultImplementationForConstants() const override { return true; } - ColumnNumbers getArgumentsThatAreAlwaysConstant() const override - { - if constexpr (Impl::support_non_const_needle && Impl::support_non_const_replacement) - { - return {3, 4, 5}; - } - else if constexpr (Impl::support_non_const_needle) - { - return {2, 3, 4, 5}; - } - else if constexpr (Impl::support_non_const_replacement) - { - return {1, 3, 4, 5}; - } - else - { - return {1, 2, 3, 4, 5}; - } - } + void setCollator(const TiDB::TiDBCollatorPtr & collator_) override { collator = collator_; } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override @@ -83,21 +66,6 @@ class FunctionStringReplace : public IFunction "Illegal type " + arguments[2]->getName() + " of third argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - if (arguments.size() > 3 && !arguments[3]->isInteger()) - throw Exception( - "Illegal type " + arguments[2]->getName() + " of forth argument of function " + getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - if (arguments.size() > 4 && !arguments[4]->isInteger()) - throw Exception( - "Illegal type " + arguments[2]->getName() + " of fifth argument of function " + getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - if (arguments.size() > 5 && !arguments[5]->isStringOrFixedString()) - throw Exception( - "Illegal type " + arguments[2]->getName() + " of sixth argument of function " + getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - return std::make_shared(); } @@ -106,66 +74,37 @@ class FunctionStringReplace : public IFunction ColumnPtr column_src = block.getByPosition(arguments[0]).column; ColumnPtr column_needle = block.getByPosition(arguments[1]).column; ColumnPtr column_replacement = block.getByPosition(arguments[2]).column; - const ColumnPtr column_pos = arguments.size() > 3 ? block.getByPosition(arguments[3]).column : nullptr; - const ColumnPtr column_occ = arguments.size() > 4 ? block.getByPosition(arguments[4]).column : nullptr; - const ColumnPtr column_match_type = arguments.size() > 5 ? block.getByPosition(arguments[5]).column : nullptr; - - if ((column_pos != nullptr && !column_pos->isColumnConst()) - || (column_occ != nullptr && !column_occ->isColumnConst()) - || (column_match_type != nullptr && !column_match_type->isColumnConst())) - throw Exception("4th, 5th, 6th arguments of function " + getName() + " must be constants."); - Int64 pos = column_pos == nullptr ? 1 : typeid_cast(column_pos.get())->getInt(0); - Int64 occ = column_occ == nullptr ? 0 : typeid_cast(column_occ.get())->getInt(0); - String match_type = column_match_type == nullptr - ? "" - : typeid_cast(column_match_type.get())->getValue(); ColumnWithTypeAndName & column_result = block.getByPosition(result); bool needle_const = column_needle->isColumnConst(); bool replacement_const = column_replacement->isColumnConst(); - if (const auto * column_src_const = checkAndGetColumn(column_src.get())) + if (column_src->isColumnConst()) { - column_src = column_src_const->convertToFullColumn(); + executeImplConstHaystack( + column_src, + column_needle, + column_replacement, + needle_const, + replacement_const, + column_result); } - - if (needle_const && replacement_const) + else if (needle_const && replacement_const) { - executeImpl(column_src, column_needle, column_replacement, pos, occ, match_type, column_result); + executeImpl(column_src, column_needle, column_replacement, column_result); } else if (needle_const) { - executeImplNonConstReplacement( - column_src, - column_needle, - column_replacement, - pos, - occ, - match_type, - column_result); + executeImplNonConstReplacement(column_src, column_needle, column_replacement, column_result); } else if (replacement_const) { - executeImplNonConstNeedle( - column_src, - column_needle, - column_replacement, - pos, - occ, - match_type, - column_result); + executeImplNonConstNeedle(column_src, column_needle, column_replacement, column_result); } else { - executeImplNonConstNeedleReplacement( - column_src, - column_needle, - column_replacement, - pos, - occ, - match_type, - column_result); + executeImplNonConstNeedleReplacement(column_src, column_needle, column_replacement, column_result); } } @@ -174,9 +113,6 @@ class FunctionStringReplace : public IFunction const ColumnPtr & column_src, const ColumnPtr & column_needle, const ColumnPtr & column_replacement, - Int64 pos, - Int64 occ, - const String & match_type, ColumnWithTypeAndName & column_result) const { const auto * c1_const = typeid_cast(column_needle.get()); @@ -192,9 +128,6 @@ class FunctionStringReplace : public IFunction col->getOffsets(), needle, replacement, - pos, - occ, - match_type, collator, col_res->getChars(), col_res->getOffsets()); @@ -208,9 +141,6 @@ class FunctionStringReplace : public IFunction col->getN(), needle, replacement, - pos, - occ, - match_type, collator, col_res->getChars(), col_res->getOffsets()); @@ -222,13 +152,95 @@ class FunctionStringReplace : public IFunction ErrorCodes::ILLEGAL_COLUMN); } + template + void replace( + const HaystackSource & src_h, + const NeedleSource & src_n, + const ReplacementSource & src_r, + ColumnString::MutablePtr res_col) + { + while (!src_h.isEnd()) + { + const auto slice_h = src_h.getWhole(); + const auto slice_n = src_n.getWhole(); + const auto slice_r = src_r.getWhole(); + + const String str_h(slice_h.data, slice_h.size); + const String str_n(slice_n.data, slice_n.size); + const String str_r(slice_r.data, slice_r.size); + String res; + Impl::constant(str_h, str_n, str_r, res); + res_col->insertData(res.data(), res.size()); + } + } + + void executeImplConstHaystack( + const ColumnPtr & column_src, + const ColumnPtr & column_needle, + const ColumnPtr & column_replacement, + bool needle_const, + bool replacement_const, + ColumnWithTypeAndName & column_result) const + { + auto res_col = ColumnString::create(); + res_col->reserve(column_src->size()); + + RUNTIME_CHECK_MSG( + !needle_const || !replacement_const, + "should got here when all argments of replace is constant"); + + const auto * column_src_const = checkAndGetColumnConst(column_src.get()); + RUNTIME_CHECK(column_src_const); + + using GatherUtils::ConstSource; + using GatherUtils::StringSource; + if (!needle_const && !replacement_const) + { + const auto * column_needle_string = checkAndGetColumn(column_needle.get()); + const auto * column_replacement_string = checkAndGetColumn(column_replacement.get()); + RUNTIME_CHECK(column_needle_string); + RUNTIME_CHECK(column_replacement_string); + + replace( + ConstSource(*column_src_const), + StringSource(*column_needle_string), + StringSource(*column_replacement_string), + res_col); + } + else if (needle_const && !replacement_const) + { + const auto * column_needle_const = checkAndGetColumnConst(column_needle.get()); + const auto * column_replacement_string = checkAndGetColumn(column_replacement.get()); + RUNTIME_CHECK(column_needle_const); + RUNTIME_CHECK(column_replacement_string); + + replace( + ConstSource(*column_src_const), + ConstSource(*column_needle_const), + StringSource(*column_replacement_string), + res_col); + } + else if (!needle_const && replacement_const) + { + const auto * column_needle_string = checkAndGetColumn(column_needle.get()); + const auto * column_replacement_const = checkAndGetColumnConst(column_replacement.get()); + RUNTIME_CHECK(column_needle_string); + RUNTIME_CHECK(column_replacement_const); + + replace( + ConstSource(*column_src_const), + StringSource(*column_needle_string), + ConstSource(*column_replacement_const), + res_col); + } + + column_result.column = std::move(res_col); + } + void executeImplNonConstNeedle( const ColumnPtr & column_src, const ColumnPtr & column_needle, const ColumnPtr & column_replacement, - Int64 pos [[maybe_unused]], - Int64 occ [[maybe_unused]], - const String & match_type, ColumnWithTypeAndName & column_result) const { if constexpr (Impl::support_non_const_needle) @@ -246,9 +258,6 @@ class FunctionStringReplace : public IFunction col_needle->getChars(), col_needle->getOffsets(), replacement, - pos, - occ, - match_type, collator, col_res->getChars(), col_res->getOffsets()); @@ -263,9 +272,6 @@ class FunctionStringReplace : public IFunction col_needle->getChars(), col_needle->getOffsets(), replacement, - pos, - occ, - match_type, collator, col_res->getChars(), col_res->getOffsets()); @@ -286,9 +292,6 @@ class FunctionStringReplace : public IFunction const ColumnPtr & column_src, const ColumnPtr & column_needle, const ColumnPtr & column_replacement, - Int64 pos [[maybe_unused]], - Int64 occ [[maybe_unused]], - const String & match_type, ColumnWithTypeAndName & column_result) const { if constexpr (Impl::support_non_const_replacement) @@ -306,9 +309,6 @@ class FunctionStringReplace : public IFunction needle, col_replacement->getChars(), col_replacement->getOffsets(), - pos, - occ, - match_type, collator, col_res->getChars(), col_res->getOffsets()); @@ -323,9 +323,6 @@ class FunctionStringReplace : public IFunction needle, col_replacement->getChars(), col_replacement->getOffsets(), - pos, - occ, - match_type, collator, col_res->getChars(), col_res->getOffsets()); @@ -346,9 +343,6 @@ class FunctionStringReplace : public IFunction const ColumnPtr & column_src, const ColumnPtr & column_needle, const ColumnPtr & column_replacement, - Int64 pos [[maybe_unused]], - Int64 occ [[maybe_unused]], - const String & match_type, ColumnWithTypeAndName & column_result) const { if constexpr (Impl::support_non_const_needle && Impl::support_non_const_replacement) @@ -366,9 +360,6 @@ class FunctionStringReplace : public IFunction col_needle->getOffsets(), col_replacement->getChars(), col_replacement->getOffsets(), - pos, - occ, - match_type, collator, col_res->getChars(), col_res->getOffsets()); @@ -384,9 +375,6 @@ class FunctionStringReplace : public IFunction col_needle->getOffsets(), col_replacement->getChars(), col_replacement->getOffsets(), - pos, - occ, - match_type, collator, col_res->getChars(), col_res->getOffsets()); diff --git a/dbms/src/Functions/FunctionsStringSearch.cpp b/dbms/src/Functions/FunctionsStringSearch.cpp index 5b5318bcc30..3c4541bc00c 100644 --- a/dbms/src/Functions/FunctionsStringSearch.cpp +++ b/dbms/src/Functions/FunctionsStringSearch.cpp @@ -816,9 +816,6 @@ struct ReplaceStringImpl const ColumnString::Offsets & offsets, const std::string & needle, const std::string & replacement, - const Int64 & /* pos */, - const Int64 & /* occ */, - const std::string & /* match_type */, TiDB::TiDBCollatorPtr /* collator */, ColumnString::Chars_t & res_data, ColumnString::Offsets & res_offsets) @@ -904,9 +901,6 @@ struct ReplaceStringImpl const ColumnString::Chars_t & needle_chars, const ColumnString::Offsets & needle_offsets, const std::string & replacement, - const Int64 & /* pos */, - const Int64 & /* occ */, - const std::string & /* match_type */, TiDB::TiDBCollatorPtr /* collator */, ColumnString::Chars_t & res_data, ColumnString::Offsets & res_offsets) @@ -978,9 +972,6 @@ struct ReplaceStringImpl const std::string & needle, const ColumnString::Chars_t & replacement_chars, const ColumnString::Offsets & replacement_offsets, - const Int64 & /* pos */, - const Int64 & /* occ */, - const std::string & /* match_type */, TiDB::TiDBCollatorPtr /* collator */, ColumnString::Chars_t & res_data, ColumnString::Offsets & res_offsets) @@ -1070,9 +1061,6 @@ struct ReplaceStringImpl const ColumnString::Offsets & needle_offsets, const ColumnString::Chars_t & replacement_chars, const ColumnString::Offsets & replacement_offsets, - const Int64 & /* pos */, - const Int64 & /* occ */, - const std::string & /* match_type */, TiDB::TiDBCollatorPtr /* collator */, ColumnString::Chars_t & res_data, ColumnString::Offsets & res_offsets) @@ -1146,9 +1134,6 @@ struct ReplaceStringImpl size_t n, const std::string & needle, const std::string & replacement, - const Int64 & /* pos */, - const Int64 & /* occ */, - const std::string & /* match_type */, TiDB::TiDBCollatorPtr /* collator */, ColumnString::Chars_t & res_data, ColumnString::Offsets & res_offsets) @@ -1244,9 +1229,6 @@ struct ReplaceStringImpl const ColumnString::Chars_t & needle_chars, const ColumnString::Offsets & needle_offsets, const std::string & replacement, - const Int64 & /* pos */, - const Int64 & /* occ */, - const std::string & /* match_type */, TiDB::TiDBCollatorPtr /* collator */, ColumnString::Chars_t & res_data, ColumnString::Offsets & res_offsets) @@ -1319,9 +1301,6 @@ struct ReplaceStringImpl const std::string & needle, const ColumnString::Chars_t & replacement_chars, const ColumnString::Offsets & replacement_offsets, - const Int64 & /* pos */, - const Int64 & /* occ */, - const std::string & /* match_type */, TiDB::TiDBCollatorPtr /* collator */, ColumnString::Chars_t & res_data, ColumnString::Offsets & res_offsets) @@ -1421,9 +1400,6 @@ struct ReplaceStringImpl const ColumnString::Offsets & needle_offsets, const ColumnString::Chars_t & replacement_chars, const ColumnString::Offsets & replacement_offsets, - const Int64 & /* pos */, - const Int64 & /* occ */, - const std::string & /* match_type */, TiDB::TiDBCollatorPtr /* collator */, ColumnString::Chars_t & res_data, ColumnString::Offsets & res_offsets) @@ -1498,9 +1474,6 @@ struct ReplaceStringImpl const std::string & data, const std::string & needle, const std::string & replacement, - const Int64 & /* pos */, - const Int64 & /* occ */, - const std::string & /* match_type */, TiDB::TiDBCollatorPtr /* collator */, std::string & res_data) { From 6a1a480c7f6ce4dc71b01f1787072cd2283443f7 Mon Sep 17 00:00:00 2001 From: guo-shaoge Date: Fri, 15 Nov 2024 17:19:27 +0800 Subject: [PATCH 06/12] fix Signed-off-by: guo-shaoge --- dbms/src/Functions/FunctionsStringReplace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Functions/FunctionsStringReplace.h b/dbms/src/Functions/FunctionsStringReplace.h index 659624ebfb1..4bd642807a9 100644 --- a/dbms/src/Functions/FunctionsStringReplace.h +++ b/dbms/src/Functions/FunctionsStringReplace.h @@ -157,7 +157,7 @@ class FunctionStringReplace : public IFunction const HaystackSource & src_h, const NeedleSource & src_n, const ReplacementSource & src_r, - ColumnString::MutablePtr res_col) + ColumnString::MutablePtr res_col) const { while (!src_h.isEnd()) { From f02a23071af3f2e702bda1b852407eb53f2f7edc Mon Sep 17 00:00:00 2001 From: guo-shaoge Date: Fri, 15 Nov 2024 17:21:35 +0800 Subject: [PATCH 07/12] fix Signed-off-by: guo-shaoge --- tests/fullstack-test/expr/replace.test | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/fullstack-test/expr/replace.test b/tests/fullstack-test/expr/replace.test index db0cc946508..1f5c7f8a9c8 100644 --- a/tests/fullstack-test/expr/replace.test +++ b/tests/fullstack-test/expr/replace.test @@ -25,4 +25,16 @@ mysql> set tidb_isolation_read_engines = 'tiflash'; set tidb_enforce_mpp=1; sele replace('hello world', c2, c3) ??? world +mysql> set tidb_isolation_read_engines = 'tiflash'; set tidb_enforce_mpp=1; select replace('hello world', 'hello', '???') from test.t +replace('hello world', 'hello', '???') +??? world + +mysql> set tidb_isolation_read_engines = 'tiflash'; set tidb_enforce_mpp=1; select replace('hello world', c2, '???') from test.t +replace('hello world', c2, '???') +??? world + +mysql> set tidb_isolation_read_engines = 'tiflash'; set tidb_enforce_mpp=1; select replace('hello world', 'hello', c3) from test.t +replace('hello world', 'hello', c3) +??? world + mysql> drop table if exists test.t From 571e2dccdadbf24b92cd5f0948cebd11d6acb24d Mon Sep 17 00:00:00 2001 From: guo-shaoge Date: Fri, 15 Nov 2024 17:35:06 +0800 Subject: [PATCH 08/12] fix Signed-off-by: guo-shaoge --- dbms/src/Functions/FunctionsStringReplace.h | 32 +++++---------------- dbms/src/Functions/GatherUtils/Algorithms.h | 23 +++++++++++++++ 2 files changed, 30 insertions(+), 25 deletions(-) diff --git a/dbms/src/Functions/FunctionsStringReplace.h b/dbms/src/Functions/FunctionsStringReplace.h index 4bd642807a9..b7f425cc223 100644 --- a/dbms/src/Functions/FunctionsStringReplace.h +++ b/dbms/src/Functions/FunctionsStringReplace.h @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -152,28 +153,6 @@ class FunctionStringReplace : public IFunction ErrorCodes::ILLEGAL_COLUMN); } - template - void replace( - const HaystackSource & src_h, - const NeedleSource & src_n, - const ReplacementSource & src_r, - ColumnString::MutablePtr res_col) const - { - while (!src_h.isEnd()) - { - const auto slice_h = src_h.getWhole(); - const auto slice_n = src_n.getWhole(); - const auto slice_r = src_r.getWhole(); - - const String str_h(slice_h.data, slice_h.size); - const String str_n(slice_n.data, slice_n.size); - const String str_r(slice_r.data, slice_r.size); - String res; - Impl::constant(str_h, str_n, str_r, res); - res_col->insertData(res.data(), res.size()); - } - } - void executeImplConstHaystack( const ColumnPtr & column_src, const ColumnPtr & column_needle, @@ -201,10 +180,11 @@ class FunctionStringReplace : public IFunction RUNTIME_CHECK(column_needle_string); RUNTIME_CHECK(column_replacement_string); - replace( + GatherUtils::replace( ConstSource(*column_src_const), StringSource(*column_needle_string), StringSource(*column_replacement_string), + collator, res_col); } else if (needle_const && !replacement_const) @@ -214,10 +194,11 @@ class FunctionStringReplace : public IFunction RUNTIME_CHECK(column_needle_const); RUNTIME_CHECK(column_replacement_string); - replace( + GatherUtils::replace( ConstSource(*column_src_const), ConstSource(*column_needle_const), StringSource(*column_replacement_string), + collator, res_col); } else if (!needle_const && replacement_const) @@ -227,10 +208,11 @@ class FunctionStringReplace : public IFunction RUNTIME_CHECK(column_needle_string); RUNTIME_CHECK(column_replacement_const); - replace( + GatherUtils::replace( ConstSource(*column_src_const), StringSource(*column_needle_string), ConstSource(*column_replacement_const), + collator, res_col); } diff --git a/dbms/src/Functions/GatherUtils/Algorithms.h b/dbms/src/Functions/GatherUtils/Algorithms.h index f08dbf8fead..b630eb57841 100644 --- a/dbms/src/Functions/GatherUtils/Algorithms.h +++ b/dbms/src/Functions/GatherUtils/Algorithms.h @@ -813,4 +813,27 @@ void resizeConstantSize(ArraySource && array_source, ValueSource && value_source } } +template +void replace( + const HaystackSource & src_h, + const NeedleSource & src_n, + const ReplacementSource & src_r, + const TiDB::TiDBCollatorPtr & collator, + ColumnString::MutablePtr & res_col) +{ + while (!src_h.isEnd()) + { + const auto slice_h = src_h.getWhole(); + const auto slice_n = src_n.getWhole(); + const auto slice_r = src_r.getWhole(); + + const String str_h(reinterpret_cast(slice_h.data), slice_h.size); + const String str_n(reinterpret_cast(slice_n.data), slice_n.size); + const String str_r(reinterpret_cast(slice_r.data), slice_r.size); + String res; + Impl::constant(str_h, str_n, str_r, collator, res); + res_col->insertData(res.data(), res.size()); + } +} + } // namespace DB::GatherUtils From a2b914e26a8381b5097d21c50bc7e2eff10a5043 Mon Sep 17 00:00:00 2001 From: guo-shaoge Date: Fri, 15 Nov 2024 17:57:17 +0800 Subject: [PATCH 09/12] refine Signed-off-by: guo-shaoge --- dbms/src/Functions/FunctionsStringReplace.h | 17 +---------------- dbms/src/Functions/FunctionsStringSearch.cpp | 9 --------- dbms/src/Functions/GatherUtils/Algorithms.h | 3 +-- 3 files changed, 2 insertions(+), 27 deletions(-) diff --git a/dbms/src/Functions/FunctionsStringReplace.h b/dbms/src/Functions/FunctionsStringReplace.h index b7f425cc223..5583239c027 100644 --- a/dbms/src/Functions/FunctionsStringReplace.h +++ b/dbms/src/Functions/FunctionsStringReplace.h @@ -48,8 +48,6 @@ class FunctionStringReplace : public IFunction bool isVariadic() const override { return false; } bool useDefaultImplementationForConstants() const override { return true; } - void setCollator(const TiDB::TiDBCollatorPtr & collator_) override { collator = collator_; } - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { if (!arguments[0]->isStringOrFixedString()) @@ -129,7 +127,6 @@ class FunctionStringReplace : public IFunction col->getOffsets(), needle, replacement, - collator, col_res->getChars(), col_res->getOffsets()); column_result.column = std::move(col_res); @@ -142,7 +139,6 @@ class FunctionStringReplace : public IFunction col->getN(), needle, replacement, - collator, col_res->getChars(), col_res->getOffsets()); column_result.column = std::move(col_res); @@ -166,7 +162,7 @@ class FunctionStringReplace : public IFunction RUNTIME_CHECK_MSG( !needle_const || !replacement_const, - "should got here when all argments of replace is constant"); + "should not got here when all argments of replace are constant"); const auto * column_src_const = checkAndGetColumnConst(column_src.get()); RUNTIME_CHECK(column_src_const); @@ -184,7 +180,6 @@ class FunctionStringReplace : public IFunction ConstSource(*column_src_const), StringSource(*column_needle_string), StringSource(*column_replacement_string), - collator, res_col); } else if (needle_const && !replacement_const) @@ -198,7 +193,6 @@ class FunctionStringReplace : public IFunction ConstSource(*column_src_const), ConstSource(*column_needle_const), StringSource(*column_replacement_string), - collator, res_col); } else if (!needle_const && replacement_const) @@ -212,7 +206,6 @@ class FunctionStringReplace : public IFunction ConstSource(*column_src_const), StringSource(*column_needle_string), ConstSource(*column_replacement_const), - collator, res_col); } @@ -240,7 +233,6 @@ class FunctionStringReplace : public IFunction col_needle->getChars(), col_needle->getOffsets(), replacement, - collator, col_res->getChars(), col_res->getOffsets()); column_result.column = std::move(col_res); @@ -254,7 +246,6 @@ class FunctionStringReplace : public IFunction col_needle->getChars(), col_needle->getOffsets(), replacement, - collator, col_res->getChars(), col_res->getOffsets()); column_result.column = std::move(col_res); @@ -291,7 +282,6 @@ class FunctionStringReplace : public IFunction needle, col_replacement->getChars(), col_replacement->getOffsets(), - collator, col_res->getChars(), col_res->getOffsets()); column_result.column = std::move(col_res); @@ -305,7 +295,6 @@ class FunctionStringReplace : public IFunction needle, col_replacement->getChars(), col_replacement->getOffsets(), - collator, col_res->getChars(), col_res->getOffsets()); column_result.column = std::move(col_res); @@ -342,7 +331,6 @@ class FunctionStringReplace : public IFunction col_needle->getOffsets(), col_replacement->getChars(), col_replacement->getOffsets(), - collator, col_res->getChars(), col_res->getOffsets()); column_result.column = std::move(col_res); @@ -357,7 +345,6 @@ class FunctionStringReplace : public IFunction col_needle->getOffsets(), col_replacement->getChars(), col_replacement->getOffsets(), - collator, col_res->getChars(), col_res->getOffsets()); column_result.column = std::move(col_res); @@ -374,7 +361,5 @@ class FunctionStringReplace : public IFunction ErrorCodes::ILLEGAL_COLUMN); } } - - TiDB::TiDBCollatorPtr collator{}; }; } // namespace DB diff --git a/dbms/src/Functions/FunctionsStringSearch.cpp b/dbms/src/Functions/FunctionsStringSearch.cpp index 3c4541bc00c..bae0e9103cb 100644 --- a/dbms/src/Functions/FunctionsStringSearch.cpp +++ b/dbms/src/Functions/FunctionsStringSearch.cpp @@ -816,7 +816,6 @@ struct ReplaceStringImpl const ColumnString::Offsets & offsets, const std::string & needle, const std::string & replacement, - TiDB::TiDBCollatorPtr /* collator */, ColumnString::Chars_t & res_data, ColumnString::Offsets & res_offsets) { @@ -901,7 +900,6 @@ struct ReplaceStringImpl const ColumnString::Chars_t & needle_chars, const ColumnString::Offsets & needle_offsets, const std::string & replacement, - TiDB::TiDBCollatorPtr /* collator */, ColumnString::Chars_t & res_data, ColumnString::Offsets & res_offsets) { @@ -972,7 +970,6 @@ struct ReplaceStringImpl const std::string & needle, const ColumnString::Chars_t & replacement_chars, const ColumnString::Offsets & replacement_offsets, - TiDB::TiDBCollatorPtr /* collator */, ColumnString::Chars_t & res_data, ColumnString::Offsets & res_offsets) { @@ -1061,7 +1058,6 @@ struct ReplaceStringImpl const ColumnString::Offsets & needle_offsets, const ColumnString::Chars_t & replacement_chars, const ColumnString::Offsets & replacement_offsets, - TiDB::TiDBCollatorPtr /* collator */, ColumnString::Chars_t & res_data, ColumnString::Offsets & res_offsets) { @@ -1134,7 +1130,6 @@ struct ReplaceStringImpl size_t n, const std::string & needle, const std::string & replacement, - TiDB::TiDBCollatorPtr /* collator */, ColumnString::Chars_t & res_data, ColumnString::Offsets & res_offsets) { @@ -1229,7 +1224,6 @@ struct ReplaceStringImpl const ColumnString::Chars_t & needle_chars, const ColumnString::Offsets & needle_offsets, const std::string & replacement, - TiDB::TiDBCollatorPtr /* collator */, ColumnString::Chars_t & res_data, ColumnString::Offsets & res_offsets) { @@ -1301,7 +1295,6 @@ struct ReplaceStringImpl const std::string & needle, const ColumnString::Chars_t & replacement_chars, const ColumnString::Offsets & replacement_offsets, - TiDB::TiDBCollatorPtr /* collator */, ColumnString::Chars_t & res_data, ColumnString::Offsets & res_offsets) { @@ -1400,7 +1393,6 @@ struct ReplaceStringImpl const ColumnString::Offsets & needle_offsets, const ColumnString::Chars_t & replacement_chars, const ColumnString::Offsets & replacement_offsets, - TiDB::TiDBCollatorPtr /* collator */, ColumnString::Chars_t & res_data, ColumnString::Offsets & res_offsets) { @@ -1474,7 +1466,6 @@ struct ReplaceStringImpl const std::string & data, const std::string & needle, const std::string & replacement, - TiDB::TiDBCollatorPtr /* collator */, std::string & res_data) { if (needle.empty()) diff --git a/dbms/src/Functions/GatherUtils/Algorithms.h b/dbms/src/Functions/GatherUtils/Algorithms.h index b630eb57841..05fa30fb4e7 100644 --- a/dbms/src/Functions/GatherUtils/Algorithms.h +++ b/dbms/src/Functions/GatherUtils/Algorithms.h @@ -818,7 +818,6 @@ void replace( const HaystackSource & src_h, const NeedleSource & src_n, const ReplacementSource & src_r, - const TiDB::TiDBCollatorPtr & collator, ColumnString::MutablePtr & res_col) { while (!src_h.isEnd()) @@ -831,7 +830,7 @@ void replace( const String str_n(reinterpret_cast(slice_n.data), slice_n.size); const String str_r(reinterpret_cast(slice_r.data), slice_r.size); String res; - Impl::constant(str_h, str_n, str_r, collator, res); + Impl::constant(str_h, str_n, str_r, res); res_col->insertData(res.data(), res.size()); } } From 284e46ba74e18fac42c2db392c8ecb47c4fa23bf Mon Sep 17 00:00:00 2001 From: guo-shaoge Date: Fri, 15 Nov 2024 17:58:59 +0800 Subject: [PATCH 10/12] refine Signed-off-by: guo-shaoge --- dbms/src/Functions/GatherUtils/Algorithms.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dbms/src/Functions/GatherUtils/Algorithms.h b/dbms/src/Functions/GatherUtils/Algorithms.h index 05fa30fb4e7..3c2dc6af3ba 100644 --- a/dbms/src/Functions/GatherUtils/Algorithms.h +++ b/dbms/src/Functions/GatherUtils/Algorithms.h @@ -832,6 +832,10 @@ void replace( String res; Impl::constant(str_h, str_n, str_r, res); res_col->insertData(res.data(), res.size()); + + src_h.next(); + src_n.next(); + src_r.next(); } } From 7a2f8698da7b0e44ebf0a063071ae8a201be3f3d Mon Sep 17 00:00:00 2001 From: guo-shaoge Date: Fri, 15 Nov 2024 18:02:28 +0800 Subject: [PATCH 11/12] fix Signed-off-by: guo-shaoge --- dbms/src/Functions/GatherUtils/Algorithms.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/src/Functions/GatherUtils/Algorithms.h b/dbms/src/Functions/GatherUtils/Algorithms.h index 3c2dc6af3ba..baa6e6c27db 100644 --- a/dbms/src/Functions/GatherUtils/Algorithms.h +++ b/dbms/src/Functions/GatherUtils/Algorithms.h @@ -815,9 +815,9 @@ void resizeConstantSize(ArraySource && array_source, ValueSource && value_source template void replace( - const HaystackSource & src_h, - const NeedleSource & src_n, - const ReplacementSource & src_r, + HaystackSource && src_h, + NeedleSource && src_n, + ReplacementSource && src_r, ColumnString::MutablePtr & res_col) { while (!src_h.isEnd()) From eb65b346316cc3bcced661bf5bafc5e7bfe9f86b Mon Sep 17 00:00:00 2001 From: guo-shaoge Date: Mon, 18 Nov 2024 12:02:33 +0800 Subject: [PATCH 12/12] unit test Signed-off-by: guo-shaoge --- .../Functions/tests/gtest_strings_replace.cpp | 62 +++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/dbms/src/Functions/tests/gtest_strings_replace.cpp b/dbms/src/Functions/tests/gtest_strings_replace.cpp index 4615d634e5f..ad14cbc7be7 100644 --- a/dbms/src/Functions/tests/gtest_strings_replace.cpp +++ b/dbms/src/Functions/tests/gtest_strings_replace.cpp @@ -70,6 +70,13 @@ try toVec({"", "w", "ww", " www ", "w w w"}), executeFunction("replaceAll", toVec({"", "w", "ww", " www ", "w w w"}), toConst(""), toConst(" "))); + ASSERT_COLUMN_EQ( + createConstColumn(1, {" bc"}), + executeFunction("replaceAll", toConst("abc"), toConst("a"), toConst(" "))); + ASSERT_COLUMN_EQ( + createConstColumn(1, {""}), + executeFunction("replaceAll", toConst(""), toConst(""), toConst(" "))); + /// non-const needle and const replacement ASSERT_COLUMN_EQ( toVec({"hello", " e llo", "hello ", " ", "hello world"}), @@ -87,6 +94,14 @@ try toVec({" ", "w", "w", "www", " w"}), toConst("ww"))); + ASSERT_COLUMN_EQ( + toVec({" bc", "a c", "ab "}), + executeFunction( + "replaceAll", + createConstColumn(3, "abc"), + toVec({"a", "b", "c"}), + createConstColumn(3, " "))); + /// const needle and non-const replacement ASSERT_COLUMN_EQ( toVec({"hello", "xxxhxexllo", "helloxxxxxxxx", " ", "hello,,world"}), @@ -96,6 +111,14 @@ try toConst(" "), toVec({"", "x", "xx", " ", ","}))); + ASSERT_COLUMN_EQ( + toVec({"123", "456", "789"}), + executeFunction( + "replaceAll", + createConstColumn(3, "abc"), + createConstColumn(3, "abc"), + toVec({"123", "456", "789"}))); + /// non-const needle and non-const replacement ASSERT_COLUMN_EQ( toVec({"hello", " x e llo", "hello ", " ", "hello, world"}), @@ -104,6 +127,14 @@ try toVec({" hello ", " h e llo", "hello ", " ", "hello, world"}), toVec({" ", "h", "", "h", ","}), toVec({"", "x", "xx", " ", ","}))); + + ASSERT_COLUMN_EQ( + toVec({"1bc", "a2c", "ab3"}), + executeFunction( + "replaceAll", + createConstColumn(3, "abc"), + toVec({"a", "b", "c"}), + toVec({"1", "2", "3"}))); } CATCH @@ -127,6 +158,13 @@ try toConst("你"), toConst("您"))); + ASSERT_COLUMN_EQ( + createConstColumn(1, {"你你世界"}), + executeFunction("replaceAll", toConst("你好世界"), toConst("好"), toConst("你"))); + ASSERT_COLUMN_EQ( + createConstColumn(1, {" "}), + executeFunction("replaceAll", toConst("你好世界"), toConst("你好世界"), toConst(" "))); + /// non-const needle and const replacement ASSERT_COLUMN_EQ( toVec({" 你好 ", "你好", " ", "你 好 ", "你不好"}), @@ -144,6 +182,14 @@ try toVec({" ", " 你", "你好", " 你", "你好"}), toConst("x"))); + ASSERT_COLUMN_EQ( + toVec({" 好世界", "你好 界", "你 世界"}), + executeFunction( + "replaceAll", + createConstColumn(3, "你好世界"), + toVec({"你", "世", "好"}), + createConstColumn(3, " "))); + /// const needle and non-const replacement ASSERT_COLUMN_EQ( toVec({" 好 ", " 你 好", "你好好 你好好", " 你 好 ", "你好不好"}), @@ -153,6 +199,14 @@ try toConst("你"), toVec({"", " 你", "你好", " 你", "你好"}))); + ASSERT_COLUMN_EQ( + toVec({"你一二世界", "你天天世界", "你向上世界"}), + executeFunction( + "replaceAll", + createConstColumn(3, "你好世界"), + createConstColumn(3, "好"), + toVec({"一二", "天天", "向上"}))); + /// non-const needle and non-const replacement ASSERT_COLUMN_EQ( toVec({" 你好 ", " 你 你 你你 你好", "好 好", " 你好 ", "你不好"}), @@ -161,6 +215,14 @@ try toVec({" 你好 ", " 你 好", "你好 你好", "你 好 ", "你不好"}), toVec({"", " ", "你好", "你 ", "你好"}), toVec({" ", " 你", "好", " 你", "你好"}))); + + ASSERT_COLUMN_EQ( + toVec({"你好世好", "你好好界", "你学世界", "习好世界"}), + executeFunction( + "replaceAll", + createConstColumn(3, "你好世界"), + toVec({"界", "世", "好", "你"}), + toVec({"好", "好", "学", "习"}))); } CATCH