Skip to content

Commit

Permalink
Improve perfomance of reverse function (#14025)
Browse files Browse the repository at this point in the history
* Improve perfomance of 'reverse' function

Signed-off-by: Tai Le Manh <[email protected]>

* Apply sugestion change

* Fix typo

---------

Signed-off-by: Tai Le Manh <[email protected]>
  • Loading branch information
tlm365 authored Jan 9, 2025
1 parent 80c828b commit f9d3133
Show file tree
Hide file tree
Showing 3 changed files with 111 additions and 9 deletions.
5 changes: 5 additions & 0 deletions datafusion/functions/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,11 @@ harness = false
name = "strpos"
required-features = ["unicode_expressions"]

[[bench]]
harness = false
name = "reverse"
required-features = ["unicode_expressions"]

[[bench]]
harness = false
name = "trunc"
Expand Down
90 changes: 90 additions & 0 deletions datafusion/functions/benches/reverse.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

extern crate criterion;

use arrow::array::OffsetSizeTrait;
use arrow::util::bench_util::{
create_string_array_with_len, create_string_view_array_with_len,
};
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use datafusion_expr::ColumnarValue;
use datafusion_functions::unicode;
use std::sync::Arc;

fn create_args<O: OffsetSizeTrait>(
size: usize,
str_len: usize,
force_view_types: bool,
) -> Vec<ColumnarValue> {
if force_view_types {
let string_array =
Arc::new(create_string_view_array_with_len(size, 0.1, str_len, false));

vec![ColumnarValue::Array(string_array)]
} else {
let string_array =
Arc::new(create_string_array_with_len::<O>(size, 0.1, str_len));

vec![ColumnarValue::Array(string_array)]
}
}

fn criterion_benchmark(c: &mut Criterion) {
let reverse = unicode::reverse();
for size in [1024, 4096] {
let str_len = 8;

let args = create_args::<i32>(size, str_len, true);
c.bench_function(
format!("reverse_string_view [size={}, str_len={}]", size, str_len).as_str(),
|b| {
b.iter(|| {
// TODO use invoke_with_args
black_box(reverse.invoke_batch(&args, str_len))
})
},
);

let str_len = 32;

let args = create_args::<i32>(size, str_len, true);
c.bench_function(
format!("reverse_string_view [size={}, str_len={}]", size, str_len).as_str(),
|b| {
b.iter(|| {
// TODO use invoke_with_args
black_box(reverse.invoke_batch(&args, str_len))
})
},
);

let args = create_args::<i32>(size, str_len, false);
c.bench_function(
format!("reverse_string [size={}, str_len={}]", size, str_len).as_str(),
|b| {
b.iter(|| {
// TODO use invoke_with_args
black_box(reverse.invoke_batch(&args, str_len))
})
},
);
}
}

criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);
25 changes: 16 additions & 9 deletions datafusion/functions/src/unicode/reverse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@ use std::sync::Arc;

use crate::utils::{make_scalar_function, utf8_to_str_type};
use arrow::array::{
Array, ArrayAccessor, ArrayIter, ArrayRef, AsArray, GenericStringArray,
OffsetSizeTrait,
Array, ArrayRef, AsArray, GenericStringBuilder, OffsetSizeTrait, StringArrayType,
};
use arrow::datatypes::DataType;
use datafusion_common::{exec_err, Result};
Expand Down Expand Up @@ -105,8 +104,7 @@ impl ScalarUDFImpl for ReverseFunc {
}
}

/// Reverses the order of the characters in the string.
/// reverse('abcde') = 'edcba'
/// Reverses the order of the characters in the string `reverse('abcde') = 'edcba'`.
/// The implementation uses UTF-8 code points as characters
pub fn reverse<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
if args[0].data_type() == &Utf8View {
Expand All @@ -116,14 +114,23 @@ pub fn reverse<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
}
}

fn reverse_impl<'a, T: OffsetSizeTrait, V: ArrayAccessor<Item = &'a str>>(
fn reverse_impl<'a, T: OffsetSizeTrait, V: StringArrayType<'a>>(
string_array: V,
) -> Result<ArrayRef> {
let result = ArrayIter::new(string_array)
.map(|string| string.map(|string: &str| string.chars().rev().collect::<String>()))
.collect::<GenericStringArray<T>>();
let mut builder = GenericStringBuilder::<T>::with_capacity(string_array.len(), 1024);

let mut reversed = String::new();
for string in string_array.iter() {
if let Some(s) = string {
reversed.extend(s.chars().rev());
builder.append_value(&reversed);
reversed.clear();
} else {
builder.append_null();
}
}

Ok(Arc::new(result) as ArrayRef)
Ok(Arc::new(builder.finish()) as ArrayRef)
}

#[cfg(test)]
Expand Down

0 comments on commit f9d3133

Please sign in to comment.