use arrow::array::{StringArray, StringViewArray};
use arrow::datatypes::{DataType, Field};
use criterion::{Criterion, criterion_group, criterion_main};
use datafusion_common::ScalarValue;
use datafusion_common::config::ConfigOptions;
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
use rand::distr::Alphanumeric;
use rand::prelude::StdRng;
use rand::{Rng, SeedableRng};
use std::hint::black_box;
use std::sync::Arc;
fn gen_string_array(
n_rows: usize,
str_len: usize,
is_string_view: bool,
) -> ColumnarValue {
let mut rng = StdRng::seed_from_u64(42);
let strings: Vec<Option<String>> = (0..n_rows)
.map(|_| {
let s: String = (&mut rng)
.sample_iter(&Alphanumeric)
.take(str_len)
.map(char::from)
.collect();
Some(s)
})
.collect();
if is_string_view {
ColumnarValue::Array(Arc::new(StringViewArray::from(strings)))
} else {
ColumnarValue::Array(Arc::new(StringArray::from(strings)))
}
}
fn gen_scalar_suffix(suffix_str: &str, is_string_view: bool) -> ColumnarValue {
if is_string_view {
ColumnarValue::Scalar(ScalarValue::Utf8View(Some(suffix_str.to_string())))
} else {
ColumnarValue::Scalar(ScalarValue::Utf8(Some(suffix_str.to_string())))
}
}
fn gen_array_suffix(
suffix_str: &str,
n_rows: usize,
is_string_view: bool,
) -> ColumnarValue {
let strings: Vec<Option<String>> =
(0..n_rows).map(|_| Some(suffix_str.to_string())).collect();
if is_string_view {
ColumnarValue::Array(Arc::new(StringViewArray::from(strings)))
} else {
ColumnarValue::Array(Arc::new(StringArray::from(strings)))
}
}
fn criterion_benchmark(c: &mut Criterion) {
let ends_with = datafusion_functions::string::ends_with();
let n_rows = 8192;
let str_len = 128;
let suffix_str = "xyz";
let str_array = gen_string_array(n_rows, str_len, false);
let scalar_suffix = gen_scalar_suffix(suffix_str, false);
let arg_fields = vec![
Field::new("a", DataType::Utf8, true).into(),
Field::new("b", DataType::Utf8, true).into(),
];
let return_field = Field::new("f", DataType::Boolean, true).into();
let config_options = Arc::new(ConfigOptions::default());
c.bench_function("ends_with_StringArray_scalar_suffix", |b| {
b.iter(|| {
black_box(ends_with.invoke_with_args(ScalarFunctionArgs {
args: vec![str_array.clone(), scalar_suffix.clone()],
arg_fields: arg_fields.clone(),
number_rows: n_rows,
return_field: Arc::clone(&return_field),
config_options: Arc::clone(&config_options),
}))
})
});
let array_suffix = gen_array_suffix(suffix_str, n_rows, false);
c.bench_function("ends_with_StringArray_array_suffix", |b| {
b.iter(|| {
black_box(ends_with.invoke_with_args(ScalarFunctionArgs {
args: vec![str_array.clone(), array_suffix.clone()],
arg_fields: arg_fields.clone(),
number_rows: n_rows,
return_field: Arc::clone(&return_field),
config_options: Arc::clone(&config_options),
}))
})
});
let str_view_array = gen_string_array(n_rows, str_len, true);
let scalar_suffix_view = gen_scalar_suffix(suffix_str, true);
let arg_fields_view = vec![
Field::new("a", DataType::Utf8View, true).into(),
Field::new("b", DataType::Utf8View, true).into(),
];
c.bench_function("ends_with_StringViewArray_scalar_suffix", |b| {
b.iter(|| {
black_box(ends_with.invoke_with_args(ScalarFunctionArgs {
args: vec![str_view_array.clone(), scalar_suffix_view.clone()],
arg_fields: arg_fields_view.clone(),
number_rows: n_rows,
return_field: Arc::clone(&return_field),
config_options: Arc::clone(&config_options),
}))
})
});
let array_suffix_view = gen_array_suffix(suffix_str, n_rows, true);
c.bench_function("ends_with_StringViewArray_array_suffix", |b| {
b.iter(|| {
black_box(ends_with.invoke_with_args(ScalarFunctionArgs {
args: vec![str_view_array.clone(), array_suffix_view.clone()],
arg_fields: arg_fields_view.clone(),
number_rows: n_rows,
return_field: Arc::clone(&return_field),
config_options: Arc::clone(&config_options),
}))
})
});
for str_len in [8, 32, 128, 512] {
let str_array = gen_string_array(n_rows, str_len, true);
let scalar_suffix = gen_scalar_suffix(suffix_str, true);
let arg_fields = vec![
Field::new("a", DataType::Utf8View, true).into(),
Field::new("b", DataType::Utf8View, true).into(),
];
c.bench_function(
&format!("ends_with_StringViewArray_scalar_strlen_{str_len}"),
|b| {
b.iter(|| {
black_box(ends_with.invoke_with_args(ScalarFunctionArgs {
args: vec![str_array.clone(), scalar_suffix.clone()],
arg_fields: arg_fields.clone(),
number_rows: n_rows,
return_field: Arc::clone(&return_field),
config_options: Arc::clone(&config_options),
}))
})
},
);
}
}
criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);