use std::hint::black_box;
use std::sync::Arc;
use arrow::array::{Array, ArrayRef, TimestampSecondArray};
use arrow::datatypes::Field;
use criterion::{Criterion, criterion_group, criterion_main};
use datafusion_common::ScalarValue;
use datafusion_common::config::ConfigOptions;
use datafusion_expr::{ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs};
use datafusion_functions::datetime::date_trunc;
use rand::Rng;
use rand::rngs::ThreadRng;
fn timestamps(rng: &mut ThreadRng) -> TimestampSecondArray {
let mut seconds = vec![];
for _ in 0..1000 {
seconds.push(rng.random_range(0..1_000_000));
}
TimestampSecondArray::from(seconds)
}
fn criterion_benchmark(c: &mut Criterion) {
c.bench_function("date_trunc_minute_1000", |b| {
let mut rng = rand::rng();
let timestamps_array = Arc::new(timestamps(&mut rng)) as ArrayRef;
let batch_len = timestamps_array.len();
let precision =
ColumnarValue::Scalar(ScalarValue::Utf8(Some("minute".to_string())));
let timestamps = ColumnarValue::Array(timestamps_array);
let udf = date_trunc();
let args = vec![precision, timestamps];
let arg_fields = args
.iter()
.enumerate()
.map(|(idx, arg)| {
Field::new(format!("arg_{idx}"), arg.data_type(), true).into()
})
.collect::<Vec<_>>();
let scalar_arguments = vec![None; arg_fields.len()];
let return_field = udf
.return_field_from_args(ReturnFieldArgs {
arg_fields: &arg_fields,
scalar_arguments: &scalar_arguments,
})
.unwrap();
let config_options = Arc::new(ConfigOptions::default());
b.iter(|| {
black_box(
udf.invoke_with_args(ScalarFunctionArgs {
args: args.clone(),
arg_fields: arg_fields.clone(),
number_rows: batch_len,
return_field: Arc::clone(&return_field),
config_options: Arc::clone(&config_options),
})
.expect("date_trunc should work on valid values"),
)
})
});
}
criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);