#[macro_use]
extern crate criterion;
use criterion::{BenchmarkId, Criterion};
use datafusion_expr::lit;
use datafusion_functions_nested::expr_fn::{
array_has, array_has_all, array_has_any, make_array,
};
use std::hint::black_box;
fn criterion_benchmark(c: &mut Criterion) {
let array_sizes = vec![1, 10, 100, 1000, 10000];
for &size in &array_sizes {
bench_array_has(c, size);
bench_array_has_all(c, size);
bench_array_has_any(c, size);
}
bench_array_has_strings(c);
bench_array_has_all_strings(c);
bench_array_has_any_strings(c);
bench_array_has_edge_cases(c);
}
fn bench_array_has(c: &mut Criterion, array_size: usize) {
let mut group = c.benchmark_group("array_has_i64");
group.bench_with_input(
BenchmarkId::new("found_at_start", array_size),
&array_size,
|b, &size| {
let array = (0..size).map(|i| lit(i as i64)).collect::<Vec<_>>();
let list_array = make_array(array);
let needle = lit(0_i64);
b.iter(|| black_box(array_has(list_array.clone(), needle.clone())))
},
);
group.bench_with_input(
BenchmarkId::new("found_at_end", array_size),
&array_size,
|b, &size| {
let array = (0..size).map(|i| lit(i as i64)).collect::<Vec<_>>();
let list_array = make_array(array);
let needle = lit((size - 1) as i64);
b.iter(|| black_box(array_has(list_array.clone(), needle.clone())))
},
);
group.bench_with_input(
BenchmarkId::new("not_found", array_size),
&array_size,
|b, &size| {
let array = (0..size).map(|i| lit(i as i64)).collect::<Vec<_>>();
let list_array = make_array(array);
let needle = lit(-1_i64);
b.iter(|| black_box(array_has(list_array.clone(), needle.clone())))
},
);
group.finish();
}
fn bench_array_has_all(c: &mut Criterion, array_size: usize) {
let mut group = c.benchmark_group("array_has_all");
group.bench_with_input(
BenchmarkId::new("all_found_small_needle", array_size),
&array_size,
|b, &size| {
let array = (0..size).map(|i| lit(i as i64)).collect::<Vec<_>>();
let list_array = make_array(array);
let needle_array = make_array(vec![lit(0_i64), lit(1_i64), lit(2_i64)]);
b.iter(|| black_box(array_has_all(list_array.clone(), needle_array.clone())))
},
);
group.bench_with_input(
BenchmarkId::new("all_found_medium_needle", array_size),
&array_size,
|b, &size| {
let array = (0..size).map(|i| lit(i as i64)).collect::<Vec<_>>();
let list_array = make_array(array);
let needle_size = (size / 10).max(1);
let needle = (0..needle_size).map(|i| lit(i as i64)).collect::<Vec<_>>();
let needle_array = make_array(needle);
b.iter(|| black_box(array_has_all(list_array.clone(), needle_array.clone())))
},
);
group.bench_with_input(
BenchmarkId::new("early_exit", array_size),
&array_size,
|b, &size| {
let array = (0..size).map(|i| lit(i as i64)).collect::<Vec<_>>();
let list_array = make_array(array);
let needle_array = make_array(vec![lit(0_i64), lit(-1_i64)]);
b.iter(|| black_box(array_has_all(list_array.clone(), needle_array.clone())))
},
);
group.finish();
}
fn bench_array_has_any(c: &mut Criterion, array_size: usize) {
let mut group = c.benchmark_group("array_has_any");
group.bench_with_input(
BenchmarkId::new("first_match", array_size),
&array_size,
|b, &size| {
let array = (0..size).map(|i| lit(i as i64)).collect::<Vec<_>>();
let list_array = make_array(array);
let needle_array = make_array(vec![lit(0_i64), lit(-1_i64), lit(-2_i64)]);
b.iter(|| black_box(array_has_any(list_array.clone(), needle_array.clone())))
},
);
group.bench_with_input(
BenchmarkId::new("last_match", array_size),
&array_size,
|b, &size| {
let array = (0..size).map(|i| lit(i as i64)).collect::<Vec<_>>();
let list_array = make_array(array);
let needle_array = make_array(vec![lit(-1_i64), lit(-2_i64), lit(0_i64)]);
b.iter(|| black_box(array_has_any(list_array.clone(), needle_array.clone())))
},
);
group.bench_with_input(
BenchmarkId::new("no_match", array_size),
&array_size,
|b, &size| {
let array = (0..size).map(|i| lit(i as i64)).collect::<Vec<_>>();
let list_array = make_array(array);
let needle_array = make_array(vec![lit(-1_i64), lit(-2_i64), lit(-3_i64)]);
b.iter(|| black_box(array_has_any(list_array.clone(), needle_array.clone())))
},
);
group.finish();
}
fn bench_array_has_strings(c: &mut Criterion) {
let mut group = c.benchmark_group("array_has_strings");
let sizes = vec![10, 100, 1000];
for &size in &sizes {
group.bench_with_input(BenchmarkId::new("found", size), &size, |b, &size| {
let array = (0..size)
.map(|i| lit(format!("TICKER{i:04}")))
.collect::<Vec<_>>();
let list_array = make_array(array);
let needle = lit("TICKER0005");
b.iter(|| black_box(array_has(list_array.clone(), needle.clone())))
});
group.bench_with_input(BenchmarkId::new("not_found", size), &size, |b, &size| {
let array = (0..size)
.map(|i| lit(format!("TICKER{i:04}")))
.collect::<Vec<_>>();
let list_array = make_array(array);
let needle = lit("NOTFOUND");
b.iter(|| black_box(array_has(list_array.clone(), needle.clone())))
});
}
group.finish();
}
fn bench_array_has_all_strings(c: &mut Criterion) {
let mut group = c.benchmark_group("array_has_all_strings");
let portfolio_size = 100;
let check_sizes = vec![1, 3, 5, 10];
for &check_size in &check_sizes {
group.bench_with_input(
BenchmarkId::new("all_found", check_size),
&check_size,
|b, &check_size| {
let portfolio = (0..portfolio_size)
.map(|i| lit(format!("TICKER{i:04}")))
.collect::<Vec<_>>();
let list_array = make_array(portfolio);
let checking = (0..check_size)
.map(|i| lit(format!("TICKER{i:04}")))
.collect::<Vec<_>>();
let needle_array = make_array(checking);
b.iter(|| {
black_box(array_has_all(list_array.clone(), needle_array.clone()))
})
},
);
group.bench_with_input(
BenchmarkId::new("some_missing", check_size),
&check_size,
|b, &check_size| {
let portfolio = (0..portfolio_size)
.map(|i| lit(format!("TICKER{i:04}")))
.collect::<Vec<_>>();
let list_array = make_array(portfolio);
let mut checking = (0..check_size - 1)
.map(|i| lit(format!("TICKER{i:04}")))
.collect::<Vec<_>>();
checking.push(lit("NOTFOUND".to_string()));
let needle_array = make_array(checking);
b.iter(|| {
black_box(array_has_all(list_array.clone(), needle_array.clone()))
})
},
);
}
group.finish();
}
fn bench_array_has_any_strings(c: &mut Criterion) {
let mut group = c.benchmark_group("array_has_any_strings");
let portfolio_size = 100;
let check_sizes = vec![1, 3, 5, 10];
for &check_size in &check_sizes {
group.bench_with_input(
BenchmarkId::new("first_matches", check_size),
&check_size,
|b, &check_size| {
let portfolio = (0..portfolio_size)
.map(|i| lit(format!("TICKER{i:04}")))
.collect::<Vec<_>>();
let list_array = make_array(portfolio);
let mut checking = vec![lit("TICKER0000".to_string())];
checking.extend((1..check_size).map(|_| lit("NOTFOUND".to_string())));
let needle_array = make_array(checking);
b.iter(|| {
black_box(array_has_any(list_array.clone(), needle_array.clone()))
})
},
);
group.bench_with_input(
BenchmarkId::new("none_match", check_size),
&check_size,
|b, &check_size| {
let portfolio = (0..portfolio_size)
.map(|i| lit(format!("TICKER{i:04}")))
.collect::<Vec<_>>();
let list_array = make_array(portfolio);
let checking = (0..check_size)
.map(|i| lit(format!("NOTFOUND{i}")))
.collect::<Vec<_>>();
let needle_array = make_array(checking);
b.iter(|| {
black_box(array_has_any(list_array.clone(), needle_array.clone()))
})
},
);
}
group.finish();
}
fn bench_array_has_edge_cases(c: &mut Criterion) {
let mut group = c.benchmark_group("array_has_edge_cases");
group.bench_function("empty_array", |b| {
let list_array = make_array(vec![]);
let needle = lit(1_i64);
b.iter(|| black_box(array_has(list_array.clone(), needle.clone())))
});
group.bench_function("single_element_found", |b| {
let list_array = make_array(vec![lit(1_i64)]);
let needle = lit(1_i64);
b.iter(|| black_box(array_has(list_array.clone(), needle.clone())))
});
group.bench_function("single_element_not_found", |b| {
let list_array = make_array(vec![lit(1_i64)]);
let needle = lit(2_i64);
b.iter(|| black_box(array_has(list_array.clone(), needle.clone())))
});
group.bench_function("array_with_duplicates", |b| {
let array = vec![lit(1_i64); 1000];
let list_array = make_array(array);
let needle = lit(1_i64);
b.iter(|| black_box(array_has(list_array.clone(), needle.clone())))
});
group.bench_function("array_has_all_empty_needle", |b| {
let array = (0..1000).map(|i| lit(i as i64)).collect::<Vec<_>>();
let list_array = make_array(array);
let needle_array = make_array(vec![]);
b.iter(|| black_box(array_has_all(list_array.clone(), needle_array.clone())))
});
group.bench_function("array_has_any_empty_needle", |b| {
let array = (0..1000).map(|i| lit(i as i64)).collect::<Vec<_>>();
let list_array = make_array(array);
let needle_array = make_array(vec![]);
b.iter(|| black_box(array_has_any(list_array.clone(), needle_array.clone())))
});
group.finish();
}
criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);