use arrow::array::{Int32Builder, StringBuilder, StringDictionaryBuilder};
use criterion::{criterion_group, criterion_main, Criterion};
use rand::{thread_rng, Rng};
fn build_strings(dict_size: usize, total_size: usize, key_len: usize) -> Vec<String> {
let mut rng = thread_rng();
let values: Vec<String> = (0..dict_size)
.map(|_| (0..key_len).map(|_| rng.gen::<char>()).collect())
.collect();
(0..total_size)
.map(|_| values[rng.gen_range(0..dict_size)].clone())
.collect()
}
fn criterion_benchmark(c: &mut Criterion) {
let mut group = c.benchmark_group("string_dictionary_builder");
let mut do_bench = |dict_size: usize, total_size: usize, key_len: usize| {
group.bench_function(
format!(
"(dict_size:{}, len:{}, key_len: {})",
dict_size, total_size, key_len
),
|b| {
let strings = build_strings(dict_size, total_size, key_len);
b.iter(|| {
let keys = Int32Builder::new(strings.len());
let values = StringBuilder::new((key_len + 1) * dict_size);
let mut builder = StringDictionaryBuilder::new(keys, values);
for val in &strings {
builder.append(val).unwrap();
}
builder.finish();
})
},
);
};
do_bench(20, 1000, 5);
do_bench(100, 1000, 5);
do_bench(100, 1000, 10);
do_bench(100, 10000, 10);
do_bench(100, 10000, 100);
group.finish();
}
criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);