use criterion::{BenchmarkId, Criterion, Throughput, black_box, criterion_group, criterion_main};
use std::collections::HashMap;
use zipora::{
CacheAlignedVec, FastVec, get_numa_stats, get_optimal_numa_node,
init_numa_pools, set_current_numa_node,
};
fn bench_vector_operations(c: &mut Criterion) {
let mut group = c.benchmark_group("vector_operations");
for size in [1000, 10000, 100000].iter() {
group.throughput(Throughput::Elements(*size as u64));
group.bench_with_input(BenchmarkId::new("std_vec_push", size), size, |b, &size| {
b.iter(|| {
let mut vec = Vec::new();
for i in 0..size {
vec.push(black_box(i));
}
black_box(vec)
})
});
group.bench_with_input(BenchmarkId::new("fast_vec_push", size), size, |b, &size| {
b.iter(|| {
let mut vec = FastVec::new();
for i in 0..size {
vec.push(black_box(i)).unwrap();
}
black_box(vec)
})
});
group.bench_with_input(
BenchmarkId::new("cache_aligned_vec_push", size),
size,
|b, &size| {
b.iter(|| {
let mut vec = CacheAlignedVec::new();
for i in 0..size {
vec.push(black_box(i)).unwrap();
}
black_box(vec)
})
},
);
group.bench_with_input(
BenchmarkId::new("cache_aligned_vec_with_capacity", size),
size,
|b, &size| {
b.iter(|| {
let mut vec = CacheAlignedVec::with_capacity(size).unwrap();
for i in 0..size {
vec.push(black_box(i)).unwrap();
}
black_box(vec)
})
},
);
}
group.finish();
}
fn bench_random_access(c: &mut Criterion) {
let mut group = c.benchmark_group("random_access");
const SIZE: usize = 100000;
const ACCESSES: usize = 10000;
let indices: Vec<usize> = (0..ACCESSES).map(|i| (i * 7919) % SIZE).collect();
group.throughput(Throughput::Elements(ACCESSES as u64));
group.bench_function("std_vec_random_access", |b| {
let vec: Vec<i32> = (0..SIZE as i32).collect();
b.iter(|| {
let mut sum = 0i32;
for &idx in &indices {
sum = sum.wrapping_add(black_box(vec[idx]));
}
black_box(sum)
})
});
group.bench_function("fast_vec_random_access", |b| {
let mut vec = FastVec::new();
for i in 0..SIZE as i32 {
vec.push(i).unwrap();
}
b.iter(|| {
let mut sum = 0i32;
for &idx in &indices {
sum = sum.wrapping_add(black_box(*vec.get(idx).unwrap()));
}
black_box(sum)
})
});
group.bench_function("cache_aligned_vec_random_access", |b| {
let mut vec = CacheAlignedVec::with_capacity(SIZE).unwrap();
for i in 0..SIZE as i32 {
vec.push(i).unwrap();
}
b.iter(|| {
let mut sum = 0i32;
for &idx in &indices {
sum = sum.wrapping_add(black_box(*vec.get(idx).unwrap()));
}
black_box(sum)
})
});
group.finish();
}
fn bench_sequential_access(c: &mut Criterion) {
let mut group = c.benchmark_group("sequential_access");
for size in [10000, 100000, 1000000].iter() {
group.throughput(Throughput::Elements(*size as u64));
group.bench_with_input(
BenchmarkId::new("std_vec_sequential", size),
size,
|b, &size| {
let vec: Vec<i32> = (0..size as i32).collect();
b.iter(|| {
let mut sum = 0i32;
for val in &vec {
sum = sum.wrapping_add(black_box(*val));
}
black_box(sum)
})
},
);
group.bench_with_input(
BenchmarkId::new("cache_aligned_vec_sequential", size),
size,
|b, &size| {
let mut vec = CacheAlignedVec::with_capacity(size).unwrap();
for i in 0..size as i32 {
vec.push(i).unwrap();
}
b.iter(|| {
let mut sum = 0i32;
for i in 0..vec.len() {
sum = sum.wrapping_add(black_box(*vec.get(i).unwrap()));
}
black_box(sum)
})
},
);
group.bench_with_input(
BenchmarkId::new("cache_aligned_vec_slice", size),
size,
|b, &size| {
let mut vec = CacheAlignedVec::with_capacity(size).unwrap();
for i in 0..size as i32 {
vec.push(i).unwrap();
}
b.iter(|| {
let mut sum = 0i32;
for val in vec.as_slice() {
sum = sum.wrapping_add(black_box(*val));
}
black_box(sum)
})
},
);
}
group.finish();
}
fn bench_numa_operations(c: &mut Criterion) {
let mut group = c.benchmark_group("numa_operations");
let _ = init_numa_pools();
let numa_stats = get_numa_stats();
if numa_stats.node_count > 1 {
group.bench_function("numa_aware_allocation", |b| {
b.iter(|| {
let node = get_optimal_numa_node();
let vec = CacheAlignedVec::<u64>::with_numa_node(node);
black_box(vec)
})
});
group.bench_function("numa_cross_node_allocation", |b| {
b.iter(|| {
let node = black_box(0); let vec = CacheAlignedVec::<u64>::with_numa_node(node);
black_box(vec)
})
});
}
group.finish();
}
fn bench_allocation_patterns(c: &mut Criterion) {
let mut group = c.benchmark_group("allocation_patterns");
const ITERATIONS: usize = 1000;
group.throughput(Throughput::Elements(ITERATIONS as u64));
group.bench_function("std_vec_alloc_dealloc", |b| {
b.iter(|| {
for i in 0..ITERATIONS {
let mut vec = Vec::with_capacity(black_box(i + 100));
for j in 0..100 {
vec.push(black_box(j));
}
black_box(vec);
}
})
});
group.bench_function("cache_aligned_alloc_dealloc", |b| {
b.iter(|| {
for i in 0..ITERATIONS {
let mut vec = CacheAlignedVec::with_capacity(black_box(i + 100)).unwrap();
for j in 0..100 {
vec.push(black_box(j)).unwrap();
}
black_box(vec);
}
})
});
group.finish();
}
fn bench_cache_line_utilization(c: &mut Criterion) {
let mut group = c.benchmark_group("cache_line_utilization");
const SIZE: usize = 10000;
#[repr(C)]
#[derive(Clone, Copy)]
struct CacheLineFriendly {
data: [u8; 64],
}
#[repr(C)]
#[derive(Clone, Copy)]
struct CacheLineSpanning {
data: [u8; 100],
}
group.throughput(Throughput::Elements(SIZE as u64));
group.bench_function("cache_friendly_struct", |b| {
let mut vec = CacheAlignedVec::with_capacity(SIZE).unwrap();
for _ in 0..SIZE {
vec.push(CacheLineFriendly { data: [42; 64] }).unwrap();
}
b.iter(|| {
let mut sum = 0u8;
for item in vec.as_slice() {
sum = sum.wrapping_add(black_box(item.data[0]));
}
black_box(sum)
})
});
group.bench_function("cache_spanning_struct", |b| {
let mut vec = CacheAlignedVec::with_capacity(SIZE).unwrap();
for _ in 0..SIZE {
vec.push(CacheLineSpanning { data: [42; 100] }).unwrap();
}
b.iter(|| {
let mut sum = 0u8;
for item in vec.as_slice() {
sum = sum.wrapping_add(black_box(item.data[0]));
}
black_box(sum)
})
});
group.finish();
}
fn bench_numa_stats(c: &mut Criterion) {
let mut group = c.benchmark_group("numa_stats");
let _ = init_numa_pools();
let _vec1 = CacheAlignedVec::<u64>::with_numa_node(0);
let _vec2 = CacheAlignedVec::<u32>::with_numa_node(get_optimal_numa_node());
group.bench_function("get_numa_stats", |b| {
b.iter(|| {
let stats = get_numa_stats();
black_box(stats)
})
});
group.finish();
}
fn bench_numa_configurations(c: &mut Criterion) {
let mut group = c.benchmark_group("numa_configurations");
let numa_stats = get_numa_stats();
if numa_stats.node_count > 1 {
const SIZE: usize = 10000;
group.bench_function("local_numa_access", |b| {
let current_node = get_optimal_numa_node();
let _ = set_current_numa_node(current_node);
b.iter(|| {
let mut vec = CacheAlignedVec::with_numa_node(current_node);
for i in 0..SIZE {
vec.push(black_box(i)).unwrap();
}
let mut sum = 0usize;
for i in 0..vec.len() {
sum = sum.wrapping_add(black_box(*vec.get(i).unwrap()));
}
black_box(sum)
})
});
group.bench_function("remote_numa_access", |b| {
let current_node = get_optimal_numa_node();
let remote_node = (current_node + 1) % numa_stats.node_count;
b.iter(|| {
let mut vec = CacheAlignedVec::with_numa_node(remote_node);
for i in 0..SIZE {
vec.push(black_box(i)).unwrap();
}
let mut sum = 0usize;
for i in 0..vec.len() {
sum = sum.wrapping_add(black_box(*vec.get(i).unwrap()));
}
black_box(sum)
})
});
}
group.finish();
}
criterion_group!(
benches,
bench_vector_operations,
bench_random_access,
bench_sequential_access,
bench_numa_operations,
bench_allocation_patterns,
bench_cache_line_utilization,
bench_numa_stats,
bench_numa_configurations
);
criterion_main!(benches);