#[cfg(feature = "simd")]
use clock_hash::simd::{
dispatch::{clock_mix_avx2, is_avx2_available, is_avx512_available, process_block_simd},
scalar::scalar_clock_mix,
};
use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};
use std::hint::black_box;
#[cfg(feature = "simd")]
fn bench_clock_mix_simd_vs_scalar(c: &mut Criterion) {
let mut group = c.benchmark_group("clock_mix_simd_vs_scalar");
let iterations = [1000, 10000, 100000];
for &iter in &iterations {
group.bench_with_input(
BenchmarkId::from_parameter(format!("scalar_{}_iterations", iter)),
&iter,
|b, &iter| {
b.iter(|| {
let mut data = [0x123456789ABCDEF0u64; 16];
for _ in 0..iter {
scalar_clock_mix(black_box(&mut data));
}
});
},
);
if is_avx2_available() {
group.bench_with_input(
BenchmarkId::from_parameter(format!("avx2_{}_iterations", iter)),
&iter,
|b, &iter| {
b.iter(|| {
let mut data = [0x123456789ABCDEF0u64; 16];
for _ in 0..iter {
clock_mix_avx2(black_box(&mut data));
}
});
},
);
}
}
group.finish();
}
#[cfg(feature = "simd")]
fn bench_simd_implementations_comparison(c: &mut Criterion) {
let mut group = c.benchmark_group("simd_implementations_comparison");
let test_data = [
("zeros", [0u64; 16]),
("ones", [u64::MAX; 16]),
("pattern", {
let mut data = [0u64; 16];
for i in 0..16 {
data[i] = (i as u64).wrapping_mul(0x9E3779B97F4A7C15);
}
data
}),
];
for (name, mut data) in test_data {
group.bench_with_input(
BenchmarkId::from_parameter(format!("{}_scalar", name)),
&data,
|b, data| {
b.iter(|| {
let mut local_data = *data;
scalar_clock_mix(black_box(&mut local_data));
black_box(local_data);
});
},
);
if is_avx2_available() {
group.bench_with_input(
BenchmarkId::from_parameter(format!("{}_avx2", name)),
&data,
|b, data| {
b.iter(|| {
let mut local_data = *data;
clock_mix_avx2(black_box(&mut local_data));
black_box(local_data);
});
},
);
}
if is_avx512_available() {
group.bench_with_input(
BenchmarkId::from_parameter(format!("{}_avx512", name)),
&data,
|b, data| {
b.iter(|| {
let mut local_data = *data;
clock_mix_avx2(black_box(&mut local_data));
black_box(local_data);
});
},
);
}
}
group.finish();
}
#[cfg(feature = "simd")]
fn bench_simd_dispatch_overhead(c: &mut Criterion) {
let mut group = c.benchmark_group("simd_dispatch_overhead");
let iterations = [10000, 100000];
for &iter in &iterations {
group.bench_with_input(
BenchmarkId::from_parameter(format!("feature_detection_{}_calls", iter)),
&iter,
|b, &iter| {
b.iter(|| {
let mut avx2_available = false;
let mut avx512_available = false;
for _ in 0..iter {
avx2_available = is_avx2_available();
avx512_available = is_avx512_available();
}
black_box((avx2_available, avx512_available));
});
},
);
let test_block = [0x42u8; 128];
group.bench_with_input(
BenchmarkId::from_parameter(format!("block_dispatch_{}_blocks", iter)),
&iter,
|b, &iter| {
b.iter(|| {
let mut state = clock_hash::constants::IV;
for _ in 0..iter {
process_block_simd(black_box(&test_block), black_box(&mut state));
}
black_box(state);
});
},
);
}
group.finish();
}
#[cfg(feature = "simd")]
fn bench_simd_scaling(c: &mut Criterion) {
let mut group = c.benchmark_group("simd_scaling");
let sizes = [1, 10, 100, 1000, 10000];
for &size in &sizes {
let blocks: Vec<[u8; 128]> = (0..size)
.map(|i| {
let mut block = [0u8; 128];
for j in 0..128 {
block[j] = ((i * 128 + j) % 256) as u8;
}
block
})
.collect();
group.bench_with_input(
BenchmarkId::from_parameter(format!("{}_blocks", size)),
&blocks,
|b, blocks| {
b.iter(|| {
let mut state = clock_hash::constants::IV;
for block in blocks.iter() {
process_block_simd(black_box(block), black_box(&mut state));
}
black_box(state);
});
},
);
}
group.finish();
}
#[cfg(feature = "simd")]
fn bench_simd_memory_patterns(c: &mut Criterion) {
let mut group = c.benchmark_group("simd_memory_patterns");
fn sequential_pattern(i: usize) -> u64 { i as u64 }
fn random_pattern(i: usize) -> u64 { (i as u64).wrapping_mul(0x9E3779B97F4A7C15).rotate_left(7) }
fn sparse_pattern(i: usize) -> u64 { if i % 3 == 0 { i as u64 } else { 0 } }
fn dense_pattern(i: usize) -> u64 { u64::MAX ^ (i as u64) }
fn alternating_pattern(i: usize) -> u64 { if i % 2 == 0 { 0 } else { u64::MAX } }
let patterns = [
("sequential", sequential_pattern as fn(usize) -> u64),
("random", random_pattern as fn(usize) -> u64),
("sparse", sparse_pattern as fn(usize) -> u64),
("dense", dense_pattern as fn(usize) -> u64),
("alternating", alternating_pattern as fn(usize) -> u64),
];
for (name, pattern_fn) in &patterns {
group.bench_with_input(
BenchmarkId::from_parameter(*name),
pattern_fn,
|b, pattern_fn| {
b.iter(|| {
let mut data = [0u64; 16];
for i in 0..16 {
data[i] = pattern_fn(i);
}
if is_avx2_available() {
clock_mix_avx2(black_box(&mut data));
} else {
scalar_clock_mix(black_box(&mut data));
}
black_box(data);
});
},
);
}
group.finish();
}
#[cfg(feature = "simd")]
fn bench_simd_throughput(c: &mut Criterion) {
let mut group = c.benchmark_group("simd_throughput");
group.throughput(criterion::Throughput::Elements(1048576));
let data_size = 1024 * 1024; let data = vec![0x42u8; data_size];
group.bench_function("1MB_bulk_hash_simd", |b| {
b.iter(|| {
let mut state = clock_hash::constants::IV;
let mut remaining = &data[..];
while remaining.len() >= 128 {
let (block, rest) = remaining.split_at(128);
let block_array: [u8; 128] = block.try_into().unwrap();
process_block_simd(black_box(&block_array), black_box(&mut state));
remaining = rest;
}
black_box(state);
});
});
group.finish();
}
#[cfg(feature = "simd")]
fn bench_simd_initialization(c: &mut Criterion) {
let mut group = c.benchmark_group("simd_initialization");
group.bench_function("first_simd_call", |b| {
b.iter(|| {
let mut data = [0xDEADBEEFDEADBEEFu64; 16];
if is_avx2_available() {
clock_mix_avx2(black_box(&mut data));
} else {
scalar_clock_mix(black_box(&mut data));
}
black_box(data);
});
});
group.bench_function("cpu_feature_detection", |b| {
b.iter(|| {
let avx2 = is_avx2_available();
let avx512 = is_avx512_available();
black_box((avx2, avx512));
});
});
group.finish();
}
#[cfg(feature = "simd")]
fn bench_simd_fallback(c: &mut Criterion) {
let mut group = c.benchmark_group("simd_fallback");
let iterations = [1000, 10000];
for &iter in &iterations {
group.bench_with_input(
BenchmarkId::from_parameter(format!("forced_scalar_{}_iterations", iter)),
&iter,
|b, &iter| {
b.iter(|| {
let mut data = [0xAAAAAAAAAAAAAAAAu64; 16];
for _ in 0..iter {
scalar_clock_mix(black_box(&mut data));
}
black_box(data);
});
},
);
group.bench_with_input(
BenchmarkId::from_parameter(format!("auto_dispatch_{}_iterations", iter)),
&iter,
|b, &iter| {
b.iter(|| {
let mut data = [0xAAAAAAAAAAAAAAAAu64; 16];
for _ in 0..iter {
if is_avx2_available() {
clock_mix_avx2(black_box(&mut data));
} else {
scalar_clock_mix(black_box(&mut data));
}
}
black_box(data);
});
},
);
}
group.finish();
}
#[cfg(feature = "simd")]
criterion_group!(
benches,
bench_clock_mix_simd_vs_scalar,
bench_simd_implementations_comparison,
bench_simd_dispatch_overhead,
bench_simd_scaling,
bench_simd_memory_patterns,
bench_simd_throughput,
bench_simd_initialization,
bench_simd_fallback,
);
#[cfg(feature = "simd")]
criterion_main!(benches);
#[cfg(not(feature = "simd"))]
fn main() {
eprintln!("SIMD benchmarks require the 'simd' feature to be enabled.");
eprintln!("Run with: cargo bench --bench simd_bench --features simd");
std::process::exit(1);
}