use criterion::{
black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput,
};
use std::alloc::{alloc, dealloc, Layout};
use zipora::memory::simd_ops::{SimdMemOps, SimdTier};
fn generate_test_data(size: usize) -> Vec<u8> {
(0..size).map(|i| ((i * 17 + 13) % 256) as u8).collect()
}
fn generate_aligned_buffer(size: usize, alignment: usize) -> (*mut u8, Layout) {
let layout = Layout::from_size_align(size, alignment)
.expect("Failed to create layout");
let ptr = unsafe { alloc(layout) };
assert!(!ptr.is_null(), "Failed to allocate aligned memory");
(ptr, layout)
}
fn bench_memory_copy(c: &mut Criterion) {
let mut group = c.benchmark_group("SIMD Memory Copy");
let test_sizes = vec![
("tiny_8B", 8),
("small_16B", 16),
("small_32B", 32),
("small_64B", 64),
("medium_128B", 128),
("medium_256B", 256),
("medium_512B", 512),
("medium_1KB", 1024),
("medium_2KB", 2048),
("medium_4KB", 4096),
("large_8KB", 8192),
("large_16KB", 16384),
("large_32KB", 32768),
("large_64KB", 65536),
("large_128KB", 131072),
("large_256KB", 262144),
("large_512KB", 524288),
("large_1MB", 1048576),
];
let simd_ops = SimdMemOps::new();
for (name, size) in test_sizes {
group.throughput(Throughput::Bytes(size as u64));
group.bench_function(BenchmarkId::new("SIMD", name), |b| {
let src = generate_test_data(size);
let mut dst = vec![0u8; size];
b.iter(|| {
simd_ops.copy_nonoverlapping(black_box(&src), black_box(&mut dst))
.expect("SIMD copy failed");
});
});
group.bench_function(BenchmarkId::new("std_copy_from_slice", name), |b| {
let src = generate_test_data(size);
let mut dst = vec![0u8; size];
b.iter(|| {
black_box(&mut dst).copy_from_slice(black_box(&src));
});
});
group.bench_function(BenchmarkId::new("ptr_copy_nonoverlapping", name), |b| {
let src = generate_test_data(size);
let mut dst = vec![0u8; size];
b.iter(|| unsafe {
std::ptr::copy_nonoverlapping(
black_box(src.as_ptr()),
black_box(dst.as_mut_ptr()),
size,
);
});
});
}
group.finish();
}
fn bench_aligned_memory_copy(c: &mut Criterion) {
let mut group = c.benchmark_group("SIMD Aligned Memory Copy");
let test_sizes = vec![
("aligned_64B", 64),
("aligned_128B", 128),
("aligned_256B", 256),
("aligned_512B", 512),
("aligned_1KB", 1024),
("aligned_4KB", 4096),
("aligned_16KB", 16384),
("aligned_64KB", 65536),
];
let simd_ops = SimdMemOps::new();
const CACHE_LINE_SIZE: usize = 64;
for (name, size) in test_sizes {
let aligned_size = (size + CACHE_LINE_SIZE - 1) / CACHE_LINE_SIZE * CACHE_LINE_SIZE;
group.throughput(Throughput::Bytes(aligned_size as u64));
group.bench_function(BenchmarkId::new("SIMD_aligned", name), |b| {
let (src_ptr, src_layout) = generate_aligned_buffer(aligned_size, CACHE_LINE_SIZE);
let (dst_ptr, dst_layout) = generate_aligned_buffer(aligned_size, CACHE_LINE_SIZE);
unsafe {
for i in 0..aligned_size {
*src_ptr.add(i) = ((i * 17 + 13) % 256) as u8;
}
}
b.iter(|| unsafe {
let src_slice = std::slice::from_raw_parts(src_ptr, aligned_size);
let dst_slice = std::slice::from_raw_parts_mut(dst_ptr, aligned_size);
simd_ops.copy_aligned(black_box(src_slice), black_box(dst_slice))
.expect("Aligned copy failed");
});
unsafe {
dealloc(src_ptr, src_layout);
dealloc(dst_ptr, dst_layout);
}
});
group.bench_function(BenchmarkId::new("SIMD_unaligned", name), |b| {
let src = generate_test_data(aligned_size);
let mut dst = vec![0u8; aligned_size];
b.iter(|| {
simd_ops.copy_nonoverlapping(black_box(&src), black_box(&mut dst))
.expect("SIMD copy failed");
});
});
}
group.finish();
}
fn bench_memory_compare(c: &mut Criterion) {
let mut group = c.benchmark_group("SIMD Memory Compare");
let test_sizes = vec![
("small_32B", 32),
("small_64B", 64),
("medium_256B", 256),
("medium_1KB", 1024),
("medium_4KB", 4096),
("large_16KB", 16384),
("large_64KB", 65536),
];
let simd_ops = SimdMemOps::new();
for (name, size) in test_sizes {
group.throughput(Throughput::Bytes(size as u64));
group.bench_function(BenchmarkId::new("SIMD_equal", name), |b| {
let data = generate_test_data(size);
let data_copy = data.clone();
b.iter(|| {
black_box(simd_ops.compare(black_box(&data), black_box(&data_copy)));
});
});
group.bench_function(BenchmarkId::new("SIMD_diff_25pct", name), |b| {
let data1 = generate_test_data(size);
let mut data2 = data1.clone();
if size > 4 {
data2[size / 4] = data2[size / 4].wrapping_add(1);
}
b.iter(|| {
black_box(simd_ops.compare(black_box(&data1), black_box(&data2)));
});
});
group.bench_function(BenchmarkId::new("std_cmp", name), |b| {
let data = generate_test_data(size);
let data_copy = data.clone();
b.iter(|| {
black_box(black_box(&data) == black_box(&data_copy));
});
});
group.bench_function(BenchmarkId::new("iter_cmp", name), |b| {
let data = generate_test_data(size);
let data_copy = data.clone();
b.iter(|| {
black_box(
black_box(&data)
.iter()
.zip(black_box(&data_copy).iter())
.all(|(a, b)| a == b)
);
});
});
}
group.finish();
}
fn bench_memory_search(c: &mut Criterion) {
let mut group = c.benchmark_group("SIMD Memory Search");
let test_sizes = vec![
("small_64B", 64),
("medium_256B", 256),
("medium_1KB", 1024),
("medium_4KB", 4096),
("large_16KB", 16384),
("large_64KB", 65536),
("large_256KB", 262144),
];
let simd_ops = SimdMemOps::new();
for (name, size) in test_sizes {
group.throughput(Throughput::Bytes(size as u64));
group.bench_function(BenchmarkId::new("SIMD_find_first", name), |b| {
let mut data = generate_test_data(size);
let needle = 0xAA;
data[0] = needle;
b.iter(|| {
black_box(simd_ops.find_byte(black_box(&data), needle));
});
});
group.bench_function(BenchmarkId::new("SIMD_find_middle", name), |b| {
let mut data = generate_test_data(size);
let needle = 0xBB;
data[size / 2] = needle;
b.iter(|| {
black_box(simd_ops.find_byte(black_box(&data), needle));
});
});
group.bench_function(BenchmarkId::new("SIMD_find_none", name), |b| {
let data = generate_test_data(size);
let needle = 0xFF; b.iter(|| {
black_box(simd_ops.find_byte(black_box(&data), needle));
});
});
group.bench_function(BenchmarkId::new("std_position", name), |b| {
let mut data = generate_test_data(size);
let needle = 0xBB;
data[size / 2] = needle;
b.iter(|| {
black_box(black_box(&data).iter().position(|&x| x == needle));
});
});
}
group.finish();
}
fn bench_memory_fill(c: &mut Criterion) {
let mut group = c.benchmark_group("SIMD Memory Fill");
let test_sizes = vec![
("small_64B", 64),
("medium_256B", 256),
("medium_1KB", 1024),
("medium_4KB", 4096),
("large_16KB", 16384),
("large_64KB", 65536),
("large_256KB", 262144),
];
let simd_ops = SimdMemOps::new();
for (name, size) in test_sizes {
group.throughput(Throughput::Bytes(size as u64));
group.bench_function(BenchmarkId::new("SIMD", name), |b| {
let mut buffer = vec![0u8; size];
let value = 0x42;
b.iter(|| {
simd_ops.fill(black_box(&mut buffer), value);
});
});
group.bench_function(BenchmarkId::new("std_fill", name), |b| {
let mut buffer = vec![0u8; size];
let value = 0x42;
b.iter(|| {
black_box(&mut buffer).fill(value);
});
});
group.bench_function(BenchmarkId::new("ptr_write_bytes", name), |b| {
let mut buffer = vec![0u8; size];
let value = 0x42;
b.iter(|| unsafe {
std::ptr::write_bytes(black_box(buffer.as_mut_ptr()), value, size);
});
});
}
group.finish();
}
fn bench_simd_tiers(c: &mut Criterion) {
let mut group = c.benchmark_group("SIMD Tier Comparison");
let simd_ops = SimdMemOps::new();
let detected_tier = simd_ops.tier();
println!("Detected SIMD tier: {:?}", detected_tier);
println!("CPU Features: {:?}", simd_ops.cpu_features());
let test_cases = vec![
("small_32B", 32),
("medium_256B", 256),
("medium_1KB", 1024),
("large_16KB", 16384),
("large_64KB", 65536),
];
for (name, size) in test_cases {
group.throughput(Throughput::Bytes(size as u64));
group.bench_function(
BenchmarkId::new(format!("Copy_{:?}", detected_tier), name),
|b| {
let src = generate_test_data(size);
let mut dst = vec![0u8; size];
b.iter(|| {
simd_ops.copy_nonoverlapping(black_box(&src), black_box(&mut dst))
.expect("SIMD copy failed");
});
},
);
group.bench_function(
BenchmarkId::new(format!("Compare_{:?}", detected_tier), name),
|b| {
let data1 = generate_test_data(size);
let data2 = data1.clone();
b.iter(|| {
black_box(simd_ops.compare(black_box(&data1), black_box(&data2)));
});
},
);
group.bench_function(
BenchmarkId::new(format!("Search_{:?}", detected_tier), name),
|b| {
let data = generate_test_data(size);
let needle = 0xFF; b.iter(|| {
black_box(simd_ops.find_byte(black_box(&data), needle));
});
},
);
}
group.finish();
}
fn bench_mixed_workload(c: &mut Criterion) {
let mut group = c.benchmark_group("SIMD Mixed Workload");
let simd_ops = SimdMemOps::new();
group.bench_function("process_buffer_chain", |b| {
let size = 4096;
let src = generate_test_data(size);
let mut temp1 = vec![0u8; size];
let mut temp2 = vec![0u8; size];
let pattern = 0x00;
b.iter(|| {
simd_ops.copy_nonoverlapping(&src, &mut temp1).unwrap();
let pos = simd_ops.find_byte(&temp1, pattern);
simd_ops.fill(&mut temp2, 0xFF);
let cmp = simd_ops.compare(&temp1, &temp2);
black_box((pos, cmp));
});
});
group.bench_function("conditional_memory_ops", |b| {
let size = 1024;
let src = generate_test_data(size);
let mut dst = vec![0u8; size];
b.iter(|| {
if simd_ops.compare(&src, &dst) != 0 {
simd_ops.copy_nonoverlapping(&src, &mut dst).unwrap();
}
if let Some(pos) = simd_ops.find_byte(&dst, 0x42) {
if pos < size {
simd_ops.fill(&mut dst[pos..], 0x00);
}
}
black_box(&dst);
});
});
group.finish();
}
fn bench_performance_targets(c: &mut Criterion) {
let mut group = c.benchmark_group("Performance Target Analysis");
group.sample_size(100);
group.measurement_time(std::time::Duration::from_secs(10));
let simd_ops = SimdMemOps::new();
println!("\n=== SIMD Memory Operations Performance Target Analysis ===");
println!("CPU Features: {:?}", simd_ops.cpu_features());
println!("Selected SIMD Tier: {:?}", simd_ops.tier());
let small_sizes = vec![8, 16, 32, 64];
for size in small_sizes {
group.throughput(Throughput::Bytes(size as u64));
group.bench_function(
BenchmarkId::new("Target_Small_SIMD", format!("{}B", size)),
|b| {
let src = generate_test_data(size);
let mut dst = vec![0u8; size];
b.iter(|| {
simd_ops.copy_nonoverlapping(black_box(&src), black_box(&mut dst))
.expect("SIMD copy failed");
});
},
);
group.bench_function(
BenchmarkId::new("Target_Small_Std", format!("{}B", size)),
|b| {
let src = generate_test_data(size);
let mut dst = vec![0u8; size];
b.iter(|| {
black_box(&mut dst).copy_from_slice(black_box(&src));
});
},
);
}
let medium_sizes = vec![128, 256, 512, 1024, 2048, 4096];
for size in medium_sizes {
group.throughput(Throughput::Bytes(size as u64));
group.bench_function(
BenchmarkId::new("Target_Medium_SIMD", format!("{}B", size)),
|b| {
let src = generate_test_data(size);
let mut dst = vec![0u8; size];
b.iter(|| {
simd_ops.copy_nonoverlapping(black_box(&src), black_box(&mut dst))
.expect("SIMD copy failed");
});
},
);
group.bench_function(
BenchmarkId::new("Target_Medium_Std", format!("{}B", size)),
|b| {
let src = generate_test_data(size);
let mut dst = vec![0u8; size];
b.iter(|| {
black_box(&mut dst).copy_from_slice(black_box(&src));
});
},
);
}
let large_sizes = vec![8192, 16384, 32768, 65536, 131072];
for size in large_sizes {
group.throughput(Throughput::Bytes(size as u64));
group.bench_function(
BenchmarkId::new("Target_Large_SIMD", format!("{}KB", size / 1024)),
|b| {
let src = generate_test_data(size);
let mut dst = vec![0u8; size];
b.iter(|| {
simd_ops.copy_nonoverlapping(black_box(&src), black_box(&mut dst))
.expect("SIMD copy failed");
});
},
);
group.bench_function(
BenchmarkId::new("Target_Large_Std", format!("{}KB", size / 1024)),
|b| {
let src = generate_test_data(size);
let mut dst = vec![0u8; size];
b.iter(|| {
black_box(&mut dst).copy_from_slice(black_box(&src));
});
},
);
}
group.finish();
println!("\n=== Target Performance Summary ===");
println!("Small copies (≤64B): Target 2-3x faster than memcpy");
println!("Medium copies (64-4096B): Target 1.5-2x faster with prefetching");
println!("Large copies (>4KB): Target match or exceed system memcpy");
println!("Run 'cargo bench --bench simd_memory_bench' to see detailed results");
println!("HTML report available at target/criterion/report/index.html");
}
criterion_group!(
memory_ops,
bench_memory_copy,
bench_aligned_memory_copy,
bench_memory_compare,
bench_memory_search,
bench_memory_fill,
);
criterion_group!(
tier_analysis,
bench_simd_tiers,
bench_mixed_workload,
);
criterion_group!(
name = performance_targets;
config = Criterion::default()
.sample_size(100)
.measurement_time(std::time::Duration::from_secs(10));
targets = bench_performance_targets
);
criterion_main!(memory_ops, tier_analysis, performance_targets);