use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use numrs2::parallel::parallel_algorithms::{ParallelArrayOps, ParallelConfig};
use std::hint::black_box;
const SIZES: &[usize] = &[10_000, 100_000, 1_000_000, 10_000_000];
const THREAD_COUNTS: &[usize] = &[1, 2, 4, 8];
fn create_config(num_threads: usize) -> ParallelConfig {
ParallelConfig {
num_threads: Some(num_threads),
parallel_threshold: 1000,
block_size: 64,
numa_aware: false,
chunk_size: 256,
}
}
fn bench_parallel_map_scaling(c: &mut Criterion) {
let mut group = c.benchmark_group("parallel_map_scaling");
for size in [100_000, 1_000_000, 10_000_000].iter() {
group.throughput(Throughput::Elements(*size as u64));
let input: Vec<f64> = (0..*size).map(|i| i as f64).collect();
for &num_threads in THREAD_COUNTS.iter() {
let config = create_config(num_threads);
let ops = ParallelArrayOps::new(config).expect("Failed to create parallel ops");
let mut output = vec![0.0f64; *size];
group.bench_with_input(
BenchmarkId::new("threads", format!("{}t_{}", num_threads, size)),
&num_threads,
|bencher, _| {
bencher.iter(|| {
ops.parallel_map(&input, &mut output, |x| x.sqrt().sin() + x.cos())
.expect("parallel_map should succeed");
black_box(&output);
});
},
);
}
}
group.finish();
}
fn bench_parallel_reduce(c: &mut Criterion) {
let mut group = c.benchmark_group("parallel_reduce");
for size in SIZES.iter() {
group.throughput(Throughput::Elements(*size as u64));
let data: Vec<f64> = (0..*size).map(|i| i as f64).collect();
for &num_threads in THREAD_COUNTS.iter() {
let config = create_config(num_threads);
let ops = ParallelArrayOps::new(config).expect("Failed to create parallel ops");
group.bench_with_input(
BenchmarkId::new("sum", format!("{}t_{}", num_threads, size)),
&num_threads,
|bencher, _| {
bencher.iter(|| {
let result = ops
.parallel_reduce(&data, 0.0, |a, b| a + b)
.expect("parallel_reduce should succeed");
black_box(result);
});
},
);
}
if *size <= 100_000 {
for &num_threads in THREAD_COUNTS.iter() {
let config = create_config(num_threads);
let ops = ParallelArrayOps::new(config).expect("Failed to create parallel ops");
let small_data: Vec<f64> = (0..*size).map(|i| 1.0 + (i as f64) * 0.00001).collect();
group.bench_with_input(
BenchmarkId::new("product", format!("{}t_{}", num_threads, size)),
&num_threads,
|bencher, _| {
bencher.iter(|| {
let result = ops
.parallel_reduce(&small_data, 1.0, |a, b| a * b)
.expect("parallel_reduce should succeed");
black_box(result);
});
},
);
}
}
}
group.finish();
}
fn bench_parallel_filter(c: &mut Criterion) {
let mut group = c.benchmark_group("parallel_filter");
for size in [100_000, 1_000_000].iter() {
group.throughput(Throughput::Elements(*size as u64));
let data: Vec<i32> = (0..*size).collect();
let selectivities = [
("10pct", 10), ("50pct", 50), ("90pct", 90), ];
for (name, threshold) in selectivities.iter() {
for &num_threads in THREAD_COUNTS.iter() {
let config = create_config(num_threads);
let ops = ParallelArrayOps::new(config).expect("Failed to create parallel ops");
group.bench_with_input(
BenchmarkId::new(*name, format!("{}t_{}", num_threads, size)),
&num_threads,
|bencher, _| {
bencher.iter(|| {
let result = ops
.parallel_filter(&data, |&x| (x % 100) < *threshold)
.expect("parallel_filter should succeed");
black_box(result);
});
},
);
}
}
}
group.finish();
}
fn bench_parallel_sort(c: &mut Criterion) {
let mut group = c.benchmark_group("parallel_sort");
group.sample_size(10);
for size in [10_000, 100_000, 1_000_000].iter() {
group.throughput(Throughput::Elements(*size as u64));
let patterns = [
(
"random",
(0..*size).map(|i| i * 7919 % *size).collect::<Vec<_>>(),
),
("sorted", (0..*size).collect::<Vec<_>>()),
("reverse", (0..*size).rev().collect::<Vec<_>>()),
];
for (pattern_name, pattern_data) in patterns.iter() {
for &num_threads in THREAD_COUNTS.iter() {
let config = create_config(num_threads);
let ops = ParallelArrayOps::new(config).expect("Failed to create parallel ops");
group.bench_with_input(
BenchmarkId::new(*pattern_name, format!("{}t_{}", num_threads, size)),
&num_threads,
|bencher, _| {
bencher.iter(|| {
let mut data = pattern_data.clone();
ops.parallel_sort(&mut data)
.expect("parallel_sort should succeed");
black_box(data);
});
},
);
}
}
}
group.finish();
}
fn bench_parallel_map_reduce(c: &mut Criterion) {
let mut group = c.benchmark_group("parallel_map_reduce");
for size in SIZES.iter() {
group.throughput(Throughput::Elements(*size as u64));
let data: Vec<f64> = (0..*size).map(|i| i as f64).collect();
for &num_threads in THREAD_COUNTS.iter() {
let config = create_config(num_threads);
let ops = ParallelArrayOps::new(config).expect("Failed to create parallel ops");
group.bench_with_input(
BenchmarkId::new("sqrt_sum", format!("{}t_{}", num_threads, size)),
&num_threads,
|bencher, _| {
bencher.iter(|| {
let result = ops
.parallel_map_reduce(&data, |x| x.sqrt(), |a, b| a + b, 0.0)
.expect("parallel_map_reduce should succeed");
black_box(result);
});
},
);
}
}
group.finish();
}
fn bench_parallel_prefix_sum(c: &mut Criterion) {
let mut group = c.benchmark_group("parallel_prefix_sum");
for size in [10_000, 100_000, 1_000_000].iter() {
group.throughput(Throughput::Elements(*size as u64));
let data: Vec<f64> = (0..*size).map(|i| i as f64).collect();
for &num_threads in THREAD_COUNTS.iter() {
let config = create_config(num_threads);
let ops = ParallelArrayOps::new(config).expect("Failed to create parallel ops");
let mut result = vec![0.0f64; *size];
group.bench_with_input(
BenchmarkId::new("prefix_sum", format!("{}t_{}", num_threads, size)),
&num_threads,
|bencher, _| {
bencher.iter(|| {
ops.parallel_prefix_sum(&data, &mut result)
.expect("parallel_prefix_sum should succeed");
black_box(&result);
});
},
);
}
}
group.finish();
}
fn bench_strong_scaling(c: &mut Criterion) {
let mut group = c.benchmark_group("strong_scaling");
let size = 10_000_000;
group.throughput(Throughput::Elements(size as u64));
let data: Vec<f64> = (0..size).map(|i| i as f64).collect();
let baseline_config = create_config(1);
let baseline_ops =
ParallelArrayOps::new(baseline_config).expect("Failed to create parallel ops");
let mut baseline_output = vec![0.0f64; size];
group.bench_function("baseline_1thread", |bencher| {
bencher.iter(|| {
baseline_ops
.parallel_map(&data, &mut baseline_output, |x| x.sqrt().sin())
.expect("parallel_map should succeed");
black_box(&baseline_output);
});
});
for &num_threads in &[2, 4, 8] {
let config = create_config(num_threads);
let ops = ParallelArrayOps::new(config).expect("Failed to create parallel ops");
let mut output = vec![0.0f64; size];
group.bench_with_input(
BenchmarkId::new("threads", num_threads),
&num_threads,
|bencher, _| {
bencher.iter(|| {
ops.parallel_map(&data, &mut output, |x| x.sqrt().sin())
.expect("parallel_map should succeed");
black_box(&output);
});
},
);
}
group.finish();
}
fn bench_weak_scaling(c: &mut Criterion) {
let mut group = c.benchmark_group("weak_scaling");
let base_size = 1_000_000;
for &num_threads in THREAD_COUNTS.iter() {
let size = base_size * num_threads;
group.throughput(Throughput::Elements(size as u64));
let data: Vec<f64> = (0..size).map(|i| i as f64).collect();
let config = create_config(num_threads);
let ops = ParallelArrayOps::new(config).expect("Failed to create parallel ops");
let mut output = vec![0.0f64; size];
group.bench_with_input(
BenchmarkId::new("size_per_thread", format!("{}t_{}elem", num_threads, size)),
&num_threads,
|bencher, _| {
bencher.iter(|| {
ops.parallel_map(&data, &mut output, |x| x.sqrt().sin())
.expect("parallel_map should succeed");
black_box(&output);
});
},
);
}
group.finish();
}
fn bench_parallel_binary_op(c: &mut Criterion) {
let mut group = c.benchmark_group("parallel_binary_op");
for size in [100_000, 1_000_000, 10_000_000].iter() {
group.throughput(Throughput::Elements(*size as u64));
let a: Vec<f64> = (0..*size).map(|i| i as f64).collect();
let b: Vec<f64> = (0..*size).map(|i| (i as f64) * 2.0).collect();
for &num_threads in THREAD_COUNTS.iter() {
let config = create_config(num_threads);
let ops = ParallelArrayOps::new(config).expect("Failed to create parallel ops");
let mut result = vec![0.0f64; *size];
group.bench_with_input(
BenchmarkId::new("add", format!("{}t_{}", num_threads, size)),
&num_threads,
|bencher, _| {
bencher.iter(|| {
ops.parallel_binary_op(&a, &b, &mut result, |x, y| x + y)
.expect("parallel_binary_op should succeed");
black_box(&result);
});
},
);
group.bench_with_input(
BenchmarkId::new("mul", format!("{}t_{}", num_threads, size)),
&num_threads,
|bencher, _| {
bencher.iter(|| {
ops.parallel_binary_op(&a, &b, &mut result, |x, y| x * y)
.expect("parallel_binary_op should succeed");
black_box(&result);
});
},
);
}
}
group.finish();
}
fn bench_irregular_workload(c: &mut Criterion) {
let mut group = c.benchmark_group("irregular_workload");
let size = 100_000;
group.throughput(Throughput::Elements(size as u64));
let data: Vec<f64> = (0..size).map(|i| i as f64).collect();
for &num_threads in THREAD_COUNTS.iter() {
let config = create_config(num_threads);
let ops = ParallelArrayOps::new(config).expect("Failed to create parallel ops");
let mut output = vec![0.0f64; size];
group.bench_with_input(
BenchmarkId::new("variable_work", format!("{}t", num_threads)),
&num_threads,
|bencher, _| {
bencher.iter(|| {
ops.parallel_map(&data, &mut output, |x| {
let iterations = ((x % 100.0) as usize) + 1;
let mut result = x;
for _ in 0..iterations {
result = result.sqrt() + 0.1;
}
result
})
.expect("parallel_map should succeed");
black_box(&output);
});
},
);
}
group.finish();
}
criterion_group!(
benches,
bench_parallel_map_scaling,
bench_parallel_reduce,
bench_parallel_filter,
bench_parallel_sort,
bench_parallel_map_reduce,
bench_parallel_prefix_sum,
bench_strong_scaling,
bench_weak_scaling,
bench_parallel_binary_op,
bench_irregular_workload,
);
criterion_main!(benches);