#![allow(clippy::disallowed_methods, clippy::float_cmp)]
#![cfg(feature = "gpu")]
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use std::hint::black_box;
use trueno::backends::gpu::GpuBackend;
fn generate_matrix_data(width: usize, height: usize) -> Vec<f32> {
(0..width * height).map(|i| ((i % 1000) as f32) * 0.001).collect()
}
fn bench_tiled_sum(c: &mut Criterion) {
if !GpuBackend::is_available() {
eprintln!("GPU not available, skipping tiled reduction benchmarks");
return;
}
let mut group = c.benchmark_group("tiled_sum_reduction");
group.sample_size(20);
let sizes: Vec<(usize, usize, &str)> = vec![
(1000, 1000, "1M"), (3162, 3163, "10M"), (5657, 5657, "32M"), ];
for (width, height, label) in sizes.iter() {
let total = width * height;
group.throughput(Throughput::Elements(total as u64));
group.bench_with_input(
BenchmarkId::new("GPU_Tiled", label),
&(*width, *height),
|bencher, &(w, h)| {
let data = generate_matrix_data(w, h);
let mut gpu = GpuBackend::new();
bencher.iter(|| {
black_box(gpu.tiled_sum_2d_gpu(&data, w, h).unwrap());
});
},
);
group.bench_with_input(
BenchmarkId::new("Scalar", label),
&(*width, *height),
|bencher, &(w, h)| {
let data = generate_matrix_data(w, h);
bencher.iter(|| {
let sum: f32 = data.iter().sum();
black_box(sum);
});
},
);
}
group.finish();
}
fn bench_tiled_max(c: &mut Criterion) {
if !GpuBackend::is_available() {
eprintln!("GPU not available, skipping tiled reduction benchmarks");
return;
}
let mut group = c.benchmark_group("tiled_max_reduction");
group.sample_size(20);
let sizes: Vec<(usize, usize, &str)> =
vec![(1000, 1000, "1M"), (3162, 3163, "10M"), (5657, 5657, "32M")];
for (width, height, label) in sizes.iter() {
let total = width * height;
group.throughput(Throughput::Elements(total as u64));
group.bench_with_input(
BenchmarkId::new("GPU_Tiled", label),
&(*width, *height),
|bencher, &(w, h)| {
let data = generate_matrix_data(w, h);
let mut gpu = GpuBackend::new();
bencher.iter(|| {
black_box(gpu.tiled_max_2d_gpu(&data, w, h).unwrap());
});
},
);
group.bench_with_input(
BenchmarkId::new("Scalar", label),
&(*width, *height),
|bencher, &(w, h)| {
let data = generate_matrix_data(w, h);
bencher.iter(|| {
let max_val = data.iter().cloned().fold(f32::NEG_INFINITY, f32::max);
black_box(max_val);
});
},
);
}
group.finish();
}
fn bench_tiled_min(c: &mut Criterion) {
if !GpuBackend::is_available() {
eprintln!("GPU not available, skipping tiled reduction benchmarks");
return;
}
let mut group = c.benchmark_group("tiled_min_reduction");
group.sample_size(20);
let sizes: Vec<(usize, usize, &str)> =
vec![(1000, 1000, "1M"), (3162, 3163, "10M"), (5657, 5657, "32M")];
for (width, height, label) in sizes.iter() {
let total = width * height;
group.throughput(Throughput::Elements(total as u64));
group.bench_with_input(
BenchmarkId::new("GPU_Tiled", label),
&(*width, *height),
|bencher, &(w, h)| {
let data = generate_matrix_data(w, h);
let mut gpu = GpuBackend::new();
bencher.iter(|| {
black_box(gpu.tiled_min_2d_gpu(&data, w, h).unwrap());
});
},
);
group.bench_with_input(
BenchmarkId::new("Scalar", label),
&(*width, *height),
|bencher, &(w, h)| {
let data = generate_matrix_data(w, h);
bencher.iter(|| {
let min_val = data.iter().cloned().fold(f32::INFINITY, f32::min);
black_box(min_val);
});
},
);
}
group.finish();
}
criterion_group!(benches, bench_tiled_sum, bench_tiled_max, bench_tiled_min);
criterion_main!(benches);