#![allow(clippy::result_large_err)]
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use numrs2::nn::activation::*;
use numrs2::nn::conv::*;
use numrs2::nn::loss::*;
use numrs2::nn::normalization::*;
use numrs2::nn::pooling::*;
use numrs2::nn::simd_ops::*;
use numrs2::nn::ReductionMode;
use scirs2_core::ndarray::{Array1, Array2};
use std::hint::black_box;
fn bench_activation_functions(c: &mut Criterion) {
let mut group = c.benchmark_group("activation_functions");
let sizes = vec![100, 1000, 10000, 100000];
for size in sizes {
group.throughput(Throughput::Elements(size as u64));
let x_f32 = Array1::from_vec((0..size).map(|i| (i as f32) / 100.0 - 5.0).collect());
let x_f64 = Array1::from_vec((0..size).map(|i| (i as f64) / 100.0 - 5.0).collect());
group.bench_with_input(BenchmarkId::new("relu_f64", size), &size, |b, _| {
b.iter(|| relu(&black_box(x_f64.view())))
});
group.bench_with_input(BenchmarkId::new("simd_relu_f32", size), &size, |b, _| {
b.iter(|| simd_relu_f32(&black_box(x_f32.view())))
});
group.bench_with_input(BenchmarkId::new("sigmoid_f64", size), &size, |b, _| {
b.iter(|| sigmoid(&black_box(x_f64.view())))
});
group.bench_with_input(BenchmarkId::new("simd_sigmoid_f32", size), &size, |b, _| {
b.iter(|| simd_sigmoid_f32(&black_box(x_f32.view())))
});
group.bench_with_input(BenchmarkId::new("tanh_f64", size), &size, |b, _| {
b.iter(|| tanh(&black_box(x_f64.view())))
});
group.bench_with_input(BenchmarkId::new("simd_tanh_f32", size), &size, |b, _| {
b.iter(|| simd_tanh_f32(&black_box(x_f32.view())))
});
group.bench_with_input(BenchmarkId::new("gelu_f64", size), &size, |b, _| {
b.iter(|| gelu(&black_box(x_f64.view())))
});
group.bench_with_input(BenchmarkId::new("simd_gelu_f32", size), &size, |b, _| {
b.iter(|| simd_gelu_f32(&black_box(x_f32.view())))
});
group.bench_with_input(BenchmarkId::new("swish_f64", size), &size, |b, _| {
b.iter(|| swish(&black_box(x_f64.view())))
});
group.bench_with_input(BenchmarkId::new("simd_swish_f32", size), &size, |b, _| {
b.iter(|| simd_swish_f32(&black_box(x_f32.view())))
});
group.bench_with_input(BenchmarkId::new("mish_f64", size), &size, |b, _| {
b.iter(|| mish(&black_box(x_f64.view())))
});
group.bench_with_input(BenchmarkId::new("simd_mish_f32", size), &size, |b, _| {
b.iter(|| simd_mish_f32(&black_box(x_f32.view())))
});
group.bench_with_input(BenchmarkId::new("softmax_f64", size), &size, |b, _| {
b.iter(|| softmax(&black_box(x_f64.view())))
});
}
group.finish();
}
fn bench_convolution(c: &mut Criterion) {
let mut group = c.benchmark_group("convolution");
let input_1d = Array1::from_vec((0..1000).map(|i| i as f64).collect());
let kernel_1d_small = Array1::from_vec(vec![1.0, 0.0, -1.0]);
let kernel_1d_large = Array1::from_vec(vec![1.0; 11]);
group.bench_function("conv1d_small_kernel", |b| {
b.iter(|| {
conv1d(
&black_box(input_1d.view()),
&black_box(kernel_1d_small.view()),
1,
)
})
});
group.bench_function("conv1d_large_kernel", |b| {
b.iter(|| {
conv1d(
&black_box(input_1d.view()),
&black_box(kernel_1d_large.view()),
1,
)
})
});
let sizes = vec![(32, 32), (64, 64), (128, 128)];
let kernel_sizes = vec![(3, 3), (5, 5)];
for (h, w) in sizes {
for (kh, kw) in &kernel_sizes {
let input_2d = Array2::from_shape_fn((h, w), |(i, j)| (i + j) as f64);
let kernel_2d =
Array2::from_shape_fn((*kh, *kw), |(i, j)| if i == j { 1.0 } else { 0.0 });
group.throughput(Throughput::Elements((h * w) as u64));
group.bench_with_input(
BenchmarkId::new("conv2d", format!("{}x{}_k{}x{}", h, w, kh, kw)),
&(h, w),
|b, _| {
b.iter(|| {
conv2d(
&black_box(input_2d.view()),
&black_box(kernel_2d.view()),
(1, 1),
)
})
},
);
}
}
group.finish();
}
fn bench_pooling(c: &mut Criterion) {
let mut group = c.benchmark_group("pooling");
let sizes = vec![(32, 32), (64, 64), (128, 128), (256, 256)];
let pool_sizes = vec![(2, 2), (4, 4)];
for (h, w) in sizes {
for (ph, pw) in &pool_sizes {
let input = Array2::from_shape_fn((h, w), |(i, j)| (i + j) as f64);
group.throughput(Throughput::Elements((h * w) as u64));
group.bench_with_input(
BenchmarkId::new("max_pool2d", format!("{}x{}_p{}x{}", h, w, ph, pw)),
&(h, w),
|b, _| b.iter(|| max_pool2d(&black_box(input.view()), (*ph, *pw), (*ph, *pw))),
);
group.bench_with_input(
BenchmarkId::new("avg_pool2d", format!("{}x{}_p{}x{}", h, w, ph, pw)),
&(h, w),
|b, _| b.iter(|| avg_pool2d(&black_box(input.view()), (*ph, *pw), (*ph, *pw))),
);
}
}
group.finish();
}
fn bench_normalization(c: &mut Criterion) {
let mut group = c.benchmark_group("normalization");
let batch_sizes = vec![32, 64, 128];
let feature_sizes = vec![128, 256, 512];
for batch_size in batch_sizes {
for feature_size in &feature_sizes {
let x = Array2::from_shape_fn((batch_size, *feature_size), |(i, j)| {
(i * feature_size + j) as f64 / 100.0
});
let gamma = Array1::ones(*feature_size);
let beta = Array1::zeros(*feature_size);
let epsilon = 1e-5;
group.throughput(Throughput::Elements((batch_size * feature_size) as u64));
group.bench_with_input(
BenchmarkId::new(
"batch_norm_1d",
format!("b{}_f{}", batch_size, feature_size),
),
&(batch_size, feature_size),
|b, _| {
b.iter(|| {
batch_norm_1d(
&black_box(x.view()),
&black_box(gamma.view()),
&black_box(beta.view()),
epsilon,
)
})
},
);
group.bench_with_input(
BenchmarkId::new("layer_norm", format!("b{}_f{}", batch_size, feature_size)),
&(batch_size, feature_size),
|b, _| {
b.iter(|| {
layer_norm(
&black_box(x.view()),
&black_box(gamma.view()),
&black_box(beta.view()),
epsilon,
)
})
},
);
group.bench_with_input(
BenchmarkId::new("dropout", format!("b{}_f{}", batch_size, feature_size)),
&(batch_size, feature_size),
|b, _| b.iter(|| dropout_2d(&black_box(x.view()), 0.5, false)),
);
}
}
group.finish();
}
fn bench_loss_functions(c: &mut Criterion) {
let mut group = c.benchmark_group("loss_functions");
let sizes = vec![100, 1000, 10000, 100000];
for size in sizes {
let y_true = Array1::from_vec((0..size).map(|i| (i % 10) as f64 / 10.0).collect());
let y_pred = Array1::from_vec((0..size).map(|i| ((i + 1) % 10) as f64 / 10.0).collect());
let batch_size = 100.min(size);
let num_classes = size / batch_size;
let y_true_2d = Array2::from_shape_vec(
(batch_size, num_classes),
(0..batch_size * num_classes)
.map(|i| {
if i % num_classes == (i / num_classes) % num_classes {
1.0
} else {
0.0
}
})
.collect(),
)
.unwrap_or_else(|_| Array2::zeros((batch_size, num_classes)));
let y_pred_2d = Array2::from_shape_vec(
(batch_size, num_classes),
(0..batch_size * num_classes)
.map(|i| 1.0 / num_classes as f64)
.collect(),
)
.unwrap_or_else(|_| Array2::zeros((batch_size, num_classes)));
group.throughput(Throughput::Elements(size as u64));
group.bench_with_input(BenchmarkId::new("mse_loss", size), &size, |b, _| {
b.iter(|| {
mse_loss(
&black_box(y_true.view()),
&black_box(y_pred.view()),
ReductionMode::Mean,
)
})
});
group.bench_with_input(BenchmarkId::new("mae_loss", size), &size, |b, _| {
b.iter(|| {
mae_loss(
&black_box(y_true.view()),
&black_box(y_pred.view()),
ReductionMode::Mean,
)
})
});
group.bench_with_input(BenchmarkId::new("huber_loss", size), &size, |b, _| {
b.iter(|| {
huber_loss(
&black_box(y_true.view()),
&black_box(y_pred.view()),
1.0,
ReductionMode::Mean,
)
})
});
group.bench_with_input(
BenchmarkId::new("cross_entropy_loss", size),
&size,
|b, _| {
b.iter(|| {
categorical_cross_entropy(
&black_box(y_true_2d.view()),
&black_box(y_pred_2d.view()),
ReductionMode::Mean,
)
})
},
);
group.bench_with_input(
BenchmarkId::new("binary_cross_entropy_loss", size),
&size,
|b, _| {
b.iter(|| {
binary_cross_entropy(
&black_box(y_true.view()),
&black_box(y_pred.view()),
ReductionMode::Mean,
)
})
},
);
group.bench_with_input(BenchmarkId::new("focal_loss", size), &size, |b, _| {
b.iter(|| {
focal_loss(
&black_box(y_true.view()),
&black_box(y_pred.view()),
0.25,
2.0,
ReductionMode::Mean,
)
})
});
}
group.finish();
}
fn bench_simd_vs_scalar(c: &mut Criterion) {
let mut group = c.benchmark_group("simd_vs_scalar");
let sizes = vec![1000, 10000, 100000];
for size in sizes {
let x_f32 = Array1::from_vec((0..size).map(|i| (i as f32) / 100.0 - 5.0).collect());
let x_f64 = Array1::from_vec((0..size).map(|i| (i as f64) / 100.0 - 5.0).collect());
group.throughput(Throughput::Elements(size as u64));
group.bench_with_input(BenchmarkId::new("relu_scalar_f64", size), &size, |b, _| {
b.iter(|| relu(&black_box(x_f64.view())))
});
group.bench_with_input(BenchmarkId::new("relu_simd_f32", size), &size, |b, _| {
b.iter(|| simd_relu_f32(&black_box(x_f32.view())))
});
group.bench_with_input(
BenchmarkId::new("sigmoid_scalar_f64", size),
&size,
|b, _| b.iter(|| sigmoid(&black_box(x_f64.view()))),
);
group.bench_with_input(BenchmarkId::new("sigmoid_simd_f32", size), &size, |b, _| {
b.iter(|| simd_sigmoid_f32(&black_box(x_f32.view())))
});
let n = (size as f64).sqrt() as usize;
if n > 1 {
let a_f32 = Array2::from_shape_fn((n, n), |(i, j)| (i + j) as f32);
let b_f32 = Array2::from_shape_fn((n, n), |(i, j)| (i * 2 + j) as f32);
group.throughput(Throughput::Elements((n * n * n) as u64));
group.bench_with_input(
BenchmarkId::new("matmul_simd_f32", format!("{}x{}", n, n)),
&n,
|b, _| {
b.iter(|| simd_matmul_f32(&black_box(a_f32.view()), &black_box(b_f32.view())))
},
);
}
}
group.finish();
}
fn bench_simd_reductions(c: &mut Criterion) {
let mut group = c.benchmark_group("simd_reductions");
let sizes = vec![1000, 10000, 100000, 1000000];
for size in sizes {
let x = Array1::from_vec((0..size).map(|i| i as f32).collect());
group.throughput(Throughput::Elements(size as u64));
group.bench_with_input(BenchmarkId::new("sum", size), &size, |b, _| {
b.iter(|| simd_sum_f32(&black_box(x.view())))
});
group.bench_with_input(BenchmarkId::new("mean", size), &size, |b, _| {
b.iter(|| simd_mean_f32(&black_box(x.view())))
});
group.bench_with_input(BenchmarkId::new("norm", size), &size, |b, _| {
b.iter(|| simd_norm_f32(&black_box(x.view())))
});
group.bench_with_input(BenchmarkId::new("min", size), &size, |b, _| {
b.iter(|| simd_min_f32(&black_box(x.view())))
});
group.bench_with_input(BenchmarkId::new("max", size), &size, |b, _| {
b.iter(|| simd_max_f32(&black_box(x.view())))
});
let y = Array1::from_vec((0..size).map(|i| (i + 1) as f32).collect());
group.bench_with_input(BenchmarkId::new("dot", size), &size, |b, _| {
b.iter(|| simd_dot_f32(&black_box(x.view()), &black_box(y.view())))
});
}
group.finish();
}
fn bench_combined_operations(c: &mut Criterion) {
let mut group = c.benchmark_group("combined_operations");
let batch_size = 128;
let input_size = 512;
let output_size = 256;
let x = Array2::from_shape_fn((batch_size, input_size), |(i, j)| {
(i * input_size + j) as f32 / 1000.0
});
let weights = Array2::from_shape_fn((input_size, output_size), |(i, j)| {
((i + j) as f32 / 1000.0) - 0.5
});
let gamma = Array1::ones(output_size);
let beta = Array1::zeros(output_size);
group.throughput(Throughput::Elements((batch_size * output_size) as u64));
group.bench_function("forward_pass_linear_relu_batchnorm", |b| {
b.iter(|| {
let linear_out = simd_matmul_f32(&black_box(x.view()), &black_box(weights.view()))
.expect("matmul failed");
let relu_out = simd_relu_2d_f32(&black_box(linear_out.view()));
batch_norm_1d(
&black_box(relu_out.view()),
&black_box(gamma.view()),
&black_box(beta.view()),
1e-5,
)
})
});
group.finish();
}
criterion_group!(
benches,
bench_activation_functions,
bench_convolution,
bench_pooling,
bench_normalization,
bench_loss_functions,
bench_simd_vs_scalar,
bench_simd_reductions,
bench_combined_operations,
);
criterion_main!(benches);