#![allow(clippy::disallowed_methods, clippy::float_cmp)]
use criterion::{BenchmarkId, Criterion, Throughput};
use std::hint::black_box;
use trueno::{Backend, Vector};
pub fn bench_relu(c: &mut Criterion) {
let mut group = c.benchmark_group("relu");
for size in [100, 1000, 10000, 100_000, 1_000_000].iter() {
group.throughput(Throughput::Elements(*size as u64));
let data: Vec<f32> = (0..*size).map(|i| (i as f32) * 0.5 - (*size as f32) * 0.25).collect();
group.bench_with_input(BenchmarkId::new("Scalar", size), size, |bencher, _size| {
let v = Vector::from_slice_with_backend(&data, Backend::Scalar);
bencher.iter(|| {
black_box(v.relu().unwrap());
});
});
#[cfg(target_arch = "x86_64")]
group.bench_with_input(BenchmarkId::new("SSE2", size), size, |bencher, _size| {
let v = Vector::from_slice_with_backend(&data, Backend::SSE2);
bencher.iter(|| {
black_box(v.relu().unwrap());
});
});
#[cfg(target_arch = "x86_64")]
group.bench_with_input(BenchmarkId::new("AVX2", size), size, |bencher, _size| {
let v = Vector::from_slice_with_backend(&data, Backend::AVX2);
bencher.iter(|| {
black_box(v.relu().unwrap());
});
});
}
group.finish();
}
pub fn bench_softmax(c: &mut Criterion) {
let mut group = c.benchmark_group("softmax");
for size in [100, 1000, 10000, 100_000].iter() {
group.throughput(Throughput::Elements(*size as u64));
let data: Vec<f32> = (0..*size).map(|i| (i as f32) * 0.01).collect();
group.bench_with_input(BenchmarkId::new("Scalar", size), size, |bencher, _size| {
let v = Vector::from_slice_with_backend(&data, Backend::Scalar);
bencher.iter(|| {
black_box(v.softmax().unwrap());
});
});
#[cfg(target_arch = "x86_64")]
group.bench_with_input(BenchmarkId::new("SSE2", size), size, |bencher, _size| {
let v = Vector::from_slice_with_backend(&data, Backend::SSE2);
bencher.iter(|| {
black_box(v.softmax().unwrap());
});
});
#[cfg(target_arch = "x86_64")]
group.bench_with_input(BenchmarkId::new("AVX2", size), size, |bencher, _size| {
let v = Vector::from_slice_with_backend(&data, Backend::AVX2);
bencher.iter(|| {
black_box(v.softmax().unwrap());
});
});
}
group.finish();
}
pub fn bench_log_softmax(c: &mut Criterion) {
let mut group = c.benchmark_group("log_softmax");
for size in [100, 1000, 10000, 100_000].iter() {
group.throughput(Throughput::Elements(*size as u64));
let data: Vec<f32> = (0..*size).map(|i| (i as f32) * 0.01).collect();
group.bench_with_input(BenchmarkId::new("Scalar", size), size, |bencher, _size| {
let v = Vector::from_slice_with_backend(&data, Backend::Scalar);
bencher.iter(|| {
black_box(v.log_softmax().unwrap());
});
});
#[cfg(target_arch = "x86_64")]
group.bench_with_input(BenchmarkId::new("SSE2", size), size, |bencher, _size| {
let v = Vector::from_slice_with_backend(&data, Backend::SSE2);
bencher.iter(|| {
black_box(v.log_softmax().unwrap());
});
});
#[cfg(target_arch = "x86_64")]
group.bench_with_input(BenchmarkId::new("AVX2", size), size, |bencher, _size| {
let v = Vector::from_slice_with_backend(&data, Backend::AVX2);
bencher.iter(|| {
black_box(v.log_softmax().unwrap());
});
});
}
group.finish();
}
pub fn bench_clip(c: &mut Criterion) {
let mut group = c.benchmark_group("clip");
for size in [100, 1000, 10000, 100_000, 1_000_000].iter() {
group.throughput(Throughput::Elements(*size as u64));
let data: Vec<f32> = (0..*size).map(|i| (i as f32) * 0.5).collect();
let min_val = 100.0;
let max_val = 5000.0;
group.bench_with_input(BenchmarkId::new("Scalar", size), size, |bencher, _size| {
let v = Vector::from_slice_with_backend(&data, Backend::Scalar);
bencher.iter(|| {
black_box(v.clip(min_val, max_val).unwrap());
});
});
#[cfg(target_arch = "x86_64")]
group.bench_with_input(BenchmarkId::new("SSE2", size), size, |bencher, _size| {
let v = Vector::from_slice_with_backend(&data, Backend::SSE2);
bencher.iter(|| {
black_box(v.clip(min_val, max_val).unwrap());
});
});
#[cfg(target_arch = "x86_64")]
group.bench_with_input(BenchmarkId::new("AVX2", size), size, |bencher, _size| {
let v = Vector::from_slice_with_backend(&data, Backend::AVX2);
bencher.iter(|| {
black_box(v.clip(min_val, max_val).unwrap());
});
});
}
group.finish();
}
pub fn bench_sigmoid(c: &mut Criterion) {
let mut group = c.benchmark_group("sigmoid");
for size in [100, 1000, 10000, 100_000].iter() {
group.throughput(Throughput::Elements(*size as u64));
let data: Vec<f32> = (0..*size).map(|i| (i as f32 / *size as f32) * 12.0 - 6.0).collect();
group.bench_with_input(BenchmarkId::new("Scalar", size), size, |bencher, _size| {
let v = Vector::from_slice_with_backend(&data, Backend::Scalar);
bencher.iter(|| {
black_box(v.sigmoid().unwrap());
});
});
#[cfg(target_arch = "x86_64")]
group.bench_with_input(BenchmarkId::new("SSE2", size), size, |bencher, _size| {
let v = Vector::from_slice_with_backend(&data, Backend::SSE2);
bencher.iter(|| {
black_box(v.sigmoid().unwrap());
});
});
#[cfg(target_arch = "x86_64")]
group.bench_with_input(BenchmarkId::new("AVX2", size), size, |bencher, _size| {
let v = Vector::from_slice_with_backend(&data, Backend::AVX2);
bencher.iter(|| {
black_box(v.sigmoid().unwrap());
});
});
}
group.finish();
}
pub fn bench_gelu(c: &mut Criterion) {
let mut group = c.benchmark_group("gelu");
for size in [100, 1000, 10000, 100_000].iter() {
group.throughput(Throughput::Elements(*size as u64));
let data: Vec<f32> = (0..*size).map(|i| (i as f32) * 0.1 - (*size as f32) * 0.05).collect();
group.bench_with_input(BenchmarkId::new("Scalar", size), size, |bencher, _size| {
let v = Vector::from_slice_with_backend(&data, Backend::Scalar);
bencher.iter(|| {
black_box(v.gelu().unwrap());
});
});
#[cfg(target_arch = "x86_64")]
group.bench_with_input(BenchmarkId::new("SSE2", size), size, |bencher, _size| {
let v = Vector::from_slice_with_backend(&data, Backend::SSE2);
bencher.iter(|| {
black_box(v.gelu().unwrap());
});
});
#[cfg(target_arch = "x86_64")]
group.bench_with_input(BenchmarkId::new("AVX2", size), size, |bencher, _size| {
let v = Vector::from_slice_with_backend(&data, Backend::AVX2);
bencher.iter(|| {
black_box(v.gelu().unwrap());
});
});
}
group.finish();
}
pub fn bench_swish(c: &mut Criterion) {
let mut group = c.benchmark_group("swish");
for size in [100, 1000, 10000, 100_000].iter() {
group.throughput(Throughput::Elements(*size as u64));
let data: Vec<f32> = (0..*size).map(|i| (i as f32) * 0.1 - (*size as f32) * 0.05).collect();
group.bench_with_input(BenchmarkId::new("Scalar", size), size, |bencher, _size| {
let v = Vector::from_slice_with_backend(&data, Backend::Scalar);
bencher.iter(|| {
black_box(v.swish().unwrap());
});
});
#[cfg(target_arch = "x86_64")]
group.bench_with_input(BenchmarkId::new("SSE2", size), size, |bencher, _size| {
let v = Vector::from_slice_with_backend(&data, Backend::SSE2);
bencher.iter(|| {
black_box(v.swish().unwrap());
});
});
#[cfg(target_arch = "x86_64")]
group.bench_with_input(BenchmarkId::new("AVX2", size), size, |bencher, _size| {
let v = Vector::from_slice_with_backend(&data, Backend::AVX2);
bencher.iter(|| {
black_box(v.swish().unwrap());
});
});
}
group.finish();
}
pub fn bench_tanh(c: &mut Criterion) {
let mut group = c.benchmark_group("tanh");
for size in [100, 1000, 10000, 100_000].iter() {
group.throughput(Throughput::Elements(*size as u64));
let data: Vec<f32> = (0..*size).map(|i| (i as f32) / (*size as f32) * 7.0 - 3.5).collect();
group.bench_with_input(BenchmarkId::new("Scalar", size), size, |bencher, _size| {
let v = Vector::from_slice_with_backend(&data, Backend::Scalar);
bencher.iter(|| {
black_box(v.tanh().unwrap());
});
});
#[cfg(target_arch = "x86_64")]
group.bench_with_input(BenchmarkId::new("SSE2", size), size, |bencher, _size| {
let v = Vector::from_slice_with_backend(&data, Backend::SSE2);
bencher.iter(|| {
black_box(v.tanh().unwrap());
});
});
#[cfg(target_arch = "x86_64")]
group.bench_with_input(BenchmarkId::new("AVX2", size), size, |bencher, _size| {
let v = Vector::from_slice_with_backend(&data, Backend::AVX2);
bencher.iter(|| {
black_box(v.tanh().unwrap());
});
});
}
group.finish();
}