use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use quantize_rs::quantization::{QuantizedTensor, QuantizedTensorInt4};
use quantize_rs::{pack_int4, unpack_int4, QuantConfig, Quantizer};
fn synthetic_f32(n: usize) -> Vec<f32> {
(0..n)
.map(|i| {
let t = (i as f32) / (n as f32);
(t * std::f32::consts::TAU).sin()
})
.collect()
}
fn synthetic_i8_int4(n: usize) -> Vec<i8> {
(0..n).map(|i| ((i % 16) as i8) - 8).collect()
}
fn bench_quantize_throughput(c: &mut Criterion) {
let mut group = c.benchmark_group("quantize_throughput");
for &n in &[1_000_usize, 100_000, 1_000_000] {
let data = synthetic_f32(n);
let shape = vec![n];
group.throughput(Throughput::Elements(n as u64));
group.bench_with_input(BenchmarkId::new("int8", n), &data, |b, d| {
b.iter(|| QuantizedTensor::from_f32(black_box(d), black_box(shape.clone())).unwrap());
});
group.bench_with_input(BenchmarkId::new("int4", n), &data, |b, d| {
b.iter(|| {
QuantizedTensorInt4::from_f32(black_box(d), black_box(shape.clone())).unwrap()
});
});
}
group.finish();
}
fn bench_per_channel_vs_per_tensor(c: &mut Criterion) {
let mut group = c.benchmark_group("per_channel_vs_per_tensor");
let channels = 64_usize;
let per_channel = 27_usize;
let n = channels * per_channel;
let data = synthetic_f32(n);
let shape = vec![channels, per_channel];
group.throughput(Throughput::Elements(n as u64));
group.bench_function("int8_per_tensor", |b| {
b.iter(|| QuantizedTensor::from_f32(black_box(&data), black_box(shape.clone())).unwrap());
});
group.bench_function("int8_per_channel", |b| {
b.iter(|| {
QuantizedTensor::from_f32_per_channel(black_box(&data), black_box(shape.clone()))
.unwrap()
});
});
group.bench_function("int4_per_tensor", |b| {
b.iter(|| {
QuantizedTensorInt4::from_f32(black_box(&data), black_box(shape.clone())).unwrap()
});
});
group.bench_function("int4_per_channel", |b| {
b.iter(|| {
QuantizedTensorInt4::from_f32_per_channel(black_box(&data), black_box(shape.clone()))
.unwrap()
});
});
group.finish();
}
fn bench_pack_int4(c: &mut Criterion) {
let mut group = c.benchmark_group("pack_int4");
for &n in &[10_000_usize, 100_000, 1_000_000] {
let values = synthetic_i8_int4(n);
let packed = pack_int4(&values);
group.throughput(Throughput::Elements(n as u64));
group.bench_with_input(BenchmarkId::new("pack", n), &values, |b, v| {
b.iter(|| pack_int4(black_box(v)));
});
group.bench_with_input(BenchmarkId::new("unpack", n), &packed, |b, p| {
b.iter(|| unpack_int4(black_box(p), black_box(n)));
});
}
group.finish();
}
fn bench_quantize_model(c: &mut Criterion) {
let mut group = c.benchmark_group("quantize_model");
let shapes: &[&[usize]] = &[
&[64, 3, 3, 3], &[64], &[128, 64, 3, 3], &[128], &[256, 128, 3, 3], &[256], &[1024, 4096], &[1024], ];
let tensors: Vec<(Vec<f32>, Vec<usize>)> = shapes
.iter()
.map(|&s| {
let n: usize = s.iter().product();
(synthetic_f32(n), s.to_vec())
})
.collect();
let total_elements: u64 = tensors.iter().map(|(d, _)| d.len() as u64).sum();
group.throughput(Throughput::Elements(total_elements));
group.bench_function("int8", |b| {
let cfg = QuantConfig {
bits: 8,
per_channel: false,
..Default::default()
};
let quantizer = Quantizer::new(cfg);
b.iter(|| {
for (data, shape) in black_box(&tensors) {
let _ = quantizer
.quantize_tensor(black_box(data), black_box(shape.clone()))
.unwrap();
}
});
});
group.bench_function("int4", |b| {
let cfg = QuantConfig {
bits: 4,
per_channel: false,
..Default::default()
};
let quantizer = Quantizer::new(cfg);
b.iter(|| {
for (data, shape) in black_box(&tensors) {
let _ = quantizer
.quantize_tensor(black_box(data), black_box(shape.clone()))
.unwrap();
}
});
});
group.bench_function("int8_per_channel", |b| {
let cfg = QuantConfig {
bits: 8,
per_channel: true,
..Default::default()
};
let quantizer = Quantizer::new(cfg);
b.iter(|| {
for (data, shape) in black_box(&tensors) {
let _ = quantizer
.quantize_tensor(black_box(data), black_box(shape.clone()))
.unwrap();
}
});
});
group.finish();
}
criterion_group!(
benches,
bench_quantize_throughput,
bench_per_channel_vs_per_tensor,
bench_pack_int4,
bench_quantize_model,
);
criterion_main!(benches);