use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use edgevec::sparse::{SparseId, SparseStorage, SparseVector};
use rand::prelude::*;
use rand_chacha::ChaCha8Rng;
use std::hint::black_box;
use std::time::Duration;
fn random_sparse(dim: u32, nnz: usize, seed: u64) -> SparseVector {
let mut rng = ChaCha8Rng::seed_from_u64(seed);
let mut indices: Vec<u32> = (0..dim).collect();
indices.shuffle(&mut rng);
indices.truncate(nnz);
indices.sort();
let values: Vec<f32> = (0..nnz)
.map(|_| {
let mut v = rng.gen_range(-1.0..1.0);
if v == 0.0 {
v = 0.001;
}
v
})
.collect();
SparseVector::new(indices, values, dim).expect("Generated vector should be valid")
}
fn create_storage(count: usize, dim: u32, nnz: usize) -> SparseStorage {
let mut storage = SparseStorage::new();
for i in 0..count {
let vec = random_sparse(dim, nnz, i as u64);
storage.insert(&vec).expect("Insert should succeed");
}
storage
}
fn bench_storage_insert(c: &mut Criterion) {
let mut group = c.benchmark_group("storage_insert");
group.sample_size(500);
group.measurement_time(Duration::from_secs(10));
let dim = 10_000u32;
let nnz = 50usize;
group.throughput(Throughput::Elements(1));
group.bench_function("single", |b| {
let mut storage = SparseStorage::new();
let vec = random_sparse(dim, nnz, 42);
b.iter(|| storage.insert(black_box(&vec)).unwrap());
});
for initial_count in [100, 1000, 10000] {
group.bench_with_input(
BenchmarkId::new("into_existing", initial_count),
&initial_count,
|b, &count| {
let mut storage = create_storage(count, dim, nnz);
let vec = random_sparse(dim, nnz, 999);
b.iter(|| storage.insert(black_box(&vec)).unwrap());
},
);
}
for batch_size in [100, 1000, 10000] {
group.throughput(Throughput::Elements(batch_size as u64));
group.bench_with_input(
BenchmarkId::new("batch", batch_size),
&batch_size,
|b, &count| {
let vectors: Vec<SparseVector> = (0..count)
.map(|i| random_sparse(dim, nnz, i as u64))
.collect();
b.iter(|| {
let mut storage = SparseStorage::new();
for vec in &vectors {
storage.insert(black_box(vec)).unwrap();
}
storage
});
},
);
}
group.finish();
}
fn bench_storage_get(c: &mut Criterion) {
let mut group = c.benchmark_group("storage_get");
group.sample_size(1000);
group.measurement_time(Duration::from_secs(5));
let dim = 10_000u32;
let nnz = 50usize;
let storage = create_storage(10_000, dim, nnz);
group.bench_function("random_from_10k", |b| {
let mut rng = ChaCha8Rng::seed_from_u64(42);
b.iter(|| {
let id = SparseId::new(rng.gen_range(0..10_000u64));
storage.get(black_box(id))
});
});
group.bench_function("first_from_10k", |b| {
let id = SparseId::new(0);
b.iter(|| storage.get(black_box(id)));
});
group.bench_function("last_from_10k", |b| {
let id = SparseId::new(9999);
b.iter(|| storage.get(black_box(id)));
});
group.bench_function("missing_from_10k", |b| {
let id = SparseId::new(99999);
b.iter(|| storage.get(black_box(id)));
});
group.finish();
}
fn bench_storage_iter(c: &mut Criterion) {
let mut group = c.benchmark_group("storage_iter");
group.sample_size(100);
group.measurement_time(Duration::from_secs(10));
let dim = 10_000u32;
let nnz = 50usize;
for count in [1000, 10000, 100000] {
group.throughput(Throughput::Elements(count as u64));
group.bench_with_input(
BenchmarkId::from_parameter(format!("count_{count}")),
&count,
|b, &count| {
let storage = create_storage(count, dim, nnz);
b.iter(|| {
let mut sum = 0.0f32;
for (id, vec) in storage.iter() {
sum += black_box(id.as_u64()) as f32 + vec.nnz() as f32;
}
sum
});
},
);
}
let mut storage_with_deletions = create_storage(10_000, dim, nnz);
for i in (0..10_000).step_by(10) {
let _ = storage_with_deletions.delete(SparseId::new(i));
}
group.bench_function("10k_with_deletions", |b| {
b.iter(|| {
let mut sum = 0.0f32;
for (id, vec) in storage_with_deletions.iter() {
sum += black_box(id.as_u64()) as f32 + vec.nnz() as f32;
}
sum
});
});
group.finish();
}
fn bench_storage_roundtrip(c: &mut Criterion) {
let mut group = c.benchmark_group("storage_roundtrip");
group.sample_size(50);
group.measurement_time(Duration::from_secs(15));
let dim = 10_000u32;
let nnz = 50usize;
for count in [1000, 10000] {
group.throughput(Throughput::Elements(count as u64));
group.bench_with_input(BenchmarkId::new("serialize", count), &count, |b, &count| {
let storage = create_storage(count, dim, nnz);
b.iter(|| storage.to_bytes());
});
let storage = create_storage(count, dim, nnz);
let bytes = storage.to_bytes();
group.bench_with_input(
BenchmarkId::new("deserialize", count),
&bytes,
|b, bytes| {
b.iter(|| SparseStorage::from_bytes(black_box(bytes)));
},
);
group.bench_with_input(
BenchmarkId::new("full_roundtrip", count),
&count,
|b, &count| {
let storage = create_storage(count, dim, nnz);
b.iter(|| {
let bytes = storage.to_bytes();
SparseStorage::from_bytes(black_box(&bytes))
});
},
);
}
group.finish();
}
fn bench_storage_delete(c: &mut Criterion) {
let mut group = c.benchmark_group("storage_delete");
group.sample_size(500);
let dim = 10_000u32;
let nnz = 50usize;
group.bench_function("single", |b| {
b.iter_batched(
|| create_storage(1000, dim, nnz),
|mut storage| storage.delete(black_box(SparseId::new(500))).unwrap(),
criterion::BatchSize::SmallInput,
);
});
group.bench_function("batch_10_percent", |b| {
b.iter_batched(
|| create_storage(10_000, dim, nnz),
|mut storage| {
for i in (0..10_000).step_by(10) {
let _ = storage.delete(SparseId::new(i));
}
},
criterion::BatchSize::LargeInput,
);
});
group.finish();
}
fn bench_storage_memory(c: &mut Criterion) {
let mut group = c.benchmark_group("storage_memory");
group.sample_size(100);
let dim = 10_000u32;
for nnz in [10, 50, 100, 200] {
group.bench_with_input(BenchmarkId::new("create_10k", nnz), &nnz, |b, &nnz| {
b.iter(|| create_storage(10_000, dim, nnz));
});
}
group.finish();
}
criterion_group!(
benches,
bench_storage_insert,
bench_storage_get,
bench_storage_iter,
bench_storage_roundtrip,
bench_storage_delete,
bench_storage_memory
);
criterion_main!(benches);