use criterion::{criterion_group, criterion_main, Criterion, SamplingMode};
use edgevec::hnsw::{HnswConfig, HnswIndex};
use edgevec::storage::VectorStorage;
use rand::{Rng, SeedableRng};
use rand_chacha::ChaCha8Rng;
use std::hint::black_box;
use std::time::{Duration, Instant};
const SEED: u64 = 42;
const DIMS: u32 = 128;
const INDEX_SIZE: usize = 10_000;
const QUERY_COUNT: usize = 1000;
const SEARCH_K: usize = 10;
fn generate_vectors(count: usize, dims: u32, seed: u64) -> Vec<Vec<f32>> {
let mut rng = ChaCha8Rng::seed_from_u64(seed);
(0..count)
.map(|_| (0..dims).map(|_| rng.gen_range(-1.0..1.0)).collect())
.collect()
}
fn percentile(sorted_latencies: &[f64], p: f64) -> f64 {
if sorted_latencies.is_empty() {
return 0.0;
}
let idx = ((p / 100.0) * sorted_latencies.len() as f64) as usize;
let idx = idx.min(sorted_latencies.len() - 1);
sorted_latencies[idx]
}
fn collect_latencies(index: &HnswIndex, storage: &VectorStorage, queries: &[Vec<f32>]) -> Vec<f64> {
let mut latencies = Vec::with_capacity(queries.len());
for query in queries {
let start = Instant::now();
let _ = black_box(index.search(query, SEARCH_K, storage));
latencies.push(start.elapsed().as_nanos() as f64);
}
latencies.sort_by(|a, b| a.partial_cmp(b).unwrap());
latencies
}
fn bench_p99_latency(c: &mut Criterion) {
let config = HnswConfig::new(DIMS);
let mut storage = VectorStorage::new(&config, None);
let mut index = HnswIndex::new(config, &storage).unwrap();
let vectors = generate_vectors(INDEX_SIZE, DIMS, SEED);
for vector in &vectors {
index.insert(vector, &mut storage).unwrap();
}
let queries = generate_vectors(QUERY_COUNT, DIMS, SEED + 1);
let mut group = c.benchmark_group("p99_latency");
group.sample_size(10);
group.measurement_time(Duration::from_secs(30));
group.sampling_mode(SamplingMode::Flat);
group.bench_function("search_10k_percentiles", |b| {
b.iter_custom(|iters| {
let mut total_duration = Duration::ZERO;
for _ in 0..iters {
let latencies = collect_latencies(&index, &storage, &queries);
let p50 = percentile(&latencies, 50.0);
let p99 = percentile(&latencies, 99.0);
let p999 = percentile(&latencies, 99.9);
let max = latencies.last().copied().unwrap_or(0.0);
println!(
" Latencies: P50={:.2}µs, P99={:.2}µs, P999={:.2}µs, Max={:.2}µs",
p50 / 1000.0,
p99 / 1000.0,
p999 / 1000.0,
max / 1000.0
);
total_duration += Duration::from_nanos(p99 as u64);
}
total_duration
});
});
group.finish();
}
fn bench_p99_with_tombstones(c: &mut Criterion) {
let config = HnswConfig::new(DIMS);
let mut storage = VectorStorage::new(&config, None);
let mut index = HnswIndex::new(config, &storage).unwrap();
let vectors = generate_vectors(INDEX_SIZE, DIMS, SEED);
let mut ids = Vec::new();
for vector in &vectors {
let id = index.insert(vector, &mut storage).unwrap();
ids.push(id);
}
for id in ids.iter().take(INDEX_SIZE * 30 / 100) {
index.soft_delete(*id).unwrap();
}
let queries = generate_vectors(QUERY_COUNT, DIMS, SEED + 2);
let mut group = c.benchmark_group("p99_latency");
group.sample_size(10);
group.measurement_time(Duration::from_secs(30));
group.sampling_mode(SamplingMode::Flat);
group.bench_function("search_10k_30pct_tombstones", |b| {
b.iter_custom(|iters| {
let mut total_duration = Duration::ZERO;
for _ in 0..iters {
let latencies = collect_latencies(&index, &storage, &queries);
let p50 = percentile(&latencies, 50.0);
let p99 = percentile(&latencies, 99.0);
let p999 = percentile(&latencies, 99.9);
let max = latencies.last().copied().unwrap_or(0.0);
println!(
" [30% tombstones] P50={:.2}µs, P99={:.2}µs, P999={:.2}µs, Max={:.2}µs",
p50 / 1000.0,
p99 / 1000.0,
p999 / 1000.0,
max / 1000.0
);
total_duration += Duration::from_nanos(p99 as u64);
}
total_duration
});
});
group.finish();
}
fn bench_p99_scaling(c: &mut Criterion) {
let sizes = [1_000, 5_000, 10_000, 25_000];
let queries = generate_vectors(100, DIMS, SEED + 3);
let mut group = c.benchmark_group("p99_scaling");
group.sample_size(10);
group.measurement_time(Duration::from_secs(20));
group.sampling_mode(SamplingMode::Flat);
for size in sizes {
let config = HnswConfig::new(DIMS);
let mut storage = VectorStorage::new(&config, None);
let mut index = HnswIndex::new(config, &storage).unwrap();
let vectors = generate_vectors(size, DIMS, SEED);
for vector in &vectors {
index.insert(vector, &mut storage).unwrap();
}
group.bench_function(format!("search_{}_p99", size), |b| {
b.iter_custom(|iters| {
let mut total_duration = Duration::ZERO;
for _ in 0..iters {
let latencies = collect_latencies(&index, &storage, &queries);
let p99 = percentile(&latencies, 99.0);
total_duration += Duration::from_nanos(p99 as u64);
}
total_duration
});
});
}
group.finish();
}
criterion_group!(
p99_benches,
bench_p99_latency,
bench_p99_with_tombstones,
bench_p99_scaling
);
criterion_main!(p99_benches);