use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};
use sqlitegraph::hnsw::DistanceMetric;
use sqlitegraph::hnsw::simd::{
compute_norm_squared, compute_norm_squared_scalar, cosine_similarity, cosine_similarity_scalar,
dot_product, dot_product_scalar, euclidean_distance, euclidean_distance_scalar,
};
mod bench_utils;
use bench_utils::{MEASURE, WARM_UP};
fn generate_test_vectors(count: usize, dimension: usize) -> Vec<Vec<f32>> {
let mut vectors = Vec::with_capacity(count);
for i in 0..count {
let mut vector = Vec::with_capacity(dimension);
for j in 0..dimension {
let value = ((i as f32 * 0.1) + (j as f32 * 0.01)).sin();
vector.push(value);
}
vectors.push(vector);
}
vectors
}
fn create_hnsw_index(
dimension: usize,
ef_construction: usize,
ef_search: usize,
) -> sqlitegraph::hnsw::HnswIndex {
let config = sqlitegraph::hnsw::hnsw_config()
.dimension(dimension)
.m_connections(16)
.ef_construction(ef_construction)
.ef_search(ef_search)
.distance_metric(DistanceMetric::Cosine)
.build()
.expect("HNSW configuration should be valid");
sqlitegraph::hnsw::HnswIndex::new("benchmark_index", config)
.expect("Failed to create HNSW index")
}
fn hnsw_vector_insertion(criterion: &mut Criterion) {
let mut group = criterion.benchmark_group("hnsw_insertion");
group.measurement_time(MEASURE);
group.warm_up_time(WARM_UP);
let dimensions = vec![64, 128, 256, 512, 768, 1536];
let dataset_sizes = vec![100, 500, 1000];
for &dimension in &dimensions {
for &dataset_size in &dataset_sizes {
let bench_id = BenchmarkId::new(
"insertion",
format!("dim{}_size{}", dimension, dataset_size),
);
group.bench_function(bench_id, |b| {
b.iter(|| {
let mut hnsw = create_hnsw_index(dimension, 200, 50);
let vectors = generate_test_vectors(dataset_size, dimension);
for vector in &vectors {
hnsw.insert_vector(&vector, None)
.expect("Failed to insert vector");
}
})
});
}
}
group.finish();
}
fn hnsw_search_performance(criterion: &mut Criterion) {
let mut group = criterion.benchmark_group("hnsw_search");
group.measurement_time(MEASURE);
group.warm_up_time(WARM_UP);
let dimensions = vec![64, 128, 256, 512, 768, 1536];
let dataset_sizes = vec![100, 500, 1000];
let k_values = vec![1, 5, 10];
for &dimension in &dimensions {
for &dataset_size in &dataset_sizes {
for &k in &k_values {
let bench_id = BenchmarkId::new(
"search",
format!("dim{}_size{}_k{}", dimension, dataset_size, k),
);
group.bench_function(bench_id, |b| {
let mut hnsw = create_hnsw_index(dimension, 200, 50);
let vectors = generate_test_vectors(dataset_size, dimension);
for vector in &vectors {
hnsw.insert_vector(&vector, None)
.expect("Failed to insert vector");
}
let query = &vectors[0];
b.iter(|| hnsw.search(&query, k).expect("Failed to search"))
});
}
}
}
group.finish();
}
fn hnsw_distance_metrics(criterion: &mut Criterion) {
let mut group = criterion.benchmark_group("hnsw_metrics");
group.measurement_time(MEASURE);
group.warm_up_time(WARM_UP);
let test_dimensions = vec![512, 768, 1536];
let dataset_size = 1000;
let k = 10;
let metrics = vec![
DistanceMetric::Cosine,
DistanceMetric::Euclidean,
DistanceMetric::DotProduct,
DistanceMetric::Manhattan,
];
for &dimension in &test_dimensions {
for metric in &metrics {
let bench_id = BenchmarkId::new("metrics", format!("dim{}_{:?}", dimension, metric));
group.bench_function(bench_id, |b| {
b.iter(|| {
let config = sqlitegraph::hnsw::hnsw_config()
.dimension(dimension)
.m_connections(16)
.ef_construction(200)
.ef_search(50)
.distance_metric(*metric)
.build()
.expect("HNSW configuration should be valid");
let mut hnsw = sqlitegraph::hnsw::HnswIndex::new("benchmark_metrics", config)
.expect("Failed to create HNSW index");
let vectors = generate_test_vectors(dataset_size, dimension);
for vector in &vectors {
hnsw.insert_vector(&vector, None)
.expect("Failed to insert vector");
}
let query = &vectors[0];
hnsw.search(&query, k).expect("Failed to search")
})
});
}
}
group.finish();
}
fn hnsw_end_to_end_performance(criterion: &mut Criterion) {
let mut group = criterion.benchmark_group("hnsw_e2e");
group.measurement_time(MEASURE);
group.warm_up_time(WARM_UP);
let dimensions = vec![64, 128, 256, 512, 768, 1536];
let dataset_sizes = vec![100, 500, 1000];
for &dimension in &dimensions {
for &dataset_size in &dataset_sizes {
let bench_id =
BenchmarkId::new("e2e", format!("dim{}_size{}", dimension, dataset_size));
group.bench_function(bench_id, |b| {
b.iter(|| {
let mut hnsw = create_hnsw_index(dimension, 200, 50);
let vectors = generate_test_vectors(dataset_size, dimension);
for vector in &vectors {
hnsw.insert_vector(&vector, None)
.expect("Failed to insert vector");
}
let query = &vectors[0];
for _ in 0..10 {
hnsw.search(query, 10).expect("Failed to search");
}
hnsw
})
});
}
}
group.finish();
}
fn hnsw_openai_embeddings(criterion: &mut Criterion) {
let mut group = criterion.benchmark_group("hnsw_openai");
group.measurement_time(MEASURE);
group.warm_up_time(WARM_UP);
let openai_dimension = 1536;
let realistic_dataset_sizes = vec![1000, 5000, 10000];
let k_values = vec![5, 10, 20];
for &dataset_size in &realistic_dataset_sizes {
for &k in &k_values {
let bench_id = BenchmarkId::new("openai_1536", format!("size{}_k{}", dataset_size, k));
group.bench_function(bench_id, |b| {
let mut hnsw = create_hnsw_index(openai_dimension, 200, 50);
let vectors = generate_test_vectors(dataset_size, openai_dimension);
for vector in &vectors {
hnsw.insert_vector(&vector, None)
.expect("Failed to insert vector");
}
let query = &vectors[0];
b.iter(|| hnsw.search(&query, k).expect("Failed to search"))
});
}
}
group.finish();
}
fn benchmark_vectors(dim: usize) -> (Vec<f32>, Vec<f32>) {
let a: Vec<f32> = (0..dim).map(|i| i as f32 * 0.1).collect();
let b: Vec<f32> = (dim..dim * 2).map(|i| i as f32 * 0.1).collect();
(a, b)
}
fn simd_dot_product_benchmarks(criterion: &mut Criterion) {
let mut group = criterion.benchmark_group("simd_dot_product");
group.measurement_time(MEASURE);
group.warm_up_time(WARM_UP);
let vector_sizes = vec![128, 768, 1536];
for &size in &vector_sizes {
group.bench_with_input(BenchmarkId::new("scalar", size), &size, |b, &size| {
let (a, b_vec) = benchmark_vectors(size);
b.iter(|| dot_product_scalar(&a, &b_vec));
});
group.bench_with_input(BenchmarkId::new("simd", size), &size, |b, &size| {
let (a, b_vec) = benchmark_vectors(size);
b.iter(|| dot_product(&a, &b_vec));
});
}
group.finish();
}
fn simd_euclidean_distance_benchmarks(criterion: &mut Criterion) {
let mut group = criterion.benchmark_group("simd_euclidean_distance");
group.measurement_time(MEASURE);
group.warm_up_time(WARM_UP);
let vector_sizes = vec![128, 768, 1536];
for &size in &vector_sizes {
group.bench_with_input(BenchmarkId::new("scalar", size), &size, |b, &size| {
let (a, b_vec) = benchmark_vectors(size);
b.iter(|| euclidean_distance_scalar(&a, &b_vec));
});
group.bench_with_input(BenchmarkId::new("simd", size), &size, |b, &size| {
let (a, b_vec) = benchmark_vectors(size);
b.iter(|| euclidean_distance(&a, &b_vec));
});
}
group.finish();
}
fn simd_cosine_similarity_benchmarks(criterion: &mut Criterion) {
let mut group = criterion.benchmark_group("simd_cosine_similarity");
group.measurement_time(MEASURE);
group.warm_up_time(WARM_UP);
let vector_sizes = vec![128, 768, 1536];
for &size in &vector_sizes {
group.bench_with_input(BenchmarkId::new("scalar", size), &size, |b, &size| {
let (a, b_vec) = benchmark_vectors(size);
let norm_a = compute_norm_squared_scalar(&a).sqrt();
let norm_b = compute_norm_squared_scalar(&b_vec).sqrt();
let a_norm: Vec<f32> = a.iter().map(|x| x / norm_a).collect();
let b_norm: Vec<f32> = b_vec.iter().map(|x| x / norm_b).collect();
b.iter(|| cosine_similarity_scalar(&a_norm, &b_norm));
});
group.bench_with_input(BenchmarkId::new("simd", size), &size, |b, &size| {
let (a, b_vec) = benchmark_vectors(size);
let norm_a = compute_norm_squared(&a).sqrt();
let norm_b = compute_norm_squared(&b_vec).sqrt();
let a_norm: Vec<f32> = a.iter().map(|x| x / norm_a).collect();
let b_norm: Vec<f32> = b_vec.iter().map(|x| x / norm_b).collect();
b.iter(|| cosine_similarity(&a_norm, &b_norm));
});
}
group.finish();
}
fn simd_norm_squared_benchmarks(criterion: &mut Criterion) {
let mut group = criterion.benchmark_group("simd_norm_squared");
group.measurement_time(MEASURE);
group.warm_up_time(WARM_UP);
let vector_sizes = vec![128, 768, 1536];
for &size in &vector_sizes {
group.bench_with_input(BenchmarkId::new("scalar", size), &size, |b, &size| {
let (a, _) = benchmark_vectors(size);
b.iter(|| compute_norm_squared_scalar(&a));
});
group.bench_with_input(BenchmarkId::new("simd", size), &size, |b, &size| {
let (a, _) = benchmark_vectors(size);
b.iter(|| compute_norm_squared(&a));
});
}
group.finish();
}
fn simd_batch_filter_benchmarks(criterion: &mut Criterion) {
use sqlitegraph::hnsw::batch_filter::{filter_allowed_scalar, filter_batch};
let mut group = criterion.benchmark_group("simd_batch_filter");
group.measurement_time(MEASURE);
group.warm_up_time(WARM_UP);
let batch_sizes = vec![100, 1000, 10000];
for &size in &batch_sizes {
let ids: Vec<u64> = (0..size).collect();
let allowed: Vec<u64> = (0..size / 2).collect();
group.bench_with_input(
BenchmarkId::new("scalar", size),
&(ids.clone(), allowed.clone()),
|b, (ids, allowed)| {
b.iter(|| filter_allowed_scalar(ids, allowed));
},
);
group.bench_with_input(
BenchmarkId::new("simd", size),
&(ids.clone(), allowed.clone()),
|b, (ids, allowed)| {
b.iter(|| filter_batch(ids, allowed, true));
},
);
}
group.finish();
}
fn simd_delta_encode_benchmarks(criterion: &mut Criterion) {
use sqlitegraph::hnsw::serialization::{delta_encode, delta_encode_scalar};
let mut group = criterion.benchmark_group("simd_delta_encode");
group.measurement_time(MEASURE);
group.warm_up_time(WARM_UP);
let array_sizes = vec![100, 1000, 10000];
for &size in &array_sizes {
let values: Vec<u32> = (0..size).map(|i| (i * 10) as u32).collect();
group.bench_with_input(BenchmarkId::new("scalar", size), &values, |b, values| {
b.iter(|| delta_encode_scalar(values));
});
group.bench_with_input(BenchmarkId::new("simd", size), &values, |b, values| {
b.iter(|| delta_encode(values));
});
}
group.finish();
}
criterion_group!(
benches,
hnsw_vector_insertion,
hnsw_search_performance,
hnsw_distance_metrics,
hnsw_end_to_end_performance,
hnsw_openai_embeddings,
simd_dot_product_benchmarks,
simd_euclidean_distance_benchmarks,
simd_cosine_similarity_benchmarks,
simd_norm_squared_benchmarks,
simd_batch_filter_benchmarks,
simd_delta_encode_benchmarks
);
criterion_main!(benches);