use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use std::sync::Arc;
use common::Vector;
use storage::InMemoryStorage;
use storage::VectorStorage;
fn generate_vectors(count: usize, dimension: usize, prefix: &str) -> Vec<Vector> {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
(0..count)
.map(|i| {
let mut hasher = DefaultHasher::new();
i.hash(&mut hasher);
let seed = hasher.finish();
let values: Vec<f32> = (0..dimension)
.map(|j| {
let mut h = DefaultHasher::new();
(seed + j as u64).hash(&mut h);
(h.finish() as f32 / u64::MAX as f32) * 2.0 - 1.0
})
.collect();
Vector {
id: format!("{}{}", prefix, i),
values,
metadata: None,
ttl_seconds: None,
expires_at: None,
}
})
.collect()
}
fn bench_insert_batch_sizes(c: &mut Criterion) {
let rt = tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()
.unwrap();
let mut group = c.benchmark_group("insert_batch_sizes");
let dimension = 128;
for batch_size in [1, 10, 100, 1000, 5000].iter() {
group.throughput(Throughput::Elements(*batch_size as u64));
group.bench_with_input(
BenchmarkId::from_parameter(batch_size),
batch_size,
|b, &size| {
b.iter(|| {
rt.block_on(async {
let storage = Arc::new(InMemoryStorage::new());
let namespace = "bench".to_string();
storage.ensure_namespace(&namespace).await.unwrap();
let vectors = generate_vectors(size, dimension, "v");
let count = storage
.upsert(&namespace, black_box(vectors))
.await
.unwrap();
black_box(count)
})
});
},
);
}
group.finish();
}
fn bench_insert_dimensions(c: &mut Criterion) {
let rt = tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()
.unwrap();
let mut group = c.benchmark_group("insert_dimensions");
let batch_size = 100;
for dimension in [32, 128, 384, 768, 1536].iter() {
group.throughput(Throughput::Elements(batch_size as u64));
group.bench_with_input(
BenchmarkId::from_parameter(dimension),
dimension,
|b, &dim| {
b.iter(|| {
rt.block_on(async {
let storage = Arc::new(InMemoryStorage::new());
let namespace = "bench".to_string();
storage.ensure_namespace(&namespace).await.unwrap();
let vectors = generate_vectors(batch_size, dim, "v");
let count = storage
.upsert(&namespace, black_box(vectors))
.await
.unwrap();
black_box(count)
})
});
},
);
}
group.finish();
}
fn bench_insert_with_metadata(c: &mut Criterion) {
let rt = tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()
.unwrap();
let mut group = c.benchmark_group("insert_with_metadata");
let dimension = 128;
let batch_size = 100;
group.throughput(Throughput::Elements(batch_size as u64));
group.bench_function("no_metadata", |b| {
b.iter(|| {
rt.block_on(async {
let storage = Arc::new(InMemoryStorage::new());
let namespace = "bench".to_string();
storage.ensure_namespace(&namespace).await.unwrap();
let vectors = generate_vectors(batch_size, dimension, "v");
let count = storage
.upsert(&namespace, black_box(vectors))
.await
.unwrap();
black_box(count)
})
});
});
group.bench_function("simple_metadata", |b| {
b.iter(|| {
rt.block_on(async {
let storage = Arc::new(InMemoryStorage::new());
let namespace = "bench".to_string();
storage.ensure_namespace(&namespace).await.unwrap();
let mut vectors = generate_vectors(batch_size, dimension, "v");
for (i, v) in vectors.iter_mut().enumerate() {
v.metadata = Some(serde_json::json!({
"category": if i % 2 == 0 { "A" } else { "B" },
"value": i as f64
}));
}
let count = storage
.upsert(&namespace, black_box(vectors))
.await
.unwrap();
black_box(count)
})
});
});
group.bench_function("complex_metadata", |b| {
b.iter(|| {
rt.block_on(async {
let storage = Arc::new(InMemoryStorage::new());
let namespace = "bench".to_string();
storage.ensure_namespace(&namespace).await.unwrap();
let mut vectors = generate_vectors(batch_size, dimension, "v");
for (i, v) in vectors.iter_mut().enumerate() {
v.metadata = Some(serde_json::json!({
"category": if i % 2 == 0 { "A" } else { "B" },
"value": i as f64,
"tags": ["tag1", "tag2", "tag3"],
"nested": {
"field1": "value1",
"field2": i * 10,
"deep": {
"data": [1, 2, 3, 4, 5]
}
}
}));
}
let count = storage
.upsert(&namespace, black_box(vectors))
.await
.unwrap();
black_box(count)
})
});
});
group.finish();
}
fn bench_concurrent_inserts(c: &mut Criterion) {
let rt = tokio::runtime::Builder::new_multi_thread()
.enable_all()
.build()
.unwrap();
let mut group = c.benchmark_group("concurrent_inserts");
let dimension = 128;
let vectors_per_task = 100;
for num_tasks in [1, 2, 4, 8].iter() {
let total_vectors = *num_tasks * vectors_per_task;
group.throughput(Throughput::Elements(total_vectors as u64));
group.bench_with_input(
BenchmarkId::from_parameter(num_tasks),
num_tasks,
|b, &tasks| {
b.iter(|| {
rt.block_on(async {
let storage = Arc::new(InMemoryStorage::new());
let handles: Vec<_> = (0..tasks)
.map(|t| {
let storage = storage.clone();
let namespace = format!("bench_{}", t);
tokio::spawn(async move {
storage.ensure_namespace(&namespace).await.unwrap();
let vectors = generate_vectors(
vectors_per_task,
dimension,
&format!("t{}_", t),
);
storage.upsert(&namespace, vectors).await.unwrap()
})
})
.collect();
let mut total = 0;
for h in handles {
total += h.await.unwrap();
}
black_box(total)
})
});
},
);
}
group.finish();
}
criterion_group!(
benches,
bench_insert_batch_sizes,
bench_insert_dimensions,
bench_insert_with_metadata,
bench_concurrent_inserts,
);
criterion_main!(benches);