use criterion::{black_box, criterion_group, criterion_main, BatchSize, Criterion};
use overgraph::{
DatabaseEngine, DbOptions, DenseMetric, DenseVectorConfig, HnswConfig, NodeInput, PropValue,
UpsertNodeOptions, VectorSearchMode, VectorSearchRequest,
};
use std::collections::BTreeMap;
const BURST_SIZE: u64 = 10_000;
const ASYNC_THRESHOLD: usize = 1024 * 1024;
const SYNC_FLUSH_INTERVAL: u64 = 3300;
fn temp_db_with_opts(opts: DbOptions) -> (tempfile::TempDir, DatabaseEngine) {
let dir = tempfile::tempdir().unwrap();
let engine = DatabaseEngine::open(dir.path(), &opts).unwrap();
(dir, engine)
}
fn sync_opts() -> DbOptions {
DbOptions {
create_if_missing: true,
memtable_flush_threshold: 0, compact_after_n_flushes: 0,
..DbOptions::default()
}
}
fn async_opts() -> DbOptions {
DbOptions {
create_if_missing: true,
memtable_flush_threshold: ASYNC_THRESHOLD,
compact_after_n_flushes: 0,
..DbOptions::default()
}
}
fn write_opts(i: u64) -> UpsertNodeOptions {
let mut props = BTreeMap::new();
props.insert(
"name".to_string(),
PropValue::String(format!("benchmark_node_{}", i)),
);
props.insert(
"category".to_string(),
PropValue::String("perf_test_data".to_string()),
);
props.insert("score".to_string(), PropValue::Float(i as f64 * 0.001));
UpsertNodeOptions {
props,
..Default::default()
}
}
fn simple_dense_vector(dim: usize, index: usize) -> Vec<f32> {
let mut v = vec![0.0f32; dim];
let primary = index % dim;
v[primary] = 1.0;
v[(primary + 7) % dim] = 0.25;
v[(index * 3 + 13) % dim] += 0.05;
let norm = v.iter().map(|x| x * x).sum::<f32>().sqrt();
if norm > 0.0 {
for x in &mut v {
*x /= norm;
}
}
v
}
fn simple_sparse_vector(index: usize, nnz: usize) -> Vec<(u32, f32)> {
let mut dims = Vec::with_capacity(nnz);
for j in 0..nnz {
let d = ((index * 7 + j * 31 + 13) % 4096) as u32;
dims.push((d, 1.0 - j as f32 * 0.08));
}
dims.sort_unstable_by_key(|&(d, _)| d);
dims.dedup_by_key(|(d, _)| *d);
dims
}
fn pre_populate_plain(engine: &mut DatabaseEngine, count: usize) -> Vec<u64> {
let inputs: Vec<NodeInput> = (0..count)
.map(|i| NodeInput {
type_id: 1,
key: format!("seed_{}", i),
props: BTreeMap::new(),
weight: 1.0,
dense_vector: None,
sparse_vector: None,
})
.collect();
let ids = engine.batch_upsert_nodes(&inputs).unwrap();
engine.flush().unwrap();
ids
}
fn pre_populate_dense(engine: &mut DatabaseEngine, count: usize, dim: usize) {
let inputs: Vec<NodeInput> = (0..count)
.map(|i| NodeInput {
type_id: 1,
key: format!("vec_{}", i),
props: BTreeMap::new(),
weight: 1.0,
dense_vector: Some(simple_dense_vector(dim, i)),
sparse_vector: None,
})
.collect();
engine.batch_upsert_nodes(&inputs).unwrap();
engine.flush().unwrap();
}
fn pre_populate_sparse(engine: &mut DatabaseEngine, count: usize, nnz: usize) {
let inputs: Vec<NodeInput> = (0..count)
.map(|i| NodeInput {
type_id: 1,
key: format!("svec_{}", i),
props: BTreeMap::new(),
weight: 1.0,
dense_vector: None,
sparse_vector: Some(simple_sparse_vector(i, nnz)),
})
.collect();
engine.batch_upsert_nodes(&inputs).unwrap();
engine.flush().unwrap();
}
fn bench_sustained_writes_threshold(c: &mut Criterion) {
let mut group = c.benchmark_group("async_flush_sustained_writes");
group.sample_size(30);
group.bench_function("sync_baseline", |b| {
b.iter_batched(
|| temp_db_with_opts(sync_opts()),
|(_dir, engine)| {
for i in 0..BURST_SIZE {
engine
.upsert_node(1, &format!("n{}", i), write_opts(i))
.unwrap();
if (i + 1) % SYNC_FLUSH_INTERVAL == 0 {
engine.flush().unwrap();
}
}
},
BatchSize::PerIteration,
);
});
group.bench_function("async_auto_flush", |b| {
b.iter_batched(
|| temp_db_with_opts(async_opts()),
|(_dir, engine)| {
for i in 0..BURST_SIZE {
engine
.upsert_node(1, &format!("n{}", i), write_opts(i))
.unwrap();
}
},
BatchSize::PerIteration,
);
});
group.finish();
}
fn bench_writes_with_queued_epochs(c: &mut Criterion) {
let mut group = c.benchmark_group("async_flush_queued_epochs");
group.sample_size(30);
group.bench_function("sync_baseline", |b| {
b.iter_batched(
|| {
let (_dir, engine) = temp_db_with_opts(sync_opts());
for j in 0..2000u64 {
engine
.upsert_node(1, &format!("pre_{}", j), write_opts(j))
.unwrap();
if (j + 1) % SYNC_FLUSH_INTERVAL == 0 {
engine.flush().unwrap();
}
}
(_dir, engine)
},
|(_dir, engine)| {
for i in 0..BURST_SIZE {
let k = 2000 + i;
engine
.upsert_node(1, &format!("n{}", k), write_opts(k))
.unwrap();
if (i + 1) % SYNC_FLUSH_INTERVAL == 0 {
engine.flush().unwrap();
}
}
},
BatchSize::PerIteration,
);
});
group.bench_function("async_auto_flush", |b| {
b.iter_batched(
|| {
let (_dir, engine) = temp_db_with_opts(async_opts());
for j in 0..2000u64 {
engine
.upsert_node(1, &format!("pre_{}", j), write_opts(j))
.unwrap();
}
(_dir, engine)
},
|(_dir, engine)| {
for i in 0..BURST_SIZE {
let k = 2000 + i;
engine
.upsert_node(1, &format!("n{}", k), write_opts(k))
.unwrap();
}
},
BatchSize::PerIteration,
);
});
group.finish();
}
fn bench_mixed_writes_reads(c: &mut Criterion) {
let mut group = c.benchmark_group("async_flush_mixed_reads");
group.sample_size(30);
group.bench_function("sync_baseline", |b| {
b.iter_batched(
|| {
let (_dir, mut engine) = temp_db_with_opts(sync_opts());
let ids = pre_populate_plain(&mut engine, 1000);
(_dir, engine, ids)
},
|(_dir, engine, ids)| {
for i in 0..BURST_SIZE {
engine
.upsert_node(1, &format!("w{}", i), write_opts(i))
.unwrap();
if (i + 1) % SYNC_FLUSH_INTERVAL == 0 {
engine.flush().unwrap();
}
black_box(engine.get_node(ids[(i as usize) % ids.len()]).unwrap());
}
},
BatchSize::PerIteration,
);
});
group.bench_function("async_auto_flush", |b| {
b.iter_batched(
|| {
let (_dir, mut engine) = temp_db_with_opts(async_opts());
let ids = pre_populate_plain(&mut engine, 1000);
(_dir, engine, ids)
},
|(_dir, engine, ids)| {
for i in 0..BURST_SIZE {
engine
.upsert_node(1, &format!("w{}", i), write_opts(i))
.unwrap();
black_box(engine.get_node(ids[(i as usize) % ids.len()]).unwrap());
}
},
BatchSize::PerIteration,
);
});
group.finish();
}
fn bench_mixed_writes_dense_vector(c: &mut Criterion) {
let mut group = c.benchmark_group("async_flush_mixed_dense_vector");
group.sample_size(10);
let dim = 32usize;
let dense_config = DenseVectorConfig {
dimension: dim as u32,
metric: DenseMetric::Cosine,
hnsw: HnswConfig::default(),
};
let request = VectorSearchRequest {
mode: VectorSearchMode::Dense,
dense_query: Some(simple_dense_vector(dim, 999)),
sparse_query: None,
k: 10,
type_filter: None,
ef_search: None,
scope: None,
dense_weight: None,
sparse_weight: None,
fusion_mode: None,
};
group.bench_function("sync_baseline", |b| {
b.iter_batched(
|| {
let mut opts = sync_opts();
opts.dense_vector = Some(dense_config.clone());
let (_dir, mut engine) = temp_db_with_opts(opts);
pre_populate_dense(&mut engine, 1000, dim);
(_dir, engine)
},
|(_dir, engine)| {
for i in 0..BURST_SIZE {
engine
.upsert_node(1, &format!("w{}", i), write_opts(i))
.unwrap();
if (i + 1) % SYNC_FLUSH_INTERVAL == 0 {
engine.flush().unwrap();
}
black_box(engine.vector_search(&request).unwrap());
}
},
BatchSize::PerIteration,
);
});
group.bench_function("async_auto_flush", |b| {
b.iter_batched(
|| {
let mut opts = async_opts();
opts.dense_vector = Some(dense_config.clone());
let (_dir, mut engine) = temp_db_with_opts(opts);
pre_populate_dense(&mut engine, 1000, dim);
(_dir, engine)
},
|(_dir, engine)| {
for i in 0..BURST_SIZE {
engine
.upsert_node(1, &format!("w{}", i), write_opts(i))
.unwrap();
black_box(engine.vector_search(&request).unwrap());
}
},
BatchSize::PerIteration,
);
});
group.finish();
}
fn bench_mixed_writes_sparse_vector(c: &mut Criterion) {
let mut group = c.benchmark_group("async_flush_mixed_sparse_vector");
group.sample_size(10);
let request = VectorSearchRequest {
mode: VectorSearchMode::Sparse,
dense_query: None,
sparse_query: Some(simple_sparse_vector(999, 8)),
k: 10,
type_filter: None,
ef_search: None,
scope: None,
dense_weight: None,
sparse_weight: None,
fusion_mode: None,
};
group.bench_function("sync_baseline", |b| {
b.iter_batched(
|| {
let (_dir, mut engine) = temp_db_with_opts(sync_opts());
pre_populate_sparse(&mut engine, 1000, 8);
(_dir, engine)
},
|(_dir, engine)| {
for i in 0..BURST_SIZE {
engine
.upsert_node(1, &format!("w{}", i), write_opts(i))
.unwrap();
if (i + 1) % SYNC_FLUSH_INTERVAL == 0 {
engine.flush().unwrap();
}
black_box(engine.vector_search(&request).unwrap());
}
},
BatchSize::PerIteration,
);
});
group.bench_function("async_auto_flush", |b| {
b.iter_batched(
|| {
let (_dir, mut engine) = temp_db_with_opts(async_opts());
pre_populate_sparse(&mut engine, 1000, 8);
(_dir, engine)
},
|(_dir, engine)| {
for i in 0..BURST_SIZE {
engine
.upsert_node(1, &format!("w{}", i), write_opts(i))
.unwrap();
black_box(engine.vector_search(&request).unwrap());
}
},
BatchSize::PerIteration,
);
});
group.finish();
}
criterion_group!(
benches,
bench_sustained_writes_threshold,
bench_writes_with_queued_epochs,
bench_mixed_writes_reads,
bench_mixed_writes_dense_vector,
bench_mixed_writes_sparse_vector,
);
criterion_main!(benches);