use super::harness::*;
use luci::index::Index;
use luci::search::expression::parse_search;
use rayon::ThreadPoolBuilder;
use serde_json::json;
use std::collections::HashSet;
use std::time::Instant;
#[test]
fn hnsw_build_time() {
println!("\n=== Vector: HNSW Build Time ===\n");
for &(count, dims) in &[
(1_000, 64),
(5_000, 64),
(10_000, 64),
(1_000, 128),
(1_000, 256),
] {
let start = Instant::now();
let (path, _index) =
build_vector_corpus(&format!("hnsw_build_{count}_{dims}"), count, dims);
let elapsed = start.elapsed();
println!(
"{count:>6} vectors x {dims}d: {:.0}ms ({:.0} vecs/s)",
elapsed.as_millis(),
count as f64 / elapsed.as_secs_f64()
);
cleanup(&path);
}
}
#[test]
fn knn_latency() {
println!("\n=== Vector: kNN Query Latency (5K vectors, 64d) ===\n");
let dims = 64;
let (_path, index) = build_vector_corpus("knn_latency", 5_000, dims);
let query_vec: Vec<f32> = (0..dims)
.map(|i| (i as f32 / dims as f32) * 2.0 - 1.0)
.collect();
println!("{:<20} {:>12} {:>12}", "config", "p50", "p99");
println!("{}", "-".repeat(50));
for &(k, ef) in &[(10, 50), (10, 100), (10, 200), (50, 100), (100, 200)] {
let expr = parse_search(
json!({
"knn": {
"field": "embedding",
"query_vector": query_vec,
"k": k,
"num_candidates": ef
}
}),
k,
)
.unwrap();
let mut times = Vec::new();
for _ in 0..5 {
let _ = index.search(&expr);
} for _ in 0..20 {
let start = Instant::now();
let _ = index.search(&expr);
times.push(start.elapsed());
}
times.sort();
let p50 = times[times.len() / 2];
let p99 = times[times.len() * 99 / 100];
println!(
"k={k:<3} ef={ef:<4} p50={:>6.1}us p99={:>6.1}us",
p50.as_micros() as f64,
p99.as_micros() as f64
);
assert!(
p99.as_millis() < 25,
"REGRESSION: kNN k={k} ef={ef} p99={}ms exceeds 25ms",
p99.as_millis()
);
}
cleanup(&_path);
}
#[test]
fn knn_recall_vs_ef() {
println!("\n=== Vector: Recall@10 vs ef_search (5K vectors, 64d) ===\n");
let dims = 64;
let n = 5_000;
let (path, mut index) = build_vector_corpus("knn_recall", n, dims);
let mut rng: u64 = 777;
let num_queries = 10;
println!("{:<10} {:>10}", "ef", "recall@10");
println!("{}", "-".repeat(25));
for &ef in &[10, 20, 50, 100, 200] {
let mut total_recall = 0.0;
for _ in 0..num_queries {
let mut query_vec = Vec::with_capacity(dims);
for _ in 0..dims {
rng ^= rng << 13;
rng ^= rng >> 7;
rng ^= rng << 17;
query_vec.push((rng as f32 / u64::MAX as f32) * 2.0 - 1.0);
}
let expr = parse_search(
json!({
"knn": {
"field": "embedding",
"query_vector": query_vec,
"k": 10,
"num_candidates": ef
}
}),
10,
)
.unwrap();
let results = index.search(&expr).unwrap();
let hnsw_ids: HashSet<u32> = results.iter().map(|h| h.doc_id().as_u32()).collect();
let brute = brute_force_knn(&mut index, &query_vec, 10);
let brute_ids: HashSet<u32> = brute.into_iter().collect();
let recall = hnsw_ids.intersection(&brute_ids).count() as f64 / 10.0;
total_recall += recall;
}
let avg_recall = total_recall / num_queries as f64;
println!("ef={ef:<6} recall={avg_recall:.3}");
}
cleanup(&path);
}
#[test]
fn merge_reuses_seed_graph() {
use luci::index::Index;
use std::path::PathBuf;
println!("\n=== Vector: Merge Reuses Seed HNSW Graph ===\n");
let dims = 64;
let n_per_seg = 2_500;
let baseline_start = Instant::now();
let (baseline_path, _) = build_vector_corpus("merge_reuse_baseline", n_per_seg, dims);
let baseline_elapsed = baseline_start.elapsed();
cleanup(&baseline_path);
let path: PathBuf = build_vector_corpus_two_segments("merge_reuse_test", n_per_seg, dims);
let merge_index = Index::open(&path).unwrap();
let merge_start = Instant::now();
merge_index.force_merge(1).unwrap();
let merge_elapsed = merge_start.elapsed();
let ratio = merge_elapsed.as_secs_f64() / baseline_elapsed.as_secs_f64();
println!(
"single-segment build: {:.0}ms | merge of two segments: {:.0}ms | ratio {ratio:.2}×",
baseline_elapsed.as_millis(),
merge_elapsed.as_millis(),
);
assert!(
ratio <= 2.0,
"REGRESSION: merge took {ratio:.2}× single-segment-build (target ≤ 2.0×); \
seed reuse may have regressed back to full rebuild"
);
cleanup(&path);
}
fn build_vector_corpus_two_segments(
name: &str,
n_per_seg: usize,
dims: usize,
) -> std::path::PathBuf {
use luci::index::Index;
let path = profile_dir(name);
let index = Index::create_with_mapping(&path, vector_schema(dims)).unwrap();
for seed in [42u64, 9999u64] {
let mut rng = seed;
let docs: Vec<serde_json::Value> = (0..n_per_seg)
.map(|i| {
let mut vec = Vec::with_capacity(dims);
for _ in 0..dims {
rng ^= rng << 13;
rng ^= rng >> 7;
rng ^= rng << 17;
vec.push((rng as f64 / u64::MAX as f64) * 2.0 - 1.0);
}
json!({
"title": format!("vector doc {i}"),
"tag": "x",
"embedding": vec,
})
})
.collect();
index.bulk(docs).unwrap();
}
drop(index);
path
}
fn brute_force_knn(index: &mut Index, query: &[f32], k: usize) -> Vec<u32> {
let expr = parse_search(
json!({
"knn": {
"field": "embedding",
"query_vector": query,
"k": k,
"num_candidates": 5000
}
}),
k,
)
.unwrap();
let results = index.search(&expr).unwrap();
results.iter().map(|h| h.doc_id().as_u32()).collect()
}
#[test]
fn knn_recall_regression() {
println!("\n=== Vector: Recall Regression Guard (5K vectors, 64d) ===\n");
let dims = 64;
let n = 5_000;
let pool = ThreadPoolBuilder::new().num_threads(1).build().unwrap();
let (path, mut index) = pool.install(|| build_vector_corpus("knn_recall_regression", n, dims));
let mut rng: u64 = 31415;
let num_queries = 20;
let mut recalls: std::collections::HashMap<usize, Vec<f64>> = std::collections::HashMap::new();
for &ef in &[16, 64, 128] {
let mut samples = Vec::with_capacity(num_queries);
for _ in 0..num_queries {
let mut query_vec = Vec::with_capacity(dims);
for _ in 0..dims {
rng ^= rng << 13;
rng ^= rng >> 7;
rng ^= rng << 17;
query_vec.push((rng as f32 / u64::MAX as f32) * 2.0 - 1.0);
}
let expr = parse_search(
json!({
"knn": {
"field": "embedding",
"query_vector": query_vec,
"k": 10,
"num_candidates": ef
}
}),
10,
)
.unwrap();
let hnsw_ids: HashSet<u32> = index
.search(&expr)
.unwrap()
.iter()
.map(|h| h.doc_id().as_u32())
.collect();
let brute_ids: HashSet<u32> = brute_force_knn(&mut index, &query_vec, 10)
.into_iter()
.collect();
samples.push(hnsw_ids.intersection(&brute_ids).count() as f64 / 10.0);
}
let mean = samples.iter().sum::<f64>() / samples.len() as f64;
recalls.insert(ef, samples);
println!("ef={ef:<4} mean_recall@10 = {mean:.3}");
}
let r16 = recalls[&16].iter().sum::<f64>() / num_queries as f64;
let r64 = recalls[&64].iter().sum::<f64>() / num_queries as f64;
let r128 = recalls[&128].iter().sum::<f64>() / num_queries as f64;
assert!(
r16 >= 0.55,
"REGRESSION: recall@10 at ef=16 was {r16:.3} (threshold 0.55)"
);
assert!(
r64 >= 0.80,
"REGRESSION: recall@10 at ef=64 was {r64:.3} (threshold 0.80)"
);
assert!(
r128 >= 0.90,
"REGRESSION: recall@10 at ef=128 was {r128:.3} (threshold 0.90)"
);
cleanup(&path);
}