use super::distance::{CpuDistance, SimdDistance};
use super::graph::NativeHnsw;
use super::layer::NodeId;
use crate::distance::DistanceMetric;
const HIGH_DIM: usize = 128;
const LOW_DIM: usize = 8;
const NUM_VECTORS: usize = 500;
const EF_SEARCH: usize = 64;
const K: usize = 10;
#[allow(clippy::cast_precision_loss)]
fn build_index_cpu(
metric: DistanceMetric,
dim: usize,
num: usize,
) -> (NativeHnsw<CpuDistance>, Vec<Vec<f32>>) {
let engine = CpuDistance::new(metric);
let hnsw = NativeHnsw::new(engine, 16, 100, num + 100);
let mut vectors = Vec::with_capacity(num);
for i in 0..num {
let v: Vec<f32> = (0..dim).map(|j| ((i * dim + j) as f32) * 0.01).collect();
hnsw.insert(&v).expect("insert should succeed");
vectors.push(v);
}
(hnsw, vectors)
}
#[allow(clippy::cast_precision_loss)]
fn build_index_simd(
metric: DistanceMetric,
dim: usize,
num: usize,
) -> (NativeHnsw<SimdDistance>, Vec<Vec<f32>>) {
let engine = SimdDistance::new(metric);
let hnsw = NativeHnsw::new(engine, 16, 100, num + 100);
let mut vectors = Vec::with_capacity(num);
for i in 0..num {
let v: Vec<f32> = (0..dim).map(|j| ((i * dim + j) as f32) * 0.01).collect();
hnsw.insert(&v).expect("insert should succeed");
vectors.push(v);
}
(hnsw, vectors)
}
fn result_ids(results: &[(NodeId, f32)]) -> Vec<NodeId> {
results.iter().map(|(id, _)| *id).collect()
}
fn recall_at_k(predicted: &[NodeId], ground_truth: &[NodeId]) -> f64 {
if ground_truth.is_empty() {
return 1.0;
}
let hits = predicted
.iter()
.filter(|id| ground_truth.contains(id))
.count();
#[allow(clippy::cast_precision_loss)]
let recall = hits as f64 / ground_truth.len() as f64;
recall
}
#[allow(deprecated)] #[test]
fn test_pipelined_recall_matches_sequential_euclidean() {
let (hnsw, vectors) = build_index_simd(DistanceMetric::Euclidean, HIGH_DIM, NUM_VECTORS);
let query = &vectors[0];
let results = hnsw.search(query, K, EF_SEARCH);
assert!(!results.is_empty(), "search should return results");
assert!(
results.len() <= K,
"should return at most k={K} results, got {}",
results.len()
);
assert_eq!(
results[0].0, 0,
"nearest neighbor of vector[0] should be node 0"
);
}
#[allow(deprecated)]
#[test]
fn test_pipelined_recall_matches_sequential_cosine() {
let (hnsw, vectors) = build_index_simd(DistanceMetric::Cosine, HIGH_DIM, NUM_VECTORS);
let query = &vectors[5];
let results = hnsw.search(query, K, EF_SEARCH);
assert!(!results.is_empty());
assert_eq!(
results[0].0, 5,
"nearest neighbor of vector[5] should be node 5"
);
}
#[allow(deprecated)]
#[test]
fn test_pipelined_recall_matches_sequential_dot_product() {
let (hnsw, vectors) = build_index_simd(DistanceMetric::DotProduct, HIGH_DIM, NUM_VECTORS);
let query = &vectors[10];
let results = hnsw.search(query, K, EF_SEARCH);
assert!(
!results.is_empty(),
"dot product search should return results"
);
assert!(
results.len() <= K,
"should return at most k={K} results, got {}",
results.len()
);
for window in results.windows(2) {
assert!(
window[0].1 <= window[1].1,
"results must be sorted by distance: {} <= {}",
window[0].1,
window[1].1,
);
}
}
#[allow(deprecated, clippy::cast_precision_loss)]
#[test]
fn test_pipelined_recall_above_threshold() {
let (hnsw, vectors) = build_index_simd(DistanceMetric::Euclidean, HIGH_DIM, NUM_VECTORS);
let mut total_recall = 0.0;
let num_queries = 20;
for q_idx in (0..NUM_VECTORS).step_by(NUM_VECTORS / num_queries) {
let query = &vectors[q_idx];
let results = hnsw.search(query, K, EF_SEARCH);
let result_node_ids = result_ids(&results);
let mut brute: Vec<(NodeId, f32)> = vectors
.iter()
.enumerate()
.map(|(i, v)| {
let d: f32 = v
.iter()
.zip(query.iter())
.map(|(a, b)| (a - b).powi(2))
.sum();
(i, d)
})
.collect();
brute.sort_by(|a, b| a.1.total_cmp(&b.1));
let gt_ids: Vec<NodeId> = brute.iter().take(K).map(|(id, _)| *id).collect();
total_recall += recall_at_k(&result_node_ids, >_ids);
}
let avg_recall = total_recall / num_queries as f64;
assert!(
avg_recall >= 0.95,
"Pipelined path recall@{K} must be >= 95% (got {:.1}%)",
avg_recall * 100.0,
);
}
#[test]
fn test_pipelined_cpu_engine_euclidean() {
let (hnsw, vectors) = build_index_cpu(DistanceMetric::Euclidean, HIGH_DIM, 200);
let query = &vectors[0];
let results = hnsw.search(query, K, EF_SEARCH);
assert!(!results.is_empty());
assert_eq!(results[0].0, 0);
}
#[allow(deprecated)]
#[test]
fn test_pipelined_empty_index() {
let engine = SimdDistance::new(DistanceMetric::Euclidean);
let hnsw = NativeHnsw::new(engine, 16, 100, 1000);
let query = vec![0.0_f32; HIGH_DIM];
let results = hnsw.search(&query, K, EF_SEARCH);
assert!(results.is_empty(), "empty index should return no results");
}
#[allow(deprecated)]
#[test]
fn test_pipelined_single_vector() {
let engine = SimdDistance::new(DistanceMetric::Euclidean);
let hnsw = NativeHnsw::new(engine, 16, 100, 100);
let v = vec![1.0_f32; HIGH_DIM];
hnsw.insert(&v).expect("insert should succeed");
let results = hnsw.search(&v, K, EF_SEARCH);
assert_eq!(results.len(), 1, "single-element index returns 1 result");
assert_eq!(results[0].0, 0);
}
#[allow(deprecated)]
#[test]
fn test_pipelined_multi_entry_search() {
let (hnsw, vectors) = build_index_simd(DistanceMetric::Euclidean, HIGH_DIM, NUM_VECTORS);
let query = &vectors[42];
let results = hnsw.search_multi_entry(query, K, 256, 3);
assert!(!results.is_empty());
assert_eq!(
results[0].0, 42,
"multi-entry search should still find exact match"
);
}
#[test]
fn test_low_dim_uses_sequential_path() {
let (hnsw, vectors) = build_index_cpu(DistanceMetric::Euclidean, LOW_DIM, 200);
let query = &vectors[0];
let results = hnsw.search(query, K, EF_SEARCH);
assert!(!results.is_empty());
assert_eq!(
results[0].0, 0,
"low-dim sequential path should find exact match"
);
}