#![allow(clippy::cast_precision_loss)]
use super::distance::{CachedSimdDistance, CpuDistance};
use super::graph::{NativeHnsw, DEFAULT_ALPHA};
use crate::distance::DistanceMetric;
#[test]
fn test_native_hnsw_basic_insert_search() {
let engine = CachedSimdDistance::new(DistanceMetric::Cosine, 128);
let hnsw = NativeHnsw::new(engine, 16, 100, 1000);
for i in 0..100_u64 {
let v: Vec<f32> = (0..128).map(|j| ((i + j) as f32 * 0.01).sin()).collect();
hnsw.insert(&v).expect("test");
}
assert_eq!(hnsw.len(), 100);
let query: Vec<f32> = (0..128).map(|j| (j as f32 * 0.01).sin()).collect();
let results = hnsw.search(&query, 10, 50);
assert_eq!(results.len(), 10);
assert!(results[0].1 < 0.1, "First result should be very close");
}
#[test]
fn test_native_hnsw_recall() {
let engine = CachedSimdDistance::new(DistanceMetric::Cosine, 128);
let hnsw = NativeHnsw::new(engine, 16, 100, 500);
let vectors: Vec<Vec<f32>> = (0..200)
.map(|i| {
(0..128)
.map(|j| ((i * 128 + j) as f32 * 0.001).sin())
.collect()
})
.collect();
for v in &vectors {
hnsw.insert(v).expect("test");
}
let mut total_recall = 0.0;
let n_queries = 5;
let k = 10;
for q_idx in 0..n_queries {
let query = &vectors[q_idx * 40];
let hnsw_results: Vec<usize> = hnsw
.search(query, k, 128)
.iter()
.map(|(id, _)| *id)
.collect();
let mut distances: Vec<(usize, f32)> = vectors
.iter()
.enumerate()
.map(|(i, v)| {
let dist = cosine_distance(query, v);
(i, dist)
})
.collect();
distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
let ground_truth: Vec<usize> = distances.iter().take(k).map(|(i, _)| *i).collect();
let hits = hnsw_results
.iter()
.filter(|id| ground_truth.contains(id))
.count();
total_recall += hits as f64 / k as f64;
}
let avg_recall = total_recall / n_queries as f64;
assert!(
avg_recall >= 0.8,
"Recall should be at least 80%, got {:.1}%",
avg_recall * 100.0
);
}
fn cosine_distance(a: &[f32], b: &[f32]) -> f32 {
let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
if norm_a == 0.0 || norm_b == 0.0 {
1.0
} else {
1.0 - (dot / (norm_a * norm_b))
}
}
#[test]
fn test_cpu_vs_simd_consistency() {
let cpu_engine = CpuDistance::new(DistanceMetric::Euclidean);
let simd_engine = CachedSimdDistance::new(DistanceMetric::Euclidean, 64);
let cpu_hnsw = NativeHnsw::new(cpu_engine, 16, 100, 100);
let simd_hnsw = NativeHnsw::new(simd_engine, 16, 100, 100);
for i in 0..50_u64 {
let v: Vec<f32> = (0..64).map(|j| (i + j) as f32).collect();
cpu_hnsw.insert(&v).expect("test");
simd_hnsw.insert(&v).expect("test");
}
let query: Vec<f32> = (0..64).map(|j| j as f32).collect();
let cpu_results = cpu_hnsw.search(&query, 5, 30);
let simd_results = simd_hnsw.search(&query, 5, 30);
assert_eq!(
cpu_results[0].0, simd_results[0].0,
"CPU and SIMD should find same nearest neighbor"
);
}
#[test]
fn test_native_hnsw_with_alpha_diversification() {
let engine = CachedSimdDistance::new(DistanceMetric::Cosine, 32);
let hnsw = NativeHnsw::with_alpha(engine, 16, 100, 100, 1.2);
for i in 0..25_u64 {
let v: Vec<f32> = (0..32)
.map(|j| {
if j == 0 {
1.0
} else {
(i as f32 + j as f32) * 0.001
}
})
.collect();
hnsw.insert(&v).expect("test");
}
for i in 0..25_u64 {
let v: Vec<f32> = (0..32)
.map(|j| {
if j == 1 {
1.0
} else {
(i as f32 + j as f32) * 0.001
}
})
.collect();
hnsw.insert(&v).expect("test");
}
assert_eq!(hnsw.len(), 50);
let query: Vec<f32> = (0..32).map(|j| if j == 0 { 0.9 } else { 0.01 }).collect();
let results = hnsw.search(&query, 5, 50);
assert!(!results.is_empty(), "Should return results");
}
#[test]
fn test_native_hnsw_alpha_default_is_vamana() {
let engine = CachedSimdDistance::new(DistanceMetric::Euclidean, 32);
let hnsw = NativeHnsw::new(engine, 16, 100, 100);
assert!(
(hnsw.get_alpha() - DEFAULT_ALPHA).abs() < f32::EPSILON,
"Default alpha should be {DEFAULT_ALPHA}"
);
}
#[test]
fn test_native_hnsw_alpha_affects_graph_structure() {
let engine1 = CachedSimdDistance::new(DistanceMetric::Euclidean, 32);
let engine2 = CachedSimdDistance::new(DistanceMetric::Euclidean, 32);
let hnsw_standard = NativeHnsw::new(engine1, 16, 100, 100);
let hnsw_diverse = NativeHnsw::with_alpha(engine2, 16, 100, 100, 1.2);
for i in 0..30_u64 {
let v: Vec<f32> = (0..32).map(|j| (i + j) as f32 * 0.1).collect();
hnsw_standard.insert(&v).expect("test");
hnsw_diverse.insert(&v).expect("test");
}
assert_eq!(hnsw_standard.len(), hnsw_diverse.len());
}
#[test]
fn test_search_multi_entry_returns_results() {
let engine = CachedSimdDistance::new(DistanceMetric::Cosine, 32);
let hnsw = NativeHnsw::new(engine, 16, 100, 100);
for i in 0..50_u64 {
let v: Vec<f32> = (0..32).map(|j| ((i + j) as f32 * 0.01).sin()).collect();
hnsw.insert(&v).expect("test");
}
let query: Vec<f32> = (0..32).map(|j| (j as f32 * 0.01).sin()).collect();
let results = hnsw.search_multi_entry(&query, 5, 50, 3);
assert!(!results.is_empty(), "Should return results");
assert!(results.len() <= 5, "Should not exceed k");
}
#[test]
fn test_search_multi_entry_vs_standard() {
let engine = CachedSimdDistance::new(DistanceMetric::Euclidean, 32);
let hnsw = NativeHnsw::new(engine, 16, 100, 100);
for i in 0..30_u64 {
let v: Vec<f32> = (0..32).map(|j| (i + j) as f32 * 0.1).collect();
hnsw.insert(&v).expect("test");
}
let query: Vec<f32> = (0..32).map(|j| j as f32 * 0.05).collect();
let standard = hnsw.search(&query, 5, 50);
let multi = hnsw.search_multi_entry(&query, 5, 50, 2);
assert!(!standard.is_empty());
assert!(!multi.is_empty());
}
#[test]
fn test_concurrent_insert_search_no_deadlock() {
use std::sync::Arc;
use std::thread;
let engine = CachedSimdDistance::new(DistanceMetric::Euclidean, 32);
let hnsw = Arc::new(NativeHnsw::new(engine, 16, 100, 500));
for i in 0..50_u64 {
let v: Vec<f32> = (0..32).map(|j| (i + j) as f32 * 0.1).collect();
hnsw.insert(&v).expect("test");
}
let mut handles = vec![];
for t in 0..4 {
let hnsw_clone = Arc::clone(&hnsw);
handles.push(thread::spawn(move || {
for i in 0..25_u64 {
let v: Vec<f32> = (0..32).map(|j| ((t * 100 + i) + j) as f32 * 0.01).collect();
hnsw_clone.insert(&v).expect("test");
}
}));
}
for _ in 0..4 {
let hnsw_clone = Arc::clone(&hnsw);
handles.push(thread::spawn(move || {
for i in 0..25_u64 {
let query: Vec<f32> = (0..32).map(|j| (i + j) as f32 * 0.05).collect();
let _ = hnsw_clone.search(&query, 5, 30);
}
}));
}
for handle in handles {
let result = handle.join();
assert!(result.is_ok(), "Thread should complete without panic");
}
assert!(hnsw.len() >= 50, "Should have at least initial vectors");
}
#[test]
fn test_parallel_insert_stress_no_deadlock() {
use std::sync::Arc;
use std::thread;
let engine = CachedSimdDistance::new(DistanceMetric::Cosine, 64);
let hnsw = Arc::new(NativeHnsw::new(engine, 32, 200, 1000));
let num_threads = 8;
let vectors_per_thread = 50;
let mut handles = vec![];
for t in 0..num_threads {
let hnsw_clone = Arc::clone(&hnsw);
handles.push(thread::spawn(move || {
for i in 0..vectors_per_thread {
let idx = t * vectors_per_thread + i;
let v: Vec<f32> = (0..64)
.map(|j| ((idx * 64 + j) as f32 * 0.001).sin())
.collect();
hnsw_clone.insert(&v).expect("test");
}
}));
}
for handle in handles {
handle.join().expect("Thread should not panic");
}
let final_count = hnsw.len();
assert!(
final_count >= (num_threads * vectors_per_thread) / 2,
"Should have inserted many vectors, got {final_count}"
);
let query: Vec<f32> = (0..64).map(|j| (j as f32 * 0.001).sin()).collect();
let results = hnsw.search(&query, 10, 50);
assert!(
!results.is_empty(),
"Search should return results after parallel inserts"
);
}
#[test]
fn test_mixed_operations_no_deadlock() {
use std::sync::Arc;
use std::thread;
let engine = CachedSimdDistance::new(DistanceMetric::Euclidean, 32);
let hnsw = Arc::new(NativeHnsw::new(engine, 16, 100, 300));
for i in 0..30_u64 {
let v: Vec<f32> = (0..32).map(|j| (i + j) as f32 * 0.1).collect();
hnsw.insert(&v).expect("test");
}
let mut handles = vec![];
for t in 0..3 {
let hnsw_clone = Arc::clone(&hnsw);
handles.push(thread::spawn(move || {
for i in 0..20_u64 {
let v: Vec<f32> = (0..32).map(|j| ((t * 100 + i) + j) as f32 * 0.01).collect();
hnsw_clone.insert(&v).expect("test");
}
}));
}
for _ in 0..2 {
let hnsw_clone = Arc::clone(&hnsw);
handles.push(thread::spawn(move || {
for i in 0..30_u64 {
let query: Vec<f32> = (0..32).map(|j| (i + j) as f32 * 0.05).collect();
let _ = hnsw_clone.search(&query, 5, 30);
}
}));
}
for _ in 0..2 {
let hnsw_clone = Arc::clone(&hnsw);
handles.push(thread::spawn(move || {
for i in 0..20_u64 {
let query: Vec<f32> = (0..32).map(|j| (i + j) as f32 * 0.03).collect();
let _ = hnsw_clone.search_multi_entry(&query, 5, 30, 2);
}
}));
}
for handle in handles {
handle
.join()
.expect("Thread should complete without deadlock");
}
assert!(hnsw.len() >= 30, "Index should have vectors");
}
#[test]
fn test_concurrent_insert_deterministic_count() {
use std::sync::Arc;
use std::thread;
let engine = CachedSimdDistance::new(DistanceMetric::Cosine, 64);
let hnsw = Arc::new(NativeHnsw::new(engine, 16, 100, 2000));
let num_threads = 8;
let vectors_per_thread = 100;
let mut handles = vec![];
for t in 0..num_threads {
let hnsw_clone = Arc::clone(&hnsw);
handles.push(thread::spawn(move || {
for i in 0..vectors_per_thread {
let idx = t * vectors_per_thread + i;
let v: Vec<f32> = (0..64)
.map(|j| ((idx * 64 + j) as f32 * 0.001).sin())
.collect();
hnsw_clone.insert(&v).expect("test");
}
}));
}
for handle in handles {
handle.join().expect("Thread should complete without panic");
}
let final_count = hnsw.len();
assert_eq!(
final_count,
num_threads * vectors_per_thread,
"Every insert must be counted; got {final_count} expected {}",
num_threads * vectors_per_thread
);
let query: Vec<f32> = (0..64).map(|j| (j as f32 * 0.001).sin()).collect();
let results = hnsw.search(&query, 20, 50);
assert_eq!(
results.len(),
20,
"Search should return exactly k results from populated graph"
);
for window in results.windows(2) {
assert!(
window[0].1 <= window[1].1,
"Results must be sorted by distance: {} > {}",
window[0].1,
window[1].1
);
}
}
#[test]
fn test_concurrent_insert_search_correctness() {
use std::sync::Arc;
use std::thread;
let engine = CachedSimdDistance::new(DistanceMetric::Euclidean, 32);
let hnsw = Arc::new(NativeHnsw::new(engine, 16, 100, 1000));
for i in 0..100_u64 {
let v: Vec<f32> = (0..32).map(|j| (i + j) as f32 * 0.1).collect();
hnsw.insert(&v).expect("test");
}
let mut handles = vec![];
for t in 0..4_u64 {
let hnsw_clone = Arc::clone(&hnsw);
handles.push(thread::spawn(move || {
for i in 0..50_u64 {
let v: Vec<f32> = (0..32)
.map(|j| ((t * 1000 + i) + j) as f32 * 0.01)
.collect();
hnsw_clone.insert(&v).expect("test");
}
}));
}
for t in 0..4_u64 {
let hnsw_clone = Arc::clone(&hnsw);
handles.push(thread::spawn(move || {
for i in 0..50_u64 {
let query: Vec<f32> = (0..32).map(|j| ((t * 500 + i) + j) as f32 * 0.02).collect();
let results = hnsw_clone.search(&query, 5, 30);
assert!(results.len() <= 5, "Search must return at most k results");
let current_len = hnsw_clone.len();
for &(node_id, dist) in &results {
assert!(
node_id < current_len + 200,
"Node ID {node_id} should be in valid range"
);
assert!(
dist.is_finite(),
"Distance must be finite, got {dist} for node {node_id}"
);
}
for window in results.windows(2) {
assert!(
window[0].1 <= window[1].1,
"Results must be sorted by distance"
);
}
}
}));
}
for handle in handles {
handle
.join()
.expect("Thread should complete without deadlock");
}
let final_count = hnsw.len();
assert!(
final_count >= 300,
"Should have at least 100 initial + 200 inserted, got {final_count}"
);
let snapshot = super::graph::safety_counters::HNSW_COUNTERS.snapshot();
assert_eq!(
snapshot.invariant_violation_total, 0,
"Concurrent insert+search must not trigger lock-order violations"
);
}
#[test]
fn test_concurrent_insert_multi_entry_search() {
use std::sync::Arc;
use std::thread;
let engine = CachedSimdDistance::new(DistanceMetric::Cosine, 32);
let hnsw = Arc::new(NativeHnsw::new(engine, 16, 100, 600));
for i in 0..50_u64 {
let v: Vec<f32> = (0..32).map(|j| ((i + j) as f32 * 0.01).sin()).collect();
hnsw.insert(&v).expect("test");
}
let mut handles = vec![];
for t in 0..3_u64 {
let hnsw_clone = Arc::clone(&hnsw);
handles.push(thread::spawn(move || {
for i in 0..30_u64 {
let v: Vec<f32> = (0..32)
.map(|j| ((t * 100 + i) + j) as f32 * 0.005)
.collect();
hnsw_clone.insert(&v).expect("test");
}
}));
}
for _ in 0..3_u64 {
let hnsw_clone = Arc::clone(&hnsw);
handles.push(thread::spawn(move || {
for i in 0..30_u64 {
let query: Vec<f32> = (0..32).map(|j| (i + j) as f32 * 0.03).collect();
let results = hnsw_clone.search_multi_entry(&query, 5, 30, 3);
assert!(results.len() <= 5, "Multi-entry search must respect k");
for window in results.windows(2) {
assert!(
window[0].1 <= window[1].1,
"Multi-entry results must be sorted by distance"
);
}
}
}));
}
for handle in handles {
handle.join().expect("Thread must complete (no deadlock)");
}
assert!(
hnsw.len() >= 50,
"Index must retain at least pre-populated vectors"
);
}
#[test]
fn test_hnsw_no_deadlock_during_parallel_insert_search() {
use std::sync::Arc;
use std::thread;
let engine = CachedSimdDistance::new(DistanceMetric::Cosine, 64);
let hnsw = Arc::new(NativeHnsw::new(engine, 16, 100, 500));
for i in 0..100_u64 {
let v: Vec<f32> = (0..64).map(|j| ((i + j) as f32 * 0.01).sin()).collect();
hnsw.insert(&v).expect("test");
}
let mut handles = vec![];
for t in 0..4 {
let hnsw_clone = Arc::clone(&hnsw);
handles.push(thread::spawn(move || {
for i in 0..50_u64 {
let v: Vec<f32> = (0..64)
.map(|j| ((t * 1000 + i) + j) as f32 * 0.001)
.collect();
hnsw_clone.insert(&v).expect("test");
}
}));
}
for t in 0..4 {
let hnsw_clone = Arc::clone(&hnsw);
handles.push(thread::spawn(move || {
for i in 0..50_u64 {
let query: Vec<f32> = (0..64)
.map(|j| ((t * 500 + i) + j) as f32 * 0.002)
.collect();
let results = hnsw_clone.search(&query, 10, 50);
assert!(
results.len() <= 10,
"Search should return at most k results"
);
}
}));
}
for handle in handles {
handle
.join()
.expect("Thread should complete without deadlock");
}
let final_count = hnsw.len();
assert!(
final_count >= 100,
"Should have at least initial 100 vectors, got {final_count}"
);
let snapshot = super::graph::safety_counters::HNSW_COUNTERS.snapshot();
assert_eq!(
snapshot.invariant_violation_total, 0,
"No lock-order violations should occur with correct lock ordering"
);
}
#[test]
fn test_concurrent_insert_delete_search_at_index_level() {
use crate::distance::DistanceMetric as DM;
use crate::index::hnsw::native_index::NativeHnswIndex;
use std::sync::Arc;
use std::thread;
let index = Arc::new(NativeHnswIndex::new(32, DM::Euclidean).expect("test"));
for i in 0u64..100 {
let v: Vec<f32> = (0..32).map(|j| (i + j) as f32 * 0.1).collect();
index.insert(i, &v).expect("test");
}
assert_eq!(index.len(), 100);
let mut handles = vec![];
for t in 0..2u64 {
let idx = Arc::clone(&index);
handles.push(thread::spawn(move || {
for i in 0..50u64 {
let id = 1000 + t * 50 + i;
let v: Vec<f32> = (0..32).map(|j| (id + j) as f32 * 0.01).collect();
idx.insert(id, &v).expect("test");
}
}));
}
for t in 0..2u64 {
let idx = Arc::clone(&index);
handles.push(thread::spawn(move || {
for i in 0..25u64 {
let id = t * 25 + i;
let _ = idx.remove(id);
}
}));
}
for _ in 0..2 {
let idx = Arc::clone(&index);
handles.push(thread::spawn(move || {
for i in 0..30u64 {
let query: Vec<f32> = (0..32).map(|j| (i + j) as f32 * 0.05).collect();
let results = idx.search(&query, 10);
assert!(results.len() <= 10, "Search must respect k limit");
for sr in &results {
assert!(
sr.score.is_finite(),
"Distance must be finite for ID {}",
sr.id
);
}
}
}));
}
for handle in handles {
handle
.join()
.expect("Thread must complete without deadlock");
}
let live_len = index.len();
assert_eq!(
live_len, 150,
"Live count must reflect inserts minus deletes: got {live_len}"
);
let query: Vec<f32> = (0..32).map(|j| j as f32 * 0.1).collect();
let results = index.search(&query, 50);
for sr in &results {
assert!(
!(0..50).contains(&sr.id),
"Soft-deleted ID {} must not appear in search results",
sr.id
);
}
let query_new: Vec<f32> = (0..32).map(|j| (1050 + j) as f32 * 0.01).collect();
let results_new = index.search(&query_new, 10);
assert!(
!results_new.is_empty(),
"Newly inserted vectors must be searchable"
);
}
#[test]
fn test_delete_exclusion_under_concurrent_search() {
use crate::distance::DistanceMetric as DM;
use crate::index::hnsw::native_index::NativeHnswIndex;
use std::sync::atomic::{AtomicBool, Ordering as AtomOrd};
use std::sync::Arc;
use std::thread;
let index = Arc::new(NativeHnswIndex::new(16, DM::Cosine).expect("test"));
for i in 0u64..200 {
let v: Vec<f32> = (0..16).map(|j| ((i + j) as f32 * 0.01).sin()).collect();
index.insert(i, &v).expect("test");
}
let violation_found = Arc::new(AtomicBool::new(false));
let mut handles = vec![];
{
let idx = Arc::clone(&index);
handles.push(thread::spawn(move || {
for i in (0u64..200).step_by(2) {
idx.remove(i);
}
}));
}
for _ in 0..4 {
let idx = Arc::clone(&index);
let vf = Arc::clone(&violation_found);
handles.push(thread::spawn(move || {
for i in 0..50u64 {
let query: Vec<f32> = (0..16).map(|j| ((i + j) as f32 * 0.02).sin()).collect();
let results = idx.search(&query, 20);
for sr in &results {
if !sr.score.is_finite() {
vf.store(true, AtomOrd::Relaxed);
}
}
}
}));
}
for handle in handles {
handle.join().expect("Thread must complete");
}
assert!(
!violation_found.load(AtomOrd::Relaxed),
"No non-finite distances should appear during concurrent delete+search"
);
let live_len = index.len();
assert_eq!(live_len, 100, "Live count must exclude soft-deleted IDs");
let query: Vec<f32> = (0..16).map(|j| (j as f32 * 0.01).sin()).collect();
let results = index.search(&query, 200);
for sr in &results {
assert!(
sr.id % 2 != 0,
"Soft-deleted even ID {} must not appear in post-delete search",
sr.id
);
}
assert!(
!results.is_empty(),
"Odd IDs should still be searchable after deleting even IDs"
);
}
#[test]
fn test_prenormalized_cosine_recall_matches_standard() {
use super::distance::CachedSimdDistance;
let dim = 128;
let engine_std = CachedSimdDistance::new(DistanceMetric::Cosine, dim);
let hnsw_std = NativeHnsw::new(engine_std, 16, 100, 500);
let engine_pre = CachedSimdDistance::new_prenormalized(DistanceMetric::Cosine, dim);
let hnsw_pre = NativeHnsw::new(engine_pre, 16, 100, 500);
let vectors: Vec<Vec<f32>> = (0..200)
.map(|i| {
(0..dim)
.map(|j| ((i * dim + j) as f32 * 0.001).sin())
.collect()
})
.collect();
for v in &vectors {
hnsw_std.insert(v).expect("test");
hnsw_pre.insert(v).expect("test");
}
let k = 10;
for q_idx in [0, 40, 80, 120, 160] {
let query = &vectors[q_idx];
let results_std = hnsw_std.search(query, k, 128);
let results_pre = hnsw_pre.search(query, k, 128);
assert_eq!(results_std.len(), k, "standard should return {k} results");
assert_eq!(results_pre.len(), k, "prenorm should return {k} results");
assert!(
(results_std[0].1 - results_pre[0].1).abs() < 1e-4,
"Best distance should stay aligned across cosine paths (q={q_idx})"
);
let std_ids: Vec<usize> = results_std.iter().map(|(id, _)| *id).collect();
let pre_ids: Vec<usize> = results_pre.iter().map(|(id, _)| *id).collect();
let overlap = std_ids.iter().filter(|id| pre_ids.contains(id)).count();
assert!(
overlap >= k * 8 / 10,
"Recall overlap too low at q={q_idx}: {overlap}/{k}"
);
}
}
#[test]
fn test_prenormalized_search_distances_are_consistent() {
use super::distance::CachedSimdDistance;
let dim = 64;
let engine = CachedSimdDistance::new_prenormalized(DistanceMetric::Cosine, dim);
let hnsw = NativeHnsw::new(engine, 16, 100, 100);
for i in 0..50_u64 {
let v: Vec<f32> = (0..dim)
.map(|j| ((i + j as u64) as f32 * 0.01).sin())
.collect();
hnsw.insert(&v).expect("test");
}
let query: Vec<f32> = (0..dim).map(|j| (j as f32 * 0.01).sin()).collect();
let results = hnsw.search(&query, 10, 50);
for &(_, dist) in &results {
assert!(
dist.is_finite() && (-1e-6..=2.0 + 1e-6).contains(&dist),
"Cosine distance {dist} out of valid range"
);
}
for window in results.windows(2) {
assert!(
window[0].1 <= window[1].1,
"Results must be sorted: {} > {}",
window[0].1,
window[1].1
);
}
}
#[test]
fn test_safety_counters_accessible_after_operations() {
let engine = CachedSimdDistance::new(DistanceMetric::Euclidean, 32);
let hnsw = NativeHnsw::new(engine, 16, 100, 100);
for i in 0..20_u64 {
let v: Vec<f32> = (0..32).map(|j| (i + j) as f32 * 0.1).collect();
hnsw.insert(&v).expect("test");
}
let query: Vec<f32> = (0..32).map(|j| j as f32 * 0.05).collect();
let _ = hnsw.search(&query, 5, 30);
let snapshot = super::graph::safety_counters::HNSW_COUNTERS.snapshot();
assert_eq!(
snapshot.invariant_violation_total, 0,
"Correct lock ordering should produce zero violations"
);
assert_eq!(
snapshot.corruption_detected_total, 0,
"Normal operations should not trigger corruption signals"
);
}
#[test]
fn test_backward_pruning_preserves_diversity_across_clusters() {
let engine = CachedSimdDistance::new(DistanceMetric::Euclidean, 32);
let hnsw = NativeHnsw::with_alpha(engine, 16, 200, 200, 1.2);
for cluster in 0..4_usize {
for i in 0..10_u64 {
let v: Vec<f32> = (0..32)
.map(|j| {
if j == cluster {
10.0 + i as f32 * 0.01 } else {
i as f32 * 0.001 }
})
.collect();
hnsw.insert(&v).expect("test: cluster insert");
}
}
assert_eq!(hnsw.len(), 40);
let hub: Vec<f32> = (0..32).map(|j| if j < 4 { 5.0 } else { 0.0 }).collect();
hnsw.insert(&hub).expect("test: hub insert");
let hub_id = 40;
hnsw.with_vectors_and_layers_read(|vectors, layers| {
let neighbors = layers[0].get_neighbors(hub_id);
assert!(
!neighbors.is_empty(),
"Hub node must have at least one neighbor"
);
let mut clusters_seen = std::collections::HashSet::new();
for &n in &neighbors {
let vec = unsafe { vectors.get_unchecked(n) };
let cluster_id = vec[..4]
.iter()
.enumerate()
.max_by(|a, b| a.1.partial_cmp(b.1).unwrap())
.unwrap()
.0;
clusters_seen.insert(cluster_id);
}
assert!(
clusters_seen.len() >= 2,
"Backward pruning should preserve diversity: \
hub neighbors span only {} cluster(s), expected >= 2. \
Neighbor IDs: {:?}",
clusters_seen.len(),
neighbors
);
});
}
#[test]
fn test_diversity_backward_pruning_recall_not_degraded() {
let engine = CachedSimdDistance::new(DistanceMetric::Euclidean, 64);
let hnsw = NativeHnsw::new(engine, 16, 100, 500);
let vectors: Vec<Vec<f32>> = (0..200)
.map(|i| {
(0..64)
.map(|j| ((i * 64 + j) as f32 * 0.001).sin())
.collect()
})
.collect();
for v in &vectors {
hnsw.insert(v).expect("test: insert");
}
let k = 10;
let mut total_recall = 0.0;
let n_queries = 10;
for q_idx in 0..n_queries {
let query = &vectors[q_idx * 20];
let hnsw_results: Vec<usize> = hnsw
.search(query, k, 128)
.iter()
.map(|(id, _)| *id)
.collect();
let mut distances: Vec<(usize, f32)> = vectors
.iter()
.enumerate()
.map(|(i, v)| {
let d: f32 = query
.iter()
.zip(v.iter())
.map(|(a, b)| (a - b).powi(2))
.sum::<f32>()
.sqrt();
(i, d)
})
.collect();
distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
let ground_truth: Vec<usize> = distances.iter().take(k).map(|(i, _)| *i).collect();
let hits = hnsw_results
.iter()
.filter(|id| ground_truth.contains(id))
.count();
total_recall += hits as f64 / k as f64;
}
let avg_recall = total_recall / n_queries as f64;
assert!(
avg_recall >= 0.80,
"Diversity-aware backward pruning must not degrade recall below 80%: \
got {:.1}%",
avg_recall * 100.0
);
}
#[test]
fn test_eviction_respects_distance_when_all_equally_diverse() {
let engine = CachedSimdDistance::new(DistanceMetric::Euclidean, 8);
let hnsw = NativeHnsw::with_alpha(engine, 2, 100, 50, 1.2);
for i in 0..5_usize {
let v: Vec<f32> = (0..8).map(|j| if j == i { 1.0 } else { 0.0 }).collect();
hnsw.insert(&v).expect("test: axis insert");
}
let close_to_0: Vec<f32> = (0..8).map(|j| if j == 0 { 0.99 } else { 0.001 }).collect();
hnsw.insert(&close_to_0).expect("test: close insert");
assert_eq!(hnsw.len(), 6);
let query: Vec<f32> = (0..8).map(|j| if j == 0 { 1.0 } else { 0.0 }).collect();
let results = hnsw.search(&query, 3, 50);
assert!(
!results.is_empty(),
"Search should return results after eviction"
);
for window in results.windows(2) {
assert!(
window[0].1 <= window[1].1,
"Results must be sorted by distance: {} > {}",
window[0].1,
window[1].1
);
}
}