#![allow(clippy::cast_precision_loss)]
use super::index::HnswIndex;
use super::params::SearchQuality;
use crate::distance::DistanceMetric;
use crate::index::VectorIndex;
fn pseudo_random_vector(dim: usize, seed: &mut u64) -> Vec<f32> {
(0..dim)
.map(|_| {
*seed = seed.wrapping_mul(6_364_136_223_846_793_005).wrapping_add(1);
(*seed >> 33) as f32 / u32::MAX as f32 * 2.0 - 1.0
})
.collect()
}
fn build_small_index(dim: usize, count: u64, metric: DistanceMetric) -> HnswIndex {
let index = HnswIndex::new(dim, metric).expect("test: new HNSW index");
let mut seed: u64 = 0x00C0_FFEE;
for id in 0..count {
let v = pseudo_random_vector(dim, &mut seed);
index.insert(id, &v);
}
index
}
#[test]
fn search_auto_matches_cpu_search_below_gpu_threshold_cosine() {
let dim = 64;
let k = 10;
let ef_search = 128;
let index = build_small_index(dim, 2000, DistanceMetric::Cosine);
let query: Vec<f32> = (0..dim).map(|j| (j as f32 * 0.0173).sin()).collect();
let auto_results = index.search_hnsw_only(&query, k, ef_search);
let ref_results: Vec<_> = {
let inner = index.inner.read();
let neighbours = inner.search(&query, k, ef_search);
neighbours
.into_iter()
.filter_map(|(node_id, raw_dist)| {
index.mappings.get_id(node_id).map(|id| {
let score = inner.transform_score(raw_dist);
crate::scored_result::ScoredResult::new(id, score)
})
})
.collect()
};
assert_eq!(
auto_results.len(),
ref_results.len(),
"search_auto and direct search must return the same number of results below the GPU threshold",
);
let auto_ids: Vec<u64> = auto_results.iter().map(|r| r.id).collect();
let ref_ids: Vec<u64> = ref_results.iter().map(|r| r.id).collect();
assert_eq!(
auto_ids, ref_ids,
"search_auto must return the same IDs in the same order as CPU search below the GPU threshold",
);
for (a, b) in auto_results.iter().zip(ref_results.iter()) {
assert!(
(a.score - b.score).abs() < f32::EPSILON * 16.0,
"scores must match exactly: auto={} vs ref={}",
a.score,
b.score,
);
}
}
#[test]
fn search_auto_matches_cpu_search_below_gpu_threshold_euclidean() {
let dim = 32;
let k = 5;
let ef_search = 96;
let index = build_small_index(dim, 1500, DistanceMetric::Euclidean);
let query: Vec<f32> = (0..dim).map(|j| (j as f32 * 0.021).cos()).collect();
let auto_results = index.search_hnsw_only(&query, k, ef_search);
let ref_results: Vec<_> = {
let inner = index.inner.read();
let neighbours = inner.search(&query, k, ef_search);
neighbours
.into_iter()
.filter_map(|(node_id, raw_dist)| {
index.mappings.get_id(node_id).map(|id| {
let score = inner.transform_score(raw_dist);
crate::scored_result::ScoredResult::new(id, score)
})
})
.collect()
};
let auto_ids: Vec<u64> = auto_results.iter().map(|r| r.id).collect();
let ref_ids: Vec<u64> = ref_results.iter().map(|r| r.id).collect();
assert_eq!(auto_ids, ref_ids, "Euclidean parity below GPU threshold");
}
#[test]
fn search_auto_wiring_preserves_recall_small_dataset() {
let dim = 48;
let count = 1000;
let k = 10;
let index = HnswIndex::new(dim, DistanceMetric::Cosine).expect("test");
let dataset: Vec<Vec<f32>> = (0..count)
.map(|i| {
(0..dim)
.map(|j| ((i * dim + j) as f32 * 0.0013).sin())
.collect::<Vec<f32>>()
})
.collect();
for (idx, vec) in dataset.iter().enumerate() {
index.insert(idx as u64, vec);
}
let query: Vec<f32> = (0..dim).map(|j| (j as f32 * 0.007).cos()).collect();
let mut distances: Vec<(u64, f32)> = dataset
.iter()
.enumerate()
.map(|(idx, vec)| {
(
idx as u64,
crate::simd_native::cosine_similarity_native(&query, vec),
)
})
.collect();
distances.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
let ground_truth: std::collections::HashSet<u64> =
distances.iter().take(k).map(|(id, _)| *id).collect();
let results = <HnswIndex as VectorIndex>::search(&index, &query, k);
let retrieved: std::collections::HashSet<u64> = results.iter().map(|r| r.id).collect();
let intersection = retrieved.intersection(&ground_truth).count();
let recall = intersection as f64 / k as f64;
assert!(
recall >= 0.80,
"Recall@{k} via search_auto-wired path must be >= 80% on a small CPU-fallback index; got {:.1}%",
recall * 100.0,
);
}
#[test]
fn search_auto_wiring_preserves_score_ordering_on_accurate_quality() {
let dim = 16;
let count = 200;
let index = build_small_index(dim, count, DistanceMetric::Cosine);
let query: Vec<f32> = (0..dim).map(|j| (j as f32 * 0.045).sin()).collect();
let results = index
.search_with_quality(&query, 20, SearchQuality::Accurate)
.expect("test: search_with_quality");
assert!(!results.is_empty(), "Accurate search must return results");
for pair in results.windows(2) {
assert!(
pair[0].score >= pair[1].score - f32::EPSILON * 16.0,
"scores must be non-increasing for Cosine: {} then {}",
pair[0].score,
pair[1].score,
);
}
}
#[test]
#[cfg(feature = "gpu")]
fn search_auto_below_threshold_matches_cpu_under_gpu_feature() {
let dim = 64;
let k = 10;
let ef_search = 128;
let index = build_small_index(dim, 3000, DistanceMetric::Cosine);
assert!(
!crate::gpu::should_traverse_gpu(index.len(), dim),
"test pre-condition: 3000 vectors must be below the GPU threshold",
);
let query: Vec<f32> = (0..dim).map(|j| (j as f32 * 0.011).sin()).collect();
let inner = index.inner.read();
let auto_neighbours = inner.search_auto(&query, k, ef_search);
let cpu_neighbours = inner.search(&query, k, ef_search);
assert_eq!(
auto_neighbours, cpu_neighbours,
"Under gpu feature at sub-threshold size, search_auto must tail-call search() verbatim",
);
}