nodedb_vector/batch_distance.rs
1//! Batch distance computation for HNSW neighbor selection.
2//!
3//! Instead of computing distances one-at-a-time in a loop, collects
4//! candidate vectors and computes distances in bulk. This improves
5//! cache utilization and enables SIMD-friendly memory access patterns.
6//!
7//! Used by `select_neighbors_heuristic` in build.rs to accelerate
8//! the diversity check during HNSW graph construction.
9
10use crate::distance::{DistanceMetric, distance};
11
12/// Compute distances from a query vector to multiple candidate vectors.
13///
14/// Returns a Vec of distances, one per candidate, in the same order.
15/// Processes candidates in batches for better cache behavior.
16pub fn batch_distances(query: &[f32], candidates: &[&[f32]], metric: DistanceMetric) -> Vec<f32> {
17 candidates
18 .iter()
19 .map(|candidate| distance(query, candidate, metric))
20 .collect()
21}
22
23/// Precompute all pairwise distances between selected neighbors and a candidate.
24///
25/// For the diversity heuristic: given a candidate and the currently selected
26/// set, compute `distance(candidate, selected[i])` for all i.
27/// Returns true if the candidate is "diverse" (closer to query than to
28/// every selected neighbor).
29pub fn is_diverse_batched(
30 candidate_vec: &[f32],
31 candidate_dist_to_query: f32,
32 selected_vecs: &[&[f32]],
33 metric: DistanceMetric,
34) -> bool {
35 for selected in selected_vecs {
36 let dist_to_selected = distance(candidate_vec, selected, metric);
37 if candidate_dist_to_query > dist_to_selected {
38 return false;
39 }
40 }
41 true
42}
43
44#[cfg(test)]
45mod tests {
46 use super::*;
47
48 #[test]
49 fn batch_distances_correctness() {
50 let query = [1.0, 0.0, 0.0];
51 let c1 = [0.0, 1.0, 0.0];
52 let c2 = [1.0, 0.0, 0.0];
53 let c3 = [0.0, 0.0, 1.0];
54
55 let dists = batch_distances(&query, &[&c1, &c2, &c3], DistanceMetric::L2);
56 assert_eq!(dists.len(), 3);
57 // c2 is identical to query → distance 0.
58 assert_eq!(dists[1], 0.0);
59 // c1 and c3 are equidistant from query.
60 assert_eq!(dists[0], dists[2]);
61 }
62
63 #[test]
64 fn diversity_check() {
65 let candidate = [1.0, 0.0];
66 let selected1 = [0.9, 0.1]; // Close to candidate.
67
68 // candidate_dist_to_query = 0.5 (arbitrary).
69 // dist(candidate, selected1) = sqrt(0.01 + 0.01) = 0.141...
70 // Since 0.5 > 0.141, candidate is NOT diverse (farther from query than from selected).
71 assert!(!is_diverse_batched(
72 &candidate,
73 0.5,
74 &[&selected1],
75 DistanceMetric::L2,
76 ));
77
78 // With dist_to_query = 0.01 — candidate is closer to query than to selected.
79 assert!(is_diverse_batched(
80 &candidate,
81 0.01,
82 &[&selected1],
83 DistanceMetric::L2,
84 ));
85 }
86}