Skip to main content

nodedb_vector/
batch_distance.rs

1//! Batch distance computation for HNSW neighbor selection.
2//!
3//! Instead of computing distances one-at-a-time in a loop, collects
4//! candidate vectors and computes distances in bulk. This improves
5//! cache utilization and enables SIMD-friendly memory access patterns.
6//!
7//! Used by `select_neighbors_heuristic` in build.rs to accelerate
8//! the diversity check during HNSW graph construction.
9
10use crate::distance::{DistanceMetric, distance};
11
12/// Compute distances from a query vector to multiple candidate vectors.
13///
14/// Returns a Vec of distances, one per candidate, in the same order.
15/// Processes candidates in batches for better cache behavior.
16pub fn batch_distances(query: &[f32], candidates: &[&[f32]], metric: DistanceMetric) -> Vec<f32> {
17    candidates
18        .iter()
19        .map(|candidate| distance(query, candidate, metric))
20        .collect()
21}
22
23/// Precompute all pairwise distances between selected neighbors and a candidate.
24///
25/// For the diversity heuristic: given a candidate and the currently selected
26/// set, compute `distance(candidate, selected[i])` for all i.
27/// Returns true if the candidate is "diverse" (closer to query than to
28/// every selected neighbor).
29pub fn is_diverse_batched(
30    candidate_vec: &[f32],
31    candidate_dist_to_query: f32,
32    selected_vecs: &[&[f32]],
33    metric: DistanceMetric,
34) -> bool {
35    for selected in selected_vecs {
36        let dist_to_selected = distance(candidate_vec, selected, metric);
37        if candidate_dist_to_query > dist_to_selected {
38            return false;
39        }
40    }
41    true
42}
43
44#[cfg(test)]
45mod tests {
46    use super::*;
47
48    #[test]
49    fn batch_distances_correctness() {
50        let query = [1.0, 0.0, 0.0];
51        let c1 = [0.0, 1.0, 0.0];
52        let c2 = [1.0, 0.0, 0.0];
53        let c3 = [0.0, 0.0, 1.0];
54
55        let dists = batch_distances(&query, &[&c1, &c2, &c3], DistanceMetric::L2);
56        assert_eq!(dists.len(), 3);
57        // c2 is identical to query → distance 0.
58        assert_eq!(dists[1], 0.0);
59        // c1 and c3 are equidistant from query.
60        assert_eq!(dists[0], dists[2]);
61    }
62
63    #[test]
64    fn diversity_check() {
65        let candidate = [1.0, 0.0];
66        let selected1 = [0.9, 0.1]; // Close to candidate.
67
68        // candidate_dist_to_query = 0.5 (arbitrary).
69        // dist(candidate, selected1) = sqrt(0.01 + 0.01) = 0.141...
70        // Since 0.5 > 0.141, candidate is NOT diverse (farther from query than from selected).
71        assert!(!is_diverse_batched(
72            &candidate,
73            0.5,
74            &[&selected1],
75            DistanceMetric::L2,
76        ));
77
78        // With dist_to_query = 0.01 — candidate is closer to query than to selected.
79        assert!(is_diverse_batched(
80            &candidate,
81            0.01,
82            &[&selected1],
83            DistanceMetric::L2,
84        ));
85    }
86}