Skip to main content

nodedb_types/
vector_distance.rs

1//! Shared scalar distance metric implementations.
2//!
3//! Pure scalar functions that the compiler auto-vectorizes. Used by both
4//! `nodedb` (with optional SIMD dispatch) and `nodedb-lite` (scalar only).
5
6/// Distance metric selection.
7#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
8pub enum DistanceMetric {
9    /// Euclidean (L2) squared distance.
10    L2 = 0,
11    /// Cosine distance (1 - cosine_similarity).
12    Cosine = 1,
13    /// Negative inner product (for max-inner-product search via min-heap).
14    InnerProduct = 2,
15    /// Manhattan (L1) distance: sum of absolute differences.
16    Manhattan = 3,
17    /// Chebyshev (L-infinity) distance: max absolute difference.
18    Chebyshev = 4,
19    /// Hamming distance for binary-like vectors (threshold > 0.5).
20    Hamming = 5,
21    /// Jaccard distance for binary-like vectors (threshold > 0.5).
22    Jaccard = 6,
23    /// Pearson distance: 1 - Pearson correlation coefficient.
24    Pearson = 7,
25}
26
27/// Euclidean (L2) squared distance.
28#[inline]
29pub fn l2_squared(a: &[f32], b: &[f32]) -> f32 {
30    debug_assert_eq!(a.len(), b.len());
31    let mut sum = 0.0f32;
32    for i in 0..a.len() {
33        let d = a[i] - b[i];
34        sum += d * d;
35    }
36    sum
37}
38
39/// Cosine distance: 1.0 - cosine_similarity(a, b).
40///
41/// Returns 0.0 for identical directions, 2.0 for opposite directions.
42#[inline]
43pub fn cosine_distance(a: &[f32], b: &[f32]) -> f32 {
44    debug_assert_eq!(a.len(), b.len());
45    let mut dot = 0.0f32;
46    let mut norm_a = 0.0f32;
47    let mut norm_b = 0.0f32;
48
49    for i in 0..a.len() {
50        dot += a[i] * b[i];
51        norm_a += a[i] * a[i];
52        norm_b += b[i] * b[i];
53    }
54
55    let denom = (norm_a * norm_b).sqrt();
56    if denom < f32::EPSILON {
57        return 1.0;
58    }
59    (1.0 - (dot / denom)).max(0.0)
60}
61
62/// Negative inner product (for max-inner-product search via min-heap).
63#[inline]
64pub fn neg_inner_product(a: &[f32], b: &[f32]) -> f32 {
65    debug_assert_eq!(a.len(), b.len());
66    let mut dot = 0.0f32;
67    for i in 0..a.len() {
68        dot += a[i] * b[i];
69    }
70    -dot
71}
72
73/// Manhattan (L1) distance: sum of absolute differences.
74#[inline]
75pub fn manhattan(a: &[f32], b: &[f32]) -> f32 {
76    debug_assert_eq!(a.len(), b.len());
77    let mut sum = 0.0f32;
78    for i in 0..a.len() {
79        sum += (a[i] - b[i]).abs();
80    }
81    sum
82}
83
84/// Chebyshev (L-infinity) distance: max absolute difference.
85#[inline]
86pub fn chebyshev(a: &[f32], b: &[f32]) -> f32 {
87    debug_assert_eq!(a.len(), b.len());
88    let mut max = 0.0f32;
89    for i in 0..a.len() {
90        let d = (a[i] - b[i]).abs();
91        if d > max {
92            max = d;
93        }
94    }
95    max
96}
97
98/// Hamming distance for f32 vectors (values > 0.5 treated as 1).
99#[inline]
100pub fn hamming_f32(a: &[f32], b: &[f32]) -> f32 {
101    debug_assert_eq!(a.len(), b.len());
102    let mut count = 0u32;
103    for i in 0..a.len() {
104        let ba = a[i] > 0.5;
105        let bb = b[i] > 0.5;
106        if ba != bb {
107            count += 1;
108        }
109    }
110    count as f32
111}
112
113/// Jaccard distance for f32 vectors (values > 0.5 treated as set membership).
114///
115/// Returns 1 - |intersection|/|union|. If both are zero-sets, returns 0.0.
116#[inline]
117pub fn jaccard(a: &[f32], b: &[f32]) -> f32 {
118    debug_assert_eq!(a.len(), b.len());
119    let mut intersection = 0u32;
120    let mut union = 0u32;
121    for i in 0..a.len() {
122        let ba = a[i] > 0.5;
123        let bb = b[i] > 0.5;
124        if ba || bb {
125            union += 1;
126        }
127        if ba && bb {
128            intersection += 1;
129        }
130    }
131    if union == 0 {
132        0.0
133    } else {
134        1.0 - (intersection as f32 / union as f32)
135    }
136}
137
138/// Pearson distance: 1 - Pearson correlation coefficient.
139///
140/// Returns 0.0 for perfectly correlated, 1.0 for uncorrelated, ~2.0 for
141/// perfectly anti-correlated.
142#[inline]
143pub fn pearson(a: &[f32], b: &[f32]) -> f32 {
144    debug_assert_eq!(a.len(), b.len());
145    let n = a.len() as f32;
146    if n < 2.0 {
147        return 1.0;
148    }
149    let mut sum_a = 0.0f32;
150    let mut sum_b = 0.0f32;
151    for i in 0..a.len() {
152        sum_a += a[i];
153        sum_b += b[i];
154    }
155    let mean_a = sum_a / n;
156    let mean_b = sum_b / n;
157
158    let mut cov = 0.0f32;
159    let mut var_a = 0.0f32;
160    let mut var_b = 0.0f32;
161    for i in 0..a.len() {
162        let da = a[i] - mean_a;
163        let db = b[i] - mean_b;
164        cov += da * db;
165        var_a += da * da;
166        var_b += db * db;
167    }
168    let denom = (var_a * var_b).sqrt();
169    if denom < f32::EPSILON {
170        return 1.0;
171    }
172    (1.0 - cov / denom).max(0.0)
173}
174
175/// Compute distance using the specified metric (scalar dispatch).
176#[inline]
177pub fn distance(a: &[f32], b: &[f32], metric: DistanceMetric) -> f32 {
178    match metric {
179        DistanceMetric::L2 => l2_squared(a, b),
180        DistanceMetric::Cosine => cosine_distance(a, b),
181        DistanceMetric::InnerProduct => neg_inner_product(a, b),
182        DistanceMetric::Manhattan => manhattan(a, b),
183        DistanceMetric::Chebyshev => chebyshev(a, b),
184        DistanceMetric::Hamming => hamming_f32(a, b),
185        DistanceMetric::Jaccard => jaccard(a, b),
186        DistanceMetric::Pearson => pearson(a, b),
187    }
188}