dynvec/distance.rs
1//! Distance metrics for vector search.
2//!
3//! Three metrics are supported: [`Distance::Euclidean`] (L2),
4//! [`Distance::Cosine`], and [`Distance::DotProduct`]. All routines
5//! operate on `f32` slices and run in scalar code; portable SIMD
6//! is not used because the workspace forbids `unsafe_code` and
7//! `std::simd` is still nightly-only.
8//!
9//! For ANN search, smaller scores mean closer. Cosine and
10//! euclidean already have that orientation; dot product is
11//! negated so the same comparator works across metrics.
12//!
13//! # Examples
14//!
15//! ```
16//! use dynvec::distance::Distance;
17//! let a = [1.0, 0.0, 0.0];
18//! let b = [0.0, 1.0, 0.0];
19//! assert!((Distance::Euclidean.score(&a, &b) - 2.0_f32.sqrt()).abs() < 1e-6);
20//! assert!((Distance::Cosine.score(&a, &b) - 1.0).abs() < 1e-6);
21//! ```
22
23use serde::{Deserialize, Serialize};
24
25/// A distance / similarity metric over `f32` vectors.
26#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
27#[serde(rename_all = "snake_case")]
28#[non_exhaustive]
29pub enum Distance {
30 /// L2 distance: `sqrt(sum((a_i - b_i)^2))`.
31 Euclidean,
32 /// `1 - cos(theta)`, where `cos(theta) = (a . b) / (|a| |b|)`.
33 /// Returns 0 for identical direction, 2 for opposite.
34 Cosine,
35 /// Negated dot product: `-(a . b)`. The negation makes the
36 /// metric monotonically smaller-is-closer so a single
37 /// comparator works across all three.
38 DotProduct,
39}
40
41impl Distance {
42 /// Score `a` against `b`. Smaller scores mean closer.
43 ///
44 /// # Panics
45 ///
46 /// Does not panic. Mismatched lengths return `f32::INFINITY`
47 /// to signal "incomparable" without bringing the search
48 /// down; production callers should validate dimensions
49 /// upstream of this routine.
50 #[must_use]
51 pub fn score(self, a: &[f32], b: &[f32]) -> f32 {
52 if a.len() != b.len() {
53 return f32::INFINITY;
54 }
55 match self {
56 Self::Euclidean => euclidean(a, b),
57 Self::Cosine => cosine(a, b),
58 Self::DotProduct => -dot(a, b),
59 }
60 }
61
62 /// Human-readable name suitable for logging or JSON
63 /// serialisation.
64 #[must_use]
65 pub fn name(self) -> &'static str {
66 match self {
67 Self::Euclidean => "euclidean",
68 Self::Cosine => "cosine",
69 Self::DotProduct => "dot",
70 }
71 }
72}
73
74/// L2 (euclidean) distance.
75fn euclidean(a: &[f32], b: &[f32]) -> f32 {
76 let mut acc = 0.0_f32;
77 for (x, y) in a.iter().zip(b) {
78 let d = x - y;
79 acc += d * d;
80 }
81 acc.sqrt()
82}
83
84/// `1 - cos(theta)`. Matches the orientation used by FAISS and
85/// other ANN libraries: identical direction -> 0, orthogonal -> 1,
86/// antiparallel -> 2.
87///
88/// Zero-norm vectors return 1.0 (orthogonal); this avoids
89/// dividing by zero and matches the "no information" intuition.
90fn cosine(a: &[f32], b: &[f32]) -> f32 {
91 let mut dot_acc = 0.0_f32;
92 let mut na = 0.0_f32;
93 let mut nb = 0.0_f32;
94 for (x, y) in a.iter().zip(b) {
95 dot_acc += x * y;
96 na += x * x;
97 nb += y * y;
98 }
99 let denom = (na * nb).sqrt();
100 if denom <= 0.0 {
101 return 1.0;
102 }
103 1.0 - (dot_acc / denom)
104}
105
106/// Raw inner product `a . b`.
107fn dot(a: &[f32], b: &[f32]) -> f32 {
108 let mut acc = 0.0_f32;
109 for (x, y) in a.iter().zip(b) {
110 acc += x * y;
111 }
112 acc
113}
114
115#[cfg(test)]
116mod tests {
117 use super::*;
118
119 fn approx(a: f32, b: f32) -> bool {
120 (a - b).abs() < 1e-5
121 }
122
123 #[test]
124 fn euclidean_known_values() {
125 assert!(approx(
126 Distance::Euclidean.score(&[0.0, 0.0], &[3.0, 4.0]),
127 5.0
128 ));
129 assert!(approx(
130 Distance::Euclidean.score(&[1.0, 1.0, 1.0], &[1.0, 1.0, 1.0]),
131 0.0
132 ));
133 }
134
135 #[test]
136 fn cosine_known_values() {
137 // Identical direction -> 0
138 assert!(approx(
139 Distance::Cosine.score(&[1.0, 0.0], &[2.0, 0.0]),
140 0.0
141 ));
142 // Orthogonal -> 1
143 assert!(approx(
144 Distance::Cosine.score(&[1.0, 0.0], &[0.0, 1.0]),
145 1.0
146 ));
147 // Antiparallel -> 2
148 assert!(approx(
149 Distance::Cosine.score(&[1.0, 0.0], &[-1.0, 0.0]),
150 2.0
151 ));
152 }
153
154 #[test]
155 fn dot_product_known_values() {
156 // dot([1,2,3], [4,5,6]) = 4 + 10 + 18 = 32 -> -32
157 assert!(approx(
158 Distance::DotProduct.score(&[1.0, 2.0, 3.0], &[4.0, 5.0, 6.0]),
159 -32.0
160 ));
161 }
162
163 #[test]
164 fn mismatched_dim_is_infinity() {
165 assert!(Distance::Euclidean
166 .score(&[1.0, 2.0], &[1.0, 2.0, 3.0])
167 .is_infinite());
168 }
169
170 #[test]
171 fn cosine_zero_vector_is_orthogonal() {
172 assert!(approx(
173 Distance::Cosine.score(&[0.0, 0.0], &[1.0, 0.0]),
174 1.0
175 ));
176 }
177}