sevensense_vector/
distance.rs

1//! Distance metrics for vector similarity computation.
2//!
3//! This module provides optimized implementations of common distance metrics:
4//! - Cosine distance/similarity
5//! - Euclidean (L2) distance
6//! - Dot product
7//!
8//! ## SIMD Optimization
9//!
10//! When the `simd` feature is enabled, these functions use SIMD intrinsics
11//! for improved performance on supported architectures.
12
13#![allow(dead_code)]  // Utility functions for future use
14
15/// Compute the cosine distance between two vectors.
16///
17/// Cosine distance = 1 - cosine_similarity
18///
19/// Returns a value in [0, 2]:
20/// - 0 = identical direction
21/// - 1 = orthogonal
22/// - 2 = opposite direction
23///
24/// # Panics
25/// Panics in debug mode if vectors have different lengths.
26#[inline]
27pub fn cosine_distance(a: &[f32], b: &[f32]) -> f32 {
28    1.0 - cosine_similarity(a, b)
29}
30
31/// Compute the cosine similarity between two vectors.
32///
33/// Returns a value in [-1, 1]:
34/// - 1 = identical direction
35/// - 0 = orthogonal
36/// - -1 = opposite direction
37///
38/// # Panics
39/// Panics in debug mode if vectors have different lengths.
40#[inline]
41pub fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
42    debug_assert_eq!(a.len(), b.len(), "Vector length mismatch");
43
44    let dot = dot_product(a, b);
45    let norm_a = l2_norm(a);
46    let norm_b = l2_norm(b);
47
48    if norm_a < 1e-10 || norm_b < 1e-10 {
49        return 0.0;
50    }
51
52    (dot / (norm_a * norm_b)).clamp(-1.0, 1.0)
53}
54
55/// Compute the Euclidean (L2) distance between two vectors.
56///
57/// Returns the straight-line distance in n-dimensional space.
58///
59/// # Panics
60/// Panics in debug mode if vectors have different lengths.
61#[inline]
62pub fn euclidean_distance(a: &[f32], b: &[f32]) -> f32 {
63    squared_euclidean_distance(a, b).sqrt()
64}
65
66/// Compute the squared Euclidean distance.
67///
68/// This is faster than `euclidean_distance` when only comparing distances,
69/// as it avoids the square root operation.
70#[inline]
71pub fn squared_euclidean_distance(a: &[f32], b: &[f32]) -> f32 {
72    debug_assert_eq!(a.len(), b.len(), "Vector length mismatch");
73
74    #[cfg(feature = "simd")]
75    {
76        simd_squared_euclidean(a, b)
77    }
78
79    #[cfg(not(feature = "simd"))]
80    {
81        a.iter()
82            .zip(b.iter())
83            .map(|(x, y)| {
84                let diff = x - y;
85                diff * diff
86            })
87            .sum()
88    }
89}
90
91/// Compute the dot product of two vectors.
92///
93/// # Panics
94/// Panics in debug mode if vectors have different lengths.
95#[inline]
96pub fn dot_product(a: &[f32], b: &[f32]) -> f32 {
97    debug_assert_eq!(a.len(), b.len(), "Vector length mismatch");
98
99    #[cfg(feature = "simd")]
100    {
101        simd_dot_product(a, b)
102    }
103
104    #[cfg(not(feature = "simd"))]
105    {
106        a.iter().zip(b.iter()).map(|(x, y)| x * y).sum()
107    }
108}
109
110/// Compute the L2 (Euclidean) norm of a vector.
111#[inline]
112pub fn l2_norm(v: &[f32]) -> f32 {
113    dot_product(v, v).sqrt()
114}
115
116/// Compute the L1 (Manhattan) norm of a vector.
117#[inline]
118pub fn l1_norm(v: &[f32]) -> f32 {
119    v.iter().map(|x| x.abs()).sum()
120}
121
122/// Normalize a vector to unit length (L2 normalization).
123///
124/// Returns a zero vector if the input has zero or near-zero norm.
125#[inline]
126pub fn normalize_vector(v: &[f32]) -> Vec<f32> {
127    let norm = l2_norm(v);
128    if norm < 1e-10 {
129        return vec![0.0; v.len()];
130    }
131    v.iter().map(|x| x / norm).collect()
132}
133
134/// Normalize a vector in place.
135#[inline]
136pub fn normalize_vector_inplace(v: &mut [f32]) {
137    let norm = l2_norm(v);
138    if norm < 1e-10 {
139        v.fill(0.0);
140        return;
141    }
142    for x in v.iter_mut() {
143        *x /= norm;
144    }
145}
146
147/// Compute the Manhattan (L1) distance between two vectors.
148#[inline]
149pub fn manhattan_distance(a: &[f32], b: &[f32]) -> f32 {
150    debug_assert_eq!(a.len(), b.len(), "Vector length mismatch");
151
152    a.iter().zip(b.iter()).map(|(x, y)| (x - y).abs()).sum()
153}
154
155/// Compute the Chebyshev (L-infinity) distance between two vectors.
156///
157/// This is the maximum absolute difference along any dimension.
158#[inline]
159pub fn chebyshev_distance(a: &[f32], b: &[f32]) -> f32 {
160    debug_assert_eq!(a.len(), b.len(), "Vector length mismatch");
161
162    a.iter()
163        .zip(b.iter())
164        .map(|(x, y)| (x - y).abs())
165        .fold(0.0f32, f32::max)
166}
167
168/// Compute angular distance (based on cosine).
169///
170/// Returns the angle in radians between two vectors, normalized to [0, 1].
171#[inline]
172pub fn angular_distance(a: &[f32], b: &[f32]) -> f32 {
173    let cos_sim = cosine_similarity(a, b);
174    cos_sim.acos() / std::f32::consts::PI
175}
176
177/// Add two vectors element-wise.
178#[inline]
179pub fn vector_add(a: &[f32], b: &[f32]) -> Vec<f32> {
180    debug_assert_eq!(a.len(), b.len(), "Vector length mismatch");
181    a.iter().zip(b.iter()).map(|(x, y)| x + y).collect()
182}
183
184/// Subtract two vectors element-wise (a - b).
185#[inline]
186pub fn vector_sub(a: &[f32], b: &[f32]) -> Vec<f32> {
187    debug_assert_eq!(a.len(), b.len(), "Vector length mismatch");
188    a.iter().zip(b.iter()).map(|(x, y)| x - y).collect()
189}
190
191/// Scale a vector by a scalar.
192#[inline]
193pub fn vector_scale(v: &[f32], scalar: f32) -> Vec<f32> {
194    v.iter().map(|x| x * scalar).collect()
195}
196
197/// Compute the centroid (average) of multiple vectors.
198pub fn centroid(vectors: &[&[f32]]) -> Option<Vec<f32>> {
199    if vectors.is_empty() {
200        return None;
201    }
202
203    let dim = vectors[0].len();
204    let n = vectors.len() as f32;
205
206    let mut result = vec![0.0; dim];
207    for v in vectors {
208        debug_assert_eq!(v.len(), dim, "Vector dimension mismatch");
209        for (i, &x) in v.iter().enumerate() {
210            result[i] += x;
211        }
212    }
213
214    for x in result.iter_mut() {
215        *x /= n;
216    }
217
218    Some(result)
219}
220
221/// Check if a vector is normalized (unit length).
222#[inline]
223pub fn is_normalized(v: &[f32], tolerance: f32) -> bool {
224    let norm = l2_norm(v);
225    (norm - 1.0).abs() < tolerance
226}
227
228// SIMD implementations
229
230#[cfg(feature = "simd")]
231fn simd_dot_product(a: &[f32], b: &[f32]) -> f32 {
232    // Fall back to scalar for now - can be enhanced with platform-specific SIMD
233    // when needed (e.g., using std::arch for AVX/AVX2/AVX-512)
234    a.iter().zip(b.iter()).map(|(x, y)| x * y).sum()
235}
236
237#[cfg(feature = "simd")]
238fn simd_squared_euclidean(a: &[f32], b: &[f32]) -> f32 {
239    a.iter()
240        .zip(b.iter())
241        .map(|(x, y)| {
242            let diff = x - y;
243            diff * diff
244        })
245        .sum()
246}
247
248#[cfg(test)]
249mod tests {
250    use super::*;
251    use approx::assert_relative_eq;
252
253    #[test]
254    fn test_cosine_similarity_identical() {
255        let v = vec![1.0, 2.0, 3.0];
256        let sim = cosine_similarity(&v, &v);
257        assert_relative_eq!(sim, 1.0, epsilon = 1e-6);
258    }
259
260    #[test]
261    fn test_cosine_similarity_opposite() {
262        let v1 = vec![1.0, 0.0, 0.0];
263        let v2 = vec![-1.0, 0.0, 0.0];
264        let sim = cosine_similarity(&v1, &v2);
265        assert_relative_eq!(sim, -1.0, epsilon = 1e-6);
266    }
267
268    #[test]
269    fn test_cosine_similarity_orthogonal() {
270        let v1 = vec![1.0, 0.0, 0.0];
271        let v2 = vec![0.0, 1.0, 0.0];
272        let sim = cosine_similarity(&v1, &v2);
273        assert_relative_eq!(sim, 0.0, epsilon = 1e-6);
274    }
275
276    #[test]
277    fn test_cosine_distance() {
278        let v1 = vec![1.0, 0.0, 0.0];
279        let v2 = vec![0.0, 1.0, 0.0];
280        let dist = cosine_distance(&v1, &v2);
281        assert_relative_eq!(dist, 1.0, epsilon = 1e-6);
282    }
283
284    #[test]
285    fn test_euclidean_distance() {
286        let v1 = vec![0.0, 0.0, 0.0];
287        let v2 = vec![3.0, 4.0, 0.0];
288        let dist = euclidean_distance(&v1, &v2);
289        assert_relative_eq!(dist, 5.0, epsilon = 1e-6);
290    }
291
292    #[test]
293    fn test_normalize_vector() {
294        let v = vec![3.0, 4.0];
295        let normalized = normalize_vector(&v);
296        assert_relative_eq!(l2_norm(&normalized), 1.0, epsilon = 1e-6);
297        assert_relative_eq!(normalized[0], 0.6, epsilon = 1e-6);
298        assert_relative_eq!(normalized[1], 0.8, epsilon = 1e-6);
299    }
300
301    #[test]
302    fn test_normalize_zero_vector() {
303        let v = vec![0.0, 0.0, 0.0];
304        let normalized = normalize_vector(&v);
305        assert!(normalized.iter().all(|&x| x == 0.0));
306    }
307
308    #[test]
309    fn test_dot_product() {
310        let v1 = vec![1.0, 2.0, 3.0];
311        let v2 = vec![4.0, 5.0, 6.0];
312        let dot = dot_product(&v1, &v2);
313        assert_relative_eq!(dot, 32.0, epsilon = 1e-6); // 1*4 + 2*5 + 3*6 = 32
314    }
315
316    #[test]
317    fn test_manhattan_distance() {
318        let v1 = vec![0.0, 0.0];
319        let v2 = vec![3.0, 4.0];
320        let dist = manhattan_distance(&v1, &v2);
321        assert_relative_eq!(dist, 7.0, epsilon = 1e-6);
322    }
323
324    #[test]
325    fn test_chebyshev_distance() {
326        let v1 = vec![0.0, 0.0];
327        let v2 = vec![3.0, 4.0];
328        let dist = chebyshev_distance(&v1, &v2);
329        assert_relative_eq!(dist, 4.0, epsilon = 1e-6);
330    }
331
332    #[test]
333    fn test_centroid() {
334        let v1 = vec![0.0, 0.0];
335        let v2 = vec![2.0, 2.0];
336        let v3 = vec![4.0, 4.0];
337
338        let c = centroid(&[&v1, &v2, &v3]).unwrap();
339        assert_relative_eq!(c[0], 2.0, epsilon = 1e-6);
340        assert_relative_eq!(c[1], 2.0, epsilon = 1e-6);
341    }
342
343    #[test]
344    fn test_is_normalized() {
345        let v = normalize_vector(&[3.0, 4.0]);
346        assert!(is_normalized(&v, 1e-6));
347
348        let v2 = vec![1.0, 2.0, 3.0];
349        assert!(!is_normalized(&v2, 1e-6));
350    }
351
352    #[test]
353    fn test_vector_operations() {
354        let v1 = vec![1.0, 2.0];
355        let v2 = vec![3.0, 4.0];
356
357        let sum = vector_add(&v1, &v2);
358        assert_eq!(sum, vec![4.0, 6.0]);
359
360        let diff = vector_sub(&v1, &v2);
361        assert_eq!(diff, vec![-2.0, -2.0]);
362
363        let scaled = vector_scale(&v1, 2.0);
364        assert_eq!(scaled, vec![2.0, 4.0]);
365    }
366
367    #[test]
368    fn test_angular_distance() {
369        let v1 = vec![1.0, 0.0];
370        let v2 = vec![0.0, 1.0];
371        let dist = angular_distance(&v1, &v2);
372        // 90 degrees = pi/2, normalized = 0.5
373        assert_relative_eq!(dist, 0.5, epsilon = 1e-6);
374    }
375}