ruvector_core/
distance.rs

1//! SIMD-optimized distance metrics
2//! Uses SimSIMD when available (native), falls back to pure Rust for WASM
3
4use crate::error::{Result, RuvectorError};
5use crate::types::DistanceMetric;
6
7/// Calculate distance between two vectors using the specified metric
8#[inline]
9pub fn distance(a: &[f32], b: &[f32], metric: DistanceMetric) -> Result<f32> {
10    if a.len() != b.len() {
11        return Err(RuvectorError::DimensionMismatch {
12            expected: a.len(),
13            actual: b.len(),
14        });
15    }
16
17    match metric {
18        DistanceMetric::Euclidean => Ok(euclidean_distance(a, b)),
19        DistanceMetric::Cosine => Ok(cosine_distance(a, b)),
20        DistanceMetric::DotProduct => Ok(dot_product_distance(a, b)),
21        DistanceMetric::Manhattan => Ok(manhattan_distance(a, b)),
22    }
23}
24
25/// Euclidean (L2) distance
26#[inline]
27pub fn euclidean_distance(a: &[f32], b: &[f32]) -> f32 {
28    #[cfg(all(feature = "simd", not(target_arch = "wasm32")))]
29    {
30        (simsimd::SpatialSimilarity::sqeuclidean(a, b)
31            .expect("SimSIMD euclidean failed")
32            .sqrt()) as f32
33    }
34    #[cfg(any(not(feature = "simd"), target_arch = "wasm32"))]
35    {
36        // Pure Rust fallback for WASM
37        a.iter()
38            .zip(b.iter())
39            .map(|(x, y)| (x - y) * (x - y))
40            .sum::<f32>()
41            .sqrt()
42    }
43}
44
45/// Cosine distance (1 - cosine_similarity)
46#[inline]
47pub fn cosine_distance(a: &[f32], b: &[f32]) -> f32 {
48    #[cfg(all(feature = "simd", not(target_arch = "wasm32")))]
49    {
50        simsimd::SpatialSimilarity::cosine(a, b).expect("SimSIMD cosine failed") as f32
51    }
52    #[cfg(any(not(feature = "simd"), target_arch = "wasm32"))]
53    {
54        // Pure Rust fallback for WASM
55        let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
56        let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
57        let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
58        if norm_a > 1e-8 && norm_b > 1e-8 {
59            1.0 - (dot / (norm_a * norm_b))
60        } else {
61            1.0
62        }
63    }
64}
65
66/// Dot product distance (negative for maximization)
67#[inline]
68pub fn dot_product_distance(a: &[f32], b: &[f32]) -> f32 {
69    #[cfg(all(feature = "simd", not(target_arch = "wasm32")))]
70    {
71        let dot = simsimd::SpatialSimilarity::dot(a, b).expect("SimSIMD dot product failed");
72        (-dot) as f32
73    }
74    #[cfg(any(not(feature = "simd"), target_arch = "wasm32"))]
75    {
76        // Pure Rust fallback for WASM
77        let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
78        -dot
79    }
80}
81
82/// Manhattan (L1) distance
83#[inline]
84pub fn manhattan_distance(a: &[f32], b: &[f32]) -> f32 {
85    a.iter().zip(b.iter()).map(|(x, y)| (x - y).abs()).sum()
86}
87
88/// Batch distance calculation optimized with Rayon (native) or sequential (WASM)
89pub fn batch_distances(
90    query: &[f32],
91    vectors: &[Vec<f32>],
92    metric: DistanceMetric,
93) -> Result<Vec<f32>> {
94    #[cfg(all(feature = "parallel", not(target_arch = "wasm32")))]
95    {
96        use rayon::prelude::*;
97        vectors
98            .par_iter()
99            .map(|v| distance(query, v, metric))
100            .collect()
101    }
102    #[cfg(any(not(feature = "parallel"), target_arch = "wasm32"))]
103    {
104        // Sequential fallback for WASM
105        vectors
106            .iter()
107            .map(|v| distance(query, v, metric))
108            .collect()
109    }
110}
111
112#[cfg(test)]
113mod tests {
114    use super::*;
115
116    #[test]
117    fn test_euclidean_distance() {
118        let a = vec![1.0, 2.0, 3.0];
119        let b = vec![4.0, 5.0, 6.0];
120        let dist = euclidean_distance(&a, &b);
121        assert!((dist - 5.196).abs() < 0.01);
122    }
123
124    #[test]
125    fn test_cosine_distance() {
126        // Test with identical vectors (should have distance ~0)
127        let a = vec![1.0, 2.0, 3.0];
128        let b = vec![1.0, 2.0, 3.0];
129        let dist = cosine_distance(&a, &b);
130        assert!(
131            dist < 0.01,
132            "Identical vectors should have ~0 distance, got {}",
133            dist
134        );
135
136        // Test with opposite vectors (should have high distance)
137        let a = vec![1.0, 0.0, 0.0];
138        let b = vec![-1.0, 0.0, 0.0];
139        let dist = cosine_distance(&a, &b);
140        assert!(
141            dist > 1.5,
142            "Opposite vectors should have high distance, got {}",
143            dist
144        );
145    }
146
147    #[test]
148    fn test_dot_product_distance() {
149        let a = vec![1.0, 2.0, 3.0];
150        let b = vec![4.0, 5.0, 6.0];
151        let dist = dot_product_distance(&a, &b);
152        assert!((dist + 32.0).abs() < 0.01); // -(4 + 10 + 18) = -32
153    }
154
155    #[test]
156    fn test_manhattan_distance() {
157        let a = vec![1.0, 2.0, 3.0];
158        let b = vec![4.0, 5.0, 6.0];
159        let dist = manhattan_distance(&a, &b);
160        assert!((dist - 9.0).abs() < 0.01); // |1-4| + |2-5| + |3-6| = 9
161    }
162
163    #[test]
164    fn test_dimension_mismatch() {
165        let a = vec![1.0, 2.0];
166        let b = vec![1.0, 2.0, 3.0];
167        let result = distance(&a, &b, DistanceMetric::Euclidean);
168        assert!(result.is_err());
169    }
170}