Skip to main content

ruvector_core/
distance.rs

1//! SIMD-optimized distance metrics
2//! Uses SimSIMD when available (native), falls back to pure Rust for WASM
3
4use crate::error::{Result, RuvectorError};
5use crate::types::DistanceMetric;
6
7/// Calculate distance between two vectors using the specified metric
8#[inline]
9pub fn distance(a: &[f32], b: &[f32], metric: DistanceMetric) -> Result<f32> {
10    if a.len() != b.len() {
11        return Err(RuvectorError::DimensionMismatch {
12            expected: a.len(),
13            actual: b.len(),
14        });
15    }
16
17    match metric {
18        DistanceMetric::Euclidean => Ok(euclidean_distance(a, b)),
19        DistanceMetric::Cosine => Ok(cosine_distance(a, b)),
20        DistanceMetric::DotProduct => Ok(dot_product_distance(a, b)),
21        DistanceMetric::Manhattan => Ok(manhattan_distance(a, b)),
22    }
23}
24
25/// Euclidean (L2) distance
26#[inline]
27pub fn euclidean_distance(a: &[f32], b: &[f32]) -> f32 {
28    #[cfg(all(feature = "simd", not(target_arch = "wasm32")))]
29    {
30        (simsimd::SpatialSimilarity::sqeuclidean(a, b)
31            .expect("SimSIMD euclidean failed")
32            .sqrt()) as f32
33    }
34    #[cfg(any(not(feature = "simd"), target_arch = "wasm32"))]
35    {
36        // Unrolled scalar fallback for WASM — 4x unroll for ILP
37        let len = a.len();
38        let chunks = len / 4;
39        let mut sum = 0.0f32;
40        for i in 0..chunks {
41            let idx = i * 4;
42            let d0 = a[idx] - b[idx];
43            let d1 = a[idx + 1] - b[idx + 1];
44            let d2 = a[idx + 2] - b[idx + 2];
45            let d3 = a[idx + 3] - b[idx + 3];
46            sum += d0 * d0 + d1 * d1 + d2 * d2 + d3 * d3;
47        }
48        for i in (chunks * 4)..len {
49            let d = a[i] - b[i];
50            sum += d * d;
51        }
52        sum.sqrt()
53    }
54}
55
56/// Cosine distance (1 - cosine_similarity)
57#[inline]
58pub fn cosine_distance(a: &[f32], b: &[f32]) -> f32 {
59    #[cfg(all(feature = "simd", not(target_arch = "wasm32")))]
60    {
61        simsimd::SpatialSimilarity::cosine(a, b).expect("SimSIMD cosine failed") as f32
62    }
63    #[cfg(any(not(feature = "simd"), target_arch = "wasm32"))]
64    {
65        // Single-pass cosine fallback for WASM — avoids 3x iteration overhead
66        let (mut dot, mut norm_a_sq, mut norm_b_sq) = (0.0f32, 0.0f32, 0.0f32);
67        for (&ai, &bi) in a.iter().zip(b.iter()) {
68            dot += ai * bi;
69            norm_a_sq += ai * ai;
70            norm_b_sq += bi * bi;
71        }
72        let denom = norm_a_sq.sqrt() * norm_b_sq.sqrt();
73        if denom > 1e-8 {
74            1.0 - (dot / denom)
75        } else {
76            1.0
77        }
78    }
79}
80
81/// Dot product distance (negative for maximization)
82#[inline]
83pub fn dot_product_distance(a: &[f32], b: &[f32]) -> f32 {
84    #[cfg(all(feature = "simd", not(target_arch = "wasm32")))]
85    {
86        let dot = simsimd::SpatialSimilarity::dot(a, b).expect("SimSIMD dot product failed");
87        (-dot) as f32
88    }
89    #[cfg(any(not(feature = "simd"), target_arch = "wasm32"))]
90    {
91        // Pure Rust fallback for WASM
92        let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
93        -dot
94    }
95}
96
97/// Manhattan (L1) distance — delegates to SIMD when available
98#[inline]
99pub fn manhattan_distance(a: &[f32], b: &[f32]) -> f32 {
100    crate::simd_intrinsics::manhattan_distance_simd(a, b)
101}
102
103/// Batch distance calculation optimized with Rayon (native) or sequential (WASM)
104pub fn batch_distances(
105    query: &[f32],
106    vectors: &[Vec<f32>],
107    metric: DistanceMetric,
108) -> Result<Vec<f32>> {
109    #[cfg(all(feature = "parallel", not(target_arch = "wasm32")))]
110    {
111        use rayon::prelude::*;
112        vectors
113            .par_iter()
114            .map(|v| distance(query, v, metric))
115            .collect()
116    }
117    #[cfg(any(not(feature = "parallel"), target_arch = "wasm32"))]
118    {
119        // Sequential fallback for WASM
120        vectors.iter().map(|v| distance(query, v, metric)).collect()
121    }
122}
123
124#[cfg(test)]
125mod tests {
126    use super::*;
127
128    #[test]
129    fn test_euclidean_distance() {
130        let a = vec![1.0, 2.0, 3.0];
131        let b = vec![4.0, 5.0, 6.0];
132        let dist = euclidean_distance(&a, &b);
133        assert!((dist - 5.196).abs() < 0.01);
134    }
135
136    #[test]
137    fn test_cosine_distance() {
138        // Test with identical vectors (should have distance ~0)
139        let a = vec![1.0, 2.0, 3.0];
140        let b = vec![1.0, 2.0, 3.0];
141        let dist = cosine_distance(&a, &b);
142        assert!(
143            dist < 0.01,
144            "Identical vectors should have ~0 distance, got {}",
145            dist
146        );
147
148        // Test with opposite vectors (should have high distance)
149        let a = vec![1.0, 0.0, 0.0];
150        let b = vec![-1.0, 0.0, 0.0];
151        let dist = cosine_distance(&a, &b);
152        assert!(
153            dist > 1.5,
154            "Opposite vectors should have high distance, got {}",
155            dist
156        );
157    }
158
159    #[test]
160    fn test_dot_product_distance() {
161        let a = vec![1.0, 2.0, 3.0];
162        let b = vec![4.0, 5.0, 6.0];
163        let dist = dot_product_distance(&a, &b);
164        assert!((dist + 32.0).abs() < 0.01); // -(4 + 10 + 18) = -32
165    }
166
167    #[test]
168    fn test_manhattan_distance() {
169        let a = vec![1.0, 2.0, 3.0];
170        let b = vec![4.0, 5.0, 6.0];
171        let dist = manhattan_distance(&a, &b);
172        assert!((dist - 9.0).abs() < 0.01); // |1-4| + |2-5| + |3-6| = 9
173    }
174
175    #[test]
176    fn test_dimension_mismatch() {
177        let a = vec![1.0, 2.0];
178        let b = vec![1.0, 2.0, 3.0];
179        let result = distance(&a, &b, DistanceMetric::Euclidean);
180        assert!(result.is_err());
181    }
182}