ruvector_data_framework/
utils.rs

1//! Shared utility functions for the RuVector Data Framework
2//!
3//! This module contains common utilities used across multiple modules,
4//! including vector operations and mathematical functions.
5
6/// Compute cosine similarity between two vectors
7///
8/// Returns a value in [-1, 1] where:
9/// - 1 = identical direction
10/// - 0 = orthogonal
11/// - -1 = opposite direction
12///
13/// # Arguments
14///
15/// * `a` - First vector
16/// * `b` - Second vector (must be same length as `a`)
17///
18/// # Returns
19///
20/// Cosine similarity score, or 0.0 if vectors are empty or different lengths
21///
22/// # Example
23///
24/// ```
25/// use ruvector_data_framework::utils::cosine_similarity;
26///
27/// let a = vec![1.0, 0.0, 0.0];
28/// let b = vec![1.0, 0.0, 0.0];
29/// assert!((cosine_similarity(&a, &b) - 1.0).abs() < 1e-6);
30///
31/// let c = vec![0.0, 1.0, 0.0];
32/// assert!(cosine_similarity(&a, &c).abs() < 1e-6);
33/// ```
34#[inline]
35pub fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
36    if a.len() != b.len() || a.is_empty() {
37        return 0.0;
38    }
39
40    // Process in chunks for better cache locality
41    const CHUNK_SIZE: usize = 8;
42    let mut dot = 0.0f32;
43    let mut norm_a = 0.0f32;
44    let mut norm_b = 0.0f32;
45
46    // Process aligned chunks
47    let chunks = a.len() / CHUNK_SIZE;
48    for chunk in 0..chunks {
49        let base = chunk * CHUNK_SIZE;
50        for i in 0..CHUNK_SIZE {
51            let ai = a[base + i];
52            let bi = b[base + i];
53            dot += ai * bi;
54            norm_a += ai * ai;
55            norm_b += bi * bi;
56        }
57    }
58
59    // Process remainder
60    for i in (chunks * CHUNK_SIZE)..a.len() {
61        let ai = a[i];
62        let bi = b[i];
63        dot += ai * bi;
64        norm_a += ai * ai;
65        norm_b += bi * bi;
66    }
67
68    let denom = (norm_a * norm_b).sqrt();
69    if denom > 1e-10 {
70        dot / denom
71    } else {
72        0.0
73    }
74}
75
76/// Compute Euclidean (L2) distance between two vectors
77///
78/// # Arguments
79///
80/// * `a` - First vector
81/// * `b` - Second vector (must be same length as `a`)
82///
83/// # Returns
84///
85/// Euclidean distance, or 0.0 if vectors are empty or different lengths
86#[inline]
87pub fn euclidean_distance(a: &[f32], b: &[f32]) -> f32 {
88    if a.len() != b.len() || a.is_empty() {
89        return 0.0;
90    }
91
92    let sum_sq: f32 = a.iter()
93        .zip(b.iter())
94        .map(|(ai, bi)| {
95            let diff = ai - bi;
96            diff * diff
97        })
98        .sum();
99
100    sum_sq.sqrt()
101}
102
103/// Normalize a vector to unit length (L2 normalization)
104///
105/// # Arguments
106///
107/// * `v` - Vector to normalize (modified in place)
108#[inline]
109pub fn normalize_vector(v: &mut [f32]) {
110    let norm: f32 = v.iter().map(|x| x * x).sum::<f32>().sqrt();
111    if norm > 1e-10 {
112        for x in v.iter_mut() {
113            *x /= norm;
114        }
115    }
116}
117
118#[cfg(test)]
119mod tests {
120    use super::*;
121
122    #[test]
123    fn test_cosine_similarity_identical() {
124        let a = vec![1.0, 0.0, 0.0, 0.0];
125        let b = vec![1.0, 0.0, 0.0, 0.0];
126        assert!((cosine_similarity(&a, &b) - 1.0).abs() < 1e-6);
127    }
128
129    #[test]
130    fn test_cosine_similarity_orthogonal() {
131        let a = vec![1.0, 0.0, 0.0, 0.0];
132        let b = vec![0.0, 1.0, 0.0, 0.0];
133        assert!(cosine_similarity(&a, &b).abs() < 1e-6);
134    }
135
136    #[test]
137    fn test_cosine_similarity_opposite() {
138        let a = vec![1.0, 0.0, 0.0, 0.0];
139        let b = vec![-1.0, 0.0, 0.0, 0.0];
140        assert!((cosine_similarity(&a, &b) + 1.0).abs() < 1e-6);
141    }
142
143    #[test]
144    fn test_cosine_similarity_empty() {
145        let a: Vec<f32> = vec![];
146        let b: Vec<f32> = vec![];
147        assert_eq!(cosine_similarity(&a, &b), 0.0);
148    }
149
150    #[test]
151    fn test_cosine_similarity_different_lengths() {
152        let a = vec![1.0, 0.0];
153        let b = vec![1.0, 0.0, 0.0];
154        assert_eq!(cosine_similarity(&a, &b), 0.0);
155    }
156
157    #[test]
158    fn test_euclidean_distance() {
159        let a = vec![0.0, 0.0];
160        let b = vec![3.0, 4.0];
161        assert!((euclidean_distance(&a, &b) - 5.0).abs() < 1e-6);
162    }
163
164    #[test]
165    fn test_normalize_vector() {
166        let mut v = vec![3.0, 4.0];
167        normalize_vector(&mut v);
168        assert!((v[0] - 0.6).abs() < 1e-6);
169        assert!((v[1] - 0.8).abs() < 1e-6);
170    }
171}