Skip to main content

foxstash_core/vector/
ops.rs

1//! Vector operations for RAG system
2//!
3//! This module provides high-performance vector operations for similarity search
4//! and embedding manipulation. All functions are optimized for hot-path performance.
5
6use crate::{RagError, Result};
7
8/// Computes the cosine similarity between two vectors.
9///
10/// Cosine similarity measures the cosine of the angle between two vectors,
11/// returning a value in the range [-1, 1] where:
12/// - 1.0 indicates identical direction
13/// - 0.0 indicates orthogonal vectors
14/// - -1.0 indicates opposite direction
15///
16/// # Arguments
17///
18/// * `a` - First vector
19/// * `b` - Second vector
20///
21/// # Returns
22///
23/// Returns the cosine similarity score in the range [-1, 1].
24///
25/// # Errors
26///
27/// Returns `RagError::DimensionMismatch` if the vectors have different dimensions.
28///
29/// # Performance
30///
31/// This function is optimized for hot-path performance and should be inlined
32/// in most cases. For large batches, consider pre-normalizing vectors and using
33/// dot product directly.
34///
35/// # Examples
36///
37/// ```
38/// use foxstash_core::vector::ops::cosine_similarity;
39///
40/// let a = vec![1.0, 0.0, 0.0];
41/// let b = vec![0.0, 1.0, 0.0];
42/// let similarity = cosine_similarity(&a, &b).unwrap();
43/// assert!((similarity - 0.0).abs() < 1e-6);
44/// ```
45#[inline]
46pub fn cosine_similarity(a: &[f32], b: &[f32]) -> Result<f32> {
47    if a.len() != b.len() {
48        return Err(RagError::DimensionMismatch {
49            expected: a.len(),
50            actual: b.len(),
51        });
52    }
53
54    if a.is_empty() {
55        return Ok(1.0); // Convention: empty vectors are maximally similar
56    }
57
58    let dot = dot_product_unchecked(a, b);
59    let norm_a = magnitude(a);
60    let norm_b = magnitude(b);
61
62    // Handle zero vectors
63    if norm_a == 0.0 || norm_b == 0.0 {
64        return Ok(0.0);
65    }
66
67    // Clamp to [-1, 1] to handle numerical errors
68    let similarity = dot / (norm_a * norm_b);
69    Ok(similarity.clamp(-1.0, 1.0))
70}
71
72/// Computes the Euclidean (L2) distance between two vectors.
73///
74/// The L2 distance is the straight-line distance between two points in
75/// Euclidean space, calculated as: sqrt(sum((a\[i\] - b\[i\])^2))
76///
77/// # Arguments
78///
79/// * `a` - First vector
80/// * `b` - Second vector
81///
82/// # Returns
83///
84/// Returns the non-negative Euclidean distance.
85///
86/// # Errors
87///
88/// Returns `RagError::DimensionMismatch` if the vectors have different dimensions.
89///
90/// # Performance
91///
92/// This function uses optimized iterators and should be inlined for small vectors.
93/// For distance-based sorting, consider using squared distance to avoid the sqrt.
94///
95/// # Examples
96///
97/// ```
98/// use foxstash_core::vector::ops::l2_distance;
99///
100/// let a = vec![0.0, 0.0];
101/// let b = vec![3.0, 4.0];
102/// let distance = l2_distance(&a, &b).unwrap();
103/// assert!((distance - 5.0).abs() < 1e-6);
104/// ```
105#[inline]
106pub fn l2_distance(a: &[f32], b: &[f32]) -> Result<f32> {
107    if a.len() != b.len() {
108        return Err(RagError::DimensionMismatch {
109            expected: a.len(),
110            actual: b.len(),
111        });
112    }
113
114    let squared_sum: f32 = a
115        .iter()
116        .zip(b.iter())
117        .map(|(x, y)| {
118            let diff = x - y;
119            diff * diff
120        })
121        .sum();
122
123    Ok(squared_sum.sqrt())
124}
125
126/// Computes the dot product of two vectors.
127///
128/// The dot product is the sum of element-wise products: sum(a\[i\] * b\[i\])
129///
130/// # Arguments
131///
132/// * `a` - First vector
133/// * `b` - Second vector
134///
135/// # Returns
136///
137/// Returns the dot product as a scalar value.
138///
139/// # Errors
140///
141/// Returns `RagError::DimensionMismatch` if the vectors have different dimensions.
142///
143/// # Performance
144///
145/// This is a critical hot-path function. The implementation uses optimized
146/// iteration and will benefit from auto-vectorization on most platforms.
147///
148/// # Examples
149///
150/// ```
151/// use foxstash_core::vector::ops::dot_product;
152///
153/// let a = vec![1.0, 2.0, 3.0];
154/// let b = vec![4.0, 5.0, 6.0];
155/// let product = dot_product(&a, &b).unwrap();
156/// assert!((product - 32.0).abs() < 1e-6);
157/// ```
158#[inline]
159pub fn dot_product(a: &[f32], b: &[f32]) -> Result<f32> {
160    if a.len() != b.len() {
161        return Err(RagError::DimensionMismatch {
162            expected: a.len(),
163            actual: b.len(),
164        });
165    }
166
167    Ok(dot_product_unchecked(a, b))
168}
169
170/// Normalizes a vector to unit length (L2 norm = 1).
171///
172/// This operation modifies the vector in-place, scaling all components
173/// so that the resulting vector has a magnitude of 1.0.
174///
175/// # Arguments
176///
177/// * `vector` - Mutable reference to the vector to normalize
178///
179/// # Behavior
180///
181/// - If the vector has zero magnitude, it remains unchanged
182/// - Empty vectors remain unchanged
183///
184/// # Performance
185///
186/// This function performs two passes over the data (magnitude calculation
187/// and scaling). For better performance when normalizing many vectors,
188/// consider batching or using SIMD operations.
189///
190/// # Examples
191///
192/// ```
193/// use foxstash_core::vector::ops::normalize;
194///
195/// let mut v = vec![3.0, 4.0];
196/// normalize(&mut v);
197/// assert!((v[0] - 0.6).abs() < 1e-6);
198/// assert!((v[1] - 0.8).abs() < 1e-6);
199/// ```
200#[inline]
201pub fn normalize(vector: &mut [f32]) {
202    if vector.is_empty() {
203        return;
204    }
205
206    let norm = magnitude(vector);
207
208    if norm == 0.0 {
209        return; // Don't modify zero vectors
210    }
211
212    let inv_norm = 1.0 / norm;
213    for x in vector.iter_mut() {
214        *x *= inv_norm;
215    }
216}
217
218/// Checks if two vectors are approximately equal within a tolerance.
219///
220/// This function performs element-wise comparison with the specified epsilon
221/// value to account for floating-point precision issues.
222///
223/// # Arguments
224///
225/// * `a` - First vector
226/// * `b` - Second vector
227/// * `epsilon` - Maximum allowed difference per component
228///
229/// # Returns
230///
231/// Returns `true` if all corresponding elements differ by at most `epsilon`,
232/// `false` otherwise. Returns `false` if dimensions don't match.
233///
234/// # Examples
235///
236/// ```
237/// use foxstash_core::vector::ops::approx_equal;
238///
239/// let a = vec![1.0, 2.0, 3.0];
240/// let b = vec![1.0001, 2.0001, 3.0001];
241/// assert!(approx_equal(&a, &b, 0.001));
242/// assert!(!approx_equal(&a, &b, 0.00001));
243/// ```
244#[inline]
245pub fn approx_equal(a: &[f32], b: &[f32], epsilon: f32) -> bool {
246    if a.len() != b.len() {
247        return false;
248    }
249
250    a.iter()
251        .zip(b.iter())
252        .all(|(x, y)| (x - y).abs() <= epsilon)
253}
254
255/// Internal helper: computes dot product without dimension checking.
256///
257/// # Safety
258///
259/// Caller must ensure vectors have the same length.
260#[inline(always)]
261fn dot_product_unchecked(a: &[f32], b: &[f32]) -> f32 {
262    a.iter().zip(b.iter()).map(|(x, y)| x * y).sum()
263}
264
265/// Internal helper: computes the L2 magnitude (Euclidean norm) of a vector.
266///
267/// Returns sqrt(sum(x\[i\]^2))
268#[inline(always)]
269fn magnitude(vector: &[f32]) -> f32 {
270    vector.iter().map(|x| x * x).sum::<f32>().sqrt()
271}
272
273#[cfg(test)]
274mod tests {
275    use super::*;
276
277    const EPSILON: f32 = 1e-6;
278
279    #[test]
280    fn test_cosine_similarity_identical() {
281        let a = vec![1.0, 2.0, 3.0];
282        let b = vec![1.0, 2.0, 3.0];
283        let similarity = cosine_similarity(&a, &b).unwrap();
284        assert!((similarity - 1.0).abs() < EPSILON);
285    }
286
287    #[test]
288    fn test_cosine_similarity_opposite() {
289        let a = vec![1.0, 2.0, 3.0];
290        let b = vec![-1.0, -2.0, -3.0];
291        let similarity = cosine_similarity(&a, &b).unwrap();
292        assert!((similarity - (-1.0)).abs() < EPSILON);
293    }
294
295    #[test]
296    fn test_cosine_similarity_orthogonal() {
297        let a = vec![1.0, 0.0, 0.0];
298        let b = vec![0.0, 1.0, 0.0];
299        let similarity = cosine_similarity(&a, &b).unwrap();
300        assert!((similarity - 0.0).abs() < EPSILON);
301    }
302
303    #[test]
304    fn test_cosine_similarity_dimension_mismatch() {
305        let a = vec![1.0, 2.0];
306        let b = vec![1.0, 2.0, 3.0];
307        let result = cosine_similarity(&a, &b);
308        assert!(matches!(result, Err(RagError::DimensionMismatch { .. })));
309    }
310
311    #[test]
312    fn test_cosine_similarity_zero_vector() {
313        let a = vec![0.0, 0.0, 0.0];
314        let b = vec![1.0, 2.0, 3.0];
315        let similarity = cosine_similarity(&a, &b).unwrap();
316        assert!((similarity - 0.0).abs() < EPSILON);
317    }
318
319    #[test]
320    fn test_cosine_similarity_empty() {
321        let a: Vec<f32> = vec![];
322        let b: Vec<f32> = vec![];
323        let similarity = cosine_similarity(&a, &b).unwrap();
324        assert!((similarity - 1.0).abs() < EPSILON);
325    }
326
327    #[test]
328    fn test_cosine_similarity_normalized() {
329        // Pre-normalized vectors
330        let a = vec![0.6, 0.8];
331        let b = vec![0.8, 0.6];
332        let similarity = cosine_similarity(&a, &b).unwrap();
333        let expected = 0.6 * 0.8 + 0.8 * 0.6; // = 0.96
334        assert!((similarity - expected).abs() < EPSILON);
335    }
336
337    #[test]
338    fn test_l2_distance_zero() {
339        let a = vec![1.0, 2.0, 3.0];
340        let b = vec![1.0, 2.0, 3.0];
341        let distance = l2_distance(&a, &b).unwrap();
342        assert!((distance - 0.0).abs() < EPSILON);
343    }
344
345    #[test]
346    fn test_l2_distance_unit() {
347        let a = vec![0.0, 0.0];
348        let b = vec![1.0, 0.0];
349        let distance = l2_distance(&a, &b).unwrap();
350        assert!((distance - 1.0).abs() < EPSILON);
351    }
352
353    #[test]
354    fn test_l2_distance_pythagorean() {
355        let a = vec![0.0, 0.0];
356        let b = vec![3.0, 4.0];
357        let distance = l2_distance(&a, &b).unwrap();
358        assert!((distance - 5.0).abs() < EPSILON);
359    }
360
361    #[test]
362    fn test_l2_distance_dimension_mismatch() {
363        let a = vec![1.0, 2.0];
364        let b = vec![1.0, 2.0, 3.0];
365        let result = l2_distance(&a, &b);
366        assert!(matches!(result, Err(RagError::DimensionMismatch { .. })));
367    }
368
369    #[test]
370    fn test_l2_distance_negative_values() {
371        let a = vec![-1.0, -1.0];
372        let b = vec![1.0, 1.0];
373        let distance = l2_distance(&a, &b).unwrap();
374        let expected = (8.0_f32).sqrt(); // sqrt(4 + 4)
375        assert!((distance - expected).abs() < EPSILON);
376    }
377
378    #[test]
379    fn test_l2_distance_high_dimension() {
380        let a = vec![1.0; 384]; // Typical embedding dimension
381        let b = vec![2.0; 384];
382        let distance = l2_distance(&a, &b).unwrap();
383        let expected = (384.0_f32).sqrt(); // sqrt(384 * 1.0)
384        assert!((distance - expected).abs() < EPSILON);
385    }
386
387    #[test]
388    fn test_dot_product_positive() {
389        let a = vec![1.0, 2.0, 3.0];
390        let b = vec![4.0, 5.0, 6.0];
391        let product = dot_product(&a, &b).unwrap();
392        assert!((product - 32.0).abs() < EPSILON); // 1*4 + 2*5 + 3*6 = 32
393    }
394
395    #[test]
396    fn test_dot_product_zero() {
397        let a = vec![1.0, 0.0, 0.0];
398        let b = vec![0.0, 1.0, 0.0];
399        let product = dot_product(&a, &b).unwrap();
400        assert!((product - 0.0).abs() < EPSILON);
401    }
402
403    #[test]
404    fn test_dot_product_negative() {
405        let a = vec![1.0, 2.0];
406        let b = vec![-1.0, -2.0];
407        let product = dot_product(&a, &b).unwrap();
408        assert!((product - (-5.0)).abs() < EPSILON);
409    }
410
411    #[test]
412    fn test_dot_product_dimension_mismatch() {
413        let a = vec![1.0, 2.0];
414        let b = vec![1.0, 2.0, 3.0];
415        let result = dot_product(&a, &b);
416        assert!(matches!(result, Err(RagError::DimensionMismatch { .. })));
417    }
418
419    #[test]
420    fn test_dot_product_empty() {
421        let a: Vec<f32> = vec![];
422        let b: Vec<f32> = vec![];
423        let product = dot_product(&a, &b).unwrap();
424        assert!((product - 0.0).abs() < EPSILON);
425    }
426
427    #[test]
428    fn test_normalize_unit_vector() {
429        let mut v = vec![1.0, 0.0, 0.0];
430        normalize(&mut v);
431        assert!((v[0] - 1.0).abs() < EPSILON);
432        assert!((v[1] - 0.0).abs() < EPSILON);
433        assert!((v[2] - 0.0).abs() < EPSILON);
434    }
435
436    #[test]
437    fn test_normalize_standard() {
438        let mut v = vec![3.0, 4.0];
439        normalize(&mut v);
440        assert!((v[0] - 0.6).abs() < EPSILON);
441        assert!((v[1] - 0.8).abs() < EPSILON);
442
443        // Verify it's actually unit length
444        let magnitude = (v[0] * v[0] + v[1] * v[1]).sqrt();
445        assert!((magnitude - 1.0).abs() < EPSILON);
446    }
447
448    #[test]
449    fn test_normalize_zero_vector() {
450        let mut v = vec![0.0, 0.0, 0.0];
451        normalize(&mut v);
452        assert!((v[0] - 0.0).abs() < EPSILON);
453        assert!((v[1] - 0.0).abs() < EPSILON);
454        assert!((v[2] - 0.0).abs() < EPSILON);
455    }
456
457    #[test]
458    fn test_normalize_negative_values() {
459        let mut v = vec![-3.0, -4.0];
460        normalize(&mut v);
461        assert!((v[0] - (-0.6)).abs() < EPSILON);
462        assert!((v[1] - (-0.8)).abs() < EPSILON);
463    }
464
465    #[test]
466    fn test_normalize_empty() {
467        let mut v: Vec<f32> = vec![];
468        normalize(&mut v);
469        assert!(v.is_empty());
470    }
471
472    #[test]
473    fn test_normalize_high_dimension() {
474        let mut v = vec![1.0; 384];
475        normalize(&mut v);
476
477        // Each component should be 1/sqrt(384)
478        let expected = 1.0 / (384.0_f32).sqrt();
479        for &val in &v {
480            assert!((val - expected).abs() < EPSILON);
481        }
482
483        // Verify unit length (use larger epsilon for accumulated error)
484        let magnitude = v.iter().map(|x| x * x).sum::<f32>().sqrt();
485        assert!((magnitude - 1.0).abs() < 1e-5); // Relaxed epsilon for high dimensions
486    }
487
488    #[test]
489    fn test_approx_equal_identical() {
490        let a = vec![1.0, 2.0, 3.0];
491        let b = vec![1.0, 2.0, 3.0];
492        assert!(approx_equal(&a, &b, EPSILON));
493    }
494
495    #[test]
496    fn test_approx_equal_within_epsilon() {
497        let a = vec![1.0, 2.0, 3.0];
498        let b = vec![1.0001, 2.0001, 3.0001];
499        assert!(approx_equal(&a, &b, 0.001));
500    }
501
502    #[test]
503    fn test_approx_equal_outside_epsilon() {
504        let a = vec![1.0, 2.0, 3.0];
505        let b = vec![1.0001, 2.0001, 3.0001];
506        assert!(!approx_equal(&a, &b, 0.00001));
507    }
508
509    #[test]
510    fn test_approx_equal_dimension_mismatch() {
511        let a = vec![1.0, 2.0];
512        let b = vec![1.0, 2.0, 3.0];
513        assert!(!approx_equal(&a, &b, EPSILON));
514    }
515
516    #[test]
517    fn test_approx_equal_negative_values() {
518        let a = vec![-1.0, -2.0];
519        let b = vec![-1.0001, -2.0001];
520        assert!(approx_equal(&a, &b, 0.001));
521    }
522
523    #[test]
524    fn test_approx_equal_empty() {
525        let a: Vec<f32> = vec![];
526        let b: Vec<f32> = vec![];
527        assert!(approx_equal(&a, &b, EPSILON));
528    }
529
530    #[test]
531    fn test_approx_equal_zero_epsilon() {
532        let a = vec![1.0, 2.0];
533        let b = vec![1.0, 2.0];
534        assert!(approx_equal(&a, &b, 0.0));
535
536        let c = vec![1.0001, 2.0];
537        assert!(!approx_equal(&a, &c, 0.0));
538    }
539
540    // Integration tests
541    #[test]
542    fn test_normalized_vectors_cosine_similarity() {
543        let mut a = vec![3.0, 4.0];
544        let mut b = vec![5.0, 12.0];
545
546        normalize(&mut a);
547        normalize(&mut b);
548
549        // For normalized vectors, cosine similarity equals dot product
550        let similarity = cosine_similarity(&a, &b).unwrap();
551        let dot = dot_product(&a, &b).unwrap();
552
553        assert!((similarity - dot).abs() < EPSILON);
554    }
555
556    #[test]
557    fn test_l2_distance_relationship_to_cosine() {
558        // For unit vectors: L2^2 = 2(1 - cosine_similarity)
559        let mut a = vec![1.0, 2.0, 3.0];
560        let mut b = vec![4.0, 5.0, 6.0];
561
562        normalize(&mut a);
563        normalize(&mut b);
564
565        let similarity = cosine_similarity(&a, &b).unwrap();
566        let distance = l2_distance(&a, &b).unwrap();
567        let expected_distance_squared = 2.0 * (1.0 - similarity);
568
569        assert!((distance * distance - expected_distance_squared).abs() < EPSILON);
570    }
571
572    #[test]
573    fn test_performance_typical_embedding() {
574        // Test with realistic embedding dimensions (MiniLM-L6-v2: 384)
575        let a: Vec<f32> = (0..384).map(|i| (i as f32) / 384.0).collect();
576        let b: Vec<f32> = (0..384).map(|i| 1.0 - (i as f32) / 384.0).collect();
577
578        let _similarity = cosine_similarity(&a, &b).unwrap();
579        let _distance = l2_distance(&a, &b).unwrap();
580        let _dot = dot_product(&a, &b).unwrap();
581    }
582
583    #[test]
584    fn test_numerical_stability_large_values() {
585        let a = vec![1e6, 2e6, 3e6];
586        let b = vec![4e6, 5e6, 6e6];
587
588        let similarity = cosine_similarity(&a, &b).unwrap();
589
590        // Should still be in valid range despite large values
591        assert!((-1.0..=1.0).contains(&similarity));
592    }
593
594    #[test]
595    fn test_numerical_stability_small_values() {
596        let a = vec![1e-6, 2e-6, 3e-6];
597        let b = vec![4e-6, 5e-6, 6e-6];
598
599        let similarity = cosine_similarity(&a, &b).unwrap();
600
601        // Should still compute valid similarity
602        assert!((-1.0..=1.0).contains(&similarity));
603    }
604}