vecnorm_core/
lib.rs

1//! Pure-Rust core for `vecnorm`. Bulk f32 matrix operations:
2//!
3//! - [`l2_normalize`] / [`l2_normalize_copy`] — row-wise unit-length scaling.
4//!   Rows whose norm is below `EPS` are left at zero rather than dividing
5//!   by zero.
6//! - [`cosine_similarity`] — single pair on 1-D vectors. Returns 0 for
7//!   any pair where either side has zero norm.
8//! - [`top_k_argmax`] / [`batch_top_k_argmax`] — partial-heap top-k that
9//!   runs in `O(n log k)`. Tied scores are broken by the original index
10//!   ascending (deterministic).
11
12#![deny(unsafe_code)]
13#![warn(missing_docs)]
14#![warn(rust_2018_idioms)]
15
16use std::cmp::Reverse;
17use std::collections::BinaryHeap;
18
19use ndarray::{ArrayView1, ArrayView2, ArrayViewMut2, Axis};
20use rayon::prelude::*;
21use thiserror::Error;
22
23/// Tiny norm below which a row is considered all-zero and left unscaled.
24pub const EPS: f32 = 1e-12;
25
26/// Crate-wide result alias.
27pub type Result<T> = std::result::Result<T, VecNormError>;
28
29/// All errors surfaced by `vecnorm-core`.
30#[derive(Error, Debug)]
31pub enum VecNormError {
32    /// Two arrays had incompatible shapes.
33    #[error("dimension mismatch: a={a:?}, b={b:?}")]
34    DimensionMismatch {
35        /// Shape of the first input.
36        a: Vec<usize>,
37        /// Shape of the second input.
38        b: Vec<usize>,
39    },
40    /// Caller asked for more elements than the input has.
41    #[error("k ({k}) must be <= len ({len})")]
42    KTooLarge {
43        /// Requested k.
44        k: usize,
45        /// Available length.
46        len: usize,
47    },
48    /// Caller passed `k = 0`.
49    #[error("k must be > 0")]
50    KZero,
51}
52
53/// L2-normalize `matrix` in place, row by row. Rows with norm below `EPS`
54/// are zeroed out (i.e. left unchanged at all-zero) to avoid NaN.
55pub fn l2_normalize(matrix: &mut ArrayViewMut2<'_, f32>) {
56    matrix
57        .axis_iter_mut(Axis(0))
58        .into_par_iter()
59        .for_each(|mut row| {
60            let mut sum_sq = 0.0_f32;
61            for &x in row.iter() {
62                sum_sq += x * x;
63            }
64            let norm = sum_sq.sqrt();
65            if norm > EPS {
66                for x in row.iter_mut() {
67                    *x /= norm;
68                }
69            } else {
70                for x in row.iter_mut() {
71                    *x = 0.0;
72                }
73            }
74        });
75}
76
77/// L2-normalize a copy. Same semantics as [`l2_normalize`].
78pub fn l2_normalize_copy(matrix: &ArrayView2<'_, f32>) -> ndarray::Array2<f32> {
79    let mut out = matrix.to_owned();
80    l2_normalize(&mut out.view_mut());
81    out
82}
83
84/// Cosine similarity between two 1-D vectors. Returns 0 if either side is
85/// all-zero.
86pub fn cosine_similarity(a: &ArrayView1<'_, f32>, b: &ArrayView1<'_, f32>) -> Result<f32> {
87    if a.len() != b.len() {
88        return Err(VecNormError::DimensionMismatch {
89            a: a.shape().to_vec(),
90            b: b.shape().to_vec(),
91        });
92    }
93    let mut dot = 0.0_f32;
94    let mut norm_a = 0.0_f32;
95    let mut norm_b = 0.0_f32;
96    for (&x, &y) in a.iter().zip(b.iter()) {
97        dot += x * y;
98        norm_a += x * x;
99        norm_b += y * y;
100    }
101    let denom = norm_a.sqrt() * norm_b.sqrt();
102    if denom <= EPS {
103        return Ok(0.0);
104    }
105    Ok(dot / denom)
106}
107
108/// Top-k argmax over a 1-D score vector. Returns `(index, score)` pairs in
109/// descending order. Ties broken by ascending index.
110pub fn top_k_argmax(scores: &ArrayView1<'_, f32>, k: usize) -> Result<Vec<(usize, f32)>> {
111    if k == 0 {
112        return Err(VecNormError::KZero);
113    }
114    if k > scores.len() {
115        return Err(VecNormError::KTooLarge {
116            k,
117            len: scores.len(),
118        });
119    }
120    // Maintain a min-heap of size k. The smallest element on the heap is
121    // the threshold to beat. We compare on `(Reverse(score), idx)` so equal
122    // scores order ascending by index, which matches the stable convention.
123    let mut heap: BinaryHeap<(Reverse<OrdFloat>, usize)> = BinaryHeap::with_capacity(k);
124    for (i, &s) in scores.iter().enumerate() {
125        let entry = (Reverse(OrdFloat(s)), i);
126        if heap.len() < k {
127            heap.push(entry);
128        } else if let Some(top) = heap.peek() {
129            // Heap is a min-heap on score (because of Reverse); the *largest*
130            // Reverse-key is the smallest score on the heap.
131            if entry.0 < top.0 {
132                heap.pop();
133                heap.push(entry);
134            }
135        }
136    }
137    // Drain heap and sort descending.
138    let mut out: Vec<(usize, f32)> = heap.into_iter().map(|(rs, i)| (i, rs.0 .0)).collect();
139    out.sort_by(|a, b| {
140        b.1.partial_cmp(&a.1)
141            .unwrap_or(std::cmp::Ordering::Equal)
142            .then(a.0.cmp(&b.0))
143    });
144    Ok(out)
145}
146
147/// Batch top-k argmax over an `(n_rows, n_cols)` matrix. With `parallel = true`
148/// distributes rows across rayon's pool.
149pub fn batch_top_k_argmax(
150    scores: &ArrayView2<'_, f32>,
151    k: usize,
152    parallel: bool,
153) -> Result<Vec<Vec<(usize, f32)>>> {
154    if k == 0 {
155        return Err(VecNormError::KZero);
156    }
157    if k > scores.ncols() {
158        return Err(VecNormError::KTooLarge {
159            k,
160            len: scores.ncols(),
161        });
162    }
163    if parallel {
164        scores
165            .axis_iter(Axis(0))
166            .into_par_iter()
167            .map(|row| top_k_argmax(&row, k))
168            .collect()
169    } else {
170        scores
171            .axis_iter(Axis(0))
172            .map(|row| top_k_argmax(&row, k))
173            .collect()
174    }
175}
176
177/// Cosine distance matrix between two `(n_a, d)` and `(n_b, d)` matrices.
178/// Returns an `(n_a, n_b)` matrix where `out[i, j]` is the cosine distance
179/// `1 - cos(a_i, b_j)`. Inputs are not modified; this normalizes copies
180/// internally so accuracy is preserved on un-normalized inputs.
181pub fn cosine_distances(
182    a: &ArrayView2<'_, f32>,
183    b: &ArrayView2<'_, f32>,
184) -> Result<ndarray::Array2<f32>> {
185    if a.ncols() != b.ncols() {
186        return Err(VecNormError::DimensionMismatch {
187            a: a.shape().to_vec(),
188            b: b.shape().to_vec(),
189        });
190    }
191    let an = l2_normalize_copy(a);
192    let bn = l2_normalize_copy(b);
193    let n_a = an.nrows();
194    let n_b = bn.nrows();
195    let mut out = ndarray::Array2::<f32>::zeros((n_a, n_b));
196    out.axis_iter_mut(Axis(0))
197        .into_par_iter()
198        .enumerate()
199        .for_each(|(i, mut row)| {
200            for (j, cell) in row.iter_mut().enumerate() {
201                let mut dot = 0.0_f32;
202                for (&x, &y) in an.row(i).iter().zip(bn.row(j).iter()) {
203                    dot += x * y;
204                }
205                *cell = 1.0 - dot;
206            }
207        });
208    Ok(out)
209}
210
211// ---- internal: Ord-able f32 wrapper ----
212
213#[derive(Debug, Clone, Copy, PartialEq)]
214struct OrdFloat(f32);
215
216impl Eq for OrdFloat {}
217
218impl Ord for OrdFloat {
219    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
220        // NaN sorts as smallest; we don't expect NaN in scores but tolerate.
221        match self.0.partial_cmp(&other.0) {
222            Some(o) => o,
223            None => {
224                let s = self.0.is_nan();
225                let o = other.0.is_nan();
226                match (s, o) {
227                    (true, true) => std::cmp::Ordering::Equal,
228                    (true, false) => std::cmp::Ordering::Less,
229                    (false, true) => std::cmp::Ordering::Greater,
230                    (false, false) => std::cmp::Ordering::Equal,
231                }
232            }
233        }
234    }
235}
236
237impl PartialOrd for OrdFloat {
238    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
239        Some(self.cmp(other))
240    }
241}
242
243#[cfg(test)]
244mod tests {
245    use super::*;
246    use ndarray::{arr1, arr2, Array1, Array2};
247
248    #[test]
249    fn l2_normalize_basic() {
250        let mut a = arr2(&[[3.0_f32, 4.0], [1.0, 0.0]]);
251        l2_normalize(&mut a.view_mut());
252        // Row 0 norm 5 -> [0.6, 0.8]
253        assert!((a[[0, 0]] - 0.6).abs() < 1e-6);
254        assert!((a[[0, 1]] - 0.8).abs() < 1e-6);
255        // Row 1 norm 1 -> [1.0, 0.0]
256        assert!((a[[1, 0]] - 1.0).abs() < 1e-6);
257        assert!((a[[1, 1]] - 0.0).abs() < 1e-6);
258    }
259
260    #[test]
261    fn l2_normalize_zero_row_left_zero() {
262        let mut a = arr2(&[[0.0_f32, 0.0], [3.0, 4.0]]);
263        l2_normalize(&mut a.view_mut());
264        assert_eq!(a[[0, 0]], 0.0);
265        assert_eq!(a[[0, 1]], 0.0);
266        assert!(!a[[0, 0]].is_nan());
267    }
268
269    #[test]
270    fn l2_normalize_copy_does_not_mutate_input() {
271        let a = arr2(&[[3.0_f32, 4.0]]);
272        let _ = l2_normalize_copy(&a.view());
273        assert_eq!(a[[0, 0]], 3.0);
274        assert_eq!(a[[0, 1]], 4.0);
275    }
276
277    #[test]
278    fn cosine_basic() {
279        let a = arr1(&[1.0_f32, 0.0]);
280        let b = arr1(&[1.0_f32, 0.0]);
281        let c = arr1(&[0.0_f32, 1.0]);
282        assert!((cosine_similarity(&a.view(), &b.view()).unwrap() - 1.0).abs() < 1e-6);
283        assert!(cosine_similarity(&a.view(), &c.view()).unwrap().abs() < 1e-6);
284    }
285
286    #[test]
287    fn cosine_zero_for_zero_vector() {
288        let a = arr1(&[0.0_f32, 0.0]);
289        let b = arr1(&[1.0_f32, 1.0]);
290        assert_eq!(cosine_similarity(&a.view(), &b.view()).unwrap(), 0.0);
291    }
292
293    #[test]
294    fn cosine_dim_mismatch() {
295        let a = arr1(&[1.0_f32, 0.0]);
296        let b = arr1(&[1.0_f32, 0.0, 1.0]);
297        assert!(cosine_similarity(&a.view(), &b.view()).is_err());
298    }
299
300    #[test]
301    fn top_k_correct_order() {
302        let s = arr1(&[1.0, 5.0, 3.0, 4.0, 2.0]);
303        let r = top_k_argmax(&s.view(), 3).unwrap();
304        assert_eq!(r, vec![(1, 5.0), (3, 4.0), (2, 3.0)]);
305    }
306
307    #[test]
308    fn top_k_full_length_returns_full_sort() {
309        let s = arr1(&[1.0, 5.0, 3.0]);
310        let r = top_k_argmax(&s.view(), 3).unwrap();
311        assert_eq!(r, vec![(1, 5.0), (2, 3.0), (0, 1.0)]);
312    }
313
314    #[test]
315    fn top_k_ties_broken_by_lower_index() {
316        let s = arr1(&[1.0, 1.0, 1.0]);
317        let r = top_k_argmax(&s.view(), 2).unwrap();
318        assert_eq!(r, vec![(0, 1.0), (1, 1.0)]);
319    }
320
321    #[test]
322    fn top_k_zero_rejected() {
323        let s = arr1(&[1.0, 2.0]);
324        assert!(top_k_argmax(&s.view(), 0).is_err());
325    }
326
327    #[test]
328    fn top_k_too_large_rejected() {
329        let s = arr1(&[1.0, 2.0]);
330        assert!(top_k_argmax(&s.view(), 3).is_err());
331    }
332
333    #[test]
334    fn batch_top_k_serial_and_parallel_match() {
335        let m = Array2::from_shape_fn((10, 50), |(i, j)| (i * 50 + j) as f32);
336        let s = batch_top_k_argmax(&m.view(), 5, false).unwrap();
337        let p = batch_top_k_argmax(&m.view(), 5, true).unwrap();
338        assert_eq!(s, p);
339        assert_eq!(s.len(), 10);
340        // First row: top-5 of [0..50) is [49, 48, 47, 46, 45].
341        assert_eq!(s[0][0], (49, 49.0));
342    }
343
344    #[test]
345    fn cosine_distances_zero_diagonal() {
346        let a = arr2(&[[1.0_f32, 0.0], [0.0, 1.0]]);
347        let d = cosine_distances(&a.view(), &a.view()).unwrap();
348        // Diagonal is cosine to self == 0 distance.
349        assert!(d[[0, 0]].abs() < 1e-6);
350        assert!(d[[1, 1]].abs() < 1e-6);
351        // Off-diagonal: orthogonal == 1 distance.
352        assert!((d[[0, 1]] - 1.0).abs() < 1e-6);
353        assert!((d[[1, 0]] - 1.0).abs() < 1e-6);
354    }
355
356    #[test]
357    fn cosine_distances_dim_mismatch() {
358        let a = Array2::<f32>::zeros((4, 3));
359        let b = Array2::<f32>::zeros((4, 5));
360        assert!(cosine_distances(&a.view(), &b.view()).is_err());
361    }
362
363    #[test]
364    fn nan_in_top_k_does_not_panic() {
365        let s = Array1::from(vec![1.0_f32, f32::NAN, 3.0]);
366        // We don't promise NaN handling, but we promise no panic.
367        let r = top_k_argmax(&s.view(), 2);
368        assert!(r.is_ok());
369    }
370}
vecnorm_core/lib.rs

vecnorm_core/
lib.rs