Skip to main content

innr/
lib.rs

1//! SIMD-accelerated vector similarity primitives.
2//!
3//! Fast building blocks for embedding similarity with automatic hardware dispatch.
4//!
5//! # Which Function Should I Use?
6//!
7//! | Task | Function | Notes |
8//! |------|----------|-------|
9//! | **Similarity (normalized)** | [`cosine`] | Most embeddings are normalized |
10//! | **Similarity (raw)** | [`dot`] | When you know norms |
11//! | **Distance (L2)** | [`l2_distance`] | For k-NN, clustering |
12//! | **Token-level matching** | [`maxsim`] | ColBERT-style late interaction |
13//! | **Sparse vectors** | [`sparse_dot`] | BM25 scores, SPLADE |
14//!
15//! # SIMD Dispatch
16//!
17//! All functions automatically dispatch to the fastest available instruction set:
18//!
19//! | Architecture | Instructions | Detection |
20//! |--------------|--------------|-----------|
21//! | x86_64 | AVX-512F | Runtime |
22//! | x86_64 | AVX2 + FMA | Runtime |
23//! | aarch64 | NEON | Always available |
24//! | Other | Portable | LLVM auto-vectorizes |
25//!
26//! Vectors shorter than 16 dimensions use portable code (SIMD overhead not worthwhile).
27//!
28//! # Historical Context
29//!
30//! The inner product (dot product) dates to Grassmann's 1844 "Ausdehnungslehre" and
31//! Hamilton's quaternions, formalized in Gibbs and Heaviside's vector calculus (~1880s).
32//! Modern embedding similarity (Word2Vec 2013, BERT 2018) relies on inner products
33//! in high-dimensional spaces where SIMD acceleration is essential.
34//!
35//! ColBERT's MaxSim (Khattab & Zaharia, 2020) extends this to token-level late
36//! interaction, requiring O(|Q| x |D|) inner products per query-document pair.
37//!
38//! # Example
39//!
40//! ```rust
41//! use innr::{dot, cosine, norm};
42//!
43//! let a = [1.0_f32, 0.0, 0.0];
44//! let b = [0.707, 0.707, 0.0];
45//!
46//! // Dot product
47//! let d = dot(&a, &b);
48//! assert!((d - 0.707).abs() < 0.01);
49//!
50//! // Cosine similarity (normalized dot product)
51//! let c = cosine(&a, &b);
52//! assert!((c - 0.707).abs() < 0.01);
53//!
54//! // L2 norm
55//! let n = norm(&a);
56//! assert!((n - 1.0).abs() < 1e-6);
57//! ```
58//!
59//! # References
60//!
61//! - Gibbs, J.W. (1881). "Elements of Vector Analysis"
62//! - Mikolov et al. (2013). "Efficient Estimation of Word Representations" (Word2Vec)
63//! - Khattab & Zaharia (2020). "ColBERT: Efficient and Effective Passage Search"
64
65#![warn(missing_docs)]
66#![warn(clippy::all)]
67
68mod arch;
69
70/// Binary (1-bit) quantization: encode, Hamming distance, dot product, Jaccard.
71pub mod binary;
72
73/// Dense vector primitives: dot, cosine, norm, L2/L1 distance, matryoshka.
74pub mod dense;
75
76/// Fast math operations using hardware-aware approximations (rsqrt, NR iteration).
77pub mod fast_math;
78
79/// Batch vector operations with columnar (PDX-style) layout.
80pub mod batch;
81
82/// Sparse vector dot product via sorted-index merge join.
83mod sparse;
84
85/// ColBERT MaxSim late interaction scoring for multi-vector retrieval.
86mod maxsim;
87
88// Re-export core operations
89pub use dense::{
90    angular_distance, cosine, dot, l1_distance, l2_distance, l2_distance_squared,
91    matryoshka_cosine, matryoshka_dot, norm,
92};
93
94// Re-export binary operations
95pub use binary::{binary_dot, binary_hamming, binary_jaccard, encode_binary, PackedBinary};
96
97// Re-export fast math (rsqrt-based approximations)
98pub use fast_math::{fast_cosine, fast_cosine_dispatch, fast_rsqrt, fast_rsqrt_precise};
99
100/// Ternary quantization (1.58-bit) for ultra-compressed embeddings.
101pub mod ternary;
102
103/// Scalar quantization (uint8) for memory-efficient asymmetric similarity.
104pub mod scalar;
105
106pub use sparse::{sparse_dot, sparse_maxsim};
107
108pub use maxsim::{maxsim, maxsim_cosine};
109
110/// Minimum vector dimension for SIMD to be worthwhile.
111///
112/// Below this, function call overhead outweighs SIMD benefits.
113#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
114pub(crate) const MIN_DIM_SIMD: usize = 16;
115
116/// Threshold for treating a norm as "effectively zero".
117///
118/// Chosen to be larger than `f32::EPSILON` (~1.19e-7) to provide numerical
119/// headroom while remaining small enough to only catch degenerate cases.
120///
121/// Used by [`cosine`] to avoid division by zero.
122pub(crate) const NORM_EPSILON: f32 = 1e-9;
123
124#[cfg(test)]
125mod tests {
126    use super::*;
127
128    #[test]
129    fn test_dot_basic() {
130        let a = [1.0_f32, 2.0, 3.0];
131        let b = [4.0_f32, 5.0, 6.0];
132        let result = dot(&a, &b);
133        assert!((result - 32.0).abs() < 1e-6);
134    }
135
136    #[test]
137    fn test_dot_empty() {
138        let a: [f32; 0] = [];
139        let b: [f32; 0] = [];
140        assert_eq!(dot(&a, &b), 0.0);
141    }
142
143    #[test]
144    fn test_norm() {
145        let v = [3.0_f32, 4.0];
146        assert!((norm(&v) - 5.0).abs() < 1e-6);
147    }
148
149    #[test]
150    fn test_cosine_orthogonal() {
151        let a = [1.0_f32, 0.0];
152        let b = [0.0_f32, 1.0];
153        assert!(cosine(&a, &b).abs() < 1e-6);
154    }
155
156    #[test]
157    fn test_cosine_parallel() {
158        let a = [1.0_f32, 0.0];
159        let b = [2.0_f32, 0.0];
160        assert!((cosine(&a, &b) - 1.0).abs() < 1e-6);
161    }
162
163    #[test]
164    fn test_cosine_zero_vector() {
165        let a = [1.0_f32, 2.0];
166        let zero = [0.0_f32, 0.0];
167        assert_eq!(cosine(&a, &zero), 0.0);
168    }
169
170    #[test]
171    fn test_l2_distance() {
172        let a = [0.0_f32, 0.0];
173        let b = [3.0_f32, 4.0];
174        assert!((l2_distance(&a, &b) - 5.0).abs() < 1e-6);
175    }
176
177    #[test]
178    fn test_l2_distance_same_point() {
179        let a = [1.0_f32, 2.0, 3.0];
180        assert!(l2_distance(&a, &a) < 1e-9);
181    }
182
183    #[test]
184    fn test_l1_distance() {
185        let a = [1.0_f32, 2.0, 3.0];
186        let b = [4.0_f32, 0.0, 1.0];
187        // |1-4| + |2-0| + |3-1| = 3 + 2 + 2 = 7
188        assert!((l1_distance(&a, &b) - 7.0).abs() < 1e-6);
189    }
190}