innr/lib.rs
1//! SIMD-accelerated vector similarity primitives.
2//!
3//! Fast building blocks for embedding similarity with automatic hardware dispatch.
4//!
5//! # Which Function Should I Use?
6//!
7//! | Task | Function | Notes |
8//! |------|----------|-------|
9//! | **Similarity (normalized)** | [`cosine`] | Most embeddings are normalized |
10//! | **Similarity (raw)** | [`dot`] | When you know norms |
11//! | **Distance (L2)** | [`l2_distance`] | For k-NN, clustering |
12//! | **Token-level matching** | [`maxsim`] | ColBERT-style late interaction |
13//! | **Sparse vectors** | [`sparse_dot`] | BM25 scores, SPLADE |
14//!
15//! # SIMD Dispatch
16//!
17//! All functions automatically dispatch to the fastest available instruction set:
18//!
19//! | Architecture | Instructions | Detection |
20//! |--------------|--------------|-----------|
21//! | x86_64 | AVX-512F | Runtime |
22//! | x86_64 | AVX2 + FMA | Runtime |
23//! | aarch64 | NEON | Always available |
24//! | Other | Portable | LLVM auto-vectorizes |
25//!
26//! Vectors shorter than 16 dimensions use portable code (SIMD overhead not worthwhile).
27//!
28//! # Historical Context
29//!
30//! The inner product (dot product) dates to Grassmann's 1844 "Ausdehnungslehre" and
31//! Hamilton's quaternions, formalized in Gibbs and Heaviside's vector calculus (~1880s).
32//! Modern embedding similarity (Word2Vec 2013, BERT 2018) relies on inner products
33//! in high-dimensional spaces where SIMD acceleration is essential.
34//!
35//! ColBERT's MaxSim (Khattab & Zaharia, 2020) extends this to token-level late
36//! interaction, requiring O(|Q| x |D|) inner products per query-document pair.
37//!
38//! # Example
39//!
40//! ```rust
41//! use innr::{dot, cosine, norm};
42//!
43//! let a = [1.0_f32, 0.0, 0.0];
44//! let b = [0.707, 0.707, 0.0];
45//!
46//! // Dot product
47//! let d = dot(&a, &b);
48//! assert!((d - 0.707).abs() < 0.01);
49//!
50//! // Cosine similarity (normalized dot product)
51//! let c = cosine(&a, &b);
52//! assert!((c - 0.707).abs() < 0.01);
53//!
54//! // L2 norm
55//! let n = norm(&a);
56//! assert!((n - 1.0).abs() < 1e-6);
57//! ```
58//!
59//! # References
60//!
61//! - Gibbs, J.W. (1881). "Elements of Vector Analysis"
62//! - Mikolov et al. (2013). "Efficient Estimation of Word Representations" (Word2Vec)
63//! - Khattab & Zaharia (2020). "ColBERT: Efficient and Effective Passage Search"
64
65#![warn(missing_docs)]
66#![warn(clippy::all)]
67
68mod arch;
69
70/// Binary (1-bit) quantization: encode, Hamming distance, dot product, Jaccard.
71pub mod binary;
72
73/// Dense vector primitives: dot, cosine, norm, L2/L1 distance, matryoshka.
74pub mod dense;
75
76/// Fast math operations using hardware-aware approximations (rsqrt, NR iteration).
77pub mod fast_math;
78
79/// Batch vector operations with columnar (PDX-style) layout.
80pub mod batch;
81
82/// Sparse vector dot product via sorted-index merge join.
83mod sparse;
84
85/// ColBERT MaxSim late interaction scoring for multi-vector retrieval.
86mod maxsim;
87
88// Re-export core operations
89pub use dense::{
90 angular_distance, cosine, dot, l1_distance, l2_distance, l2_distance_squared,
91 matryoshka_cosine, matryoshka_dot, norm,
92};
93
94// Re-export binary operations
95pub use binary::{binary_dot, binary_hamming, binary_jaccard, encode_binary, PackedBinary};
96
97// Re-export fast math (rsqrt-based approximations)
98pub use fast_math::{fast_cosine, fast_cosine_dispatch, fast_rsqrt, fast_rsqrt_precise};
99
100/// Ternary quantization (1.58-bit) for ultra-compressed embeddings.
101pub mod ternary;
102
103/// Scalar quantization (uint8) for memory-efficient asymmetric similarity.
104pub mod scalar;
105
106pub use sparse::{sparse_dot, sparse_maxsim};
107
108pub use maxsim::{maxsim, maxsim_cosine};
109
110/// Minimum vector dimension for SIMD to be worthwhile.
111///
112/// Below this, function call overhead outweighs SIMD benefits.
113#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
114pub(crate) const MIN_DIM_SIMD: usize = 16;
115
116/// Threshold for treating a norm as "effectively zero".
117///
118/// Chosen to be larger than `f32::EPSILON` (~1.19e-7) to provide numerical
119/// headroom while remaining small enough to only catch degenerate cases.
120///
121/// Used by [`cosine`] to avoid division by zero.
122pub(crate) const NORM_EPSILON: f32 = 1e-9;
123
124#[cfg(test)]
125mod tests {
126 use super::*;
127
128 #[test]
129 fn test_dot_basic() {
130 let a = [1.0_f32, 2.0, 3.0];
131 let b = [4.0_f32, 5.0, 6.0];
132 let result = dot(&a, &b);
133 assert!((result - 32.0).abs() < 1e-6);
134 }
135
136 #[test]
137 fn test_dot_empty() {
138 let a: [f32; 0] = [];
139 let b: [f32; 0] = [];
140 assert_eq!(dot(&a, &b), 0.0);
141 }
142
143 #[test]
144 fn test_norm() {
145 let v = [3.0_f32, 4.0];
146 assert!((norm(&v) - 5.0).abs() < 1e-6);
147 }
148
149 #[test]
150 fn test_cosine_orthogonal() {
151 let a = [1.0_f32, 0.0];
152 let b = [0.0_f32, 1.0];
153 assert!(cosine(&a, &b).abs() < 1e-6);
154 }
155
156 #[test]
157 fn test_cosine_parallel() {
158 let a = [1.0_f32, 0.0];
159 let b = [2.0_f32, 0.0];
160 assert!((cosine(&a, &b) - 1.0).abs() < 1e-6);
161 }
162
163 #[test]
164 fn test_cosine_zero_vector() {
165 let a = [1.0_f32, 2.0];
166 let zero = [0.0_f32, 0.0];
167 assert_eq!(cosine(&a, &zero), 0.0);
168 }
169
170 #[test]
171 fn test_l2_distance() {
172 let a = [0.0_f32, 0.0];
173 let b = [3.0_f32, 4.0];
174 assert!((l2_distance(&a, &b) - 5.0).abs() < 1e-6);
175 }
176
177 #[test]
178 fn test_l2_distance_same_point() {
179 let a = [1.0_f32, 2.0, 3.0];
180 assert!(l2_distance(&a, &a) < 1e-9);
181 }
182
183 #[test]
184 fn test_l1_distance() {
185 let a = [1.0_f32, 2.0, 3.0];
186 let b = [4.0_f32, 0.0, 1.0];
187 // |1-4| + |2-0| + |3-1| = 3 + 2 + 2 = 7
188 assert!((l1_distance(&a, &b) - 7.0).abs() < 1e-6);
189 }
190}