1#[inline]
25#[must_use]
26pub fn sparse_dot_product(a: &[(u32, f32)], b: &[(u32, f32)]) -> f32 {
27 let mut result = 0.0;
28 let mut i = 0;
29 let mut j = 0;
30
31 while i < a.len() && j < b.len() {
32 let (idx_a, val_a) = a[i];
33 let (idx_b, val_b) = b[j];
34
35 match idx_a.cmp(&idx_b) {
36 std::cmp::Ordering::Less => i += 1,
37 std::cmp::Ordering::Greater => j += 1,
38 std::cmp::Ordering::Equal => {
39 result += val_a * val_b;
40 i += 1;
41 j += 1;
42 }
43 }
44 }
45
46 result
47}
48
49#[inline]
51#[must_use]
52pub fn sparse_sum_of_squares(v: &[(u32, f32)]) -> f32 {
53 v.iter().map(|&(_, val)| val * val).sum()
54}
55
56#[inline]
58#[must_use]
59pub fn sparse_l2_norm(v: &[(u32, f32)]) -> f32 {
60 sparse_sum_of_squares(v).sqrt()
61}
62
63#[inline]
72#[must_use]
73pub fn sparse_cosine_similarity(a: &[(u32, f32)], b: &[(u32, f32)]) -> f32 {
74 let dot = sparse_dot_product(a, b);
75 let norm_a = sparse_l2_norm(a);
76 let norm_b = sparse_l2_norm(b);
77
78 if norm_a == 0.0 || norm_b == 0.0 {
79 return 0.0;
80 }
81
82 dot / (norm_a * norm_b)
83}
84
85#[inline]
89#[must_use]
90pub fn sparse_cosine_similarity_with_norms(
91 a: &[(u32, f32)],
92 b: &[(u32, f32)],
93 norm_a: f32,
94 norm_b: f32,
95) -> Option<f32> {
96 if norm_a == 0.0 || norm_b == 0.0 {
97 return None;
98 }
99
100 let dot = sparse_dot_product(a, b);
101 Some(dot / (norm_a * norm_b))
102}
103
104#[inline]
108#[must_use]
109pub fn sparse_cosine_distance(a: &[(u32, f32)], b: &[(u32, f32)]) -> f32 {
110 1.0 - sparse_cosine_similarity(a, b)
111}
112
113#[inline]
120#[must_use]
121pub fn sparse_euclidean_distance_squared(a: &[(u32, f32)], b: &[(u32, f32)]) -> f32 {
122 let norm_a_sq = sparse_sum_of_squares(a);
123 let norm_b_sq = sparse_sum_of_squares(b);
124 let dot = sparse_dot_product(a, b);
125
126 (norm_a_sq + norm_b_sq - 2.0 * dot).max(0.0)
128}
129
130#[inline]
132#[must_use]
133pub fn sparse_euclidean_distance(a: &[(u32, f32)], b: &[(u32, f32)]) -> f32 {
134 sparse_euclidean_distance_squared(a, b).sqrt()
135}
136
137#[derive(Debug, Clone, Copy)]
139pub struct SparseCachedNorm {
140 norm_squared: f32,
142 norm: f32,
144}
145
146impl SparseCachedNorm {
147 #[must_use]
149 pub fn new(v: &[(u32, f32)]) -> Self {
150 let norm_squared = sparse_sum_of_squares(v);
151 let norm = norm_squared.sqrt();
152 Self { norm_squared, norm }
153 }
154
155 #[inline]
157 #[must_use]
158 pub const fn norm(&self) -> f32 {
159 self.norm
160 }
161
162 #[inline]
164 #[must_use]
165 pub const fn norm_squared(&self) -> f32 {
166 self.norm_squared
167 }
168
169 #[inline]
171 #[must_use]
172 pub fn is_zero(&self) -> bool {
173 self.norm == 0.0
174 }
175}
176
177#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
179pub enum SparseDistanceMetric {
180 Euclidean,
182 Cosine,
184 DotProduct,
186}
187
188impl SparseDistanceMetric {
189 #[inline]
191 #[must_use]
192 pub fn calculate(&self, a: &[(u32, f32)], b: &[(u32, f32)]) -> f32 {
193 match self {
194 Self::Euclidean => sparse_euclidean_distance(a, b),
195 Self::Cosine => sparse_cosine_distance(a, b),
196 Self::DotProduct => -sparse_dot_product(a, b),
197 }
198 }
199
200 #[inline]
202 #[must_use]
203 pub fn calculate_with_norms(
204 &self,
205 a: &[(u32, f32)],
206 b: &[(u32, f32)],
207 norm_a: f32,
208 norm_b: f32,
209 ) -> f32 {
210 match self {
211 Self::Euclidean => {
212 let norm_a_sq = norm_a * norm_a;
214 let norm_b_sq = norm_b * norm_b;
215 let dot = sparse_dot_product(a, b);
216 (norm_a_sq + norm_b_sq - 2.0 * dot).max(0.0).sqrt()
217 }
218 Self::Cosine => {
219 sparse_cosine_similarity_with_norms(a, b, norm_a, norm_b).map_or(1.0, |s| 1.0 - s)
220 }
221 Self::DotProduct => -sparse_dot_product(a, b),
222 }
223 }
224}
225
226#[cfg(test)]
227mod tests {
228 use super::*;
229
230 const EPSILON: f32 = 1e-5;
231
232 fn assert_near(a: f32, b: f32, epsilon: f32) {
233 assert!(
234 (a - b).abs() < epsilon,
235 "assertion failed: {} !~ {} (diff: {})",
236 a,
237 b,
238 (a - b).abs()
239 );
240 }
241
242 #[test]
243 fn test_sparse_dot_product_no_overlap() {
244 let a = [(0, 1.0), (2, 2.0)];
245 let b = [(1, 1.0), (3, 1.0)];
246 assert_near(sparse_dot_product(&a, &b), 0.0, EPSILON);
247 }
248
249 #[test]
250 fn test_sparse_dot_product_full_overlap() {
251 let a = [(0, 1.0), (2, 2.0), (5, 3.0)];
252 let b = [(0, 1.0), (2, 2.0), (5, 3.0)];
253 assert_near(sparse_dot_product(&a, &b), 14.0, EPSILON);
255 }
256
257 #[test]
258 fn test_sparse_dot_product_partial_overlap() {
259 let a = [(0, 1.0), (2, 2.0), (5, 3.0)];
260 let b = [(1, 1.0), (2, 2.0), (5, 1.0)];
261 assert_near(sparse_dot_product(&a, &b), 7.0, EPSILON);
263 }
264
265 #[test]
266 fn test_sparse_dot_product_empty() {
267 let a: [(u32, f32); 0] = [];
268 let b = [(0, 1.0)];
269 assert_near(sparse_dot_product(&a, &b), 0.0, EPSILON);
270 }
271
272 #[test]
273 fn test_sparse_l2_norm() {
274 let v = [(0, 3.0), (5, 4.0)];
275 assert_near(sparse_l2_norm(&v), 5.0, EPSILON);
276 }
277
278 #[test]
279 fn test_sparse_cosine_similarity_identical() {
280 let a = [(0, 1.0), (1, 2.0)];
281 assert_near(sparse_cosine_similarity(&a, &a), 1.0, EPSILON);
282 }
283
284 #[test]
285 fn test_sparse_cosine_similarity_orthogonal() {
286 let a = [(0, 1.0)];
287 let b = [(1, 1.0)];
288 assert_near(sparse_cosine_similarity(&a, &b), 0.0, EPSILON);
289 }
290
291 #[test]
292 fn test_sparse_cosine_similarity_opposite() {
293 let a = [(0, 1.0)];
294 let b = [(0, -1.0)];
295 assert_near(sparse_cosine_similarity(&a, &b), -1.0, EPSILON);
296 }
297
298 #[test]
299 fn test_sparse_cosine_distance() {
300 let a = [(0, 1.0), (1, 2.0)];
301 assert_near(sparse_cosine_distance(&a, &a), 0.0, EPSILON);
302
303 let b = [(2, 1.0)];
304 assert_near(sparse_cosine_distance(&a, &b), 1.0, EPSILON);
305 }
306
307 #[test]
308 fn test_sparse_euclidean_distance() {
309 let a = [(0, 0.0)];
310 let b = [(0, 3.0), (1, 4.0)];
311 assert_near(sparse_euclidean_distance(&a, &b), 5.0, EPSILON);
312 }
313
314 #[test]
315 fn test_sparse_euclidean_distance_no_overlap() {
316 let a = [(0, 3.0)];
318 let b = [(1, 4.0)];
319 assert_near(sparse_euclidean_distance(&a, &b), 5.0, EPSILON);
321 }
322
323 #[test]
324 fn test_sparse_cached_norm() {
325 let v = [(0, 3.0), (5, 4.0)];
326 let cached = SparseCachedNorm::new(&v);
327
328 assert_near(cached.norm(), 5.0, EPSILON);
329 assert_near(cached.norm_squared(), 25.0, EPSILON);
330 assert!(!cached.is_zero());
331 }
332
333 #[test]
334 fn test_sparse_cached_norm_empty() {
335 let v: [(u32, f32); 0] = [];
336 let cached = SparseCachedNorm::new(&v);
337 assert!(cached.is_zero());
338 }
339
340 #[test]
341 fn test_sparse_distance_metric() {
342 let a = [(0, 3.0), (1, 4.0)];
343 let b = [(0, 3.0), (1, 4.0)];
344
345 assert_near(SparseDistanceMetric::Euclidean.calculate(&a, &b), 0.0, EPSILON);
346 assert_near(SparseDistanceMetric::Cosine.calculate(&a, &b), 0.0, EPSILON);
347 assert_near(SparseDistanceMetric::DotProduct.calculate(&a, &b), -25.0, EPSILON);
348 }
349
350 #[test]
351 fn test_sparse_cosine_with_norms() {
352 let a = [(0, 3.0), (1, 4.0)];
353 let b = [(0, 3.0), (1, 4.0)];
354 let norm = sparse_l2_norm(&a);
355
356 let sim = sparse_cosine_similarity_with_norms(&a, &b, norm, norm);
357 assert!(sim.is_some());
358 assert_near(sim.unwrap(), 1.0, EPSILON);
359 }
360
361 #[test]
362 fn test_sparse_cosine_with_zero_norm() {
363 let a: [(u32, f32); 0] = [];
364 let b = [(0, 1.0)];
365
366 let sim = sparse_cosine_similarity_with_norms(&a, &b, 0.0, 1.0);
367 assert!(sim.is_none());
368 }
369}