velesdb_core/
simd_explicit.rs1use wide::f32x8;
26
27#[inline]
49#[must_use]
50pub fn dot_product_simd(a: &[f32], b: &[f32]) -> f32 {
51 assert_eq!(a.len(), b.len(), "Vector dimensions must match");
52
53 let len = a.len();
54 let simd_len = len / 8;
55 let remainder = len % 8;
56
57 let mut sum = f32x8::ZERO;
58
59 for i in 0..simd_len {
62 let offset = i * 8;
63 let va = f32x8::from(&a[offset..offset + 8]);
64 let vb = f32x8::from(&b[offset..offset + 8]);
65 sum = va.mul_add(vb, sum); }
67
68 let mut result = sum.reduce_add();
70
71 let base = simd_len * 8;
73 for i in 0..remainder {
74 result += a[base + i] * b[base + i];
75 }
76
77 result
78}
79
80#[inline]
90#[must_use]
91pub fn euclidean_distance_simd(a: &[f32], b: &[f32]) -> f32 {
92 squared_l2_distance_simd(a, b).sqrt()
93}
94
95#[inline]
103#[must_use]
104pub fn squared_l2_distance_simd(a: &[f32], b: &[f32]) -> f32 {
105 assert_eq!(a.len(), b.len(), "Vector dimensions must match");
106
107 let len = a.len();
108 let simd_len = len / 8;
109 let remainder = len % 8;
110
111 let mut sum = f32x8::ZERO;
112
113 for i in 0..simd_len {
114 let offset = i * 8;
115 let va = f32x8::from(&a[offset..offset + 8]);
116 let vb = f32x8::from(&b[offset..offset + 8]);
117 let diff = va - vb;
118 sum = diff.mul_add(diff, sum); }
120
121 let mut result = sum.reduce_add();
122
123 let base = simd_len * 8;
124 for i in 0..remainder {
125 let diff = a[base + i] - b[base + i];
126 result += diff * diff;
127 }
128
129 result
130}
131
132#[inline]
143#[must_use]
144#[allow(clippy::similar_names)]
145pub fn cosine_similarity_simd(a: &[f32], b: &[f32]) -> f32 {
146 assert_eq!(a.len(), b.len(), "Vector dimensions must match");
147
148 let len = a.len();
149 let simd_len = len / 8;
150 let remainder = len % 8;
151
152 let mut dot_sum = f32x8::ZERO;
153 let mut norm_a_sum = f32x8::ZERO;
154 let mut norm_b_sum = f32x8::ZERO;
155
156 for i in 0..simd_len {
158 let offset = i * 8;
159 let va = f32x8::from(&a[offset..offset + 8]);
160 let vb = f32x8::from(&b[offset..offset + 8]);
161
162 dot_sum = va.mul_add(vb, dot_sum);
163 norm_a_sum = va.mul_add(va, norm_a_sum);
164 norm_b_sum = vb.mul_add(vb, norm_b_sum);
165 }
166
167 let mut dot = dot_sum.reduce_add();
168 let mut norm_a_sq = norm_a_sum.reduce_add();
169 let mut norm_b_sq = norm_b_sum.reduce_add();
170
171 let base = simd_len * 8;
173 for i in 0..remainder {
174 let ai = a[base + i];
175 let bi = b[base + i];
176 dot += ai * bi;
177 norm_a_sq += ai * ai;
178 norm_b_sq += bi * bi;
179 }
180
181 let norm_a = norm_a_sq.sqrt();
182 let norm_b = norm_b_sq.sqrt();
183
184 if norm_a == 0.0 || norm_b == 0.0 {
185 return 0.0;
186 }
187
188 dot / (norm_a * norm_b)
189}
190
191#[inline]
193#[must_use]
194pub fn norm_simd(v: &[f32]) -> f32 {
195 let len = v.len();
196 let simd_len = len / 8;
197 let remainder = len % 8;
198
199 let mut sum = f32x8::ZERO;
200
201 for i in 0..simd_len {
202 let offset = i * 8;
203 let vv = f32x8::from(&v[offset..offset + 8]);
204 sum = vv.mul_add(vv, sum); }
206
207 let mut result = sum.reduce_add();
208
209 let base = simd_len * 8;
210 for i in 0..remainder {
211 result += v[base + i] * v[base + i];
212 }
213
214 result.sqrt()
215}
216
217#[inline]
228#[must_use]
229pub fn hamming_distance_simd(a: &[f32], b: &[f32]) -> f32 {
230 assert_eq!(a.len(), b.len(), "Vector dimensions must match");
231
232 let len = a.len();
233 let chunks = len / 8;
234 let remainder = len % 8;
235
236 let mut count = 0u32;
237
238 for i in 0..chunks {
240 let base = i * 8;
241 count += u32::from((a[base] > 0.5) != (b[base] > 0.5));
242 count += u32::from((a[base + 1] > 0.5) != (b[base + 1] > 0.5));
243 count += u32::from((a[base + 2] > 0.5) != (b[base + 2] > 0.5));
244 count += u32::from((a[base + 3] > 0.5) != (b[base + 3] > 0.5));
245 count += u32::from((a[base + 4] > 0.5) != (b[base + 4] > 0.5));
246 count += u32::from((a[base + 5] > 0.5) != (b[base + 5] > 0.5));
247 count += u32::from((a[base + 6] > 0.5) != (b[base + 6] > 0.5));
248 count += u32::from((a[base + 7] > 0.5) != (b[base + 7] > 0.5));
249 }
250
251 let base = chunks * 8;
253 for i in 0..remainder {
254 if (a[base + i] > 0.5) != (b[base + i] > 0.5) {
255 count += 1;
256 }
257 }
258
259 #[allow(clippy::cast_precision_loss)]
260 {
261 count as f32
262 }
263}
264
265#[inline]
283#[must_use]
284pub fn hamming_distance_binary(a: &[u64], b: &[u64]) -> u32 {
285 assert_eq!(a.len(), b.len(), "Vector dimensions must match");
286
287 a.iter()
289 .zip(b.iter())
290 .map(|(&x, &y)| (x ^ y).count_ones())
291 .sum()
292}
293
294#[inline]
303#[must_use]
304pub fn hamming_distance_binary_fast(a: &[u64], b: &[u64]) -> u32 {
305 assert_eq!(a.len(), b.len(), "Vector dimensions must match");
306
307 let len = a.len();
308 let chunks = len / 8;
309 let remainder = len % 8;
310
311 let mut c0 = 0u32;
313 let mut c1 = 0u32;
314 let mut c2 = 0u32;
315 let mut c3 = 0u32;
316
317 for i in 0..chunks {
318 let base = i * 8;
319 c0 += (a[base] ^ b[base]).count_ones();
320 c1 += (a[base + 1] ^ b[base + 1]).count_ones();
321 c0 += (a[base + 2] ^ b[base + 2]).count_ones();
322 c1 += (a[base + 3] ^ b[base + 3]).count_ones();
323 c2 += (a[base + 4] ^ b[base + 4]).count_ones();
324 c3 += (a[base + 5] ^ b[base + 5]).count_ones();
325 c2 += (a[base + 6] ^ b[base + 6]).count_ones();
326 c3 += (a[base + 7] ^ b[base + 7]).count_ones();
327 }
328
329 let base = chunks * 8;
331 for i in 0..remainder {
332 c0 += (a[base + i] ^ b[base + i]).count_ones();
333 }
334
335 c0 + c1 + c2 + c3
336}
337
338#[inline]
346#[must_use]
347pub fn jaccard_similarity_simd(a: &[f32], b: &[f32]) -> f32 {
348 assert_eq!(a.len(), b.len(), "Vector dimensions must match");
349
350 let len = a.len();
351 let chunks = len / 8;
352 let remainder = len % 8;
353
354 let mut intersection = 0u32;
355 let mut union = 0u32;
356
357 for i in 0..chunks {
359 let base = i * 8;
360 for j in 0..8 {
361 let ai = a[base + j] > 0.5;
362 let bi = b[base + j] > 0.5;
363 intersection += u32::from(ai && bi);
364 union += u32::from(ai || bi);
365 }
366 }
367
368 let base = chunks * 8;
370 for i in 0..remainder {
371 let ai = a[base + i] > 0.5;
372 let bi = b[base + i] > 0.5;
373 intersection += u32::from(ai && bi);
374 union += u32::from(ai || bi);
375 }
376
377 if union == 0 {
378 return 1.0; }
380
381 #[allow(clippy::cast_precision_loss)]
382 {
383 intersection as f32 / union as f32
384 }
385}
386
387#[inline]
389pub fn normalize_inplace_simd(v: &mut [f32]) {
390 let norm = norm_simd(v);
391
392 if norm == 0.0 {
393 return;
394 }
395
396 let inv_norm = 1.0 / norm;
397 let inv_norm_simd = f32x8::splat(inv_norm);
398
399 let len = v.len();
400 let simd_len = len / 8;
401 let remainder = len % 8;
402
403 for i in 0..simd_len {
404 let offset = i * 8;
405 let vv = f32x8::from(&v[offset..offset + 8]);
406 let normalized = vv * inv_norm_simd;
407 let arr: [f32; 8] = normalized.into();
408 v[offset..offset + 8].copy_from_slice(&arr);
409 }
410
411 let base = simd_len * 8;
412 for i in 0..remainder {
413 v[base + i] *= inv_norm;
414 }
415}