vecstasy/slice.rs
1use crate::VecLike;
2use std::simd::Simd;
3use std::simd::num::SimdFloat;
4
5pub const SIMD_LANECOUNT: usize = 8;
6pub type SimdUnit = Simd<f32, SIMD_LANECOUNT>;
7
8impl<'a> VecLike for &'a [f32] {
9 type Owned = Vec<f32>;
10
11 /// Computes the squared L2 (Euclidean) distance between `self` and `othr`.
12 ///
13 /// Operates on fixed‐size chunks; any trailing elements when the slice length
14 /// is not a multiple of the chunk size will be silently ignored in release mode.
15 ///
16 /// # Panics
17 /// - In debug mode, if `self.len() != othr.len()`.
18 /// - In debug mode, if the slice length is not a multiple of the internal chunk size.
19 #[inline]
20 fn l2_dist_squared(&self, othr: &Self) -> f32 {
21 debug_assert!(self.len() == othr.len());
22 debug_assert!(self.len() % SIMD_LANECOUNT == 0);
23
24 let mut intermediate_sum_x8 = SimdUnit::splat(0.0);
25
26 let self_chunks = self.chunks_exact(SIMD_LANECOUNT);
27 let othr_chunks = othr.chunks_exact(SIMD_LANECOUNT);
28
29 for (slice_self, slice_othr) in self_chunks.zip(othr_chunks) {
30 let f32x8_slf = SimdUnit::from_slice(slice_self);
31 let f32x8_oth = SimdUnit::from_slice(slice_othr);
32 let diff = f32x8_slf - f32x8_oth;
33 intermediate_sum_x8 += diff * diff;
34 }
35
36 intermediate_sum_x8.reduce_sum() // 8-to-1 sum
37 }
38
39 /// Computes the dot product of `self` and `othr`.
40 ///
41 /// Operates on fixed‐size chunks; any trailing elements when the slice length
42 /// is not a multiple of the chunk size will be silently ignored in release mode.
43 ///
44 /// # Panics
45 /// - In debug mode, if `self.len() != othr.len()`.
46 /// - In debug mode, if the slice length is not a multiple of the internal chunk size.
47 #[inline]
48 fn dot(&self, othr: &Self) -> f32 {
49 debug_assert!(self.len() == othr.len());
50 debug_assert!(self.len() % SIMD_LANECOUNT == 0);
51
52 // accumulator vector of zeroes
53 let mut accumulated = SimdUnit::splat(0.0);
54
55 let self_chunks = self.chunks_exact(SIMD_LANECOUNT);
56 let othr_chunks = othr.chunks_exact(SIMD_LANECOUNT);
57
58 for (slice_self, slice_othr) in self_chunks.zip(othr_chunks) {
59 // load each chunk into a SIMD register
60 let vx = SimdUnit::from_slice(slice_self);
61 let vy = SimdUnit::from_slice(slice_othr);
62 // multiply-and-accumulate
63 accumulated += vx * vy;
64 }
65
66 // horizontal sum across lanes
67 accumulated.reduce_sum()
68 }
69
70 /// Returns a normalized copy of the input slice.
71 ///
72 /// Operates on fixed‐size chunks; any trailing elements when the slice length
73 /// is not a multiple of the chunk size will be silently ignored in release mode.
74 ///
75 /// If the input norm is zero, returns a zero vector of the same length.
76 ///
77 /// # Panics
78 /// - In debug mode, if the slice length is not a multiple of the internal chunk size.
79 #[inline]
80 fn normalized(&self) -> Self::Owned {
81 let norm = self.dot(self).sqrt();
82 if norm == 0.0 {
83 // avoid division by zero; return zero vector
84 return vec![0.0; self.len()];
85 }
86 let inv_norm = SimdUnit::splat(1.0 / norm);
87
88 let mut out = Vec::with_capacity(self.len());
89
90 for chunk in self.chunks_exact(SIMD_LANECOUNT) {
91 let v = SimdUnit::from_slice(chunk);
92 let scaled = v * inv_norm;
93 out.extend_from_slice(&scaled.to_array());
94 }
95
96 out
97 }
98}