vecstasy/
slice.rs

1use crate::VecLike;
2use std::simd::Simd;
3use std::simd::num::SimdFloat;
4
5pub const SIMD_LANECOUNT: usize = 8;
6pub type SimdUnit = Simd<f32, SIMD_LANECOUNT>;
7
8impl<'a> VecLike for &'a [f32] {
9    type Owned = Vec<f32>;
10
11    /// Computes the squared L2 (Euclidean) distance between `self` and `othr`.
12    ///
13    /// Operates on fixed‐size chunks; any trailing elements when the slice length
14    /// is not a multiple of the chunk size will be silently ignored in release mode.
15    ///
16    /// # Panics
17    /// - In debug mode, if `self.len() != othr.len()`.
18    /// - In debug mode, if the slice length is not a multiple of the internal chunk size.
19    #[inline]
20    fn l2_dist_squared(&self, othr: &Self) -> f32 {
21        debug_assert!(self.len() == othr.len());
22        debug_assert!(self.len() % SIMD_LANECOUNT == 0);
23
24        let mut intermediate_sum_x8 = SimdUnit::splat(0.0);
25
26        let self_chunks = self.chunks_exact(SIMD_LANECOUNT);
27        let othr_chunks = othr.chunks_exact(SIMD_LANECOUNT);
28
29        for (slice_self, slice_othr) in self_chunks.zip(othr_chunks) {
30            let f32x8_slf = SimdUnit::from_slice(slice_self);
31            let f32x8_oth = SimdUnit::from_slice(slice_othr);
32            let diff = f32x8_slf - f32x8_oth;
33            intermediate_sum_x8 += diff * diff;
34        }
35
36        intermediate_sum_x8.reduce_sum() // 8-to-1 sum
37    }
38
39    /// Computes the dot product of `self` and `othr`.
40    ///
41    /// Operates on fixed‐size chunks; any trailing elements when the slice length
42    /// is not a multiple of the chunk size will be silently ignored in release mode.
43    ///
44    /// # Panics
45    /// - In debug mode, if `self.len() != othr.len()`.
46    /// - In debug mode, if the slice length is not a multiple of the internal chunk size.
47    #[inline]
48    fn dot(&self, othr: &Self) -> f32 {
49        debug_assert!(self.len() == othr.len());
50        debug_assert!(self.len() % SIMD_LANECOUNT == 0);
51
52        // accumulator vector of zeroes
53        let mut accumulated = SimdUnit::splat(0.0);
54
55        let self_chunks = self.chunks_exact(SIMD_LANECOUNT);
56        let othr_chunks = othr.chunks_exact(SIMD_LANECOUNT);
57
58        for (slice_self, slice_othr) in self_chunks.zip(othr_chunks) {
59            // load each chunk into a SIMD register
60            let vx = SimdUnit::from_slice(slice_self);
61            let vy = SimdUnit::from_slice(slice_othr);
62            // multiply-and-accumulate
63            accumulated += vx * vy;
64        }
65
66        // horizontal sum across lanes
67        accumulated.reduce_sum()
68    }
69
70    /// Returns a normalized copy of the input slice.
71    ///
72    /// Operates on fixed‐size chunks; any trailing elements when the slice length
73    /// is not a multiple of the chunk size will be silently ignored in release mode.
74    ///
75    /// If the input norm is zero, returns a zero vector of the same length.
76    ///
77    /// # Panics
78    /// - In debug mode, if the slice length is not a multiple of the internal chunk size.
79    #[inline]
80    fn normalized(&self) -> Self::Owned {
81        let norm = self.dot(self).sqrt();
82        if norm == 0.0 {
83            // avoid division by zero; return zero vector
84            return vec![0.0; self.len()];
85        }
86        let inv_norm = SimdUnit::splat(1.0 / norm);
87
88        let mut out = Vec::with_capacity(self.len());
89
90        for chunk in self.chunks_exact(SIMD_LANECOUNT) {
91            let v = SimdUnit::from_slice(chunk);
92            let scaled = v * inv_norm;
93            out.extend_from_slice(&scaled.to_array());
94        }
95
96        out
97    }
98}