Skip to main content

trueno/vector/ops/reductions/
stats.rs

1//! Statistical and numerically stable reduction operations for Vector<f32>
2//!
3//! Provides: `sum_kahan`, `sum_of_squares`, `mean`, `variance`, `stddev`,
4//! `covariance`, `correlation`.
5
6#[cfg(target_arch = "x86_64")]
7use crate::backends::avx2::Avx2Backend;
8#[cfg(any(target_arch = "aarch64", target_arch = "arm"))]
9use crate::backends::neon::NeonBackend;
10use crate::backends::scalar::ScalarBackend;
11#[cfg(target_arch = "x86_64")]
12use crate::backends::sse2::Sse2Backend;
13#[cfg(target_arch = "wasm32")]
14use crate::backends::wasm::WasmBackend;
15use crate::backends::VectorBackend;
16use crate::vector::Vector;
17use crate::{Backend, Result, TruenoError};
18
19impl Vector<f32> {
20    /// Kahan summation (numerically stable sum)
21    ///
22    /// Uses the Kahan summation algorithm to reduce floating-point rounding errors
23    /// when summing many numbers. This is more accurate than the standard sum() method
24    /// for vectors with many elements or elements of vastly different magnitudes.
25    ///
26    /// # Performance
27    ///
28    /// Note: Kahan summation is inherently sequential and cannot be effectively
29    /// parallelized with SIMD. All backends use the scalar implementation.
30    ///
31    /// # Examples
32    ///
33    /// ```
34    /// use trueno::Vector;
35    ///
36    /// let v = Vector::from_slice(&[1.0, 2.0, 3.0, 4.0]);
37    /// assert_eq!(v.sum_kahan()?, 10.0);
38    /// # Ok::<(), trueno::TruenoError>(())
39    /// ```
40    pub fn sum_kahan(&self) -> Result<f32> {
41        if self.data.is_empty() {
42            return Ok(0.0);
43        }
44
45        // SAFETY: Unsafe block delegates to backend implementation which maintains safety invariants
46        let result = unsafe {
47            match self.backend {
48                Backend::Scalar => ScalarBackend::sum_kahan(&self.data),
49                #[cfg(target_arch = "x86_64")]
50                Backend::SSE2 | Backend::AVX => Sse2Backend::sum_kahan(&self.data),
51                #[cfg(target_arch = "x86_64")]
52                Backend::AVX2 | Backend::AVX512 => Avx2Backend::sum_kahan(&self.data),
53                #[cfg(not(target_arch = "x86_64"))]
54                Backend::SSE2 | Backend::AVX | Backend::AVX2 | Backend::AVX512 => {
55                    ScalarBackend::sum_kahan(&self.data)
56                }
57                #[cfg(any(target_arch = "aarch64", target_arch = "arm"))]
58                Backend::NEON => NeonBackend::sum_kahan(&self.data),
59                #[cfg(not(any(target_arch = "aarch64", target_arch = "arm")))]
60                Backend::NEON => ScalarBackend::sum_kahan(&self.data),
61                #[cfg(target_arch = "wasm32")]
62                Backend::WasmSIMD => WasmBackend::sum_kahan(&self.data),
63                #[cfg(not(target_arch = "wasm32"))]
64                Backend::WasmSIMD => ScalarBackend::sum_kahan(&self.data),
65                Backend::GPU | Backend::Auto => ScalarBackend::sum_kahan(&self.data),
66            }
67        };
68
69        Ok(result)
70    }
71
72    /// Sum of squared elements
73    ///
74    /// Computes the sum of squares: sum(a\[i\]^2).
75    /// This is the building block for computing L2 norm and variance.
76    ///
77    /// # Examples
78    ///
79    /// ```
80    /// use trueno::Vector;
81    ///
82    /// let v = Vector::from_slice(&[1.0, 2.0, 3.0]);
83    /// let sum_sq = v.sum_of_squares()?;
84    /// assert_eq!(sum_sq, 14.0); // 1^2 + 2^2 + 3^2 = 1 + 4 + 9 = 14
85    /// # Ok::<(), trueno::TruenoError>(())
86    /// ```
87    ///
88    /// # Empty vectors
89    ///
90    /// Returns 0.0 for empty vectors.
91    pub fn sum_of_squares(&self) -> Result<f32> {
92        if self.data.is_empty() {
93            return Ok(0.0);
94        }
95
96        // Use dot product with self: dot(self, self) = sum(a[i]^2)
97        self.dot(self)
98    }
99
100    /// Arithmetic mean (average)
101    ///
102    /// Computes the arithmetic mean of all elements: sum(a\[i\]) / n.
103    ///
104    /// # Performance
105    ///
106    /// Uses optimized SIMD sum() implementation, then divides by length.
107    ///
108    /// # Examples
109    ///
110    /// ```
111    /// use trueno::Vector;
112    ///
113    /// let v = Vector::from_slice(&[1.0, 2.0, 3.0, 4.0]);
114    /// let avg = v.mean()?;
115    /// assert!((avg - 2.5).abs() < 1e-5); // (1+2+3+4)/4 = 2.5
116    /// # Ok::<(), trueno::TruenoError>(())
117    /// ```
118    ///
119    /// # Empty vectors
120    ///
121    /// Returns an error for empty vectors (division by zero).
122    ///
123    /// ```
124    /// use trueno::{Vector, TruenoError};
125    ///
126    /// let v: Vector<f32> = Vector::from_slice(&[]);
127    /// assert!(matches!(v.mean(), Err(TruenoError::EmptyVector)));
128    /// ```
129    pub fn mean(&self) -> Result<f32> {
130        if self.data.is_empty() {
131            return Err(TruenoError::EmptyVector);
132        }
133
134        let total = self.sum()?;
135        Ok(total / self.len() as f32)
136    }
137
138    /// Population variance
139    ///
140    /// Computes the population variance: Var(X) = E\[(X - μ)²\] = E\[X²\] - μ²
141    /// Uses the computational formula to avoid two passes over the data.
142    ///
143    /// # Performance
144    ///
145    /// Uses optimized SIMD implementations via sum_of_squares() and mean().
146    ///
147    /// # Examples
148    ///
149    /// ```
150    /// use trueno::Vector;
151    ///
152    /// let v = Vector::from_slice(&[1.0, 2.0, 3.0, 4.0, 5.0]);
153    /// let var = v.variance()?;
154    /// assert!((var - 2.0).abs() < 1e-5); // Population variance
155    /// # Ok::<(), trueno::TruenoError>(())
156    /// ```
157    ///
158    /// # Empty vectors
159    ///
160    /// Returns an error for empty vectors.
161    ///
162    /// ```
163    /// use trueno::{Vector, TruenoError};
164    ///
165    /// let v: Vector<f32> = Vector::from_slice(&[]);
166    /// assert!(matches!(v.variance(), Err(TruenoError::EmptyVector)));
167    /// ```
168    pub fn variance(&self) -> Result<f32> {
169        if self.data.is_empty() {
170            return Err(TruenoError::EmptyVector);
171        }
172
173        let mean_val = self.mean()?;
174        let sum_sq = self.sum_of_squares()?;
175        let mean_sq = sum_sq / self.len() as f32;
176
177        // Var(X) = E[X²] - μ²
178        Ok(mean_sq - mean_val * mean_val)
179    }
180
181    /// Population standard deviation
182    ///
183    /// Computes the population standard deviation: σ = sqrt(Var(X)).
184    /// This is the square root of the variance.
185    ///
186    /// # Performance
187    ///
188    /// Uses optimized SIMD implementations via variance().
189    ///
190    /// # Examples
191    ///
192    /// ```
193    /// use trueno::Vector;
194    ///
195    /// let v = Vector::from_slice(&[1.0, 2.0, 3.0, 4.0, 5.0]);
196    /// let sd = v.stddev()?;
197    /// assert!((sd - 1.4142135).abs() < 1e-5); // sqrt(2) ≈ 1.414
198    /// # Ok::<(), trueno::TruenoError>(())
199    /// ```
200    ///
201    /// # Empty vectors
202    ///
203    /// Returns an error for empty vectors.
204    ///
205    /// ```
206    /// use trueno::{Vector, TruenoError};
207    ///
208    /// let v: Vector<f32> = Vector::from_slice(&[]);
209    /// assert!(matches!(v.stddev(), Err(TruenoError::EmptyVector)));
210    /// ```
211    pub fn stddev(&self) -> Result<f32> {
212        let var = self.variance()?;
213        Ok(var.sqrt())
214    }
215
216    /// Population covariance between two vectors
217    ///
218    /// Computes the population covariance: Cov(X,Y) = E[(X - μx)(Y - μy)]
219    /// Uses the computational formula: Cov(X,Y) = E\[XY\] - μx·μy
220    ///
221    /// # Performance
222    ///
223    /// Uses optimized SIMD implementations via dot() and mean().
224    ///
225    /// # Examples
226    ///
227    /// ```
228    /// use trueno::Vector;
229    ///
230    /// let x = Vector::from_slice(&[1.0, 2.0, 3.0]);
231    /// let y = Vector::from_slice(&[2.0, 4.0, 6.0]);
232    /// let cov = x.covariance(&y)?;
233    /// assert!((cov - 1.333).abs() < 0.01); // Perfect positive covariance
234    /// # Ok::<(), trueno::TruenoError>(())
235    /// ```
236    ///
237    /// # Size mismatch
238    ///
239    /// Returns an error if vectors have different lengths.
240    ///
241    /// ```
242    /// use trueno::{Vector, TruenoError};
243    ///
244    /// let x = Vector::from_slice(&[1.0, 2.0]);
245    /// let y = Vector::from_slice(&[1.0, 2.0, 3.0]);
246    /// assert!(matches!(x.covariance(&y), Err(TruenoError::SizeMismatch { .. })));
247    /// ```
248    ///
249    /// # Empty vectors
250    ///
251    /// Returns an error for empty vectors.
252    ///
253    /// ```
254    /// use trueno::{Vector, TruenoError};
255    ///
256    /// let x: Vector<f32> = Vector::from_slice(&[]);
257    /// let y: Vector<f32> = Vector::from_slice(&[]);
258    /// assert!(matches!(x.covariance(&y), Err(TruenoError::EmptyVector)));
259    /// ```
260    pub fn covariance(&self, other: &Self) -> Result<f32> {
261        if self.data.is_empty() {
262            return Err(TruenoError::EmptyVector);
263        }
264        if self.len() != other.len() {
265            return Err(TruenoError::SizeMismatch { expected: self.len(), actual: other.len() });
266        }
267
268        let mean_x = self.mean()?;
269        let mean_y = other.mean()?;
270        let dot_xy = self.dot(other)?;
271        let mean_xy = dot_xy / self.len().max(1) as f32;
272
273        // Cov(X,Y) = E[XY] - μx·μy
274        Ok(mean_xy - mean_x * mean_y)
275    }
276
277    /// Pearson correlation coefficient
278    ///
279    /// Computes the Pearson correlation coefficient: ρ(X,Y) = Cov(X,Y) / (σx·σy)
280    /// Normalized covariance in range [-1, 1].
281    ///
282    /// # Performance
283    ///
284    /// Uses optimized SIMD implementations via covariance() and stddev().
285    ///
286    /// # Examples
287    ///
288    /// ```
289    /// use trueno::Vector;
290    ///
291    /// let x = Vector::from_slice(&[1.0, 2.0, 3.0]);
292    /// let y = Vector::from_slice(&[2.0, 4.0, 6.0]);
293    /// let corr = x.correlation(&y)?;
294    /// assert!((corr - 1.0).abs() < 1e-5); // Perfect positive correlation
295    /// # Ok::<(), trueno::TruenoError>(())
296    /// ```
297    ///
298    /// # Size mismatch
299    ///
300    /// Returns an error if vectors have different lengths.
301    ///
302    /// # Division by zero
303    ///
304    /// Returns DivisionByZero error if either vector has zero standard deviation
305    /// (i.e., is constant).
306    ///
307    /// ```
308    /// use trueno::{Vector, TruenoError};
309    ///
310    /// let x = Vector::from_slice(&[5.0, 5.0, 5.0]); // Constant
311    /// let y = Vector::from_slice(&[1.0, 2.0, 3.0]);
312    /// assert!(matches!(x.correlation(&y), Err(TruenoError::DivisionByZero)));
313    /// ```
314    pub fn correlation(&self, other: &Self) -> Result<f32> {
315        let cov = self.covariance(other)?;
316        let std_x = self.stddev()?;
317        let std_y = other.stddev()?;
318
319        // Check for zero standard deviation (constant vectors)
320        if std_x.abs() < 1e-10 || std_y.abs() < 1e-10 {
321            return Err(TruenoError::DivisionByZero);
322        }
323
324        // ρ(X,Y) = Cov(X,Y) / (σx·σy)
325        // Clamp to [-1, 1] to handle floating-point precision errors
326        let corr = cov / (std_x * std_y);
327        Ok(corr.clamp(-1.0, 1.0))
328    }
329}