trueno/vector/ops/reductions/stats.rs
1//! Statistical and numerically stable reduction operations for Vector<f32>
2//!
3//! Provides: `sum_kahan`, `sum_of_squares`, `mean`, `variance`, `stddev`,
4//! `covariance`, `correlation`.
5
6#[cfg(target_arch = "x86_64")]
7use crate::backends::avx2::Avx2Backend;
8#[cfg(any(target_arch = "aarch64", target_arch = "arm"))]
9use crate::backends::neon::NeonBackend;
10use crate::backends::scalar::ScalarBackend;
11#[cfg(target_arch = "x86_64")]
12use crate::backends::sse2::Sse2Backend;
13#[cfg(target_arch = "wasm32")]
14use crate::backends::wasm::WasmBackend;
15use crate::backends::VectorBackend;
16use crate::vector::Vector;
17use crate::{Backend, Result, TruenoError};
18
19impl Vector<f32> {
20 /// Kahan summation (numerically stable sum)
21 ///
22 /// Uses the Kahan summation algorithm to reduce floating-point rounding errors
23 /// when summing many numbers. This is more accurate than the standard sum() method
24 /// for vectors with many elements or elements of vastly different magnitudes.
25 ///
26 /// # Performance
27 ///
28 /// Note: Kahan summation is inherently sequential and cannot be effectively
29 /// parallelized with SIMD. All backends use the scalar implementation.
30 ///
31 /// # Examples
32 ///
33 /// ```
34 /// use trueno::Vector;
35 ///
36 /// let v = Vector::from_slice(&[1.0, 2.0, 3.0, 4.0]);
37 /// assert_eq!(v.sum_kahan()?, 10.0);
38 /// # Ok::<(), trueno::TruenoError>(())
39 /// ```
40 pub fn sum_kahan(&self) -> Result<f32> {
41 if self.data.is_empty() {
42 return Ok(0.0);
43 }
44
45 // SAFETY: Unsafe block delegates to backend implementation which maintains safety invariants
46 let result = unsafe {
47 match self.backend {
48 Backend::Scalar => ScalarBackend::sum_kahan(&self.data),
49 #[cfg(target_arch = "x86_64")]
50 Backend::SSE2 | Backend::AVX => Sse2Backend::sum_kahan(&self.data),
51 #[cfg(target_arch = "x86_64")]
52 Backend::AVX2 | Backend::AVX512 => Avx2Backend::sum_kahan(&self.data),
53 #[cfg(not(target_arch = "x86_64"))]
54 Backend::SSE2 | Backend::AVX | Backend::AVX2 | Backend::AVX512 => {
55 ScalarBackend::sum_kahan(&self.data)
56 }
57 #[cfg(any(target_arch = "aarch64", target_arch = "arm"))]
58 Backend::NEON => NeonBackend::sum_kahan(&self.data),
59 #[cfg(not(any(target_arch = "aarch64", target_arch = "arm")))]
60 Backend::NEON => ScalarBackend::sum_kahan(&self.data),
61 #[cfg(target_arch = "wasm32")]
62 Backend::WasmSIMD => WasmBackend::sum_kahan(&self.data),
63 #[cfg(not(target_arch = "wasm32"))]
64 Backend::WasmSIMD => ScalarBackend::sum_kahan(&self.data),
65 Backend::GPU | Backend::Auto => ScalarBackend::sum_kahan(&self.data),
66 }
67 };
68
69 Ok(result)
70 }
71
72 /// Sum of squared elements
73 ///
74 /// Computes the sum of squares: sum(a\[i\]^2).
75 /// This is the building block for computing L2 norm and variance.
76 ///
77 /// # Examples
78 ///
79 /// ```
80 /// use trueno::Vector;
81 ///
82 /// let v = Vector::from_slice(&[1.0, 2.0, 3.0]);
83 /// let sum_sq = v.sum_of_squares()?;
84 /// assert_eq!(sum_sq, 14.0); // 1^2 + 2^2 + 3^2 = 1 + 4 + 9 = 14
85 /// # Ok::<(), trueno::TruenoError>(())
86 /// ```
87 ///
88 /// # Empty vectors
89 ///
90 /// Returns 0.0 for empty vectors.
91 pub fn sum_of_squares(&self) -> Result<f32> {
92 if self.data.is_empty() {
93 return Ok(0.0);
94 }
95
96 // Use dot product with self: dot(self, self) = sum(a[i]^2)
97 self.dot(self)
98 }
99
100 /// Arithmetic mean (average)
101 ///
102 /// Computes the arithmetic mean of all elements: sum(a\[i\]) / n.
103 ///
104 /// # Performance
105 ///
106 /// Uses optimized SIMD sum() implementation, then divides by length.
107 ///
108 /// # Examples
109 ///
110 /// ```
111 /// use trueno::Vector;
112 ///
113 /// let v = Vector::from_slice(&[1.0, 2.0, 3.0, 4.0]);
114 /// let avg = v.mean()?;
115 /// assert!((avg - 2.5).abs() < 1e-5); // (1+2+3+4)/4 = 2.5
116 /// # Ok::<(), trueno::TruenoError>(())
117 /// ```
118 ///
119 /// # Empty vectors
120 ///
121 /// Returns an error for empty vectors (division by zero).
122 ///
123 /// ```
124 /// use trueno::{Vector, TruenoError};
125 ///
126 /// let v: Vector<f32> = Vector::from_slice(&[]);
127 /// assert!(matches!(v.mean(), Err(TruenoError::EmptyVector)));
128 /// ```
129 pub fn mean(&self) -> Result<f32> {
130 if self.data.is_empty() {
131 return Err(TruenoError::EmptyVector);
132 }
133
134 let total = self.sum()?;
135 Ok(total / self.len() as f32)
136 }
137
138 /// Population variance
139 ///
140 /// Computes the population variance: Var(X) = E\[(X - μ)²\] = E\[X²\] - μ²
141 /// Uses the computational formula to avoid two passes over the data.
142 ///
143 /// # Performance
144 ///
145 /// Uses optimized SIMD implementations via sum_of_squares() and mean().
146 ///
147 /// # Examples
148 ///
149 /// ```
150 /// use trueno::Vector;
151 ///
152 /// let v = Vector::from_slice(&[1.0, 2.0, 3.0, 4.0, 5.0]);
153 /// let var = v.variance()?;
154 /// assert!((var - 2.0).abs() < 1e-5); // Population variance
155 /// # Ok::<(), trueno::TruenoError>(())
156 /// ```
157 ///
158 /// # Empty vectors
159 ///
160 /// Returns an error for empty vectors.
161 ///
162 /// ```
163 /// use trueno::{Vector, TruenoError};
164 ///
165 /// let v: Vector<f32> = Vector::from_slice(&[]);
166 /// assert!(matches!(v.variance(), Err(TruenoError::EmptyVector)));
167 /// ```
168 pub fn variance(&self) -> Result<f32> {
169 if self.data.is_empty() {
170 return Err(TruenoError::EmptyVector);
171 }
172
173 let mean_val = self.mean()?;
174 let sum_sq = self.sum_of_squares()?;
175 let mean_sq = sum_sq / self.len() as f32;
176
177 // Var(X) = E[X²] - μ²
178 Ok(mean_sq - mean_val * mean_val)
179 }
180
181 /// Population standard deviation
182 ///
183 /// Computes the population standard deviation: σ = sqrt(Var(X)).
184 /// This is the square root of the variance.
185 ///
186 /// # Performance
187 ///
188 /// Uses optimized SIMD implementations via variance().
189 ///
190 /// # Examples
191 ///
192 /// ```
193 /// use trueno::Vector;
194 ///
195 /// let v = Vector::from_slice(&[1.0, 2.0, 3.0, 4.0, 5.0]);
196 /// let sd = v.stddev()?;
197 /// assert!((sd - 1.4142135).abs() < 1e-5); // sqrt(2) ≈ 1.414
198 /// # Ok::<(), trueno::TruenoError>(())
199 /// ```
200 ///
201 /// # Empty vectors
202 ///
203 /// Returns an error for empty vectors.
204 ///
205 /// ```
206 /// use trueno::{Vector, TruenoError};
207 ///
208 /// let v: Vector<f32> = Vector::from_slice(&[]);
209 /// assert!(matches!(v.stddev(), Err(TruenoError::EmptyVector)));
210 /// ```
211 pub fn stddev(&self) -> Result<f32> {
212 let var = self.variance()?;
213 Ok(var.sqrt())
214 }
215
216 /// Population covariance between two vectors
217 ///
218 /// Computes the population covariance: Cov(X,Y) = E[(X - μx)(Y - μy)]
219 /// Uses the computational formula: Cov(X,Y) = E\[XY\] - μx·μy
220 ///
221 /// # Performance
222 ///
223 /// Uses optimized SIMD implementations via dot() and mean().
224 ///
225 /// # Examples
226 ///
227 /// ```
228 /// use trueno::Vector;
229 ///
230 /// let x = Vector::from_slice(&[1.0, 2.0, 3.0]);
231 /// let y = Vector::from_slice(&[2.0, 4.0, 6.0]);
232 /// let cov = x.covariance(&y)?;
233 /// assert!((cov - 1.333).abs() < 0.01); // Perfect positive covariance
234 /// # Ok::<(), trueno::TruenoError>(())
235 /// ```
236 ///
237 /// # Size mismatch
238 ///
239 /// Returns an error if vectors have different lengths.
240 ///
241 /// ```
242 /// use trueno::{Vector, TruenoError};
243 ///
244 /// let x = Vector::from_slice(&[1.0, 2.0]);
245 /// let y = Vector::from_slice(&[1.0, 2.0, 3.0]);
246 /// assert!(matches!(x.covariance(&y), Err(TruenoError::SizeMismatch { .. })));
247 /// ```
248 ///
249 /// # Empty vectors
250 ///
251 /// Returns an error for empty vectors.
252 ///
253 /// ```
254 /// use trueno::{Vector, TruenoError};
255 ///
256 /// let x: Vector<f32> = Vector::from_slice(&[]);
257 /// let y: Vector<f32> = Vector::from_slice(&[]);
258 /// assert!(matches!(x.covariance(&y), Err(TruenoError::EmptyVector)));
259 /// ```
260 pub fn covariance(&self, other: &Self) -> Result<f32> {
261 if self.data.is_empty() {
262 return Err(TruenoError::EmptyVector);
263 }
264 if self.len() != other.len() {
265 return Err(TruenoError::SizeMismatch { expected: self.len(), actual: other.len() });
266 }
267
268 let mean_x = self.mean()?;
269 let mean_y = other.mean()?;
270 let dot_xy = self.dot(other)?;
271 let mean_xy = dot_xy / self.len().max(1) as f32;
272
273 // Cov(X,Y) = E[XY] - μx·μy
274 Ok(mean_xy - mean_x * mean_y)
275 }
276
277 /// Pearson correlation coefficient
278 ///
279 /// Computes the Pearson correlation coefficient: ρ(X,Y) = Cov(X,Y) / (σx·σy)
280 /// Normalized covariance in range [-1, 1].
281 ///
282 /// # Performance
283 ///
284 /// Uses optimized SIMD implementations via covariance() and stddev().
285 ///
286 /// # Examples
287 ///
288 /// ```
289 /// use trueno::Vector;
290 ///
291 /// let x = Vector::from_slice(&[1.0, 2.0, 3.0]);
292 /// let y = Vector::from_slice(&[2.0, 4.0, 6.0]);
293 /// let corr = x.correlation(&y)?;
294 /// assert!((corr - 1.0).abs() < 1e-5); // Perfect positive correlation
295 /// # Ok::<(), trueno::TruenoError>(())
296 /// ```
297 ///
298 /// # Size mismatch
299 ///
300 /// Returns an error if vectors have different lengths.
301 ///
302 /// # Division by zero
303 ///
304 /// Returns DivisionByZero error if either vector has zero standard deviation
305 /// (i.e., is constant).
306 ///
307 /// ```
308 /// use trueno::{Vector, TruenoError};
309 ///
310 /// let x = Vector::from_slice(&[5.0, 5.0, 5.0]); // Constant
311 /// let y = Vector::from_slice(&[1.0, 2.0, 3.0]);
312 /// assert!(matches!(x.correlation(&y), Err(TruenoError::DivisionByZero)));
313 /// ```
314 pub fn correlation(&self, other: &Self) -> Result<f32> {
315 let cov = self.covariance(other)?;
316 let std_x = self.stddev()?;
317 let std_y = other.stddev()?;
318
319 // Check for zero standard deviation (constant vectors)
320 if std_x.abs() < 1e-10 || std_y.abs() < 1e-10 {
321 return Err(TruenoError::DivisionByZero);
322 }
323
324 // ρ(X,Y) = Cov(X,Y) / (σx·σy)
325 // Clamp to [-1, 1] to handle floating-point precision errors
326 let corr = cov / (std_x * std_y);
327 Ok(corr.clamp(-1.0, 1.0))
328 }
329}