matten/stats.rs
1//! Small statistics reductions (RFC-040).
2//!
3//! [`Tensor::var`] / [`Tensor::std`] compute the **population** variance and
4//! standard deviation over all elements; [`Tensor::var_axis`] / [`Tensor::std_axis`]
5//! do the same along one axis (removing it from the output shape). These are the
6//! only statistics in core — quantile, percentile, histogram, covariance,
7//! correlation, and z-score are deferred to a possible future `matten-stats`
8//! companion (RFC-040 §6/§8), and there is no sample-variance (`ddof = 1`) variant
9//! in the first cut.
10//!
11//! **Variance is population variance, not sample variance:**
12//! `var = sum((x_i - mean)^2) / n` and `std = sqrt(var)`. A two-pass algorithm is
13//! used (mean first, then squared deviations) to avoid the avoidable cancellation
14//! of the naive one-pass `E[x^2] - E[x]^2`. `NaN` propagates (any `NaN` element
15//! yields `NaN`), consistent with the other `f64` reductions.
16
17use crate::{MattenError, Tensor};
18
19/// Population variance of a non-empty slice, two-pass: `sum((x - mean)^2) / n`.
20/// Callers guarantee `data` is non-empty.
21fn population_variance(data: &[f64]) -> f64 {
22 let n = data.len() as f64;
23 let mean = data.iter().sum::<f64>() / n;
24 data.iter()
25 .map(|x| {
26 let d = x - mean;
27 d * d
28 })
29 .sum::<f64>()
30 / n
31}
32
33/// Population variance reduced along `axis`, removing that axis (two-pass per
34/// slice). Shared by [`Tensor::try_var_axis`] and [`Tensor::try_std_axis`].
35fn variance_axis_impl(
36 t: &Tensor,
37 axis: usize,
38 operation: &'static str,
39) -> Result<Tensor, MattenError> {
40 crate::math::reject_dynamic(t, operation)?;
41 let rank = t.shape.len();
42 if axis >= rank {
43 return Err(MattenError::Shape {
44 operation,
45 message: format!("axis {axis} is out of range for a rank-{rank} tensor"),
46 });
47 }
48
49 let axis_len = t.shape[axis];
50 let outer: usize = t.shape[..axis].iter().product();
51 let inner: usize = t.shape[axis + 1..].iter().product();
52
53 let mut data = Vec::with_capacity(outer * inner);
54 for o in 0..outer {
55 let base = o * axis_len * inner;
56 for i in 0..inner {
57 // Two-pass over the `axis_len` values at stride `inner`.
58 let mut sum = 0.0;
59 for a in 0..axis_len {
60 sum += t.data[base + a * inner + i];
61 }
62 let mean = sum / axis_len as f64;
63 let mut acc = 0.0;
64 for a in 0..axis_len {
65 let d = t.data[base + a * inner + i] - mean;
66 acc += d * d;
67 }
68 data.push(acc / axis_len as f64);
69 }
70 }
71
72 let out_shape: Vec<usize> = t.shape[..axis]
73 .iter()
74 .chain(&t.shape[axis + 1..])
75 .copied()
76 .collect();
77
78 Ok(Tensor {
79 data,
80 shape: out_shape,
81 #[cfg(feature = "dynamic")]
82 dynamic: None,
83 })
84}
85
86impl Tensor {
87 /// Population variance over all elements: `sum((x_i - mean)^2) / n`.
88 ///
89 /// This is **population** variance (`ddof = 0`), not sample variance — it
90 /// divides by `n`, not `n - 1`. A single-element tensor has variance `0.0`.
91 /// `NaN` propagates.
92 ///
93 /// # Panics
94 /// Panics on a dynamic tensor (call [`try_numeric`](crate::Tensor::try_numeric)
95 /// first). Use [`Tensor::try_var`] for the non-panicking form.
96 ///
97 /// ```
98 /// use matten::Tensor;
99 /// // [1,2,3,4]: mean 2.5, population variance 1.25
100 /// assert_eq!(Tensor::from_vec(vec![1.0, 2.0, 3.0, 4.0]).var(), 1.25);
101 /// ```
102 #[must_use]
103 pub fn var(&self) -> f64 {
104 self.try_var().unwrap_or_else(|e| panic!("{e}"))
105 }
106
107 /// Non-panicking [`Tensor::var`].
108 ///
109 /// # Errors
110 /// Returns [`MattenError::Unsupported`] on a dynamic tensor. The empty-tensor
111 /// guard returns [`MattenError::InvalidArgument`], but `matten` already forbids
112 /// zero-sized dimensions, so an empty tensor is not constructible in practice.
113 ///
114 /// ```
115 /// use matten::Tensor;
116 /// assert_eq!(Tensor::from_vec(vec![1.0, 2.0, 3.0, 4.0]).try_var().unwrap(), 1.25);
117 /// ```
118 pub fn try_var(&self) -> Result<f64, MattenError> {
119 crate::math::reject_dynamic(self, "var")?;
120 if self.data.is_empty() {
121 return Err(MattenError::InvalidArgument {
122 operation: "var",
123 argument: "self",
124 message: "variance is undefined for an empty tensor".to_string(),
125 });
126 }
127 Ok(population_variance(&self.data))
128 }
129
130 /// Population standard deviation over all elements: `sqrt(var)`.
131 ///
132 /// Population (`ddof = 0`), not sample. A single-element tensor has std `0.0`.
133 /// `NaN` propagates.
134 ///
135 /// # Panics
136 /// Panics on a dynamic tensor. Use [`Tensor::try_std`] for the non-panicking
137 /// form.
138 ///
139 /// ```
140 /// use matten::Tensor;
141 /// let s = Tensor::from_vec(vec![1.0, 2.0, 3.0, 4.0]).std();
142 /// assert!((s - 1.25_f64.sqrt()).abs() < 1e-12);
143 /// ```
144 #[must_use]
145 pub fn std(&self) -> f64 {
146 self.try_std().unwrap_or_else(|e| panic!("{e}"))
147 }
148
149 /// Non-panicking [`Tensor::std`].
150 ///
151 /// # Errors
152 /// Returns [`MattenError::Unsupported`] on a dynamic tensor (and
153 /// [`MattenError::InvalidArgument`] on the unreachable empty-tensor case).
154 ///
155 /// ```
156 /// use matten::Tensor;
157 /// assert!(Tensor::from_vec(vec![5.0]).try_std().unwrap() == 0.0);
158 /// ```
159 pub fn try_std(&self) -> Result<f64, MattenError> {
160 crate::math::reject_dynamic(self, "std")?;
161 if self.data.is_empty() {
162 return Err(MattenError::InvalidArgument {
163 operation: "std",
164 argument: "self",
165 message: "standard deviation is undefined for an empty tensor".to_string(),
166 });
167 }
168 Ok(population_variance(&self.data).sqrt())
169 }
170
171 /// Population variance along `axis`, removing that axis from the output shape.
172 ///
173 /// Population (`ddof = 0`). `NaN` propagates within each reduced slice. No
174 /// `keepdims` (e.g. `[2, 3]` axis 0 → `[3]`, axis 1 → `[2]`).
175 ///
176 /// # Panics
177 /// Panics if `axis >= rank`, or on a dynamic tensor. Use
178 /// [`Tensor::try_var_axis`] for the non-panicking form.
179 ///
180 /// ```
181 /// use matten::Tensor;
182 /// let m = Tensor::new(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0], &[2, 3]);
183 /// assert_eq!(m.var_axis(0).as_slice(), &[2.25, 2.25, 2.25]);
184 /// ```
185 #[must_use]
186 pub fn var_axis(&self, axis: usize) -> Tensor {
187 self.try_var_axis(axis).unwrap_or_else(|e| panic!("{e}"))
188 }
189
190 /// Non-panicking [`Tensor::var_axis`].
191 ///
192 /// # Errors
193 /// Returns [`MattenError::Shape`] if `axis >= rank`, or
194 /// [`MattenError::Unsupported`] on a dynamic tensor.
195 ///
196 /// ```
197 /// use matten::Tensor;
198 /// let m = Tensor::new(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0], &[2, 3]);
199 /// assert!(m.try_var_axis(2).is_err()); // axis out of range
200 /// ```
201 pub fn try_var_axis(&self, axis: usize) -> Result<Tensor, MattenError> {
202 variance_axis_impl(self, axis, "var_axis")
203 }
204
205 /// Population standard deviation along `axis`, removing that axis.
206 ///
207 /// Population (`ddof = 0`). `NaN` propagates within each reduced slice.
208 ///
209 /// # Panics
210 /// Panics if `axis >= rank`, or on a dynamic tensor. Use
211 /// [`Tensor::try_std_axis`] for the non-panicking form.
212 ///
213 /// ```
214 /// use matten::Tensor;
215 /// let m = Tensor::new(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0], &[2, 3]);
216 /// // each column [1,4],[2,5],[3,6] has variance 2.25, std 1.5
217 /// assert_eq!(m.std_axis(0).as_slice(), &[1.5, 1.5, 1.5]);
218 /// ```
219 #[must_use]
220 pub fn std_axis(&self, axis: usize) -> Tensor {
221 self.try_std_axis(axis).unwrap_or_else(|e| panic!("{e}"))
222 }
223
224 /// Non-panicking [`Tensor::std_axis`].
225 ///
226 /// # Errors
227 /// Returns [`MattenError::Shape`] if `axis >= rank`, or
228 /// [`MattenError::Unsupported`] on a dynamic tensor.
229 ///
230 /// ```
231 /// use matten::Tensor;
232 /// let m = Tensor::new(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0], &[2, 3]);
233 /// assert!(m.try_std_axis(5).is_err());
234 /// ```
235 pub fn try_std_axis(&self, axis: usize) -> Result<Tensor, MattenError> {
236 let mut v = variance_axis_impl(self, axis, "std_axis")?;
237 for x in &mut v.data {
238 *x = x.sqrt();
239 }
240 Ok(v)
241 }
242}
243
244#[cfg(test)]
245mod tests;