Skip to main content

matten/
stats.rs

1//! Small statistics reductions (RFC-040).
2//!
3//! [`Tensor::var`] / [`Tensor::std`] compute the **population** variance and
4//! standard deviation over all elements; [`Tensor::var_axis`] / [`Tensor::std_axis`]
5//! do the same along one axis (removing it from the output shape). These are the
6//! only statistics in core — quantile, percentile, histogram, covariance,
7//! correlation, and z-score are deferred to a possible future `matten-stats`
8//! companion (RFC-040 §6/§8), and there is no sample-variance (`ddof = 1`) variant
9//! in the first cut.
10//!
11//! **Variance is population variance, not sample variance:**
12//! `var = sum((x_i - mean)^2) / n` and `std = sqrt(var)`. A two-pass algorithm is
13//! used (mean first, then squared deviations) to avoid the avoidable cancellation
14//! of the naive one-pass `E[x^2] - E[x]^2`. `NaN` propagates (any `NaN` element
15//! yields `NaN`), consistent with the other `f64` reductions.
16
17use crate::{MattenError, Tensor};
18
19/// Population variance of a non-empty slice, two-pass: `sum((x - mean)^2) / n`.
20/// Callers guarantee `data` is non-empty.
21fn population_variance(data: &[f64]) -> f64 {
22    let n = data.len() as f64;
23    let mean = data.iter().sum::<f64>() / n;
24    data.iter()
25        .map(|x| {
26            let d = x - mean;
27            d * d
28        })
29        .sum::<f64>()
30        / n
31}
32
33/// Population variance reduced along `axis`, removing that axis (two-pass per
34/// slice). Shared by [`Tensor::try_var_axis`] and [`Tensor::try_std_axis`].
35fn variance_axis_impl(
36    t: &Tensor,
37    axis: usize,
38    operation: &'static str,
39) -> Result<Tensor, MattenError> {
40    crate::math::reject_dynamic(t, operation)?;
41    let rank = t.shape.len();
42    if axis >= rank {
43        return Err(MattenError::Shape {
44            operation,
45            message: format!("axis {axis} is out of range for a rank-{rank} tensor"),
46        });
47    }
48
49    let axis_len = t.shape[axis];
50    let outer: usize = t.shape[..axis].iter().product();
51    let inner: usize = t.shape[axis + 1..].iter().product();
52
53    let mut data = Vec::with_capacity(outer * inner);
54    for o in 0..outer {
55        let base = o * axis_len * inner;
56        for i in 0..inner {
57            // Two-pass over the `axis_len` values at stride `inner`.
58            let mut sum = 0.0;
59            for a in 0..axis_len {
60                sum += t.data[base + a * inner + i];
61            }
62            let mean = sum / axis_len as f64;
63            let mut acc = 0.0;
64            for a in 0..axis_len {
65                let d = t.data[base + a * inner + i] - mean;
66                acc += d * d;
67            }
68            data.push(acc / axis_len as f64);
69        }
70    }
71
72    let out_shape: Vec<usize> = t.shape[..axis]
73        .iter()
74        .chain(&t.shape[axis + 1..])
75        .copied()
76        .collect();
77
78    Ok(Tensor {
79        data,
80        shape: out_shape,
81        #[cfg(feature = "dynamic")]
82        dynamic: None,
83    })
84}
85
86impl Tensor {
87    /// Population variance over all elements: `sum((x_i - mean)^2) / n`.
88    ///
89    /// This is **population** variance (`ddof = 0`), not sample variance — it
90    /// divides by `n`, not `n - 1`. A single-element tensor has variance `0.0`.
91    /// `NaN` propagates.
92    ///
93    /// # Panics
94    /// Panics on a dynamic tensor (call [`try_numeric`](crate::Tensor::try_numeric)
95    /// first). Use [`Tensor::try_var`] for the non-panicking form.
96    ///
97    /// ```
98    /// use matten::Tensor;
99    /// // [1,2,3,4]: mean 2.5, population variance 1.25
100    /// assert_eq!(Tensor::from_vec(vec![1.0, 2.0, 3.0, 4.0]).var(), 1.25);
101    /// ```
102    #[must_use]
103    pub fn var(&self) -> f64 {
104        self.try_var().unwrap_or_else(|e| panic!("{e}"))
105    }
106
107    /// Non-panicking [`Tensor::var`].
108    ///
109    /// # Errors
110    /// Returns [`MattenError::Unsupported`] on a dynamic tensor. The empty-tensor
111    /// guard returns [`MattenError::InvalidArgument`], but `matten` already forbids
112    /// zero-sized dimensions, so an empty tensor is not constructible in practice.
113    ///
114    /// ```
115    /// use matten::Tensor;
116    /// assert_eq!(Tensor::from_vec(vec![1.0, 2.0, 3.0, 4.0]).try_var().unwrap(), 1.25);
117    /// ```
118    pub fn try_var(&self) -> Result<f64, MattenError> {
119        crate::math::reject_dynamic(self, "var")?;
120        if self.data.is_empty() {
121            return Err(MattenError::InvalidArgument {
122                operation: "var",
123                argument: "self",
124                message: "variance is undefined for an empty tensor".to_string(),
125            });
126        }
127        Ok(population_variance(&self.data))
128    }
129
130    /// Population standard deviation over all elements: `sqrt(var)`.
131    ///
132    /// Population (`ddof = 0`), not sample. A single-element tensor has std `0.0`.
133    /// `NaN` propagates.
134    ///
135    /// # Panics
136    /// Panics on a dynamic tensor. Use [`Tensor::try_std`] for the non-panicking
137    /// form.
138    ///
139    /// ```
140    /// use matten::Tensor;
141    /// let s = Tensor::from_vec(vec![1.0, 2.0, 3.0, 4.0]).std();
142    /// assert!((s - 1.25_f64.sqrt()).abs() < 1e-12);
143    /// ```
144    #[must_use]
145    pub fn std(&self) -> f64 {
146        self.try_std().unwrap_or_else(|e| panic!("{e}"))
147    }
148
149    /// Non-panicking [`Tensor::std`].
150    ///
151    /// # Errors
152    /// Returns [`MattenError::Unsupported`] on a dynamic tensor (and
153    /// [`MattenError::InvalidArgument`] on the unreachable empty-tensor case).
154    ///
155    /// ```
156    /// use matten::Tensor;
157    /// assert!(Tensor::from_vec(vec![5.0]).try_std().unwrap() == 0.0);
158    /// ```
159    pub fn try_std(&self) -> Result<f64, MattenError> {
160        crate::math::reject_dynamic(self, "std")?;
161        if self.data.is_empty() {
162            return Err(MattenError::InvalidArgument {
163                operation: "std",
164                argument: "self",
165                message: "standard deviation is undefined for an empty tensor".to_string(),
166            });
167        }
168        Ok(population_variance(&self.data).sqrt())
169    }
170
171    /// Population variance along `axis`, removing that axis from the output shape.
172    ///
173    /// Population (`ddof = 0`). `NaN` propagates within each reduced slice. No
174    /// `keepdims` (e.g. `[2, 3]` axis 0 → `[3]`, axis 1 → `[2]`).
175    ///
176    /// # Panics
177    /// Panics if `axis >= rank`, or on a dynamic tensor. Use
178    /// [`Tensor::try_var_axis`] for the non-panicking form.
179    ///
180    /// ```
181    /// use matten::Tensor;
182    /// let m = Tensor::new(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0], &[2, 3]);
183    /// assert_eq!(m.var_axis(0).as_slice(), &[2.25, 2.25, 2.25]);
184    /// ```
185    #[must_use]
186    pub fn var_axis(&self, axis: usize) -> Tensor {
187        self.try_var_axis(axis).unwrap_or_else(|e| panic!("{e}"))
188    }
189
190    /// Non-panicking [`Tensor::var_axis`].
191    ///
192    /// # Errors
193    /// Returns [`MattenError::Shape`] if `axis >= rank`, or
194    /// [`MattenError::Unsupported`] on a dynamic tensor.
195    ///
196    /// ```
197    /// use matten::Tensor;
198    /// let m = Tensor::new(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0], &[2, 3]);
199    /// assert!(m.try_var_axis(2).is_err()); // axis out of range
200    /// ```
201    pub fn try_var_axis(&self, axis: usize) -> Result<Tensor, MattenError> {
202        variance_axis_impl(self, axis, "var_axis")
203    }
204
205    /// Population standard deviation along `axis`, removing that axis.
206    ///
207    /// Population (`ddof = 0`). `NaN` propagates within each reduced slice.
208    ///
209    /// # Panics
210    /// Panics if `axis >= rank`, or on a dynamic tensor. Use
211    /// [`Tensor::try_std_axis`] for the non-panicking form.
212    ///
213    /// ```
214    /// use matten::Tensor;
215    /// let m = Tensor::new(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0], &[2, 3]);
216    /// // each column [1,4],[2,5],[3,6] has variance 2.25, std 1.5
217    /// assert_eq!(m.std_axis(0).as_slice(), &[1.5, 1.5, 1.5]);
218    /// ```
219    #[must_use]
220    pub fn std_axis(&self, axis: usize) -> Tensor {
221        self.try_std_axis(axis).unwrap_or_else(|e| panic!("{e}"))
222    }
223
224    /// Non-panicking [`Tensor::std_axis`].
225    ///
226    /// # Errors
227    /// Returns [`MattenError::Shape`] if `axis >= rank`, or
228    /// [`MattenError::Unsupported`] on a dynamic tensor.
229    ///
230    /// ```
231    /// use matten::Tensor;
232    /// let m = Tensor::new(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0], &[2, 3]);
233    /// assert!(m.try_std_axis(5).is_err());
234    /// ```
235    pub fn try_std_axis(&self, axis: usize) -> Result<Tensor, MattenError> {
236        let mut v = variance_axis_impl(self, axis, "std_axis")?;
237        for x in &mut v.data {
238            *x = x.sqrt();
239        }
240        Ok(v)
241    }
242}
243
244#[cfg(test)]
245mod tests;