ndarray_stats/summary_statistics/
mod.rs

1//! Summary statistics (e.g. mean, variance, etc.).
2use crate::errors::{EmptyInput, MultiInputError};
3use ndarray::{Array, ArrayBase, Axis, Data, Dimension, Ix1, RemoveAxis};
4use num_traits::{Float, FromPrimitive, Zero};
5use std::ops::{Add, AddAssign, Div, Mul};
6
7/// Extension trait for `ArrayBase` providing methods
8/// to compute several summary statistics (e.g. mean, variance, etc.).
9pub trait SummaryStatisticsExt<A, S, D>
10where
11    S: Data<Elem = A>,
12    D: Dimension,
13{
14    /// Returns the [`arithmetic mean`] x̅ of all elements in the array:
15    ///
16    /// ```text
17    ///     1   n
18    /// x̅ = ―   ∑ xᵢ
19    ///     n  i=1
20    /// ```
21    ///
22    /// If the array is empty, `Err(EmptyInput)` is returned.
23    ///
24    /// **Panics** if `A::from_usize()` fails to convert the number of elements in the array.
25    ///
26    /// [`arithmetic mean`]: https://en.wikipedia.org/wiki/Arithmetic_mean
27    fn mean(&self) -> Result<A, EmptyInput>
28    where
29        A: Clone + FromPrimitive + Add<Output = A> + Div<Output = A> + Zero;
30
31    /// Returns the [`arithmetic weighted mean`] x̅ of all elements in the array. Use `weighted_sum`
32    /// if the `weights` are normalized (they sum up to 1.0).
33    ///
34    /// ```text
35    ///       n
36    ///       ∑ wᵢxᵢ
37    ///      i=1
38    /// x̅ = ―――――――――
39    ///        n
40    ///        ∑ wᵢ
41    ///       i=1
42    /// ```
43    ///
44    /// **Panics** if division by zero panics for type A.
45    ///
46    /// The following **errors** may be returned:
47    ///
48    /// * `MultiInputError::EmptyInput` if `self` is empty
49    /// * `MultiInputError::ShapeMismatch` if `self` and `weights` don't have the same shape
50    ///
51    /// [`arithmetic weighted mean`] https://en.wikipedia.org/wiki/Weighted_arithmetic_mean
52    fn weighted_mean(&self, weights: &Self) -> Result<A, MultiInputError>
53    where
54        A: Copy + Div<Output = A> + Mul<Output = A> + Zero;
55
56    /// Returns the weighted sum of all elements in the array, that is, the dot product of the
57    /// arrays `self` and `weights`. Equivalent to `weighted_mean` if the `weights` are normalized.
58    ///
59    /// ```text
60    ///      n
61    /// x̅ =  ∑ wᵢxᵢ
62    ///     i=1
63    /// ```
64    ///
65    /// The following **errors** may be returned:
66    ///
67    /// * `MultiInputError::ShapeMismatch` if `self` and `weights` don't have the same shape
68    fn weighted_sum(&self, weights: &Self) -> Result<A, MultiInputError>
69    where
70        A: Copy + Mul<Output = A> + Zero;
71
72    /// Returns the [`arithmetic weighted mean`] x̅ along `axis`. Use `weighted_mean_axis ` if the
73    /// `weights` are normalized.
74    ///
75    /// ```text
76    ///       n
77    ///       ∑ wᵢxᵢ
78    ///      i=1
79    /// x̅ = ―――――――――
80    ///        n
81    ///        ∑ wᵢ
82    ///       i=1
83    /// ```
84    ///
85    /// **Panics** if `axis` is out of bounds.
86    ///
87    /// The following **errors** may be returned:
88    ///
89    /// * `MultiInputError::EmptyInput` if `self` is empty
90    /// * `MultiInputError::ShapeMismatch` if `self` length along axis is not equal to `weights` length
91    ///
92    /// [`arithmetic weighted mean`] https://en.wikipedia.org/wiki/Weighted_arithmetic_mean
93    fn weighted_mean_axis(
94        &self,
95        axis: Axis,
96        weights: &ArrayBase<S, Ix1>,
97    ) -> Result<Array<A, D::Smaller>, MultiInputError>
98    where
99        A: Copy + Div<Output = A> + Mul<Output = A> + Zero,
100        D: RemoveAxis;
101
102    /// Returns the weighted sum along `axis`, that is, the dot product of `weights` and each lane
103    /// of `self` along `axis`. Equivalent to `weighted_mean_axis` if the `weights` are normalized.
104    ///
105    /// ```text
106    ///      n
107    /// x̅ =  ∑ wᵢxᵢ
108    ///     i=1
109    /// ```
110    ///
111    /// **Panics** if `axis` is out of bounds.
112    ///
113    /// The following **errors** may be returned
114    ///
115    /// * `MultiInputError::ShapeMismatch` if `self` and `weights` don't have the same shape
116    fn weighted_sum_axis(
117        &self,
118        axis: Axis,
119        weights: &ArrayBase<S, Ix1>,
120    ) -> Result<Array<A, D::Smaller>, MultiInputError>
121    where
122        A: Copy + Mul<Output = A> + Zero,
123        D: RemoveAxis;
124
125    /// Returns the [`harmonic mean`] `HM(X)` of all elements in the array:
126    ///
127    /// ```text
128    ///           ⎛ n     ⎞⁻¹
129    /// HM(X) = n ⎜ ∑ xᵢ⁻¹⎟
130    ///           ⎝i=1    ⎠
131    /// ```
132    ///
133    /// If the array is empty, `Err(EmptyInput)` is returned.
134    ///
135    /// **Panics** if `A::from_usize()` fails to convert the number of elements in the array.
136    ///
137    /// [`harmonic mean`]: https://en.wikipedia.org/wiki/Harmonic_mean
138    fn harmonic_mean(&self) -> Result<A, EmptyInput>
139    where
140        A: Float + FromPrimitive;
141
142    /// Returns the [`geometric mean`] `GM(X)` of all elements in the array:
143    ///
144    /// ```text
145    ///         ⎛ n   ⎞¹⁄ₙ
146    /// GM(X) = ⎜ ∏ xᵢ⎟
147    ///         ⎝i=1  ⎠
148    /// ```
149    ///
150    /// If the array is empty, `Err(EmptyInput)` is returned.
151    ///
152    /// **Panics** if `A::from_usize()` fails to convert the number of elements in the array.
153    ///
154    /// [`geometric mean`]: https://en.wikipedia.org/wiki/Geometric_mean
155    fn geometric_mean(&self) -> Result<A, EmptyInput>
156    where
157        A: Float + FromPrimitive;
158
159    /// Return weighted variance of all elements in the array.
160    ///
161    /// The weighted variance is computed using the [`West, D. H. D.`] incremental algorithm.
162    /// Equivalent to `var_axis` if the `weights` are normalized.
163    ///
164    /// The parameter `ddof` specifies the "delta degrees of freedom". For example, to calculate the
165    /// population variance, use `ddof = 0`, or to calculate the sample variance, use `ddof = 1`.
166    ///
167    /// **Panics** if `ddof` is less than zero or greater than one, or if `axis` is out of bounds,
168    /// or if `A::from_usize()` fails for zero or one.
169    ///
170    /// [`West, D. H. D.`]: https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
171    fn weighted_var(&self, weights: &Self, ddof: A) -> Result<A, MultiInputError>
172    where
173        A: AddAssign + Float + FromPrimitive;
174
175    /// Return weighted standard deviation of all elements in the array.
176    ///
177    /// The weighted standard deviation is computed using the [`West, D. H. D.`] incremental
178    /// algorithm. Equivalent to `var_axis` if the `weights` are normalized.
179    ///
180    /// The parameter `ddof` specifies the "delta degrees of freedom". For example, to calculate the
181    /// population variance, use `ddof = 0`, or to calculate the sample variance, use `ddof = 1`.
182    ///
183    /// **Panics** if `ddof` is less than zero or greater than one, or if `axis` is out of bounds,
184    /// or if `A::from_usize()` fails for zero or one.
185    ///
186    /// [`West, D. H. D.`]: https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
187    fn weighted_std(&self, weights: &Self, ddof: A) -> Result<A, MultiInputError>
188    where
189        A: AddAssign + Float + FromPrimitive;
190
191    /// Return weighted variance along `axis`.
192    ///
193    /// The weighted variance is computed using the [`West, D. H. D.`] incremental algorithm.
194    /// Equivalent to `var_axis` if the `weights` are normalized.
195    ///
196    /// The parameter `ddof` specifies the "delta degrees of freedom". For example, to calculate the
197    /// population variance, use `ddof = 0`, or to calculate the sample variance, use `ddof = 1`.
198    ///
199    /// **Panics** if `ddof` is less than zero or greater than one, or if `axis` is out of bounds,
200    /// or if `A::from_usize()` fails for zero or one.
201    ///
202    /// [`West, D. H. D.`]: https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
203    fn weighted_var_axis(
204        &self,
205        axis: Axis,
206        weights: &ArrayBase<S, Ix1>,
207        ddof: A,
208    ) -> Result<Array<A, D::Smaller>, MultiInputError>
209    where
210        A: AddAssign + Float + FromPrimitive,
211        D: RemoveAxis;
212
213    /// Return weighted standard deviation along `axis`.
214    ///
215    /// The weighted standard deviation is computed using the [`West, D. H. D.`] incremental
216    /// algorithm. Equivalent to `var_axis` if the `weights` are normalized.
217    ///
218    /// The parameter `ddof` specifies the "delta degrees of freedom". For example, to calculate the
219    /// population variance, use `ddof = 0`, or to calculate the sample variance, use `ddof = 1`.
220    ///
221    /// **Panics** if `ddof` is less than zero or greater than one, or if `axis` is out of bounds,
222    /// or if `A::from_usize()` fails for zero or one.
223    ///
224    /// [`West, D. H. D.`]: https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
225    fn weighted_std_axis(
226        &self,
227        axis: Axis,
228        weights: &ArrayBase<S, Ix1>,
229        ddof: A,
230    ) -> Result<Array<A, D::Smaller>, MultiInputError>
231    where
232        A: AddAssign + Float + FromPrimitive,
233        D: RemoveAxis;
234
235    /// Returns the [kurtosis] `Kurt[X]` of all elements in the array:
236    ///
237    /// ```text
238    /// Kurt[X] = μ₄ / σ⁴
239    /// ```
240    ///
241    /// where μ₄ is the fourth central moment and σ is the standard deviation of
242    /// the elements in the array.
243    ///
244    /// This is sometimes referred to as _Pearson's kurtosis_. Fisher's kurtosis can be
245    /// computed by subtracting 3 from Pearson's kurtosis.
246    ///
247    /// If the array is empty, `Err(EmptyInput)` is returned.
248    ///
249    /// **Panics** if `A::from_usize()` fails to convert the number of elements in the array.
250    ///
251    /// [kurtosis]: https://en.wikipedia.org/wiki/Kurtosis
252    fn kurtosis(&self) -> Result<A, EmptyInput>
253    where
254        A: Float + FromPrimitive;
255
256    /// Returns the [Pearson's moment coefficient of skewness] γ₁ of all elements in the array:
257    ///
258    /// ```text
259    /// γ₁ = μ₃ / σ³
260    /// ```
261    ///
262    /// where μ₃ is the third central moment and σ is the standard deviation of
263    /// the elements in the array.
264    ///
265    /// If the array is empty, `Err(EmptyInput)` is returned.
266    ///
267    /// **Panics** if `A::from_usize()` fails to convert the number of elements in the array.
268    ///
269    /// [Pearson's moment coefficient of skewness]: https://en.wikipedia.org/wiki/Skewness
270    fn skewness(&self) -> Result<A, EmptyInput>
271    where
272        A: Float + FromPrimitive;
273
274    /// Returns the *p*-th [central moment] of all elements in the array, μₚ:
275    ///
276    /// ```text
277    ///      1  n
278    /// μₚ = ―  ∑ (xᵢ-x̅)ᵖ
279    ///      n i=1
280    /// ```
281    ///
282    /// If the array is empty, `Err(EmptyInput)` is returned.
283    ///
284    /// The *p*-th central moment is computed using a corrected two-pass algorithm (see Section 3.5
285    /// in [Pébay et al., 2016]). Complexity is *O(np)* when *n >> p*, *p > 1*.
286    ///
287    /// **Panics** if `A::from_usize()` fails to convert the number of elements
288    /// in the array or if `order` overflows `i32`.
289    ///
290    /// [central moment]: https://en.wikipedia.org/wiki/Central_moment
291    /// [Pébay et al., 2016]: https://www.osti.gov/pages/servlets/purl/1427275
292    fn central_moment(&self, order: u16) -> Result<A, EmptyInput>
293    where
294        A: Float + FromPrimitive;
295
296    /// Returns the first *p* [central moments] of all elements in the array, see [central moment]
297    /// for more details.
298    ///
299    /// If the array is empty, `Err(EmptyInput)` is returned.
300    ///
301    /// This method reuses the intermediate steps for the *k*-th moment to compute the *(k+1)*-th,
302    /// being thus more efficient than repeated calls to [central moment] if the computation
303    /// of central moments of multiple orders is required.
304    ///
305    /// **Panics** if `A::from_usize()` fails to convert the number of elements
306    /// in the array or if `order` overflows `i32`.
307    ///
308    /// [central moments]: https://en.wikipedia.org/wiki/Central_moment
309    /// [central moment]: #tymethod.central_moment
310    fn central_moments(&self, order: u16) -> Result<Vec<A>, EmptyInput>
311    where
312        A: Float + FromPrimitive;
313
314    private_decl! {}
315}
316
317mod means;