ndarray_stats/summary_statistics/
mod.rs

1//! Summary statistics (e.g. mean, variance, etc.).
2use crate::errors::{EmptyInput, MultiInputError};
3use ndarray::{Array, ArrayRef, Axis, Dimension, Ix1, RemoveAxis};
4use num_traits::{Float, FromPrimitive, Zero};
5use std::ops::{Add, AddAssign, Div, Mul};
6
7/// Extension trait for `ArrayRef` providing methods
8/// to compute several summary statistics (e.g. mean, variance, etc.).
9pub trait SummaryStatisticsExt<A, D>
10where
11    D: Dimension,
12{
13    /// Returns the [`arithmetic mean`] x̅ of all elements in the array:
14    ///
15    /// ```text
16    ///     1   n
17    /// x̅ = ―   ∑ xᵢ
18    ///     n  i=1
19    /// ```
20    ///
21    /// If the array is empty, `Err(EmptyInput)` is returned.
22    ///
23    /// **Panics** if `A::from_usize()` fails to convert the number of elements in the array.
24    ///
25    /// [`arithmetic mean`]: https://en.wikipedia.org/wiki/Arithmetic_mean
26    fn mean(&self) -> Result<A, EmptyInput>
27    where
28        A: Clone + FromPrimitive + Add<Output = A> + Div<Output = A> + Zero;
29
30    /// Returns the [`arithmetic weighted mean`] x̅ of all elements in the array. Use `weighted_sum`
31    /// if the `weights` are normalized (they sum up to 1.0).
32    ///
33    /// ```text
34    ///       n
35    ///       ∑ wᵢxᵢ
36    ///      i=1
37    /// x̅ = ―――――――――
38    ///        n
39    ///        ∑ wᵢ
40    ///       i=1
41    /// ```
42    ///
43    /// **Panics** if division by zero panics for type A.
44    ///
45    /// The following **errors** may be returned:
46    ///
47    /// * `MultiInputError::EmptyInput` if `self` is empty
48    /// * `MultiInputError::ShapeMismatch` if `self` and `weights` don't have the same shape
49    ///
50    /// [`arithmetic weighted mean`] https://en.wikipedia.org/wiki/Weighted_arithmetic_mean
51    fn weighted_mean(&self, weights: &Self) -> Result<A, MultiInputError>
52    where
53        A: Copy + Div<Output = A> + Mul<Output = A> + Zero;
54
55    /// Returns the weighted sum of all elements in the array, that is, the dot product of the
56    /// arrays `self` and `weights`. Equivalent to `weighted_mean` if the `weights` are normalized.
57    ///
58    /// ```text
59    ///      n
60    /// x̅ =  ∑ wᵢxᵢ
61    ///     i=1
62    /// ```
63    ///
64    /// The following **errors** may be returned:
65    ///
66    /// * `MultiInputError::ShapeMismatch` if `self` and `weights` don't have the same shape
67    fn weighted_sum(&self, weights: &Self) -> Result<A, MultiInputError>
68    where
69        A: Copy + Mul<Output = A> + Zero;
70
71    /// Returns the [`arithmetic weighted mean`] x̅ along `axis`. Use `weighted_mean_axis ` if the
72    /// `weights` are normalized.
73    ///
74    /// ```text
75    ///       n
76    ///       ∑ wᵢxᵢ
77    ///      i=1
78    /// x̅ = ―――――――――
79    ///        n
80    ///        ∑ wᵢ
81    ///       i=1
82    /// ```
83    ///
84    /// **Panics** if `axis` is out of bounds.
85    ///
86    /// The following **errors** may be returned:
87    ///
88    /// * `MultiInputError::EmptyInput` if `self` is empty
89    /// * `MultiInputError::ShapeMismatch` if `self` length along axis is not equal to `weights` length
90    ///
91    /// [`arithmetic weighted mean`] https://en.wikipedia.org/wiki/Weighted_arithmetic_mean
92    fn weighted_mean_axis(
93        &self,
94        axis: Axis,
95        weights: &ArrayRef<A, Ix1>,
96    ) -> Result<Array<A, D::Smaller>, MultiInputError>
97    where
98        A: Copy + Div<Output = A> + Mul<Output = A> + Zero,
99        D: RemoveAxis;
100
101    /// Returns the weighted sum along `axis`, that is, the dot product of `weights` and each lane
102    /// of `self` along `axis`. Equivalent to `weighted_mean_axis` if the `weights` are normalized.
103    ///
104    /// ```text
105    ///      n
106    /// x̅ =  ∑ wᵢxᵢ
107    ///     i=1
108    /// ```
109    ///
110    /// **Panics** if `axis` is out of bounds.
111    ///
112    /// The following **errors** may be returned
113    ///
114    /// * `MultiInputError::ShapeMismatch` if `self` and `weights` don't have the same shape
115    fn weighted_sum_axis(
116        &self,
117        axis: Axis,
118        weights: &ArrayRef<A, Ix1>,
119    ) -> Result<Array<A, D::Smaller>, MultiInputError>
120    where
121        A: Copy + Mul<Output = A> + Zero,
122        D: RemoveAxis;
123
124    /// Returns the [`harmonic mean`] `HM(X)` of all elements in the array:
125    ///
126    /// ```text
127    ///           ⎛ n     ⎞⁻¹
128    /// HM(X) = n ⎜ ∑ xᵢ⁻¹⎟
129    ///           ⎝i=1    ⎠
130    /// ```
131    ///
132    /// If the array is empty, `Err(EmptyInput)` is returned.
133    ///
134    /// **Panics** if `A::from_usize()` fails to convert the number of elements in the array.
135    ///
136    /// [`harmonic mean`]: https://en.wikipedia.org/wiki/Harmonic_mean
137    fn harmonic_mean(&self) -> Result<A, EmptyInput>
138    where
139        A: Float + FromPrimitive;
140
141    /// Returns the [`geometric mean`] `GM(X)` of all elements in the array:
142    ///
143    /// ```text
144    ///         ⎛ n   ⎞¹⁄ₙ
145    /// GM(X) = ⎜ ∏ xᵢ⎟
146    ///         ⎝i=1  ⎠
147    /// ```
148    ///
149    /// If the array is empty, `Err(EmptyInput)` is returned.
150    ///
151    /// **Panics** if `A::from_usize()` fails to convert the number of elements in the array.
152    ///
153    /// [`geometric mean`]: https://en.wikipedia.org/wiki/Geometric_mean
154    fn geometric_mean(&self) -> Result<A, EmptyInput>
155    where
156        A: Float + FromPrimitive;
157
158    /// Return weighted variance of all elements in the array.
159    ///
160    /// The weighted variance is computed using the [`West, D. H. D.`] incremental algorithm.
161    /// Equivalent to `var_axis` if the `weights` are normalized.
162    ///
163    /// The parameter `ddof` specifies the "delta degrees of freedom". For example, to calculate the
164    /// population variance, use `ddof = 0`, or to calculate the sample variance, use `ddof = 1`.
165    ///
166    /// **Panics** if `ddof` is less than zero or greater than one, or if `axis` is out of bounds,
167    /// or if `A::from_usize()` fails for zero or one.
168    ///
169    /// [`West, D. H. D.`]: https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
170    fn weighted_var(&self, weights: &Self, ddof: A) -> Result<A, MultiInputError>
171    where
172        A: AddAssign + Float + FromPrimitive;
173
174    /// Return weighted standard deviation of all elements in the array.
175    ///
176    /// The weighted standard deviation is computed using the [`West, D. H. D.`] incremental
177    /// algorithm. Equivalent to `var_axis` if the `weights` are normalized.
178    ///
179    /// The parameter `ddof` specifies the "delta degrees of freedom". For example, to calculate the
180    /// population variance, use `ddof = 0`, or to calculate the sample variance, use `ddof = 1`.
181    ///
182    /// **Panics** if `ddof` is less than zero or greater than one, or if `axis` is out of bounds,
183    /// or if `A::from_usize()` fails for zero or one.
184    ///
185    /// [`West, D. H. D.`]: https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
186    fn weighted_std(&self, weights: &Self, ddof: A) -> Result<A, MultiInputError>
187    where
188        A: AddAssign + Float + FromPrimitive;
189
190    /// Return weighted variance along `axis`.
191    ///
192    /// The weighted variance is computed using the [`West, D. H. D.`] incremental algorithm.
193    /// Equivalent to `var_axis` if the `weights` are normalized.
194    ///
195    /// The parameter `ddof` specifies the "delta degrees of freedom". For example, to calculate the
196    /// population variance, use `ddof = 0`, or to calculate the sample variance, use `ddof = 1`.
197    ///
198    /// **Panics** if `ddof` is less than zero or greater than one, or if `axis` is out of bounds,
199    /// or if `A::from_usize()` fails for zero or one.
200    ///
201    /// [`West, D. H. D.`]: https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
202    fn weighted_var_axis(
203        &self,
204        axis: Axis,
205        weights: &ArrayRef<A, Ix1>,
206        ddof: A,
207    ) -> Result<Array<A, D::Smaller>, MultiInputError>
208    where
209        A: AddAssign + Float + FromPrimitive,
210        D: RemoveAxis;
211
212    /// Return weighted standard deviation along `axis`.
213    ///
214    /// The weighted standard deviation is computed using the [`West, D. H. D.`] incremental
215    /// algorithm. Equivalent to `var_axis` if the `weights` are normalized.
216    ///
217    /// The parameter `ddof` specifies the "delta degrees of freedom". For example, to calculate the
218    /// population variance, use `ddof = 0`, or to calculate the sample variance, use `ddof = 1`.
219    ///
220    /// **Panics** if `ddof` is less than zero or greater than one, or if `axis` is out of bounds,
221    /// or if `A::from_usize()` fails for zero or one.
222    ///
223    /// [`West, D. H. D.`]: https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
224    fn weighted_std_axis(
225        &self,
226        axis: Axis,
227        weights: &ArrayRef<A, Ix1>,
228        ddof: A,
229    ) -> Result<Array<A, D::Smaller>, MultiInputError>
230    where
231        A: AddAssign + Float + FromPrimitive,
232        D: RemoveAxis;
233
234    /// Returns the [kurtosis] `Kurt[X]` of all elements in the array:
235    ///
236    /// ```text
237    /// Kurt[X] = μ₄ / σ⁴
238    /// ```
239    ///
240    /// where μ₄ is the fourth central moment and σ is the standard deviation of
241    /// the elements in the array.
242    ///
243    /// This is sometimes referred to as _Pearson's kurtosis_. Fisher's kurtosis can be
244    /// computed by subtracting 3 from Pearson's kurtosis.
245    ///
246    /// If the array is empty, `Err(EmptyInput)` is returned.
247    ///
248    /// **Panics** if `A::from_usize()` fails to convert the number of elements in the array.
249    ///
250    /// [kurtosis]: https://en.wikipedia.org/wiki/Kurtosis
251    fn kurtosis(&self) -> Result<A, EmptyInput>
252    where
253        A: Float + FromPrimitive;
254
255    /// Returns the [Pearson's moment coefficient of skewness] γ₁ of all elements in the array:
256    ///
257    /// ```text
258    /// γ₁ = μ₃ / σ³
259    /// ```
260    ///
261    /// where μ₃ is the third central moment and σ is the standard deviation of
262    /// the elements in the array.
263    ///
264    /// If the array is empty, `Err(EmptyInput)` is returned.
265    ///
266    /// **Panics** if `A::from_usize()` fails to convert the number of elements in the array.
267    ///
268    /// [Pearson's moment coefficient of skewness]: https://en.wikipedia.org/wiki/Skewness
269    fn skewness(&self) -> Result<A, EmptyInput>
270    where
271        A: Float + FromPrimitive;
272
273    /// Returns the *p*-th [central moment] of all elements in the array, μₚ:
274    ///
275    /// ```text
276    ///      1  n
277    /// μₚ = ―  ∑ (xᵢ-x̅)ᵖ
278    ///      n i=1
279    /// ```
280    ///
281    /// If the array is empty, `Err(EmptyInput)` is returned.
282    ///
283    /// The *p*-th central moment is computed using a corrected two-pass algorithm (see Section 3.5
284    /// in [Pébay et al., 2016]). Complexity is *O(np)* when *n >> p*, *p > 1*.
285    ///
286    /// **Panics** if `A::from_usize()` fails to convert the number of elements
287    /// in the array or if `order` overflows `i32`.
288    ///
289    /// [central moment]: https://en.wikipedia.org/wiki/Central_moment
290    /// [Pébay et al., 2016]: https://www.osti.gov/pages/servlets/purl/1427275
291    fn central_moment(&self, order: u16) -> Result<A, EmptyInput>
292    where
293        A: Float + FromPrimitive;
294
295    /// Returns the first *p* [central moments] of all elements in the array, see [central moment]
296    /// for more details.
297    ///
298    /// If the array is empty, `Err(EmptyInput)` is returned.
299    ///
300    /// This method reuses the intermediate steps for the *k*-th moment to compute the *(k+1)*-th,
301    /// being thus more efficient than repeated calls to [central moment] if the computation
302    /// of central moments of multiple orders is required.
303    ///
304    /// **Panics** if `A::from_usize()` fails to convert the number of elements
305    /// in the array or if `order` overflows `i32`.
306    ///
307    /// [central moments]: https://en.wikipedia.org/wiki/Central_moment
308    /// [central moment]: #tymethod.central_moment
309    fn central_moments(&self, order: u16) -> Result<Vec<A>, EmptyInput>
310    where
311        A: Float + FromPrimitive;
312
313    private_decl! {}
314}
315
316mod means;