sci_rs/
stats.rs

1use core::{borrow::Borrow, iter::Sum, ops::Add};
2use itertools::Itertools;
3use num_traits::{Float, Num, NumCast, Signed};
4
5#[cfg(feature = "alloc")]
6use alloc::vec::Vec;
7
8// Quick select finds the `i`th smallest element with 2N comparisons
9#[cfg(feature = "alloc")]
10fn quickselect<B, T>(y: &[B], k: usize) -> T
11where
12    B: Borrow<T>,
13    T: Num + NumCast + PartialOrd + Copy,
14{
15    use num_traits::{Num, NumCast};
16
17    let n = y.len();
18    if n == 1 {
19        return *y[0].borrow();
20    }
21
22    let pivot = y.get(n / 2).unwrap().borrow();
23    let lower = y
24        .iter()
25        .filter(|yi| *(*yi).borrow() < *pivot)
26        .map(|yi| *yi.borrow())
27        .collect::<Vec<_>>();
28    let lowers = lower.len();
29    let upper = y
30        .iter()
31        .filter(|yi| *(*yi).borrow() > *pivot)
32        .map(|yi| *yi.borrow())
33        .collect::<Vec<_>>();
34    let uppers = upper.len();
35    let pivots = n - lowers - uppers;
36
37    if k < lowers {
38        quickselect(&lower, k)
39    } else if k < lowers + pivots {
40        *pivot
41    } else {
42        quickselect(&upper, k - lowers - pivots)
43    }
44}
45
46///
47/// Compute the median of the signal, `y`
48///
49/// Return the median and the number of points averaged
50///
51/// ```
52/// use approx::assert_relative_eq;
53/// use sci_rs::stats::median;
54///
55/// let y: [f64; 5] = [1.,2.,3.,4.,5.];
56/// assert_relative_eq!(3f64, median(y.iter()).0);
57///
58/// let y: [i32; 5] = [1,2,3,4,5];
59/// assert_eq!(3, median(y.iter()).0);
60///
61/// let y: [f64; 4] = [1.,2.,3.,4.];
62/// assert_relative_eq!(2.5f64, median(y.iter()).0);
63///
64/// let y: [f32; 5] = [3.,1.,4.,2.,5.];
65/// assert_relative_eq!(3f32, median(y.iter()).0);
66///
67/// let y: [f64; 6] = [3.,1.,4.,2.,3.,5.];
68/// assert_relative_eq!(3f64, median(y.iter()).0);
69///
70/// let y: &[f32] = &[];
71/// assert_eq!((0f32, 0), median(y.iter()));
72///
73/// let y: &[f32] = &[1.];
74/// assert_eq!((1f32, 1), median(y.iter()));
75///
76/// let y: [i64; 4] = [1,2,3,4];
77/// assert_eq!(2i64, median(y.iter()).0);
78///
79/// ```
80///
81#[cfg(feature = "alloc")]
82pub fn median<YI, T>(y: YI) -> (T, usize)
83where
84    T: Num + NumCast + PartialOrd + Copy + Default,
85    YI: Iterator,
86    YI::Item: Borrow<T>,
87{
88    // Materialize the values in the iterator in order to run O(n) quick select
89
90    use num_traits::NumCast;
91    let y = y.collect::<Vec<_>>();
92    let n = y.len();
93
94    if n == 0 {
95        Default::default()
96    } else if n == 1 {
97        (*y[0].borrow(), 1)
98    } else if n % 2 == 1 {
99        (quickselect(&y, n / 2), n)
100    } else {
101        (
102            (quickselect(&y, n / 2 - 1) + quickselect(&y, n / 2)) / T::from(2).unwrap(),
103            n,
104        )
105    }
106}
107
108///
109/// Compute the mean of the signal, `y`
110///
111/// Return the mean and the number of points averaged
112///
113/// ```
114/// use approx::assert_relative_eq;
115/// use sci_rs::stats::mean;
116///
117/// let y: [f64; 5] = [1.,2.,3.,4.,5.];
118/// assert_relative_eq!(3f64, mean(y.iter()).0);
119///
120/// let y: [i64; 5] = [1,2,3,4,5];
121/// assert_eq!(3i64, mean(y.iter()).0);
122///
123/// let y: &[f32] = &[];
124/// assert_eq!((0f32, 0), mean(y.iter()));
125///
126/// ```
127///
128pub fn mean<YI, F>(y: YI) -> (F, usize)
129where
130    F: Num + NumCast + Default + Copy + Add,
131    YI: Iterator,
132    YI::Item: Borrow<F>,
133{
134    let (sum, count) = y.fold(Default::default(), |acc: (F, usize), yi| {
135        (acc.0 + *yi.borrow(), acc.1 + 1)
136    });
137    if count > 0 {
138        (sum / F::from(count).unwrap(), count)
139    } else {
140        Default::default()
141    }
142}
143
144///
145/// Compute the variance of the signal, `y`
146///
147/// Return the variance and the number of points averaged
148///
149/// ```
150/// use approx::assert_relative_eq;
151/// use sci_rs::stats::variance;
152///
153/// let y: [f64; 5] = [1.,2.,3.,4.,5.];
154/// assert_relative_eq!(2f64, variance(y.iter()).0);
155///
156/// let y: &[f32] = &[];
157/// assert_eq!((0f32, 0), variance(y.iter()));
158///
159/// ```
160///
161pub fn variance<YI, F>(y: YI) -> (F, usize)
162where
163    F: Float + Default + Sum,
164    YI: Iterator + Clone,
165    YI::Item: Borrow<F>,
166{
167    let (avg, n) = mean(y.clone());
168    let sum: F = y
169        .map(|f| {
170            let delta = *f.borrow() - avg;
171            delta * delta
172        })
173        .sum::<F>();
174    if n > 0 {
175        (sum / F::from(n).unwrap(), n)
176    } else {
177        Default::default()
178    }
179}
180
181///
182/// Compute the standard deviation of the signal, `y`
183///
184/// Return the standard deviation and the number of points averaged
185///
186/// ```
187/// use approx::assert_relative_eq;
188/// use sci_rs::stats::stdev;
189///
190/// let y: [f64; 5] = [1.,2.,3.,4.,5.];
191/// assert_relative_eq!(1.41421356237, stdev(y.iter()).0, max_relative = 1e-8);
192///
193/// let y: &[f32] = &[];
194/// assert_eq!((0f32, 0), stdev(y.iter()));
195///
196/// ```
197pub fn stdev<YI, F>(y: YI) -> (F, usize)
198where
199    F: Float + Default + Sum,
200    YI: Iterator + Clone,
201    YI::Item: Borrow<F>,
202{
203    match variance(y) {
204        (_, 0) => Default::default(),
205        (v, n) => (v.sqrt(), n),
206    }
207}
208
209///
210/// Autocorrelate the signal `y` with lag `k`,
211/// using 1/N formulation
212///
213/// <https://www.itl.nist.gov/div898/handbook/eda/section3/autocopl.htm>
214///
215pub fn autocorr<YI, F>(y: YI, k: usize) -> F
216where
217    F: Float + Add + Sum + Default,
218    YI: Iterator + Clone,
219    YI::Item: Borrow<F>,
220{
221    let (avg, n) = mean(y.clone());
222    let n = F::from(n).unwrap();
223    let (var, _) = variance(y.clone());
224    let autocovariance: F = y
225        .clone()
226        .zip(y.skip(k))
227        .map(|(fi, fik)| (*fi.borrow() - avg) * (*fik.borrow() - avg))
228        .sum::<F>()
229        / n;
230    autocovariance / var
231}
232
233///
234/// Unscaled tiled autocorrelation of signal `y` with itself into `x`.
235///
236/// This skips variance normalization and only computes lags in `SKIP..SKIP+x.len()`
237///
238/// The autocorrelation is not normalized by 1/y.len() or variance. The variance of the signal
239/// is returned. The returned variance may be used to normalize lags of interest after the fact.
240///
241/// <https://www.itl.nist.gov/div898/handbook/eda/section3/autocopl.htm>
242///
243pub fn autocorr_fast32<const N: usize, const M: usize, const SKIP: usize>(
244    y: &mut [f32; N],
245    x: &mut [f32; M],
246) -> f32 {
247    assert!(N >= M + SKIP);
248
249    // Subtract the mean
250    let sum = y.iter().sum::<f32>();
251    let avg = sum / y.len() as f32;
252    y.iter_mut().for_each(|yi| *yi -= avg);
253
254    // Compute the variance of the signal
255    let var = y.iter().map(|yi| yi * yi).sum::<f32>() / y.len() as f32;
256
257    // Compute the autocorrelation for lag 1 to lag n
258    let lag_skip = y.len() - x.len();
259    for (h, xi) in (SKIP..y.len()).zip(x.iter_mut()) {
260        let left = &y[..y.len() - h];
261        let right = &y[h..];
262        const TILE: usize = 4;
263        let left = left.chunks_exact(TILE);
264        let right = right.chunks_exact(TILE);
265        *xi = left
266            .remainder()
267            .iter()
268            .zip(right.remainder().iter())
269            .map(|(a, b)| a * b)
270            .sum::<f32>();
271        *xi = left
272            .zip(right)
273            .map(|(left, right)| {
274                left.iter()
275                    .zip(right.iter())
276                    .map(|(a, b)| a * b)
277                    .sum::<f32>()
278            })
279            .sum();
280    }
281
282    var
283}
284
285///
286/// Root Mean Square (RMS) of signal `y`.
287///
288/// It is assumed that the mean of the signal is zero.
289///
290pub fn rms_fast32<const N: usize>(y: &[f32; N]) -> f32 {
291    const TILE: usize = 4;
292    let tiles = y.chunks_exact(TILE);
293    let sum = tiles.remainder().iter().map(|yi| yi * yi).sum::<f32>()
294        + tiles
295            .map(|yi| yi.iter().map(|yi| yi * yi).sum::<f32>())
296            .sum::<f32>();
297    (sum / y.len() as f32).sqrt()
298}
299
300///
301/// Produce an iterator yielding the lag difference, yi1 - yi0,
302///
303/// <https://www.itl.nist.gov/div898/handbook/eda/section3/lagplot.htm>
304///
305/// ```
306/// use approx::assert_relative_eq;
307/// use sci_rs::stats::lag_diff;
308///
309/// // Flat signal perfectly correlates with itself
310/// let y: [f64; 4] = [1.,2.,4.,7.];
311/// let z = lag_diff(y.iter()).collect::<Vec<_>>();
312/// for i in 0..3 {
313///     assert_relative_eq!(i as f64 + 1f64, z[i]);
314/// }
315/// ```
316///
317pub fn lag_diff<'a, YI, F>(y: YI) -> impl Iterator<Item = F>
318where
319    F: Float + 'a,
320    YI: Iterator + Clone,
321    YI::Item: Borrow<F>,
322{
323    y.clone()
324        .zip(y.skip(1))
325        .map(|(yi0, yi1)| *yi1.borrow() - *yi0.borrow())
326}
327
328///
329/// Compute the root mean square of successive differences
330///
331/// ```
332/// use approx::assert_relative_eq;
333/// use sci_rs::stats::rmssd;
334///
335/// // Differences are 1, 2, 3
336/// // Square differences are 1, 4, 9
337/// // Mean is 4.666666666666667
338/// // RMSSD is 2.1602468995
339/// let y: [f64; 4] = [1.,2.,4.,7.];
340/// assert_relative_eq!(2.1602468995, rmssd(y.iter()), max_relative = 1e-8);
341/// ```
342///
343pub fn rmssd<YI, F>(y: YI) -> F
344where
345    F: Float + Add + Sum + Default,
346    YI: Iterator + Clone,
347    YI::Item: Borrow<F> + Copy,
348{
349    let square_diffs = y
350        .tuple_windows()
351        .map(|(yi0, yi1)| (*yi1.borrow() - *yi0.borrow()).powi(2));
352    let (sum, n): (F, usize) = mean(square_diffs);
353    sum.sqrt()
354}
355
356///
357/// Compute the z score of each value in the sample, relative to the sample mean and standard deviation.
358///
359/// <https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.zscore.html>
360///
361/// # Arguments
362///
363/// * `y` - An array of floating point values
364///
365/// # Examples
366///
367/// ```
368/// use sci_rs::stats::zscore;
369/// use approx::assert_relative_eq;
370///
371/// let y: [f32; 5] = [1.,2.,3.,4.,5.];
372/// let z : Vec<f32> = zscore(y.iter()).collect::<Vec<_>>();
373/// let answer: [f32; 5] = [-1.4142135, -0.70710677, 0.,  0.70710677,  1.4142135];
374/// for i in 0..5 {
375///     assert_relative_eq!(answer[i], z[i], epsilon = 1e-6);
376/// }
377///
378/// // Example from scipy docs
379/// let a: [f32; 10] = [ 0.7972,  0.0767,  0.4383,  0.7866,  0.8091, 0.1954,  0.6307,  0.6599,  0.1065,  0.0508];
380/// let z : Vec<f32> = zscore(a.iter()).collect::<Vec<_>>();
381/// let answer: [f32; 10] =[ 1.12724554, -1.2469956 , -0.05542642,  1.09231569,  1.16645923, -0.8558472 ,  0.57858329,  0.67480514, -1.14879659, -1.33234306];
382/// for i in 0..10 {
383///    assert_relative_eq!(answer[i], z[i], epsilon = 1e-6);
384/// }
385/// ```
386pub fn zscore<YI, F>(y: YI) -> impl Iterator<Item = F>
387where
388    F: Float + Default + Copy + Add + Sum,
389    YI: Iterator + Clone,
390    YI::Item: Borrow<F>,
391{
392    let mean = mean(y.clone()).0;
393    let standard_deviation = stdev(y.clone()).0;
394    y.map(move |yi| ((*yi.borrow() - mean) / standard_deviation))
395}
396
397///
398/// Compute the modified Z-score of each value in the sample, relative to the sample median over the mean absolute deviation.
399///
400/// <https://www.itl.nist.gov/div898/handbook/eda/section3/eda35h.htm>
401///
402/// # Arguments
403///
404/// * `y` - An array of floating point values
405///
406/// # Examples
407///
408/// ```
409/// use sci_rs::stats::mod_zscore;
410/// use approx::assert_relative_eq;
411///
412/// let y: [f32; 5] = [1.,2.,3.,4.,5.];
413/// let z : Vec<f32> = mod_zscore(y.iter()).collect::<Vec<_>>();
414/// let answer: [f32; 5] = [-1.349, -0.6745, 0.,  0.6745,  1.349];
415/// for i in 0..5 {
416///     assert_relative_eq!(answer[i], z[i], epsilon = 1e-5);
417/// }
418/// ```
419pub fn mod_zscore<YI, F>(y: YI) -> impl Iterator<Item = F>
420where
421    F: Float + Default + Copy + Add + Sum,
422    YI: Iterator + Clone,
423    YI::Item: Borrow<F>,
424{
425    let median = median(y.clone()).0;
426
427    let mad = median_abs_deviation(y.clone()).0;
428    y.map(move |yi| ((*yi.borrow() - median) * F::from(0.6745).unwrap() / mad))
429}
430
431/// The median absolute deviation (MAD, [1]) computes the median over the absolute deviations from the median.
432/// It is a measure of dispersion similar to the standard deviation but more robust to outliers
433///
434/// # Arguments
435///
436/// * `y` - An array of floating point values
437///
438/// # Examples
439///
440/// ```
441/// use sci_rs::stats::median_abs_deviation;
442/// use approx::assert_relative_eq;
443///
444/// let y: [f64; 16] = [6., 7., 7., 8., 12., 14., 15., 16., 16., 19., 22., 24., 26., 26., 29., 46.];
445/// let z = median_abs_deviation(y.iter());
446///
447/// assert_relative_eq!(8., z.0);
448/// ```
449pub fn median_abs_deviation<YI, F>(y: YI) -> (F, usize)
450where
451    F: Float + Default + Sum,
452    YI: Iterator + Clone,
453    YI::Item: Borrow<F>,
454{
455    let med = median(y.clone()).0;
456
457    let abs_vals = y.map(|yi| (*yi.borrow() - med).abs());
458
459    median(abs_vals.into_iter())
460}
461
462#[cfg(test)]
463mod tests {
464    use super::*;
465    use approx::assert_relative_eq;
466
467    #[cfg(feature = "std")]
468    use {std::f64::consts::PI, std::vec::Vec};
469
470    #[cfg(feature = "std")]
471    #[test]
472    fn can_median() {
473        let y: [f64; 4] = [1., 2., 3., 4.];
474        println!("y = {:?}", y);
475        println!("y = {:?}", median::<_, f64>(y.iter()));
476        assert_relative_eq!(2.5, median::<_, f64>(y.iter()).0);
477        let y: [f64; 5] = [1., 2., 3., 4., 5.];
478        println!("y = {:?}", y);
479        println!("y = {:?}", median::<_, f64>(y.iter()));
480        assert_relative_eq!(3.0, median::<_, f64>(y.iter()).0);
481    }
482
483    #[cfg(feature = "std")]
484    #[test]
485    fn can_autocorrelate() {
486        // sin wave w/ multiple periods
487        let periods = 1.;
488        let points = 100;
489        let radians_per_pt = (periods * 2. * PI) / points as f64;
490        let sin_wave = (0..points)
491            .map(|i| (i as f64 * radians_per_pt).sin())
492            .collect::<Vec<_>>();
493        // println!("sin_wave = {:?}", sin_wave);
494
495        let _correlations: Vec<f64> = (0..points)
496            .map(|i| autocorr(sin_wave.iter(), i))
497            .collect::<Vec<_>>();
498        let correlations: Vec<f32> = (0..points)
499            .map(|i| autocorr(sin_wave.iter().map(|f| *f as f32), i))
500            .collect::<Vec<_>>();
501        println!("correlations = {:?}", correlations);
502    }
503
504    #[test]
505    fn it_works() {
506        let result = 2 + 2;
507        assert_eq!(result, 4);
508    }
509}