sci_rs/stats.rs
1use core::{borrow::Borrow, iter::Sum, ops::Add};
2use itertools::Itertools;
3use num_traits::{Float, Num, NumCast, Signed};
4
5#[cfg(feature = "alloc")]
6use alloc::vec::Vec;
7
8// Quick select finds the `i`th smallest element with 2N comparisons
9#[cfg(feature = "alloc")]
10fn quickselect<B, T>(y: &[B], k: usize) -> T
11where
12 B: Borrow<T>,
13 T: Num + NumCast + PartialOrd + Copy,
14{
15 use num_traits::{Num, NumCast};
16
17 let n = y.len();
18 if n == 1 {
19 return *y[0].borrow();
20 }
21
22 let pivot = y.get(n / 2).unwrap().borrow();
23 let lower = y
24 .iter()
25 .filter(|yi| *(*yi).borrow() < *pivot)
26 .map(|yi| *yi.borrow())
27 .collect::<Vec<_>>();
28 let lowers = lower.len();
29 let upper = y
30 .iter()
31 .filter(|yi| *(*yi).borrow() > *pivot)
32 .map(|yi| *yi.borrow())
33 .collect::<Vec<_>>();
34 let uppers = upper.len();
35 let pivots = n - lowers - uppers;
36
37 if k < lowers {
38 quickselect(&lower, k)
39 } else if k < lowers + pivots {
40 *pivot
41 } else {
42 quickselect(&upper, k - lowers - pivots)
43 }
44}
45
46///
47/// Compute the median of the signal, `y`
48///
49/// Return the median and the number of points averaged
50///
51/// ```
52/// use approx::assert_relative_eq;
53/// use sci_rs::stats::median;
54///
55/// let y: [f64; 5] = [1.,2.,3.,4.,5.];
56/// assert_relative_eq!(3f64, median(y.iter()).0);
57///
58/// let y: [i32; 5] = [1,2,3,4,5];
59/// assert_eq!(3, median(y.iter()).0);
60///
61/// let y: [f64; 4] = [1.,2.,3.,4.];
62/// assert_relative_eq!(2.5f64, median(y.iter()).0);
63///
64/// let y: [f32; 5] = [3.,1.,4.,2.,5.];
65/// assert_relative_eq!(3f32, median(y.iter()).0);
66///
67/// let y: [f64; 6] = [3.,1.,4.,2.,3.,5.];
68/// assert_relative_eq!(3f64, median(y.iter()).0);
69///
70/// let y: &[f32] = &[];
71/// assert_eq!((0f32, 0), median(y.iter()));
72///
73/// let y: &[f32] = &[1.];
74/// assert_eq!((1f32, 1), median(y.iter()));
75///
76/// let y: [i64; 4] = [1,2,3,4];
77/// assert_eq!(2i64, median(y.iter()).0);
78///
79/// ```
80///
81#[cfg(feature = "alloc")]
82pub fn median<YI, T>(y: YI) -> (T, usize)
83where
84 T: Num + NumCast + PartialOrd + Copy + Default,
85 YI: Iterator,
86 YI::Item: Borrow<T>,
87{
88 // Materialize the values in the iterator in order to run O(n) quick select
89
90 use num_traits::NumCast;
91 let y = y.collect::<Vec<_>>();
92 let n = y.len();
93
94 if n == 0 {
95 Default::default()
96 } else if n == 1 {
97 (*y[0].borrow(), 1)
98 } else if n % 2 == 1 {
99 (quickselect(&y, n / 2), n)
100 } else {
101 (
102 (quickselect(&y, n / 2 - 1) + quickselect(&y, n / 2)) / T::from(2).unwrap(),
103 n,
104 )
105 }
106}
107
108///
109/// Compute the mean of the signal, `y`
110///
111/// Return the mean and the number of points averaged
112///
113/// ```
114/// use approx::assert_relative_eq;
115/// use sci_rs::stats::mean;
116///
117/// let y: [f64; 5] = [1.,2.,3.,4.,5.];
118/// assert_relative_eq!(3f64, mean(y.iter()).0);
119///
120/// let y: [i64; 5] = [1,2,3,4,5];
121/// assert_eq!(3i64, mean(y.iter()).0);
122///
123/// let y: &[f32] = &[];
124/// assert_eq!((0f32, 0), mean(y.iter()));
125///
126/// ```
127///
128pub fn mean<YI, F>(y: YI) -> (F, usize)
129where
130 F: Num + NumCast + Default + Copy + Add,
131 YI: Iterator,
132 YI::Item: Borrow<F>,
133{
134 let (sum, count) = y.fold(Default::default(), |acc: (F, usize), yi| {
135 (acc.0 + *yi.borrow(), acc.1 + 1)
136 });
137 if count > 0 {
138 (sum / F::from(count).unwrap(), count)
139 } else {
140 Default::default()
141 }
142}
143
144///
145/// Compute the variance of the signal, `y`
146///
147/// Return the variance and the number of points averaged
148///
149/// ```
150/// use approx::assert_relative_eq;
151/// use sci_rs::stats::variance;
152///
153/// let y: [f64; 5] = [1.,2.,3.,4.,5.];
154/// assert_relative_eq!(2f64, variance(y.iter()).0);
155///
156/// let y: &[f32] = &[];
157/// assert_eq!((0f32, 0), variance(y.iter()));
158///
159/// ```
160///
161pub fn variance<YI, F>(y: YI) -> (F, usize)
162where
163 F: Float + Default + Sum,
164 YI: Iterator + Clone,
165 YI::Item: Borrow<F>,
166{
167 let (avg, n) = mean(y.clone());
168 let sum: F = y
169 .map(|f| {
170 let delta = *f.borrow() - avg;
171 delta * delta
172 })
173 .sum::<F>();
174 if n > 0 {
175 (sum / F::from(n).unwrap(), n)
176 } else {
177 Default::default()
178 }
179}
180
181///
182/// Compute the standard deviation of the signal, `y`
183///
184/// Return the standard deviation and the number of points averaged
185///
186/// ```
187/// use approx::assert_relative_eq;
188/// use sci_rs::stats::stdev;
189///
190/// let y: [f64; 5] = [1.,2.,3.,4.,5.];
191/// assert_relative_eq!(1.41421356237, stdev(y.iter()).0, max_relative = 1e-8);
192///
193/// let y: &[f32] = &[];
194/// assert_eq!((0f32, 0), stdev(y.iter()));
195///
196/// ```
197pub fn stdev<YI, F>(y: YI) -> (F, usize)
198where
199 F: Float + Default + Sum,
200 YI: Iterator + Clone,
201 YI::Item: Borrow<F>,
202{
203 match variance(y) {
204 (_, 0) => Default::default(),
205 (v, n) => (v.sqrt(), n),
206 }
207}
208
209///
210/// Autocorrelate the signal `y` with lag `k`,
211/// using 1/N formulation
212///
213/// <https://www.itl.nist.gov/div898/handbook/eda/section3/autocopl.htm>
214///
215pub fn autocorr<YI, F>(y: YI, k: usize) -> F
216where
217 F: Float + Add + Sum + Default,
218 YI: Iterator + Clone,
219 YI::Item: Borrow<F>,
220{
221 let (avg, n) = mean(y.clone());
222 let n = F::from(n).unwrap();
223 let (var, _) = variance(y.clone());
224 let autocovariance: F = y
225 .clone()
226 .zip(y.skip(k))
227 .map(|(fi, fik)| (*fi.borrow() - avg) * (*fik.borrow() - avg))
228 .sum::<F>()
229 / n;
230 autocovariance / var
231}
232
233///
234/// Unscaled tiled autocorrelation of signal `y` with itself into `x`.
235///
236/// This skips variance normalization and only computes lags in `SKIP..SKIP+x.len()`
237///
238/// The autocorrelation is not normalized by 1/y.len() or variance. The variance of the signal
239/// is returned. The returned variance may be used to normalize lags of interest after the fact.
240///
241/// <https://www.itl.nist.gov/div898/handbook/eda/section3/autocopl.htm>
242///
243pub fn autocorr_fast32<const N: usize, const M: usize, const SKIP: usize>(
244 y: &mut [f32; N],
245 x: &mut [f32; M],
246) -> f32 {
247 assert!(N >= M + SKIP);
248
249 // Subtract the mean
250 let sum = y.iter().sum::<f32>();
251 let avg = sum / y.len() as f32;
252 y.iter_mut().for_each(|yi| *yi -= avg);
253
254 // Compute the variance of the signal
255 let var = y.iter().map(|yi| yi * yi).sum::<f32>() / y.len() as f32;
256
257 // Compute the autocorrelation for lag 1 to lag n
258 let lag_skip = y.len() - x.len();
259 for (h, xi) in (SKIP..y.len()).zip(x.iter_mut()) {
260 let left = &y[..y.len() - h];
261 let right = &y[h..];
262 const TILE: usize = 4;
263 let left = left.chunks_exact(TILE);
264 let right = right.chunks_exact(TILE);
265 *xi = left
266 .remainder()
267 .iter()
268 .zip(right.remainder().iter())
269 .map(|(a, b)| a * b)
270 .sum::<f32>();
271 *xi = left
272 .zip(right)
273 .map(|(left, right)| {
274 left.iter()
275 .zip(right.iter())
276 .map(|(a, b)| a * b)
277 .sum::<f32>()
278 })
279 .sum();
280 }
281
282 var
283}
284
285///
286/// Root Mean Square (RMS) of signal `y`.
287///
288/// It is assumed that the mean of the signal is zero.
289///
290pub fn rms_fast32<const N: usize>(y: &[f32; N]) -> f32 {
291 const TILE: usize = 4;
292 let tiles = y.chunks_exact(TILE);
293 let sum = tiles.remainder().iter().map(|yi| yi * yi).sum::<f32>()
294 + tiles
295 .map(|yi| yi.iter().map(|yi| yi * yi).sum::<f32>())
296 .sum::<f32>();
297 (sum / y.len() as f32).sqrt()
298}
299
300///
301/// Produce an iterator yielding the lag difference, yi1 - yi0,
302///
303/// <https://www.itl.nist.gov/div898/handbook/eda/section3/lagplot.htm>
304///
305/// ```
306/// use approx::assert_relative_eq;
307/// use sci_rs::stats::lag_diff;
308///
309/// // Flat signal perfectly correlates with itself
310/// let y: [f64; 4] = [1.,2.,4.,7.];
311/// let z = lag_diff(y.iter()).collect::<Vec<_>>();
312/// for i in 0..3 {
313/// assert_relative_eq!(i as f64 + 1f64, z[i]);
314/// }
315/// ```
316///
317pub fn lag_diff<'a, YI, F>(y: YI) -> impl Iterator<Item = F>
318where
319 F: Float + 'a,
320 YI: Iterator + Clone,
321 YI::Item: Borrow<F>,
322{
323 y.clone()
324 .zip(y.skip(1))
325 .map(|(yi0, yi1)| *yi1.borrow() - *yi0.borrow())
326}
327
328///
329/// Compute the root mean square of successive differences
330///
331/// ```
332/// use approx::assert_relative_eq;
333/// use sci_rs::stats::rmssd;
334///
335/// // Differences are 1, 2, 3
336/// // Square differences are 1, 4, 9
337/// // Mean is 4.666666666666667
338/// // RMSSD is 2.1602468995
339/// let y: [f64; 4] = [1.,2.,4.,7.];
340/// assert_relative_eq!(2.1602468995, rmssd(y.iter()), max_relative = 1e-8);
341/// ```
342///
343pub fn rmssd<YI, F>(y: YI) -> F
344where
345 F: Float + Add + Sum + Default,
346 YI: Iterator + Clone,
347 YI::Item: Borrow<F> + Copy,
348{
349 let square_diffs = y
350 .tuple_windows()
351 .map(|(yi0, yi1)| (*yi1.borrow() - *yi0.borrow()).powi(2));
352 let (sum, n): (F, usize) = mean(square_diffs);
353 sum.sqrt()
354}
355
356///
357/// Compute the z score of each value in the sample, relative to the sample mean and standard deviation.
358///
359/// <https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.zscore.html>
360///
361/// # Arguments
362///
363/// * `y` - An array of floating point values
364///
365/// # Examples
366///
367/// ```
368/// use sci_rs::stats::zscore;
369/// use approx::assert_relative_eq;
370///
371/// let y: [f32; 5] = [1.,2.,3.,4.,5.];
372/// let z : Vec<f32> = zscore(y.iter()).collect::<Vec<_>>();
373/// let answer: [f32; 5] = [-1.4142135, -0.70710677, 0., 0.70710677, 1.4142135];
374/// for i in 0..5 {
375/// assert_relative_eq!(answer[i], z[i], epsilon = 1e-6);
376/// }
377///
378/// // Example from scipy docs
379/// let a: [f32; 10] = [ 0.7972, 0.0767, 0.4383, 0.7866, 0.8091, 0.1954, 0.6307, 0.6599, 0.1065, 0.0508];
380/// let z : Vec<f32> = zscore(a.iter()).collect::<Vec<_>>();
381/// let answer: [f32; 10] =[ 1.12724554, -1.2469956 , -0.05542642, 1.09231569, 1.16645923, -0.8558472 , 0.57858329, 0.67480514, -1.14879659, -1.33234306];
382/// for i in 0..10 {
383/// assert_relative_eq!(answer[i], z[i], epsilon = 1e-6);
384/// }
385/// ```
386pub fn zscore<YI, F>(y: YI) -> impl Iterator<Item = F>
387where
388 F: Float + Default + Copy + Add + Sum,
389 YI: Iterator + Clone,
390 YI::Item: Borrow<F>,
391{
392 let mean = mean(y.clone()).0;
393 let standard_deviation = stdev(y.clone()).0;
394 y.map(move |yi| ((*yi.borrow() - mean) / standard_deviation))
395}
396
397///
398/// Compute the modified Z-score of each value in the sample, relative to the sample median over the mean absolute deviation.
399///
400/// <https://www.itl.nist.gov/div898/handbook/eda/section3/eda35h.htm>
401///
402/// # Arguments
403///
404/// * `y` - An array of floating point values
405///
406/// # Examples
407///
408/// ```
409/// use sci_rs::stats::mod_zscore;
410/// use approx::assert_relative_eq;
411///
412/// let y: [f32; 5] = [1.,2.,3.,4.,5.];
413/// let z : Vec<f32> = mod_zscore(y.iter()).collect::<Vec<_>>();
414/// let answer: [f32; 5] = [-1.349, -0.6745, 0., 0.6745, 1.349];
415/// for i in 0..5 {
416/// assert_relative_eq!(answer[i], z[i], epsilon = 1e-5);
417/// }
418/// ```
419pub fn mod_zscore<YI, F>(y: YI) -> impl Iterator<Item = F>
420where
421 F: Float + Default + Copy + Add + Sum,
422 YI: Iterator + Clone,
423 YI::Item: Borrow<F>,
424{
425 let median = median(y.clone()).0;
426
427 let mad = median_abs_deviation(y.clone()).0;
428 y.map(move |yi| ((*yi.borrow() - median) * F::from(0.6745).unwrap() / mad))
429}
430
431/// The median absolute deviation (MAD, [1]) computes the median over the absolute deviations from the median.
432/// It is a measure of dispersion similar to the standard deviation but more robust to outliers
433///
434/// # Arguments
435///
436/// * `y` - An array of floating point values
437///
438/// # Examples
439///
440/// ```
441/// use sci_rs::stats::median_abs_deviation;
442/// use approx::assert_relative_eq;
443///
444/// let y: [f64; 16] = [6., 7., 7., 8., 12., 14., 15., 16., 16., 19., 22., 24., 26., 26., 29., 46.];
445/// let z = median_abs_deviation(y.iter());
446///
447/// assert_relative_eq!(8., z.0);
448/// ```
449pub fn median_abs_deviation<YI, F>(y: YI) -> (F, usize)
450where
451 F: Float + Default + Sum,
452 YI: Iterator + Clone,
453 YI::Item: Borrow<F>,
454{
455 let med = median(y.clone()).0;
456
457 let abs_vals = y.map(|yi| (*yi.borrow() - med).abs());
458
459 median(abs_vals.into_iter())
460}
461
462#[cfg(test)]
463mod tests {
464 use super::*;
465 use approx::assert_relative_eq;
466
467 #[cfg(feature = "std")]
468 use {std::f64::consts::PI, std::vec::Vec};
469
470 #[cfg(feature = "std")]
471 #[test]
472 fn can_median() {
473 let y: [f64; 4] = [1., 2., 3., 4.];
474 println!("y = {:?}", y);
475 println!("y = {:?}", median::<_, f64>(y.iter()));
476 assert_relative_eq!(2.5, median::<_, f64>(y.iter()).0);
477 let y: [f64; 5] = [1., 2., 3., 4., 5.];
478 println!("y = {:?}", y);
479 println!("y = {:?}", median::<_, f64>(y.iter()));
480 assert_relative_eq!(3.0, median::<_, f64>(y.iter()).0);
481 }
482
483 #[cfg(feature = "std")]
484 #[test]
485 fn can_autocorrelate() {
486 // sin wave w/ multiple periods
487 let periods = 1.;
488 let points = 100;
489 let radians_per_pt = (periods * 2. * PI) / points as f64;
490 let sin_wave = (0..points)
491 .map(|i| (i as f64 * radians_per_pt).sin())
492 .collect::<Vec<_>>();
493 // println!("sin_wave = {:?}", sin_wave);
494
495 let _correlations: Vec<f64> = (0..points)
496 .map(|i| autocorr(sin_wave.iter(), i))
497 .collect::<Vec<_>>();
498 let correlations: Vec<f32> = (0..points)
499 .map(|i| autocorr(sin_wave.iter().map(|f| *f as f32), i))
500 .collect::<Vec<_>>();
501 println!("correlations = {:?}", correlations);
502 }
503
504 #[test]
505 fn it_works() {
506 let result = 2 + 2;
507 assert_eq!(result, 4);
508 }
509}