lattice_qcd_rs/statistics/
mod.rs

1//! Provide statistical tools
2
3use std::ops::{Div, Mul, Sub};
4
5use num_traits::Zero;
6use rayon::prelude::*;
7
8pub mod distribution;
9
10pub use distribution::*;
11
12/// Compute the mean from a [`rayon::iter::IndexedParallelIterator`].
13/// It uses the power of the parallel iterator to do the computation
14/// and might give better performance than [`mean`].
15///
16/// Alternatively there is [`mean_par_iter_val`] for parallel iterator
17/// with non reference values.
18/// # Example
19/// ```
20/// use lattice_qcd_rs::statistics::mean_par_iter;
21/// use rayon::prelude::*;
22///
23/// let vec = vec![1_f64, 2_f64, 3_f64, 4_f64 /* ... */];
24/// let mean = mean_par_iter(vec.par_iter());
25/// ```
26pub fn mean_par_iter<'a, It, T>(data: It) -> T
27where
28    T: Clone
29        + Div<f64, Output = T>
30        + std::iter::Sum<T>
31        + std::iter::Sum<It::Item>
32        + Send
33        + 'a
34        + Sync,
35    It: IndexedParallelIterator<Item = &'a T>,
36{
37    mean_par_iter_val(data.cloned())
38}
39
40/// Compute the mean from a [`rayon::iter::IndexedParallelIterator`]. If you want
41/// to use reference use [`mean_par_iter`].
42/// It uses the power of the parallel iterator to do the computation and is
43/// particularly useful in combination of a map.
44///
45/// # Example
46/// ```
47/// use lattice_qcd_rs::statistics::mean_par_iter_val;
48/// use rayon::prelude::*;
49///
50/// fn expensive_computation(input: &f64) -> f64 {
51///     input + 1_f64
52/// }
53///
54/// let vec = vec![1_f64, 2_f64, 3_f64, 4_f64];
55/// let mean = mean_par_iter_val(vec.par_iter().map(|input| expensive_computation(input)));
56/// ```
57pub fn mean_par_iter_val<It, T>(data: It) -> T
58where
59    T: Clone + Div<f64, Output = T> + std::iter::Sum<T> + std::iter::Sum<It::Item> + Send,
60    It: IndexedParallelIterator<Item = T>,
61{
62    let len = data.len();
63    let mean: T = data.sum();
64    mean / len as f64
65}
66
67/// Compute the variance (squared of standard deviation) from
68/// a [`rayon::iter::IndexedParallelIterator`].
69///
70/// The alternative for iterator that yield non reference is [`variance_par_iter_val`].
71/// # Example
72/// ```
73/// use lattice_qcd_rs::statistics::variance_par_iter;
74/// use rayon::prelude::*;
75///
76/// let vec = vec![1_f64, 2_f64, 3_f64, 4_f64 /* ... */];
77/// let variance = variance_par_iter(vec.par_iter());
78/// ```
79pub fn variance_par_iter<'a, It, T>(data: It) -> T
80where
81    T: Clone
82        + Div<f64, Output = T>
83        + std::iter::Sum<T>
84        + std::iter::Sum<It::Item>
85        + Send
86        + Sync
87        + 'a
88        + Sub<T, Output = T>
89        + Mul<T, Output = T>
90        + Zero,
91    It: IndexedParallelIterator<Item = &'a T> + Clone,
92{
93    variance_par_iter_val(data.cloned())
94}
95
96/// Compute the variance (squared of standard deviation) from
97/// a [`rayon::iter::IndexedParallelIterator`] by value.
98///
99/// The alternative for the variance from a iterator that yields reference
100/// is [`variance_par_iter`].
101/// # Example
102/// ```
103/// use lattice_qcd_rs::statistics::variance_par_iter_val;
104/// use rayon::prelude::*;
105///
106/// fn expensive_computation(input: &f64) -> f64 {
107///     input * 2_f64
108/// }
109///
110/// let vec = vec![1_f64, 2_f64, 3_f64, 4_f64];
111/// let mean = variance_par_iter_val(vec.par_iter().map(|input| expensive_computation(input)));
112/// ```
113pub fn variance_par_iter_val<It, T>(data: It) -> T
114where
115    T: Clone
116        + Div<f64, Output = T>
117        + std::iter::Sum<T>
118        + std::iter::Sum<It::Item>
119        + Send
120        + Sub<T, Output = T>
121        + Mul<T, Output = T>
122        + Zero,
123    It: IndexedParallelIterator<Item = T> + Clone,
124{
125    let [_, variance] = mean_and_variance_par_iter_val(data);
126    variance
127}
128
129/// Compute the mean and variance (squared of standard deviation) from
130/// a [`rayon::iter::IndexedParallelIterator`].
131/// Provides better performance than computing the mean and variation separately
132/// as this method consume the iterator only once.
133///
134/// The alternative for iterators returning non-references
135/// is [`mean_and_variance_par_iter_val`]
136/// # Examples
137/// see the example of [`mean_par_iter`] and [`variance_par_iter`].
138pub fn mean_and_variance_par_iter<'a, It, T>(data: It) -> [T; 2]
139where
140    T: Clone
141        + Div<f64, Output = T>
142        + std::iter::Sum<T>
143        + std::iter::Sum<It::Item>
144        + Send
145        + Sync
146        + 'a
147        + Sub<T, Output = T>
148        + Mul<T, Output = T>
149        + Zero,
150    It: IndexedParallelIterator<Item = &'a T> + Clone,
151{
152    mean_and_variance_par_iter_val(data.cloned())
153}
154
155/// Compute the mean and variance (squared of standard deviation) from
156/// a [`rayon::iter::IndexedParallelIterator`] by value.
157/// Provides better performance than computing the mean and variation separately as
158/// this method consume the iterator only once.
159///
160/// The alternative for iterators returning references is [`mean_and_variance_par_iter`].
161/// # Example
162/// see the example of [`mean_par_iter_val`] and [`variance_par_iter_val`].
163pub fn mean_and_variance_par_iter_val<It, T>(data: It) -> [T; 2]
164where
165    T: Clone
166        + Div<f64, Output = T>
167        + std::iter::Sum<T>
168        + std::iter::Sum<It::Item>
169        + Send
170        + Sub<T, Output = T>
171        + Mul<T, Output = T>
172        + Zero,
173    It: IndexedParallelIterator<Item = T> + Clone,
174{
175    let len = data.len();
176    let (mean, mean_sqrt) = data
177        .map(|el| (el.clone(), el.clone() * el))
178        .reduce(|| (T::zero(), T::zero()), |a, b| (a.0 + b.0, a.1 + b.1));
179    let var = (mean_sqrt - mean.clone() * mean.clone() / (len as f64)) / (len - 1) as f64;
180    [mean / len as f64, var]
181}
182
183/// Computes the mean the statistical error on this value
184/// a [`rayon::iter::IndexedParallelIterator`].
185///
186/// The statistical error is defined by `sqrt(variance / len)`.
187///
188/// The alternative for iterators returning non-references is [`mean_with_error_par_iter_val`].
189pub fn mean_with_error_par_iter<'a, It: IndexedParallelIterator<Item = &'a f64> + Clone>(
190    data: It,
191) -> [f64; 2] {
192    mean_with_error_par_iter_val(data.cloned())
193}
194
195/// Computes the mean the statistical error on this value
196/// a [`rayon::iter::IndexedParallelIterator`] by value.
197///
198/// The statistical error is defined by `sqrt(variance / len)`.
199///
200/// The alternative for iterators returning references is [`mean_with_error_par_iter`]
201pub fn mean_with_error_par_iter_val<It: IndexedParallelIterator<Item = f64> + Clone>(
202    data: It,
203) -> [f64; 2] {
204    let len = data.len();
205    let [mean, variance] = mean_and_variance_par_iter_val(data);
206    [mean, (variance / len as f64).sqrt()]
207}
208
209/// Computes the covariance between two [`rayon::iter::IndexedParallelIterator`].
210/// Returns [`None`] if the par iters are not of the same length.
211///
212/// The alternative for iterators returning references is [`covariance_par_iter_val`].
213/// # Example
214/// ```
215/// use lattice_qcd_rs::statistics::covariance_par_iter;
216/// use rayon::prelude::*;
217///
218/// let vec = vec![1_f64, 2_f64, 3_f64, 4_f64];
219/// let vec_2 = vec![1_f64, 2_f64, 3_f64];
220///
221/// let cov = covariance_par_iter(vec.par_iter(), vec_2.par_iter());
222/// assert!(cov.is_none());
223///
224/// let vec = vec![1_f64, 2_f64, 3_f64, 4_f64];
225/// let vec_2 = vec![1_f64, 2_f64, 3_f64, 4_f64];
226///
227/// let cov = covariance_par_iter(vec.par_iter(), vec_2.par_iter());
228/// assert_eq!(cov, Some(1.25_f64));
229/// ```
230pub fn covariance_par_iter<'a, It1, It2, T>(data_1: It1, data_2: It2) -> Option<T>
231where
232    T: 'a
233        + Clone
234        + Div<f64, Output = T>
235        + std::iter::Sum<T>
236        + std::iter::Sum<It1::Item>
237        + Send
238        + Sync
239        + Mul<T, Output = T>
240        + Sub<T, Output = T>,
241    It1: IndexedParallelIterator<Item = &'a T> + Clone,
242    It2: IndexedParallelIterator<Item = &'a T> + Clone,
243    T: Zero,
244{
245    covariance_par_iter_val(data_1.cloned(), data_2.cloned())
246}
247
248/// Computes the covariance between two [rayon::iter::IndexedParallelIterator] by value.
249/// Returns `None` if the par iters are not of the same length.
250///
251/// The alternative for iterators returning references is [`covariance_par_iter`].
252/// # Example
253/// ```
254/// use lattice_qcd_rs::statistics::covariance_par_iter_val;
255/// use rayon::prelude::*;
256///
257/// fn expensive_computation(input: &f64) -> f64 {
258///     input + 1_f64
259/// }
260///
261/// let vec = vec![1_f64, 2_f64, 3_f64, 4_f64];
262/// let vec_2 = vec![1_f64, 2_f64, 3_f64];
263///
264/// let cov = covariance_par_iter_val(
265///     vec.par_iter().map(|input| expensive_computation(input)),
266///     vec_2.par_iter().map(|input| expensive_computation(input)),
267/// );
268/// assert!(cov.is_none());
269///
270/// let vec = vec![1_f64, 1_f64, 1_f64, 1_f64];
271/// let vec_2 = vec![1_f64, 1_f64, 1_f64, 1_f64];
272///
273/// let cov = covariance_par_iter_val(
274///     vec.par_iter().map(|input| expensive_computation(input)),
275///     vec_2.par_iter().map(|input| expensive_computation(input)),
276/// );
277/// assert_eq!(cov, Some(0_f64));
278/// ```
279pub fn covariance_par_iter_val<It1, It2, T>(data_1: It1, data_2: It2) -> Option<T>
280where
281    T: Clone
282        + Div<f64, Output = T>
283        + std::iter::Sum<T>
284        + std::iter::Sum<It1::Item>
285        + Send
286        + Mul<T, Output = T>
287        + Sub<T, Output = T>,
288    It1: IndexedParallelIterator<Item = T> + Clone,
289    It2: IndexedParallelIterator<Item = T> + Clone,
290    T: Zero,
291{
292    if data_1.len() == data_2.len() {
293        let len = data_1.len() as f64;
294        let r = data_1
295            .zip(data_2)
296            .map(|(el_1, el_2)| (el_1.clone(), el_2.clone(), el_1 * el_2))
297            .reduce(
298                || (T::zero(), T::zero(), T::zero()),
299                |a, b| (a.0 + b.0, a.1 + b.1, a.2 + b.2),
300            );
301        Some((r.2 - r.0 * r.1 / len) / len)
302    }
303    else {
304        None
305    }
306}
307
308/// compute the mean from a collections
309/// # Example
310/// ```
311/// use lattice_qcd_rs::statistics::mean;
312/// use nalgebra::Complex;
313///
314/// mean(&[1_f64, 2_f64, 3_f64, 4_f64]);
315/// let vec = vec![1_f64, 2_f64, 3_f64, 4_f64];
316/// mean(&vec);
317/// let vec_complex = vec![Complex::new(1_f64, 2_f64), Complex::new(-7_f64, -9_f64)];
318/// mean(&vec_complex);
319/// ```
320#[allow(clippy::type_repetition_in_bounds)] // false positive
321pub fn mean<'a, T, IntoIter>(data: IntoIter) -> T
322where
323    T: Div<f64, Output = T> + std::iter::Sum<&'a T> + 'a,
324    IntoIter: IntoIterator<Item = &'a T>,
325    IntoIter::IntoIter: ExactSizeIterator,
326{
327    let iter = data.into_iter();
328    let len = iter.len() as f64;
329    let mean: T = iter.sum();
330    mean / len
331}
332
333/// compute the variance (squared of standard deviation) from a collections
334/// # Example
335/// ```
336/// use lattice_qcd_rs::statistics::variance;
337/// use nalgebra::Complex;
338///
339/// variance(&[1_f64, 2_f64, 3_f64, 4_f64]);
340/// let vec = vec![1_f64, 2_f64, 3_f64, 4_f64];
341/// variance(&vec);
342/// let vec_complex = vec![Complex::new(1_f64, 2_f64), Complex::new(-7_f64, -9_f64)];
343/// variance(&vec_complex);
344/// ```
345#[allow(clippy::type_repetition_in_bounds)] // false positive
346pub fn variance<'a, T, IntoIter>(data: IntoIter) -> T
347where
348    T: 'a
349        + Div<f64, Output = T>
350        + std::iter::Sum<&'a T>
351        + std::iter::Sum<T>
352        + Mul<T, Output = T>
353        + Clone
354        + Sub<T, Output = T>,
355    IntoIter: IntoIterator<Item = &'a T> + Clone,
356    IntoIter::IntoIter: ExactSizeIterator,
357{
358    let [_, variance] = mean_and_variance(data);
359    variance
360}
361
362/// Compute the mean and variance (squared of standard deviation) from a collection.
363/// # Example
364/// ```
365/// use lattice_qcd_rs::statistics::mean_and_variance;
366/// use nalgebra::Complex;
367///
368/// mean_and_variance(&[1_f64, 2_f64, 3_f64, 4_f64]);
369/// let vec = vec![1_f64, 2_f64, 3_f64, 4_f64];
370/// mean_and_variance(&vec);
371/// let vec_complex = vec![Complex::new(1_f64, 2_f64), Complex::new(-7_f64, -9_f64)];
372/// mean_and_variance(&vec_complex);
373/// ```
374#[allow(clippy::type_repetition_in_bounds)] // false positive
375pub fn mean_and_variance<'a, T, IntoIter>(data: IntoIter) -> [T; 2]
376where
377    T: 'a
378        + Div<f64, Output = T>
379        + std::iter::Sum<&'a T>
380        + std::iter::Sum<T>
381        + Mul<T, Output = T>
382        + Clone
383        + Sub<T, Output = T>,
384    IntoIter: IntoIterator<Item = &'a T> + Clone,
385    IntoIter::IntoIter: ExactSizeIterator,
386{
387    // often data is just a reference so cloning it is not a big deal
388    let mean = mean(data.clone());
389    let iter = data.into_iter();
390    let len = iter.len();
391    let variance = iter
392        .map(|el| (el.clone() - mean.clone()) * (el.clone() - mean.clone()))
393        .sum::<T>()
394        / (len - 1) as f64;
395    [mean, variance]
396}
397
398/// compute the mean the statistical error on this value a slice.
399///
400/// The statistical error is defined by `sqrt(variance / len)`.
401pub fn mean_with_error(data: &[f64]) -> [f64; 2] {
402    let len = data.len();
403    let [mean, variance] = mean_and_variance(data);
404    [mean, (variance / len as f64).sqrt()]
405}
406
407/// compute the covariance between two slices.
408/// Return `None` if the slices are not of the same length
409/// # Example
410/// ```
411/// use lattice_qcd_rs::statistics::covariance;
412/// use nalgebra::Complex;
413///
414/// let vec = vec![1_f64, 2_f64, 3_f64, 4_f64];
415/// let array = [1_f64, 2_f64, 3_f64, 4_f64];
416/// let cov = covariance(&array, &vec);
417/// assert!(cov.is_some());
418///
419/// let array_complex = [Complex::new(1_f64, 2_f64), Complex::new(-7_f64, -9_f64)];
420/// let vec_complex = vec![Complex::new(1_f64, 2_f64), Complex::new(-7_f64, -9_f64)];
421/// let cov = covariance(&vec_complex, &array_complex);
422/// assert!(cov.is_some());
423///
424/// assert!(covariance(&[], &[1_f64]).is_none());
425/// ```
426#[allow(clippy::type_repetition_in_bounds)] // false positive
427pub fn covariance<'a, 'b, T, IntoIter1, IntoIter2>(
428    data_1: IntoIter1,
429    data_2: IntoIter2,
430) -> Option<T>
431where
432    T: 'a
433        + 'b
434        + Div<f64, Output = T>
435        + for<'c> std::iter::Sum<&'c T>
436        + std::iter::Sum<T>
437        + Mul<T, Output = T>
438        + Clone
439        + Sub<T, Output = T>,
440    IntoIter1: IntoIterator<Item = &'a T> + Clone,
441    IntoIter1::IntoIter: ExactSizeIterator,
442    IntoIter2: IntoIterator<Item = &'b T> + Clone,
443    IntoIter2::IntoIter: ExactSizeIterator,
444{
445    let iter_1 = data_1.clone().into_iter();
446    let iter_2 = data_2.clone().into_iter();
447    if iter_1.len() == iter_2.len() {
448        let len = iter_1.len();
449        let mean_prod = iter_1
450            .zip(iter_2)
451            .map(|(el1, el2)| el1.clone() * el2.clone())
452            .sum::<T>()
453            / len as f64;
454        Some(mean_prod - mean(data_1) * mean(data_2))
455    }
456    else {
457        None
458    }
459}
460
461#[cfg(test)]
462mod test {
463    use rand::SeedableRng;
464    use rand_distr::Distribution;
465
466    use super::*;
467
468    #[test]
469    fn mean_var() {
470        let a = [1_f64; 100];
471        assert_eq!(mean_and_variance_par_iter(a.par_iter()), [1_f64, 0_f64]);
472        assert_eq!(mean_and_variance(&a), [1_f64, 0_f64]);
473        assert_eq!(mean_par_iter(a.par_iter()), 1_f64);
474        assert_eq!(variance_par_iter(a.par_iter()), 0_f64);
475        assert_eq!(variance(&a), 0_f64);
476        assert_eq!(mean_with_error_par_iter(a.par_iter()), [1_f64, 0_f64]);
477        assert_eq!(mean_with_error(&a), [1_f64, 0_f64]);
478
479        let a = [0_f64, 1_f64, 0_f64, 1_f64];
480        assert_eq!(
481            mean_and_variance_par_iter(a.par_iter()),
482            [0.5_f64, 1_f64 / 3_f64]
483        );
484        assert_eq!(mean_and_variance(&a), [0.5_f64, 1_f64 / 3_f64]);
485        assert_eq!(mean_par_iter(a.par_iter()), 0.5_f64);
486        assert_eq!(variance_par_iter(a.par_iter()), 1_f64 / 3_f64);
487        assert_eq!(variance(&a), 1_f64 / 3_f64);
488        assert_eq!(
489            mean_with_error_par_iter(a.par_iter()),
490            [0.5_f64, (1_f64 / 3_f64 / 4_f64).sqrt()]
491        );
492        assert_eq!(
493            mean_with_error(&a),
494            [0.5_f64, (1_f64 / 3_f64 / 4_f64).sqrt()]
495        );
496
497        assert_eq!(covariance(&[1_f64], &[0_f64, 1_f64]), None);
498        assert_eq!(
499            covariance_par_iter([1_f64].par_iter(), [0_f64, 1_f64].par_iter()),
500            None
501        );
502
503        let mut rng = rand::rngs::StdRng::seed_from_u64(0x45_78_93_f4_4a_b0_67_f0);
504        let d = rand::distributions::Uniform::new(-1_f64, 1_f64);
505        for _ in 0_u32..100_u32 {
506            let mut vec = vec![];
507            for _ in 0_u32..100_u32 {
508                vec.push(d.sample(&mut rng));
509            }
510            let mut vec2 = vec![];
511            for _ in 0_u32..100_u32 {
512                vec2.push(d.sample(&mut rng));
513            }
514            assert!(
515                (mean_and_variance(&vec)[0] - mean_and_variance_par_iter(vec.par_iter())[0]).abs()
516                    < 0.000_000_01_f64
517            );
518            assert!(
519                (mean_and_variance(&vec)[1] - mean_and_variance_par_iter(vec.par_iter())[1]).abs()
520                    < 0.000_000_01_f64
521            );
522            assert!(
523                (covariance(&vec, &vec2).unwrap()
524                    - covariance_par_iter(vec.par_iter(), vec2.par_iter()).unwrap())
525                .abs()
526                    < 0.000_000_01_f64
527            );
528        }
529    }
530}