lattice_qcd_rs/statistics/mod.rs
1//! Provide statistical tools
2
3use std::ops::{Div, Mul, Sub};
4
5use num_traits::Zero;
6use rayon::prelude::*;
7
8pub mod distribution;
9
10pub use distribution::*;
11
12/// Compute the mean from a [`rayon::iter::IndexedParallelIterator`].
13/// It uses the power of the parallel iterator to do the computation
14/// and might give better performance than [`mean`].
15///
16/// Alternatively there is [`mean_par_iter_val`] for parallel iterator
17/// with non reference values.
18/// # Example
19/// ```
20/// use lattice_qcd_rs::statistics::mean_par_iter;
21/// use rayon::prelude::*;
22///
23/// let vec = vec![1_f64, 2_f64, 3_f64, 4_f64 /* ... */];
24/// let mean = mean_par_iter(vec.par_iter());
25/// ```
26pub fn mean_par_iter<'a, It, T>(data: It) -> T
27where
28 T: Clone
29 + Div<f64, Output = T>
30 + std::iter::Sum<T>
31 + std::iter::Sum<It::Item>
32 + Send
33 + 'a
34 + Sync,
35 It: IndexedParallelIterator<Item = &'a T>,
36{
37 mean_par_iter_val(data.cloned())
38}
39
40/// Compute the mean from a [`rayon::iter::IndexedParallelIterator`]. If you want
41/// to use reference use [`mean_par_iter`].
42/// It uses the power of the parallel iterator to do the computation and is
43/// particularly useful in combination of a map.
44///
45/// # Example
46/// ```
47/// use lattice_qcd_rs::statistics::mean_par_iter_val;
48/// use rayon::prelude::*;
49///
50/// fn expensive_computation(input: &f64) -> f64 {
51/// input + 1_f64
52/// }
53///
54/// let vec = vec![1_f64, 2_f64, 3_f64, 4_f64];
55/// let mean = mean_par_iter_val(vec.par_iter().map(|input| expensive_computation(input)));
56/// ```
57pub fn mean_par_iter_val<It, T>(data: It) -> T
58where
59 T: Clone + Div<f64, Output = T> + std::iter::Sum<T> + std::iter::Sum<It::Item> + Send,
60 It: IndexedParallelIterator<Item = T>,
61{
62 let len = data.len();
63 let mean: T = data.sum();
64 mean / len as f64
65}
66
67/// Compute the variance (squared of standard deviation) from
68/// a [`rayon::iter::IndexedParallelIterator`].
69///
70/// The alternative for iterator that yield non reference is [`variance_par_iter_val`].
71/// # Example
72/// ```
73/// use lattice_qcd_rs::statistics::variance_par_iter;
74/// use rayon::prelude::*;
75///
76/// let vec = vec![1_f64, 2_f64, 3_f64, 4_f64 /* ... */];
77/// let variance = variance_par_iter(vec.par_iter());
78/// ```
79pub fn variance_par_iter<'a, It, T>(data: It) -> T
80where
81 T: Clone
82 + Div<f64, Output = T>
83 + std::iter::Sum<T>
84 + std::iter::Sum<It::Item>
85 + Send
86 + Sync
87 + 'a
88 + Sub<T, Output = T>
89 + Mul<T, Output = T>
90 + Zero,
91 It: IndexedParallelIterator<Item = &'a T> + Clone,
92{
93 variance_par_iter_val(data.cloned())
94}
95
96/// Compute the variance (squared of standard deviation) from
97/// a [`rayon::iter::IndexedParallelIterator`] by value.
98///
99/// The alternative for the variance from a iterator that yields reference
100/// is [`variance_par_iter`].
101/// # Example
102/// ```
103/// use lattice_qcd_rs::statistics::variance_par_iter_val;
104/// use rayon::prelude::*;
105///
106/// fn expensive_computation(input: &f64) -> f64 {
107/// input * 2_f64
108/// }
109///
110/// let vec = vec![1_f64, 2_f64, 3_f64, 4_f64];
111/// let mean = variance_par_iter_val(vec.par_iter().map(|input| expensive_computation(input)));
112/// ```
113pub fn variance_par_iter_val<It, T>(data: It) -> T
114where
115 T: Clone
116 + Div<f64, Output = T>
117 + std::iter::Sum<T>
118 + std::iter::Sum<It::Item>
119 + Send
120 + Sub<T, Output = T>
121 + Mul<T, Output = T>
122 + Zero,
123 It: IndexedParallelIterator<Item = T> + Clone,
124{
125 let [_, variance] = mean_and_variance_par_iter_val(data);
126 variance
127}
128
129/// Compute the mean and variance (squared of standard deviation) from
130/// a [`rayon::iter::IndexedParallelIterator`].
131/// Provides better performance than computing the mean and variation separately
132/// as this method consume the iterator only once.
133///
134/// The alternative for iterators returning non-references
135/// is [`mean_and_variance_par_iter_val`]
136/// # Examples
137/// see the example of [`mean_par_iter`] and [`variance_par_iter`].
138pub fn mean_and_variance_par_iter<'a, It, T>(data: It) -> [T; 2]
139where
140 T: Clone
141 + Div<f64, Output = T>
142 + std::iter::Sum<T>
143 + std::iter::Sum<It::Item>
144 + Send
145 + Sync
146 + 'a
147 + Sub<T, Output = T>
148 + Mul<T, Output = T>
149 + Zero,
150 It: IndexedParallelIterator<Item = &'a T> + Clone,
151{
152 mean_and_variance_par_iter_val(data.cloned())
153}
154
155/// Compute the mean and variance (squared of standard deviation) from
156/// a [`rayon::iter::IndexedParallelIterator`] by value.
157/// Provides better performance than computing the mean and variation separately as
158/// this method consume the iterator only once.
159///
160/// The alternative for iterators returning references is [`mean_and_variance_par_iter`].
161/// # Example
162/// see the example of [`mean_par_iter_val`] and [`variance_par_iter_val`].
163pub fn mean_and_variance_par_iter_val<It, T>(data: It) -> [T; 2]
164where
165 T: Clone
166 + Div<f64, Output = T>
167 + std::iter::Sum<T>
168 + std::iter::Sum<It::Item>
169 + Send
170 + Sub<T, Output = T>
171 + Mul<T, Output = T>
172 + Zero,
173 It: IndexedParallelIterator<Item = T> + Clone,
174{
175 let len = data.len();
176 let (mean, mean_sqrt) = data
177 .map(|el| (el.clone(), el.clone() * el))
178 .reduce(|| (T::zero(), T::zero()), |a, b| (a.0 + b.0, a.1 + b.1));
179 let var = (mean_sqrt - mean.clone() * mean.clone() / (len as f64)) / (len - 1) as f64;
180 [mean / len as f64, var]
181}
182
183/// Computes the mean the statistical error on this value
184/// a [`rayon::iter::IndexedParallelIterator`].
185///
186/// The statistical error is defined by `sqrt(variance / len)`.
187///
188/// The alternative for iterators returning non-references is [`mean_with_error_par_iter_val`].
189pub fn mean_with_error_par_iter<'a, It: IndexedParallelIterator<Item = &'a f64> + Clone>(
190 data: It,
191) -> [f64; 2] {
192 mean_with_error_par_iter_val(data.cloned())
193}
194
195/// Computes the mean the statistical error on this value
196/// a [`rayon::iter::IndexedParallelIterator`] by value.
197///
198/// The statistical error is defined by `sqrt(variance / len)`.
199///
200/// The alternative for iterators returning references is [`mean_with_error_par_iter`]
201pub fn mean_with_error_par_iter_val<It: IndexedParallelIterator<Item = f64> + Clone>(
202 data: It,
203) -> [f64; 2] {
204 let len = data.len();
205 let [mean, variance] = mean_and_variance_par_iter_val(data);
206 [mean, (variance / len as f64).sqrt()]
207}
208
209/// Computes the covariance between two [`rayon::iter::IndexedParallelIterator`].
210/// Returns [`None`] if the par iters are not of the same length.
211///
212/// The alternative for iterators returning references is [`covariance_par_iter_val`].
213/// # Example
214/// ```
215/// use lattice_qcd_rs::statistics::covariance_par_iter;
216/// use rayon::prelude::*;
217///
218/// let vec = vec![1_f64, 2_f64, 3_f64, 4_f64];
219/// let vec_2 = vec![1_f64, 2_f64, 3_f64];
220///
221/// let cov = covariance_par_iter(vec.par_iter(), vec_2.par_iter());
222/// assert!(cov.is_none());
223///
224/// let vec = vec![1_f64, 2_f64, 3_f64, 4_f64];
225/// let vec_2 = vec![1_f64, 2_f64, 3_f64, 4_f64];
226///
227/// let cov = covariance_par_iter(vec.par_iter(), vec_2.par_iter());
228/// assert_eq!(cov, Some(1.25_f64));
229/// ```
230pub fn covariance_par_iter<'a, It1, It2, T>(data_1: It1, data_2: It2) -> Option<T>
231where
232 T: 'a
233 + Clone
234 + Div<f64, Output = T>
235 + std::iter::Sum<T>
236 + std::iter::Sum<It1::Item>
237 + Send
238 + Sync
239 + Mul<T, Output = T>
240 + Sub<T, Output = T>,
241 It1: IndexedParallelIterator<Item = &'a T> + Clone,
242 It2: IndexedParallelIterator<Item = &'a T> + Clone,
243 T: Zero,
244{
245 covariance_par_iter_val(data_1.cloned(), data_2.cloned())
246}
247
248/// Computes the covariance between two [rayon::iter::IndexedParallelIterator] by value.
249/// Returns `None` if the par iters are not of the same length.
250///
251/// The alternative for iterators returning references is [`covariance_par_iter`].
252/// # Example
253/// ```
254/// use lattice_qcd_rs::statistics::covariance_par_iter_val;
255/// use rayon::prelude::*;
256///
257/// fn expensive_computation(input: &f64) -> f64 {
258/// input + 1_f64
259/// }
260///
261/// let vec = vec![1_f64, 2_f64, 3_f64, 4_f64];
262/// let vec_2 = vec![1_f64, 2_f64, 3_f64];
263///
264/// let cov = covariance_par_iter_val(
265/// vec.par_iter().map(|input| expensive_computation(input)),
266/// vec_2.par_iter().map(|input| expensive_computation(input)),
267/// );
268/// assert!(cov.is_none());
269///
270/// let vec = vec![1_f64, 1_f64, 1_f64, 1_f64];
271/// let vec_2 = vec![1_f64, 1_f64, 1_f64, 1_f64];
272///
273/// let cov = covariance_par_iter_val(
274/// vec.par_iter().map(|input| expensive_computation(input)),
275/// vec_2.par_iter().map(|input| expensive_computation(input)),
276/// );
277/// assert_eq!(cov, Some(0_f64));
278/// ```
279pub fn covariance_par_iter_val<It1, It2, T>(data_1: It1, data_2: It2) -> Option<T>
280where
281 T: Clone
282 + Div<f64, Output = T>
283 + std::iter::Sum<T>
284 + std::iter::Sum<It1::Item>
285 + Send
286 + Mul<T, Output = T>
287 + Sub<T, Output = T>,
288 It1: IndexedParallelIterator<Item = T> + Clone,
289 It2: IndexedParallelIterator<Item = T> + Clone,
290 T: Zero,
291{
292 if data_1.len() == data_2.len() {
293 let len = data_1.len() as f64;
294 let r = data_1
295 .zip(data_2)
296 .map(|(el_1, el_2)| (el_1.clone(), el_2.clone(), el_1 * el_2))
297 .reduce(
298 || (T::zero(), T::zero(), T::zero()),
299 |a, b| (a.0 + b.0, a.1 + b.1, a.2 + b.2),
300 );
301 Some((r.2 - r.0 * r.1 / len) / len)
302 }
303 else {
304 None
305 }
306}
307
308/// compute the mean from a collections
309/// # Example
310/// ```
311/// use lattice_qcd_rs::statistics::mean;
312/// use nalgebra::Complex;
313///
314/// mean(&[1_f64, 2_f64, 3_f64, 4_f64]);
315/// let vec = vec![1_f64, 2_f64, 3_f64, 4_f64];
316/// mean(&vec);
317/// let vec_complex = vec![Complex::new(1_f64, 2_f64), Complex::new(-7_f64, -9_f64)];
318/// mean(&vec_complex);
319/// ```
320#[allow(clippy::type_repetition_in_bounds)] // false positive
321pub fn mean<'a, T, IntoIter>(data: IntoIter) -> T
322where
323 T: Div<f64, Output = T> + std::iter::Sum<&'a T> + 'a,
324 IntoIter: IntoIterator<Item = &'a T>,
325 IntoIter::IntoIter: ExactSizeIterator,
326{
327 let iter = data.into_iter();
328 let len = iter.len() as f64;
329 let mean: T = iter.sum();
330 mean / len
331}
332
333/// compute the variance (squared of standard deviation) from a collections
334/// # Example
335/// ```
336/// use lattice_qcd_rs::statistics::variance;
337/// use nalgebra::Complex;
338///
339/// variance(&[1_f64, 2_f64, 3_f64, 4_f64]);
340/// let vec = vec![1_f64, 2_f64, 3_f64, 4_f64];
341/// variance(&vec);
342/// let vec_complex = vec![Complex::new(1_f64, 2_f64), Complex::new(-7_f64, -9_f64)];
343/// variance(&vec_complex);
344/// ```
345#[allow(clippy::type_repetition_in_bounds)] // false positive
346pub fn variance<'a, T, IntoIter>(data: IntoIter) -> T
347where
348 T: 'a
349 + Div<f64, Output = T>
350 + std::iter::Sum<&'a T>
351 + std::iter::Sum<T>
352 + Mul<T, Output = T>
353 + Clone
354 + Sub<T, Output = T>,
355 IntoIter: IntoIterator<Item = &'a T> + Clone,
356 IntoIter::IntoIter: ExactSizeIterator,
357{
358 let [_, variance] = mean_and_variance(data);
359 variance
360}
361
362/// Compute the mean and variance (squared of standard deviation) from a collection.
363/// # Example
364/// ```
365/// use lattice_qcd_rs::statistics::mean_and_variance;
366/// use nalgebra::Complex;
367///
368/// mean_and_variance(&[1_f64, 2_f64, 3_f64, 4_f64]);
369/// let vec = vec![1_f64, 2_f64, 3_f64, 4_f64];
370/// mean_and_variance(&vec);
371/// let vec_complex = vec![Complex::new(1_f64, 2_f64), Complex::new(-7_f64, -9_f64)];
372/// mean_and_variance(&vec_complex);
373/// ```
374#[allow(clippy::type_repetition_in_bounds)] // false positive
375pub fn mean_and_variance<'a, T, IntoIter>(data: IntoIter) -> [T; 2]
376where
377 T: 'a
378 + Div<f64, Output = T>
379 + std::iter::Sum<&'a T>
380 + std::iter::Sum<T>
381 + Mul<T, Output = T>
382 + Clone
383 + Sub<T, Output = T>,
384 IntoIter: IntoIterator<Item = &'a T> + Clone,
385 IntoIter::IntoIter: ExactSizeIterator,
386{
387 // often data is just a reference so cloning it is not a big deal
388 let mean = mean(data.clone());
389 let iter = data.into_iter();
390 let len = iter.len();
391 let variance = iter
392 .map(|el| (el.clone() - mean.clone()) * (el.clone() - mean.clone()))
393 .sum::<T>()
394 / (len - 1) as f64;
395 [mean, variance]
396}
397
398/// compute the mean the statistical error on this value a slice.
399///
400/// The statistical error is defined by `sqrt(variance / len)`.
401pub fn mean_with_error(data: &[f64]) -> [f64; 2] {
402 let len = data.len();
403 let [mean, variance] = mean_and_variance(data);
404 [mean, (variance / len as f64).sqrt()]
405}
406
407/// compute the covariance between two slices.
408/// Return `None` if the slices are not of the same length
409/// # Example
410/// ```
411/// use lattice_qcd_rs::statistics::covariance;
412/// use nalgebra::Complex;
413///
414/// let vec = vec![1_f64, 2_f64, 3_f64, 4_f64];
415/// let array = [1_f64, 2_f64, 3_f64, 4_f64];
416/// let cov = covariance(&array, &vec);
417/// assert!(cov.is_some());
418///
419/// let array_complex = [Complex::new(1_f64, 2_f64), Complex::new(-7_f64, -9_f64)];
420/// let vec_complex = vec![Complex::new(1_f64, 2_f64), Complex::new(-7_f64, -9_f64)];
421/// let cov = covariance(&vec_complex, &array_complex);
422/// assert!(cov.is_some());
423///
424/// assert!(covariance(&[], &[1_f64]).is_none());
425/// ```
426#[allow(clippy::type_repetition_in_bounds)] // false positive
427pub fn covariance<'a, 'b, T, IntoIter1, IntoIter2>(
428 data_1: IntoIter1,
429 data_2: IntoIter2,
430) -> Option<T>
431where
432 T: 'a
433 + 'b
434 + Div<f64, Output = T>
435 + for<'c> std::iter::Sum<&'c T>
436 + std::iter::Sum<T>
437 + Mul<T, Output = T>
438 + Clone
439 + Sub<T, Output = T>,
440 IntoIter1: IntoIterator<Item = &'a T> + Clone,
441 IntoIter1::IntoIter: ExactSizeIterator,
442 IntoIter2: IntoIterator<Item = &'b T> + Clone,
443 IntoIter2::IntoIter: ExactSizeIterator,
444{
445 let iter_1 = data_1.clone().into_iter();
446 let iter_2 = data_2.clone().into_iter();
447 if iter_1.len() == iter_2.len() {
448 let len = iter_1.len();
449 let mean_prod = iter_1
450 .zip(iter_2)
451 .map(|(el1, el2)| el1.clone() * el2.clone())
452 .sum::<T>()
453 / len as f64;
454 Some(mean_prod - mean(data_1) * mean(data_2))
455 }
456 else {
457 None
458 }
459}
460
461#[cfg(test)]
462mod test {
463 use rand::SeedableRng;
464 use rand_distr::Distribution;
465
466 use super::*;
467
468 #[test]
469 fn mean_var() {
470 let a = [1_f64; 100];
471 assert_eq!(mean_and_variance_par_iter(a.par_iter()), [1_f64, 0_f64]);
472 assert_eq!(mean_and_variance(&a), [1_f64, 0_f64]);
473 assert_eq!(mean_par_iter(a.par_iter()), 1_f64);
474 assert_eq!(variance_par_iter(a.par_iter()), 0_f64);
475 assert_eq!(variance(&a), 0_f64);
476 assert_eq!(mean_with_error_par_iter(a.par_iter()), [1_f64, 0_f64]);
477 assert_eq!(mean_with_error(&a), [1_f64, 0_f64]);
478
479 let a = [0_f64, 1_f64, 0_f64, 1_f64];
480 assert_eq!(
481 mean_and_variance_par_iter(a.par_iter()),
482 [0.5_f64, 1_f64 / 3_f64]
483 );
484 assert_eq!(mean_and_variance(&a), [0.5_f64, 1_f64 / 3_f64]);
485 assert_eq!(mean_par_iter(a.par_iter()), 0.5_f64);
486 assert_eq!(variance_par_iter(a.par_iter()), 1_f64 / 3_f64);
487 assert_eq!(variance(&a), 1_f64 / 3_f64);
488 assert_eq!(
489 mean_with_error_par_iter(a.par_iter()),
490 [0.5_f64, (1_f64 / 3_f64 / 4_f64).sqrt()]
491 );
492 assert_eq!(
493 mean_with_error(&a),
494 [0.5_f64, (1_f64 / 3_f64 / 4_f64).sqrt()]
495 );
496
497 assert_eq!(covariance(&[1_f64], &[0_f64, 1_f64]), None);
498 assert_eq!(
499 covariance_par_iter([1_f64].par_iter(), [0_f64, 1_f64].par_iter()),
500 None
501 );
502
503 let mut rng = rand::rngs::StdRng::seed_from_u64(0x45_78_93_f4_4a_b0_67_f0);
504 let d = rand::distributions::Uniform::new(-1_f64, 1_f64);
505 for _ in 0_u32..100_u32 {
506 let mut vec = vec![];
507 for _ in 0_u32..100_u32 {
508 vec.push(d.sample(&mut rng));
509 }
510 let mut vec2 = vec![];
511 for _ in 0_u32..100_u32 {
512 vec2.push(d.sample(&mut rng));
513 }
514 assert!(
515 (mean_and_variance(&vec)[0] - mean_and_variance_par_iter(vec.par_iter())[0]).abs()
516 < 0.000_000_01_f64
517 );
518 assert!(
519 (mean_and_variance(&vec)[1] - mean_and_variance_par_iter(vec.par_iter())[1]).abs()
520 < 0.000_000_01_f64
521 );
522 assert!(
523 (covariance(&vec, &vec2).unwrap()
524 - covariance_par_iter(vec.par_iter(), vec2.par_iter()).unwrap())
525 .abs()
526 < 0.000_000_01_f64
527 );
528 }
529 }
530}