average/
traits.rs

1/// Estimate a statistic of a sequence of numbers ("population").
2pub trait Estimate {
3    /// Add an observation sampled from the population.
4    fn add(&mut self, x: f64);
5
6    /// Estimate the statistic of the population.
7    fn estimate(&self) -> f64;
8}
9
10/// Merge with another estimator.
11pub trait Merge {
12    /// Merge the other estimator into this one.
13    ///
14    /// Both estimators are assumed to be fed samples from the same population.
15    ///
16    /// This method is useful for parallelizing the calculation of estimates:
17    /// ```
18    /// use average::{Estimate, Mean, Merge};
19    ///
20    /// let data = &[1., 2., 3., 4., 5., 6., 7., 8., 9., 10.];
21    ///
22    /// let mut thread1 = std::thread::spawn(move || -> Mean {
23    ///     let mut avg = Mean::new();
24    ///     for &x in &data[..5] {
25    ///         avg.add(x);
26    ///     }
27    ///     avg
28    /// });
29    /// let mut thread2 = std::thread::spawn(move || -> Mean {
30    ///     let mut avg = Mean::new();
31    ///     for &x in &data[5..] {
32    ///         avg.add(x);
33    ///     }
34    ///     avg
35    /// });
36    ///
37    /// let mut avg = thread1.join().unwrap();
38    /// avg.merge(&thread2.join().unwrap());
39    /// assert_eq!(avg.mean(), 5.5);
40    /// ```
41    fn merge(&mut self, other: &Self);
42}
43
44/// Calculate the multinomial variance. Relevant for histograms.
45#[inline(always)]
46fn multinomial_variance(n: f64, n_tot_inv: f64) -> f64 {
47    n * (1. - n * n_tot_inv)
48}
49
50/// Get the bins and ranges from a histogram.
51pub trait Histogram
52where
53    for<'a> &'a Self: IntoIterator<Item = ((f64, f64), u64)>,
54{
55    /// Return the bins of the histogram.
56    fn bins(&self) -> &[u64];
57
58    /// Estimate the variance for the given bin.
59    ///
60    /// The square root of this estimates the error of the bin count.
61    #[inline]
62    fn variance(&self, bin: usize) -> f64 {
63        let count = self.bins()[bin];
64        let sum: u64 = self.bins().iter().sum();
65        multinomial_variance(count as f64, 1. / (sum as f64))
66    }
67
68    /// Return an iterator over the bins normalized by the bin widths.
69    #[inline]
70    fn normalized_bins(&self) -> IterNormalized<<&Self as IntoIterator>::IntoIter> {
71        IterNormalized {
72            histogram_iter: self.into_iter(),
73        }
74    }
75
76    /// Return an iterator over the bin widths.
77    #[inline]
78    fn widths(&self) -> IterWidths<<&Self as IntoIterator>::IntoIter> {
79        IterWidths {
80            histogram_iter: self.into_iter(),
81        }
82    }
83
84    /// Return an iterator over the bin centers.
85    #[inline]
86    fn centers(&self) -> IterBinCenters<<&Self as IntoIterator>::IntoIter> {
87        IterBinCenters {
88            histogram_iter: self.into_iter(),
89        }
90    }
91
92    /// Return an iterator over the bin variances.
93    ///
94    /// This is more efficient than calling `variance()` for each bin.
95    #[inline]
96    fn variances(&self) -> IterVariances<<&Self as IntoIterator>::IntoIter> {
97        let sum: u64 = self.bins().iter().sum();
98        IterVariances {
99            histogram_iter: self.into_iter(),
100            sum_inv: 1. / (sum as f64),
101        }
102    }
103}
104
105/// Iterate over the bins normalized by bin width.
106#[derive(Debug, Clone)]
107pub struct IterNormalized<T>
108where
109    T: Iterator<Item = ((f64, f64), u64)>,
110{
111    histogram_iter: T,
112}
113
114impl<T> Iterator for IterNormalized<T>
115where
116    T: Iterator<Item = ((f64, f64), u64)>,
117{
118    type Item = f64;
119
120    #[inline]
121    fn next(&mut self) -> Option<f64> {
122        self.histogram_iter
123            .next()
124            .map(|((a, b), count)| (count as f64) / (b - a))
125    }
126}
127
128/// Iterate over the widths of the bins.
129#[derive(Debug, Clone)]
130pub struct IterWidths<T>
131where
132    T: Iterator<Item = ((f64, f64), u64)>,
133{
134    histogram_iter: T,
135}
136
137impl<T> Iterator for IterWidths<T>
138where
139    T: Iterator<Item = ((f64, f64), u64)>,
140{
141    type Item = f64;
142
143    #[inline]
144    fn next(&mut self) -> Option<f64> {
145        self.histogram_iter.next().map(|((a, b), _)| b - a)
146    }
147}
148
149/// Iterate over the bin centers.
150#[derive(Debug, Clone)]
151pub struct IterBinCenters<T>
152where
153    T: Iterator<Item = ((f64, f64), u64)>,
154{
155    histogram_iter: T,
156}
157
158impl<T> Iterator for IterBinCenters<T>
159where
160    T: Iterator<Item = ((f64, f64), u64)>,
161{
162    type Item = f64;
163
164    #[inline]
165    fn next(&mut self) -> Option<f64> {
166        self.histogram_iter.next().map(|((a, b), _)| 0.5 * (a + b))
167    }
168}
169
170/// Iterate over the variances.
171#[derive(Debug, Clone)]
172pub struct IterVariances<T>
173where
174    T: Iterator<Item = ((f64, f64), u64)>,
175{
176    histogram_iter: T,
177    sum_inv: f64,
178}
179
180impl<T> Iterator for IterVariances<T>
181where
182    T: Iterator<Item = ((f64, f64), u64)>,
183{
184    type Item = f64;
185
186    #[inline]
187    fn next(&mut self) -> Option<f64> {
188        self.histogram_iter
189            .next()
190            .map(|(_, n)| multinomial_variance(n as f64, self.sum_inv))
191    }
192}