1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192
/// Estimate a statistic of a sequence of numbers ("population").
pub trait Estimate {
/// Add an observation sampled from the population.
fn add(&mut self, x: f64);
/// Estimate the statistic of the population.
fn estimate(&self) -> f64;
}
/// Merge with another estimator.
pub trait Merge {
/// Merge the other estimator into this one.
///
/// Both estimators are assumed to be fed samples from the same population.
///
/// This method is useful for parallelizing the calculation of estimates:
/// ```
/// use average::{Estimate, Mean, Merge};
///
/// let data = &[1., 2., 3., 4., 5., 6., 7., 8., 9., 10.];
///
/// let mut thread1 = std::thread::spawn(move || -> Mean {
/// let mut avg = Mean::new();
/// for &x in &data[..5] {
/// avg.add(x);
/// }
/// avg
/// });
/// let mut thread2 = std::thread::spawn(move || -> Mean {
/// let mut avg = Mean::new();
/// for &x in &data[5..] {
/// avg.add(x);
/// }
/// avg
/// });
///
/// let mut avg = thread1.join().unwrap();
/// avg.merge(&thread2.join().unwrap());
/// assert_eq!(avg.mean(), 5.5);
/// ```
fn merge(&mut self, other: &Self);
}
/// Calculate the multinomial variance. Relevant for histograms.
#[inline(always)]
fn multinomial_variance(n: f64, n_tot_inv: f64) -> f64 {
n * (1. - n * n_tot_inv)
}
/// Get the bins and ranges from a histogram.
pub trait Histogram
where
for<'a> &'a Self: IntoIterator<Item = ((f64, f64), u64)>,
{
/// Return the bins of the histogram.
fn bins(&self) -> &[u64];
/// Estimate the variance for the given bin.
///
/// The square root of this estimates the error of the bin count.
#[inline]
fn variance(&self, bin: usize) -> f64 {
let count = self.bins()[bin];
let sum: u64 = self.bins().iter().sum();
multinomial_variance(count as f64, 1. / (sum as f64))
}
/// Return an iterator over the bins normalized by the bin widths.
#[inline]
fn normalized_bins(&self) -> IterNormalized<<&Self as IntoIterator>::IntoIter> {
IterNormalized {
histogram_iter: self.into_iter(),
}
}
/// Return an iterator over the bin widths.
#[inline]
fn widths(&self) -> IterWidths<<&Self as IntoIterator>::IntoIter> {
IterWidths {
histogram_iter: self.into_iter(),
}
}
/// Return an iterator over the bin centers.
#[inline]
fn centers(&self) -> IterBinCenters<<&Self as IntoIterator>::IntoIter> {
IterBinCenters {
histogram_iter: self.into_iter(),
}
}
/// Return an iterator over the bin variances.
///
/// This is more efficient than calling `variance()` for each bin.
#[inline]
fn variances(&self) -> IterVariances<<&Self as IntoIterator>::IntoIter> {
let sum: u64 = self.bins().iter().sum();
IterVariances {
histogram_iter: self.into_iter(),
sum_inv: 1. / (sum as f64),
}
}
}
/// Iterate over the bins normalized by bin width.
#[derive(Debug, Clone)]
pub struct IterNormalized<T>
where
T: Iterator<Item = ((f64, f64), u64)>,
{
histogram_iter: T,
}
impl<T> Iterator for IterNormalized<T>
where
T: Iterator<Item = ((f64, f64), u64)>,
{
type Item = f64;
#[inline]
fn next(&mut self) -> Option<f64> {
self.histogram_iter
.next()
.map(|((a, b), count)| (count as f64) / (b - a))
}
}
/// Iterate over the widths of the bins.
#[derive(Debug, Clone)]
pub struct IterWidths<T>
where
T: Iterator<Item = ((f64, f64), u64)>,
{
histogram_iter: T,
}
impl<T> Iterator for IterWidths<T>
where
T: Iterator<Item = ((f64, f64), u64)>,
{
type Item = f64;
#[inline]
fn next(&mut self) -> Option<f64> {
self.histogram_iter.next().map(|((a, b), _)| b - a)
}
}
/// Iterate over the bin centers.
#[derive(Debug, Clone)]
pub struct IterBinCenters<T>
where
T: Iterator<Item = ((f64, f64), u64)>,
{
histogram_iter: T,
}
impl<T> Iterator for IterBinCenters<T>
where
T: Iterator<Item = ((f64, f64), u64)>,
{
type Item = f64;
#[inline]
fn next(&mut self) -> Option<f64> {
self.histogram_iter.next().map(|((a, b), _)| 0.5 * (a + b))
}
}
/// Iterate over the variances.
#[derive(Debug, Clone)]
pub struct IterVariances<T>
where
T: Iterator<Item = ((f64, f64), u64)>,
{
histogram_iter: T,
sum_inv: f64,
}
impl<T> Iterator for IterVariances<T>
where
T: Iterator<Item = ((f64, f64), u64)>,
{
type Item = f64;
#[inline]
fn next(&mut self) -> Option<f64> {
self.histogram_iter
.next()
.map(|(_, n)| multinomial_variance(n as f64, self.sum_inv))
}
}