stats_ci/
mean.rs

1//!
2//! Confidence intervals over the mean (arithmetic, geometric, harmonic) of a given sample.
3//!
4//! The premise on which confidence intervals are computed is that the sample data is a random
5//! sample from a population following some (unknown) distribution. The confidence interval
6//! is computed from the sample data, and is an estimate of the true mean of the population.
7//!
8//! Unlike what is sometimes stated, the population does not need to be normally distributed.
9//! However, it is assumed that the __standard error__ of the sample mean is normally distributed
10//! (or close to it).
11//! This is true for most distributions (especially symmetrical ones), and is guaranteed by
12//! the central limit theorem as the size of the sample data grows large.
13//!
14//! The calculations use Student's t distribution almost regardless of sample size (until
15//! a size of 100'000). This provides more conservative (and accurate intervals) than the
16//! normal distribution when the number of samples is small, and asymptotically approaches
17//! the normal distribution as the number of samples increases. This compensates for the
18//! fact that the central limit theorem applies only asymptotically.
19//!
20//! # Assumptions
21//!
22//! The following assumptions are made:
23//!
24//! * The sample data is a random sample from a population following some (unknown) distribution.
25//! * The sample data is independent and identically distributed (iid).
26//! * The standard approaches a normal distribution.
27//! * For geometric / harmonic means, the sample data is strictly positive.
28//!
29//! # Examples
30//!
31//! Confidence intervals on the arithmetic mean of a sample:
32//! ```
33//! use stats_ci::*;
34//! let data = [
35//!     82., 94., 68., 6., 39., 80., 10., 97., 34., 66., 62., 7., 39., 68., 93., 64., 10., 74.,
36//!     15., 34., 4., 48., 88., 94., 17., 99., 81., 37., 68., 66., 40., 23., 67., 72., 63.,
37//!     71., 18., 51., 65., 87., 12., 44., 89., 67., 28., 86., 62., 22., 90., 18., 50., 25.,
38//!     98., 24., 61., 62., 86., 100., 96., 27., 36., 82., 90., 55., 26., 38., 97., 73., 16.,
39//!     49., 23., 26., 55., 26., 3., 23., 47., 27., 58., 27., 97., 32., 29., 56., 28., 23.,
40//!     37., 72., 62., 77., 63., 100., 40., 84., 77., 39., 71., 61., 17., 77.,
41//! ];
42//! let confidence = Confidence::new_two_sided(0.95);
43//! let stats = mean::Arithmetic::from_iter(&data)?;
44//! // reference values computed in python / numpy
45//! use approx::*;
46//! assert_abs_diff_eq!(stats.sample_mean(), 53.67, epsilon = 1e-6);
47//! assert_abs_diff_eq!(stats.sample_std_dev(), 28.097613040716798, epsilon = 1e-6);
48//! assert_abs_diff_eq!(stats.ci_mean(confidence)?, Interval::new(48.094823990767836, 59.24517600923217)?, epsilon = 1e-6);
49//! # Ok::<(),error::CIError>(())
50//! ```
51//!
52//! Confidence intervals on the geometric mean of a sample:
53//! ```
54//! # use stats_ci::*;
55//! # let data = [
56//! #    82., 94., 68., 6., 39., 80., 10., 97., 34., 66., 62., 7., 39., 68., 93., 64., 10., 74.,
57//! #    15., 34., 4., 48., 88., 94., 17., 99., 81., 37., 68., 66., 40., 23., 67., 72., 63.,
58//! #    71., 18., 51., 65., 87., 12., 44., 89., 67., 28., 86., 62., 22., 90., 18., 50., 25.,
59//! #    98., 24., 61., 62., 86., 100., 96., 27., 36., 82., 90., 55., 26., 38., 97., 73., 16.,
60//! #    49., 23., 26., 55., 26., 3., 23., 47., 27., 58., 27., 97., 32., 29., 56., 28., 23.,
61//! #    37., 72., 62., 77., 63., 100., 40., 84., 77., 39., 71., 61., 17., 77.,
62//! # ];
63//! # let confidence = Confidence::new_two_sided(0.95);
64//! let stats = mean::Geometric::from_iter(&data)?;
65//! // reference values computed in python / numpy
66//! use approx::*;
67//! assert_abs_diff_eq!(stats.sample_mean(), 43.7268032829256, epsilon = 1e-6);
68//! assert_abs_diff_eq!(stats.ci_mean(confidence)?, Interval::new(37.731050052224354, 50.67532768627392)?, epsilon = 1e-6);
69//! # Ok::<(),error::CIError>(())
70//! ```
71//!
72//! Confidence intervals on the harmonic mean of a sample:
73//! ```
74//! # use stats_ci::*;
75//! # let data = [
76//! #    82., 94., 68., 6., 39., 80., 10., 97., 34., 66., 62., 7., 39., 68., 93., 64., 10., 74.,
77//! #    15., 34., 4., 48., 88., 94., 17., 99., 81., 37., 68., 66., 40., 23., 67., 72., 63.,
78//! #    71., 18., 51., 65., 87., 12., 44., 89., 67., 28., 86., 62., 22., 90., 18., 50., 25.,
79//! #    98., 24., 61., 62., 86., 100., 96., 27., 36., 82., 90., 55., 26., 38., 97., 73., 16.,
80//! #    49., 23., 26., 55., 26., 3., 23., 47., 27., 58., 27., 97., 32., 29., 56., 28., 23.,
81//! #    37., 72., 62., 77., 63., 100., 40., 84., 77., 39., 71., 61., 17., 77.,
82//! # ];
83//! # let confidence = Confidence::new_two_sided(0.95);
84//! let stats = mean::Harmonic::from_iter(&data)?;
85//! // reference values computed in python / numpy
86//! use approx::*;
87//! assert_abs_diff_eq!(stats.sample_mean(), 30.031313156339586, epsilon = 1e-6);
88//! assert_abs_diff_eq!(stats.ci_mean(confidence)?, Interval::new(23.614092539657168, 41.237860649168255)?, epsilon = 1e-6);
89//! # Ok::<(),error::CIError>(())
90//! ```
91//!
92use super::*;
93use crate::utils;
94
95use error::*;
96use num_traits::Float;
97
98///
99/// Trait for incremental statistics.
100/// This trait is implemented for the following statistics:
101/// - [`mean::Arithmetic`] for arithmetic calculations
102/// - [`mean::Geometric`] for geometric calculations (logarithmic space)
103/// - [`mean::Harmonic`] for harmonic calculations (reciprocal space)
104///
105/// # Example
106/// ```
107/// use stats_ci::*;
108/// let data = [1., 2., 3., 4., 5., 6., 7., 8., 9., 10.];
109/// let stats = mean::Arithmetic::from_iter(&data)?;
110/// assert_eq!(stats.sample_count(), 10);
111/// assert_eq!(stats.sample_mean(), 5.5);
112/// assert_abs_diff_eq!(stats.sample_sem(), 1.0092, epsilon = 1e-4);
113/// let confidence = Confidence::new_two_sided(0.95);
114/// let ci = stats.ci_mean(confidence)?;
115/// # use approx::*;
116/// assert_abs_diff_eq!(ci, Interval::new(3.3341, 7.6659)?, epsilon = 1e-4);
117/// # Ok::<(),error::CIError>(())
118/// ```
119pub trait StatisticsOps<F: Float>: Default {
120    ///
121    /// Create a new state and "populates" it with data from an iterator
122    ///
123    /// Complexity: \\( O(n) \\), where \\( n \\) is the number of elements in `data`
124    ///
125    /// # Arguments
126    ///
127    /// * `data` - The data to populate the state with
128    ///
129    /// # Errors
130    ///
131    /// * [`CIError::NonPositiveValue`] - If the input data contains non-positive values when computing harmonic/geometric means.
132    ///
133    /// # Example
134    /// ```
135    /// # use approx::*;
136    /// use stats_ci::*;
137    /// let data = [1., 2., 3., 4., 5., 6., 7., 8., 9., 10.];
138    /// let stats = mean::Arithmetic::from_iter(&data)?;
139    /// assert_eq!(stats.sample_count(), 10);
140    /// assert_eq!(stats.sample_mean(), 5.5);
141    /// assert_abs_diff_eq!(stats.sample_sem(), 1.0092, epsilon = 1e-4);
142    /// # Ok::<(),error::CIError>(())
143    /// ```
144    ///
145    /// # Note
146    ///
147    /// This is simply a shortcut for [`Default::default`] and [`Self::extend`]:
148    /// ```
149    /// # use stats_ci::*;
150    /// # let data = [1., 2., 3., 4., 5., 6., 7., 8., 9., 10.];
151    /// let stats = mean::Arithmetic::new().extend(&data)?;
152    /// # Ok::<(),error::CIError>(())
153    /// ```
154    ///
155    fn from_iter<I>(data: &I) -> CIResult<Self>
156    where
157        for<'a> &'a I: IntoIterator<Item = &'a F>,
158    {
159        let mut stats = Self::default();
160        stats.extend(data)?;
161        Ok(stats)
162    }
163
164    ///
165    /// Mean of the sample
166    ///
167    /// Complexity: \\( O(1) \\)
168    ///
169    fn sample_mean(&self) -> F;
170
171    ///
172    /// Standard error of the sample mean
173    ///
174    /// Complexity: \\( O(1) \\)
175    ///
176    fn sample_sem(&self) -> F;
177
178    ///
179    /// Number of samples
180    ///
181    /// Complexity: \\( O(1) \\)
182    ///
183    fn sample_count(&self) -> usize;
184
185    ///
186    /// Confidence interval of the sample mean
187    ///
188    /// Complexity: \\( O(1) \\)
189    ///
190    fn ci_mean(&self, confidence: Confidence) -> CIResult<Interval<F>>;
191
192    ///
193    /// Append a new sample to the data
194    ///
195    /// Complexity: \\( O(1) \\)
196    ///
197    fn append(&mut self, x: F) -> CIResult<()>;
198
199    ///
200    /// Extend the data with additional sample data.
201    ///
202    /// This is equivalent to calling [`Self::append`] for each value in `data`.
203    ///
204    /// Complexity: \\( O(n) \\), where \\( n \\) is the number of elements in `data`
205    ///
206    /// # Arguments
207    ///
208    /// * `data` - The data to append as an array or an iterator
209    ///
210    /// # Output
211    ///
212    /// * `Ok(())` - If the data was successfully appended
213    ///
214    /// # Errors
215    ///
216    /// * [`CIError::NonPositiveValue`] - If the input data is invalid (for harmonic/geometric means).
217    ///
218    fn extend<I>(&mut self, data: &I) -> CIResult<()>
219    where
220        for<'a> &'a I: IntoIterator<Item = &'a F>,
221    {
222        for x_i in data {
223            self.append(*x_i)?;
224        }
225        Ok(())
226    }
227
228    ///
229    /// Compute the confidence interval on the mean of a sample
230    ///
231    /// # Arguments
232    ///
233    /// * `confidence` - The confidence level of the interval
234    /// * `data` - The data to compute the confidence interval on
235    ///
236    /// # Output
237    ///
238    /// * `Ok(interval)` - The confidence interval on the mean of the sample
239    ///
240    /// # Errors
241    ///
242    /// * [`CIError::TooFewSamples`] - If the input data has too few samples to compute the confidence interval
243    /// * [`CIError::NonPositiveValue`] - If the input data contains non-positive values when computing harmonic/geometric means.
244    /// * [`CIError::InvalidInputData`] - If the input data contains invalid values (e.g. NaN)
245    /// * [`CIError::FloatConversionError`] - If some data cannot be converted to a float
246    ///
247    fn ci<I>(confidence: Confidence, data: &I) -> CIResult<Interval<F>>
248    where
249        for<'a> &'a I: IntoIterator<Item = &'a F>;
250}
251
252macro_rules! impl_statistics_ops_for {
253    ( $x:ty ) => {
254        impl<F: Float> StatisticsOps<F> for $x {
255            #[inline]
256            fn append(&mut self, x: F) -> CIResult<()> {
257                self.append(x)
258            }
259            #[inline]
260            fn sample_mean(&self) -> F {
261                self.sample_mean()
262            }
263            #[inline]
264            fn sample_sem(&self) -> F {
265                self.sample_sem()
266            }
267            #[inline]
268            fn ci_mean(&self, confidence: Confidence) -> CIResult<Interval<F>> {
269                self.ci_mean(confidence)
270            }
271            #[inline]
272            fn sample_count(&self) -> usize {
273                self.sample_count()
274            }
275            #[inline]
276            fn ci<I>(confidence: Confidence, data: &I) -> CIResult<Interval<F>>
277            where
278                for<'a> &'a I: IntoIterator<Item = &'a F>,
279            {
280                <$x>::ci(confidence, data)
281            }
282        }
283    };
284}
285
286impl_statistics_ops_for!(Arithmetic<F>);
287impl_statistics_ops_for!(Harmonic<F>);
288impl_statistics_ops_for!(Geometric<F>);
289
290///
291/// Represents the state of the computation of the arithmetic mean.
292/// This is a simple implementation that accumulates information about the samples, such as sum and sum of squares.
293///
294/// It is best used through the [`StatisticsOps`] trait.
295///
296#[derive(Debug, Clone, Copy, PartialEq)]
297#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
298pub struct Arithmetic<F: Float> {
299    sum: utils::KahanSum<F>,
300    sum_sq: utils::KahanSum<F>,
301    count: usize,
302}
303
304impl<F: Float> Default for Arithmetic<F> {
305    fn default() -> Self {
306        Self {
307            sum: utils::KahanSum::default(),
308            sum_sq: utils::KahanSum::default(),
309            count: 0,
310        }
311    }
312}
313
314impl<F: Float> Arithmetic<F> {
315    ///
316    /// Create a new empty state
317    ///
318    /// # Example
319    /// ```
320    /// use stats_ci::*;
321    /// let mut stats = mean::Arithmetic::new();
322    /// stats.append(10.);
323    /// assert_eq!(stats.sample_count(), 1);
324    /// assert_eq!(stats.sample_mean(), 10.);
325    /// ```
326    ///
327    pub fn new() -> Self {
328        Default::default()
329    }
330
331    ///
332    /// Variance of the sample
333    /// \\( \frac{1}{n-1}\left(\sum_{i=1}^n x_i^2 - \frac{1}{n} \left(\sum_{i=1}^n x_i\right)^2 \right) \\)
334    ///
335    /// Complexity: \\( O(1) \\)
336    ///
337    pub fn sample_variance(&self) -> F {
338        let mean = self.sample_mean();
339        (self.sum_sq.value() - mean * self.sum.value()) / F::from(self.count - 1).unwrap()
340    }
341
342    ///
343    /// Standard deviation of the sample
344    ///
345    /// Complexity: \\( O(1) \\)
346    ///
347    pub fn sample_std_dev(&self) -> F {
348        self.sample_variance().sqrt()
349    }
350
351    ///
352    /// Append a new sample to the data
353    ///
354    /// Complexity: \\( O(1) \\)
355    ///
356    fn append(&mut self, x: F) -> CIResult<()> {
357        self.sum += x;
358        self.sum_sq += x * x;
359        self.count += 1;
360        Ok(())
361    }
362
363    ///
364    /// Mean of the sample
365    ///
366    /// Complexity: \\( O(1) \\)
367    ///
368    pub fn sample_mean(&self) -> F {
369        self.sum.value() / F::from(self.count).unwrap()
370    }
371
372    ///
373    /// Standard error of the sample mean
374    ///
375    /// Complexity: \\( O(1) \\)
376    ///
377    pub fn sample_sem(&self) -> F {
378        self.sample_std_dev() / F::from(self.count - 1).unwrap().sqrt()
379    }
380
381    ///
382    /// Confidence interval of the sample mean
383    ///
384    /// Complexity: \\( O(1) \\)
385    ///
386    pub fn ci_mean(&self, confidence: Confidence) -> CIResult<Interval<F>> {
387        let n = self.count as f64;
388        let mean = self.sample_mean().try_f64("stats.mean")?;
389        let std_dev = self.sample_std_dev().try_f64("stats.std_dev")?;
390        let std_err_mean = std_dev / n.sqrt();
391        let degrees_of_freedom = n - 1.;
392        let (lo, hi) = stats::interval_bounds(confidence, mean, std_err_mean, degrees_of_freedom);
393        let (lo, hi) = (F::from(lo).convert("lo")?, F::from(hi).convert("hi")?);
394        match confidence {
395            Confidence::TwoSided(_) => Interval::new(lo, hi).map_err(|e| e.into()),
396            Confidence::UpperOneSided(_) => Ok(Interval::new_upper(lo)),
397            Confidence::LowerOneSided(_) => Ok(Interval::new_lower(hi)),
398        }
399    }
400
401    ///
402    /// Number of samples
403    ///
404    /// Complexity: \\( O(1) \\)
405    ///
406    pub fn sample_count(&self) -> usize {
407        self.count
408    }
409
410    ///
411    /// Combine two states
412    ///
413    /// Complexity: \\( O(1) \\)
414    ///
415    #[allow(clippy::should_implement_trait)]
416    pub fn add(self, rhs: Self) -> Self {
417        let mut sum = self.sum;
418        let mut sum_sq = self.sum_sq;
419        sum += rhs.sum;
420        sum_sq += rhs.sum_sq;
421        let count = self.count + rhs.count;
422        Self { sum, sum_sq, count }
423    }
424
425    ///
426    /// Compute the confidence interval on the mean of a sample
427    ///
428    /// # Arguments
429    ///
430    /// * `confidence` - The confidence level of the interval
431    /// * `data` - The data to compute the confidence interval on
432    ///
433    /// # Output
434    ///
435    /// * `Ok(interval)` - The confidence interval on the mean of the sample
436    ///
437    /// # Errors
438    ///
439    /// * [`CIError::TooFewSamples`] - If the input data has too few samples to compute the confidence interval
440    /// * [`CIError::NonPositiveValue`] - If the input data contains non-positive values when computing harmonic/geometric means.
441    /// * [`CIError::InvalidInputData`] - If the input data contains invalid values (e.g. NaN)
442    /// * [`CIError::FloatConversionError`] - If some data cannot be converted to a float
443    ///
444    pub fn ci<I>(confidence: Confidence, data: &I) -> CIResult<Interval<F>>
445    where
446        for<'a> &'a I: IntoIterator<Item = &'a F>,
447    {
448        Self::from_iter(data)?.ci_mean(confidence)
449    }
450}
451
452impl<F: Float> std::ops::Add for Arithmetic<F> {
453    type Output = Self;
454
455    #[inline]
456    fn add(self, rhs: Self) -> Self::Output {
457        self.add(rhs)
458    }
459}
460
461impl<F: Float> std::ops::AddAssign for Arithmetic<F> {
462    #[inline]
463    fn add_assign(&mut self, rhs: Self) {
464        *self = self.add(rhs);
465    }
466}
467
468///
469/// Represents the state of the computation related to the harmonic mean.
470/// This is a simple implementation that accumulates information about the samples, such as sum and sum of squares.
471/// It is implemented as a wrapper around [`Arithmetic`] to compute the arithmetic mean of the reciprocals of the samples.
472///
473/// It is best used through the [`StatisticsOps`] trait.
474///
475#[derive(Debug, Clone, Copy, PartialEq)]
476#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
477pub struct Harmonic<F: Float> {
478    recip_space: Arithmetic<F>,
479}
480
481impl<F: Float> Harmonic<F> {
482    ///
483    /// Create a new empty state
484    ///
485    /// # Example
486    /// ```
487    /// use stats_ci::*;
488    /// let mut stats = mean::Harmonic::new();
489    /// stats.append(10.);
490    /// assert_eq!(stats.sample_count(), 1);
491    /// assert_eq!(stats.sample_mean(), 10.);
492    /// ```
493    ///
494    pub fn new() -> Self {
495        Default::default()
496    }
497
498    ///
499    /// Append a new sample to the data
500    ///
501    /// Complexity: \\( O(1) \\)
502    ///
503    pub fn append(&mut self, x: F) -> CIResult<()> {
504        if x <= F::zero() {
505            return Err(error::CIError::NonPositiveValue(
506                x.to_f64().unwrap_or(f64::NAN),
507            ));
508        }
509        self.recip_space.append(F::one() / x)?;
510        Ok(())
511    }
512
513    ///
514    /// Harmonic mean of the sample
515    /// \\( H = \left( \frac{1}{n} \sum_i \frac{1}{x_i} \right)^{-1} \\)
516    ///
517    /// Complexity: \\( O(1) \\)
518    ///
519    pub fn sample_mean(&self) -> F {
520        F::one() / self.recip_space.sample_mean()
521    }
522
523    ///
524    /// Standard error of the harmonic mean
525    /// \\( s_H = \frac{1}{\alpha^2} \frac{s_{1/x_i}}{\sqrt{n-1}} \\)
526    ///
527    /// where
528    /// * the estimate of \\( \alpha \\) is given by \\( \alpha = \frac{1}{n} \sum_i 1/x_i \\);
529    /// * \\( s_{1/x_i} \\) is the estimate of the standard deviation of the reciprocals of the samples;
530    /// * and \\( n-1 \\) is the degree of freedom of the sample data.
531    ///
532    /// # Reference
533    ///
534    /// * Nilan Noris. "The standard errors of the geometric and harmonic means and their application to index numbers." Ann. Math. Statist. 11(4): 445-448 (December, 1940). DOI: [10.1214/aoms/1177731830](https://doi.org/10.1214/aoms/1177731830) [JSTOR](https://www.jstor.org/stable/2235727)
535    ///
536    pub fn sample_sem(&self) -> F {
537        let harm_mean = self.sample_mean();
538        let recip_std_dev = self.recip_space.sample_std_dev();
539        harm_mean * harm_mean * recip_std_dev
540            / F::from(self.recip_space.sample_count() - 1).unwrap().sqrt()
541    }
542
543    ///
544    /// Number of samples
545    ///
546    /// Complexity: \\( O(1) \\)
547    ///
548    pub fn sample_count(&self) -> usize {
549        self.recip_space.sample_count()
550    }
551
552    ///
553    /// Confidence interval for the harmonic mean
554    ///
555    pub fn ci_mean(&self, confidence: Confidence) -> CIResult<Interval<F>> {
556        let arith_ci = self.recip_space.ci_mean(confidence.flipped())?;
557        let (lo, hi) = (F::one() / arith_ci.high_f(), F::one() / arith_ci.low_f());
558        match confidence {
559            Confidence::TwoSided(_) => Interval::new(lo, hi).map_err(|e| e.into()),
560            Confidence::UpperOneSided(_) => Ok(Interval::new_upper(lo)),
561            Confidence::LowerOneSided(_) => Ok(Interval::new_lower(hi)),
562        }
563    }
564
565    ///
566    /// Combine two states
567    ///
568    /// Complexity: \\( O(1) \\)
569    ///
570    #[allow(clippy::should_implement_trait)]
571    pub fn add(self, rhs: Self) -> Self {
572        Self {
573            recip_space: self.recip_space + rhs.recip_space,
574        }
575    }
576
577    ///
578    /// Compute the confidence interval on the mean of a sample
579    ///
580    /// # Arguments
581    ///
582    /// * `confidence` - The confidence level of the interval
583    /// * `data` - The data to compute the confidence interval on
584    ///
585    /// # Output
586    ///
587    /// * `Ok(interval)` - The confidence interval on the mean of the sample
588    ///
589    /// # Errors
590    ///
591    /// * [`CIError::TooFewSamples`] - If the input data has too few samples to compute the confidence interval
592    /// * [`CIError::NonPositiveValue`] - If the input data contains non-positive values when computing harmonic/geometric means.
593    /// * [`CIError::InvalidInputData`] - If the input data contains invalid values (e.g. NaN)
594    /// * [`CIError::FloatConversionError`] - If some data cannot be converted to a float
595    ///
596    pub fn ci<I>(confidence: Confidence, data: &I) -> CIResult<Interval<F>>
597    where
598        for<'a> &'a I: IntoIterator<Item = &'a F>,
599    {
600        Self::from_iter(data)?.ci_mean(confidence)
601    }
602}
603
604impl<F: Float> Default for Harmonic<F> {
605    fn default() -> Self {
606        Self {
607            recip_space: Arithmetic::default(),
608        }
609    }
610}
611
612impl<F: Float> std::ops::Add for Harmonic<F> {
613    type Output = Self;
614
615    #[inline]
616    fn add(self, rhs: Self) -> Self::Output {
617        self.add(rhs)
618    }
619}
620
621impl<F: Float> std::ops::AddAssign for Harmonic<F> {
622    #[inline]
623    fn add_assign(&mut self, rhs: Self) {
624        *self = self.add(rhs);
625    }
626}
627
628///
629/// Represents the state of the computation of the geometric mean.
630/// This is a simple implementation that accumulates information about the samples, such as sum and sum of squares.
631/// It is implemented as a wrapper around [`Arithmetic`] to compute the arithmetic mean of the logarithms of the samples.
632///
633/// It is best used through the [`StatisticsOps`] trait.
634///
635#[derive(Debug, Clone, Copy, PartialEq)]
636#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
637pub struct Geometric<F: Float> {
638    log_space: Arithmetic<F>,
639}
640
641impl<F: Float> Geometric<F> {
642    ///
643    /// Create a new empty state
644    ///
645    /// # Example
646    /// ```
647    /// # use stats_ci::*;
648    /// # use approx::*;
649    /// let mut stats = mean::Geometric::new();
650    /// stats.append(10.)?;
651    /// assert_eq!(stats.sample_count(), 1);
652    /// assert_abs_diff_eq!(stats.sample_mean(), 10., epsilon = 1e-10);
653    /// # Ok::<(),error::CIError>(())
654    /// ```
655    ///
656    pub fn new() -> Self {
657        Default::default()
658    }
659
660    ///
661    /// Append a new sample to the data
662    ///
663    /// Complexity: \\( O(1) \\)
664    ///
665    pub fn append(&mut self, x: F) -> CIResult<()> {
666        if x <= F::zero() {
667            return Err(error::CIError::NonPositiveValue(
668                x.to_f64().unwrap_or(f64::NAN),
669            ));
670        }
671        self.log_space.append(x.ln())?;
672        Ok(())
673    }
674
675    ///
676    /// Geometric mean of the sample
677    ///
678    pub fn sample_mean(&self) -> F {
679        self.log_space.sample_mean().exp()
680    }
681
682    ///
683    /// Standard error of the geometric mean
684    ///
685    /// Computed as: \\( G \frac{s_{\log x_i}}{\sqrt{n-1}} \\)
686    /// where \\( G \\) is the geometric mean of the sample;
687    /// \\( s_{\log x_i} \\) is the estimate of the standard deviation of the logarithms of the samples;
688    /// and \\( n-1 \\) is the degree of freedom of the sample data.
689    ///
690    ///  # Reference
691    ///
692    /// * Nilan Noris. "The standard errors of the geometric and harmonic means and their application to index numbers." Ann. Math. Statist. 11(4): 445-448 (December, 1940). DOI: [10.1214/aoms/1177731830](https://doi.org/10.1214/aoms/1177731830) [JSTOR](https://www.jstor.org/stable/2235727)
693    ///
694    pub fn sample_sem(&self) -> F {
695        let geom_mean = self.sample_mean();
696        let log_std_dev = self.log_space.sample_std_dev();
697        geom_mean * log_std_dev / F::from(self.log_space.sample_count() - 1).unwrap().sqrt()
698    }
699
700    ///
701    /// Number of samples
702    ///
703    /// Complexity: \\( O(1) \\)
704    ///
705    pub fn sample_count(&self) -> usize {
706        self.log_space.sample_count()
707    }
708
709    ///
710    /// Confidence interval for the geometric mean
711    ///
712    pub fn ci_mean(&self, confidence: Confidence) -> CIResult<Interval<F>> {
713        let arith_ci = self.log_space.ci_mean(confidence)?;
714        let (lo, hi) = (arith_ci.low_f().exp(), arith_ci.high_f().exp());
715        match confidence {
716            Confidence::TwoSided(_) => Interval::new(lo, hi).map_err(|e| e.into()),
717            Confidence::UpperOneSided(_) => Ok(Interval::new_upper(lo)),
718            Confidence::LowerOneSided(_) => Ok(Interval::new_lower(hi)),
719        }
720    }
721
722    ///
723    /// Combine two states
724    ///
725    /// Complexity: \\( O(1) \\)
726    ///
727    #[allow(clippy::should_implement_trait)]
728    pub fn add(self, rhs: Self) -> Self {
729        Self {
730            log_space: self.log_space + rhs.log_space,
731        }
732    }
733
734    ///
735    /// Compute the confidence interval on the mean of a sample
736    ///
737    /// # Arguments
738    ///
739    /// * `confidence` - The confidence level of the interval
740    /// * `data` - The data to compute the confidence interval on
741    ///
742    /// # Output
743    ///
744    /// * `Ok(interval)` - The confidence interval on the mean of the sample
745    ///
746    /// # Errors
747    ///
748    /// * [`CIError::TooFewSamples`] - If the input data has too few samples to compute the confidence interval
749    /// * [`CIError::NonPositiveValue`] - If the input data contains non-positive values when computing harmonic/geometric means.
750    /// * [`CIError::InvalidInputData`] - If the input data contains invalid values (e.g. NaN)
751    /// * [`CIError::FloatConversionError`] - If some data cannot be converted to a float
752    ///
753    pub fn ci<I>(confidence: Confidence, data: &I) -> CIResult<Interval<F>>
754    where
755        for<'a> &'a I: IntoIterator<Item = &'a F>,
756    {
757        Self::from_iter(data)?.ci_mean(confidence)
758    }
759}
760
761impl<F: Float> Default for Geometric<F> {
762    fn default() -> Self {
763        Self {
764            log_space: Arithmetic::default(),
765        }
766    }
767}
768
769impl<F: Float> std::ops::Add for Geometric<F> {
770    type Output = Self;
771
772    #[inline]
773    fn add(self, rhs: Self) -> Self::Output {
774        self.add(rhs)
775    }
776}
777
778impl<F: Float> std::ops::AddAssign for Geometric<F> {
779    #[inline]
780    fn add_assign(&mut self, rhs: Self) {
781        *self = self.add(rhs);
782    }
783}
784
785///
786/// Trait for computing confidence intervals on the mean of a sample.
787///
788/// It is superseded by the [`StatisticsOps`] trait which allows incremental statistics.
789/// It is retained for backwards compatibility and will be deprecated in the future, as
790/// it brings no advantage over [`StatisticsOps`] and is less flexible.
791///
792/// # Examples
793///
794/// ```
795/// use stats_ci::*;
796/// let data = [
797///    82., 94., 68., 6., 39., 80., 10., 97., 34., 66., 62., 7., 39., 68., 93., 64., 10., 74.,
798///    15., 34., 4., 48., 88., 94., 17., 99., 81., 37., 68., 66., 40., 23., 67., 72., 63.,
799///    71., 18., 51., 65., 87., 12., 44., 89., 67., 28., 86., 62., 22., 90., 18., 50., 25.,
800///    98., 24., 61., 62., 86., 100., 96., 27., 36., 82., 90., 55., 26., 38., 97., 73., 16.,
801///    49., 23., 26., 55., 26., 3., 23., 47., 27., 58., 27., 97., 32., 29., 56., 28., 23.,
802///    37., 72., 62., 77., 63., 100., 40., 84., 77., 39., 71., 61., 17., 77.,
803/// ];
804/// let confidence = Confidence::new_two_sided(0.95);
805/// let ci = mean::Arithmetic::ci(confidence, &data)?;
806/// // arithmetic mean: 52.5
807///
808/// use num_traits::Float;
809/// use approx::*;
810/// assert_abs_diff_eq!(ci, Interval::new(48.094823990767836, 59.24517600923217)?, epsilon = 1e-3);
811/// # Ok::<(),error::CIError>(())
812/// ```
813///
814pub trait MeanCI<T: PartialOrd> {
815    ///
816    /// Compute the confidence interval on the mean of a sample
817    ///
818    /// # Arguments
819    ///
820    /// * `confidence` - The confidence level of the interval
821    /// * `data` - The data to compute the confidence interval on
822    ///
823    /// # Output
824    ///
825    /// * `Ok(interval)` - The confidence interval on the mean of the sample
826    ///
827    /// # Errors
828    ///
829    /// * [`CIError::TooFewSamples`] - If the input data has too few samples to compute the confidence interval
830    /// * [`CIError::NonPositiveValue`] - If the input data contains non-positive values when computing harmonic/geometric means.
831    /// * [`CIError::InvalidInputData`] - If the input data contains invalid values (e.g. NaN)
832    /// * [`CIError::FloatConversionError`] - If some data cannot be converted to a float
833    ///
834    fn ci<I>(confidence: Confidence, data: &I) -> CIResult<Interval<T>>
835    where
836        for<'a> &'a I: IntoIterator<Item = &'a T>;
837}
838
839macro_rules! impl_mean_ci_for {
840    ( $x:ty ) => {
841        impl<F: Float> MeanCI<F> for $x {
842            fn ci<I>(confidence: Confidence, data: &I) -> CIResult<Interval<F>>
843            where
844                for<'a> &'a I: IntoIterator<Item = &'a F>,
845            {
846                <$x>::ci(confidence, data)
847            }
848        }
849    };
850}
851
852impl_mean_ci_for!(Arithmetic<F>);
853impl_mean_ci_for!(Harmonic<F>);
854impl_mean_ci_for!(Geometric<F>);
855
856#[cfg(test)]
857mod tests {
858    use super::*;
859    use approx::*;
860
861    #[test]
862    fn test_mean_ci() -> CIResult<()> {
863        let data = [
864            82., 94., 68., 6., 39., 80., 10., 97., 34., 66., 62., 7., 39., 68., 93., 64., 10., 74.,
865            15., 34., 4., 48., 88., 94., 17., 99., 81., 37., 68., 66., 40., 23., 67., 72., 63.,
866            71., 18., 51., 65., 87., 12., 44., 89., 67., 28., 86., 62., 22., 90., 18., 50., 25.,
867            98., 24., 61., 62., 86., 100., 96., 27., 36., 82., 90., 55., 26., 38., 97., 73., 16.,
868            49., 23., 26., 55., 26., 3., 23., 47., 27., 58., 27., 97., 32., 29., 56., 28., 23.,
869            37., 72., 62., 77., 63., 100., 40., 84., 77., 39., 71., 61., 17., 77.,
870        ];
871
872        let confidence = Confidence::new_two_sided(0.95);
873        let ci = Arithmetic::ci(confidence, &data)?;
874        // mean: 53.67
875        // stddev: 28.097613040716798
876        // reference values computed in python
877        // [48.094823990767836, 59.24517600923217]
878        // ```python
879        // import numpy as np
880        // import scipy.stats as st
881        // st.t.interval(confidence=0.95, df=len(data)-1, loc=np.mean(data), scale=st.sem(data))
882        // ```
883        assert_abs_diff_eq!(ci.low_f(), 48.094823990767836, epsilon = 1e-8);
884        assert_abs_diff_eq!(ci.high_f(), 59.24517600923217, epsilon = 1e-8);
885        assert_abs_diff_eq!(ci.low_f() + ci.high_f(), 2. * 53.67);
886
887        let one_sided_ci = Arithmetic::ci(Confidence::UpperOneSided(0.975), &data)?;
888        assert_abs_diff_eq!(one_sided_ci.low_f(), ci.low_f());
889        assert_eq!(one_sided_ci.high_f(), f64::INFINITY);
890
891        let one_sided_ci = Arithmetic::ci(Confidence::LowerOneSided(0.975), &data)?;
892        assert_abs_diff_eq!(one_sided_ci.high_f(), ci.high_f());
893        assert_eq!(one_sided_ci.low_f(), f64::NEG_INFINITY);
894
895        let mut stats = Arithmetic::default();
896        stats.extend(&data)?;
897        let ci = stats.ci_mean(confidence)?;
898
899        assert_abs_diff_eq!(ci.low_f(), 48.094823990767836, epsilon = 1e-8);
900        assert_abs_diff_eq!(ci.high_f(), 59.24517600923217, epsilon = 1e-8);
901        assert_abs_diff_eq!(ci.low_f() + ci.high_f(), 2. * 53.67, epsilon = 1e-8);
902
903        let one_sided_ci = stats.ci_mean(Confidence::UpperOneSided(0.975))?;
904        assert_abs_diff_eq!(one_sided_ci.low_f(), ci.low_f());
905        assert_eq!(one_sided_ci.high_f(), f64::INFINITY);
906
907        let one_sided_ci = stats.ci_mean(Confidence::LowerOneSided(0.975))?;
908        assert_abs_diff_eq!(one_sided_ci.high_f(), ci.high_f());
909        assert_eq!(one_sided_ci.low_f(), f64::NEG_INFINITY);
910
911        Ok(())
912    }
913
914    #[test]
915    fn test_geometric_ci() -> CIResult<()> {
916        let data = [
917            82., 94., 68., 6., 39., 80., 10., 97., 34., 66., 62., 7., 39., 68., 93., 64., 10., 74.,
918            15., 34., 4., 48., 88., 94., 17., 99., 81., 37., 68., 66., 40., 23., 67., 72., 63.,
919            71., 18., 51., 65., 87., 12., 44., 89., 67., 28., 86., 62., 22., 90., 18., 50., 25.,
920            98., 24., 61., 62., 86., 100., 96., 27., 36., 82., 90., 55., 26., 38., 97., 73., 16.,
921            49., 23., 26., 55., 26., 3., 23., 47., 27., 58., 27., 97., 32., 29., 56., 28., 23.,
922            37., 72., 62., 77., 63., 100., 40., 84., 77., 39., 71., 61., 17., 77.,
923        ];
924
925        let confidence = Confidence::new_two_sided(0.95);
926        let ci = Geometric::ci(confidence, &data)?;
927        // geometric mean: 43.7268032829256
928        //
929        // reference values computed in python:
930        // in log space: (3.630483364286656, 3.9254391587458475)
931        // [37.731050052224354, 50.67532768627392]
932        assert_abs_diff_eq!(ci.low_f(), 37.731050052224354, epsilon = 1e-8);
933        assert_abs_diff_eq!(ci.high_f(), 50.67532768627392, epsilon = 1e-8);
934
935        let one_sided_ci = Geometric::ci(Confidence::UpperOneSided(0.975), &data)?;
936        assert_abs_diff_eq!(one_sided_ci.low_f(), ci.low_f());
937        assert_eq!(one_sided_ci.high_f(), f64::INFINITY);
938
939        let one_sided_ci = Geometric::ci(Confidence::LowerOneSided(0.975), &data)?;
940        assert_abs_diff_eq!(one_sided_ci.high_f(), ci.high_f());
941        assert_eq!(one_sided_ci.low_f(), f64::NEG_INFINITY);
942
943        let mut stats = Geometric::default();
944        stats.extend(&data)?;
945        let ci = stats.ci_mean(confidence)?;
946        assert_abs_diff_eq!(stats.sample_mean(), 43.7268032829256, epsilon = 1e-8);
947        assert_abs_diff_eq!(ci.low_f(), 37.731050052224354, epsilon = 1e-8);
948        assert_abs_diff_eq!(ci.high_f(), 50.67532768627392, epsilon = 1e-8);
949
950        let one_sided_ci = stats.ci_mean(Confidence::UpperOneSided(0.975))?;
951        assert_abs_diff_eq!(one_sided_ci.low_f(), ci.low_f());
952        assert_eq!(one_sided_ci.high_f(), f64::INFINITY);
953
954        let one_sided_ci = stats.ci_mean(Confidence::LowerOneSided(0.975))?;
955        assert_abs_diff_eq!(one_sided_ci.high_f(), ci.high_f());
956        assert_eq!(one_sided_ci.low_f(), f64::NEG_INFINITY);
957
958        Ok(())
959    }
960
961    #[test]
962    fn test_harmonic_ci() -> CIResult<()> {
963        let data = [
964            82., 94., 68., 6., 39., 80., 10., 97., 34., 66., 62., 7., 39., 68., 93., 64., 10., 74.,
965            15., 34., 4., 48., 88., 94., 17., 99., 81., 37., 68., 66., 40., 23., 67., 72., 63.,
966            71., 18., 51., 65., 87., 12., 44., 89., 67., 28., 86., 62., 22., 90., 18., 50., 25.,
967            98., 24., 61., 62., 86., 100., 96., 27., 36., 82., 90., 55., 26., 38., 97., 73., 16.,
968            49., 23., 26., 55., 26., 3., 23., 47., 27., 58., 27., 97., 32., 29., 56., 28., 23.,
969            37., 72., 62., 77., 63., 100., 40., 84., 77., 39., 71., 61., 17., 77.,
970        ];
971
972        let confidence = Confidence::new_two_sided(0.95);
973        let ci = Harmonic::ci(confidence, &data)?;
974        // harmonic mean: 30.031313156339586
975        //
976        // reference values computed in python:
977        // in reciprocal space: (0.02424956057996111, 0.042347593849757906)
978        // [41.237860649168255, 23.614092539657168]  (reversed by conversion from reciprocal space)
979        assert_abs_diff_eq!(ci.low_f(), 23.614092539657168, epsilon = 1e-8);
980        assert_abs_diff_eq!(ci.high_f(), 41.237860649168255, epsilon = 1e-8);
981
982        let one_sided_ci = Harmonic::ci(Confidence::UpperOneSided(0.975), &data)?;
983        assert_abs_diff_eq!(one_sided_ci.low_f(), ci.low_f());
984        assert_eq!(one_sided_ci.high_f(), f64::INFINITY);
985
986        let one_sided_ci = Harmonic::ci(Confidence::LowerOneSided(0.975), &data)?;
987        assert_abs_diff_eq!(one_sided_ci.high_f(), ci.high_f());
988        assert_eq!(one_sided_ci.low_f(), f64::NEG_INFINITY);
989
990        let confidence = Confidence::new_two_sided(0.95);
991        let data = [
992            1.81600583, 0.07498389, 1.29092744, 0.62023863, 0.09345327, 1.94670997, 2.27687339,
993            0.9251231, 1.78173864, 0.4391542, 1.36948099, 1.5191194, 0.42286756, 1.48463176,
994            0.17621009, 2.31810064, 0.15633061, 2.55137878, 1.11043948, 1.35923319, 1.58385561,
995            0.63431437, 0.49993148, 0.49168534, 0.11533354,
996        ];
997        let ci = Harmonic::ci(confidence, &data)?;
998        // harmonic mean: 0.38041820166550844
999        //
1000        // reference values computed in python:
1001        // in reciprocal space: (1.1735238063066096, 4.083848080632111)
1002        // [0.8521343961033607, 0.2448670911003175]
1003        assert_abs_diff_eq!(ci.low_f(), 0.2448670911003175, epsilon = 1e-6);
1004        assert_abs_diff_eq!(ci.high_f(), 0.8521343961033607, epsilon = 1e-6);
1005
1006        let mut stats = Harmonic::default();
1007        stats.extend(&data)?;
1008        let ci = stats.ci_mean(confidence)?;
1009        assert_abs_diff_eq!(stats.sample_mean(), 0.38041820166550844, epsilon = 1e-8);
1010        assert_abs_diff_eq!(ci.low_f(), 0.2448670911003175, epsilon = 1e-6);
1011        assert_abs_diff_eq!(ci.high_f(), 0.8521343961033607, epsilon = 1e-6);
1012
1013        Ok(())
1014    }
1015
1016    #[test]
1017    fn test_arithmetic_add() {
1018        const VALUE: f32 = 0.1;
1019        let size = 1_000_000;
1020
1021        let mut stats_ref = Arithmetic::default();
1022        let mut stats_summed = Arithmetic::default();
1023        let mut stats_summed_in_place = Arithmetic::default();
1024        for _ in 0..size {
1025            stats_ref.append(VALUE).unwrap();
1026
1027            let mut new_stat = Arithmetic::default();
1028            new_stat.append(VALUE).unwrap();
1029            stats_summed = stats_summed + new_stat;
1030            stats_summed_in_place += new_stat;
1031        }
1032
1033        assert_eq!(stats_ref.sample_count(), size);
1034        assert_eq!(stats_summed.sample_count(), size);
1035        assert_eq!(stats_summed_in_place.sample_count(), size);
1036
1037        assert_eq!(stats_ref.sample_mean(), stats_summed.sample_mean());
1038        assert_eq!(stats_ref.sample_variance(), stats_summed.sample_variance());
1039        assert_eq!(stats_ref.sample_std_dev(), stats_summed.sample_std_dev());
1040        assert_eq!(stats_ref.sample_sem(), stats_summed.sample_sem());
1041
1042        assert_eq!(stats_ref.sample_mean(), stats_summed_in_place.sample_mean());
1043        assert_eq!(
1044            stats_ref.sample_variance(),
1045            stats_summed_in_place.sample_variance()
1046        );
1047        assert_eq!(
1048            stats_ref.sample_std_dev(),
1049            stats_summed_in_place.sample_std_dev()
1050        );
1051        assert_eq!(stats_ref.sample_sem(), stats_summed_in_place.sample_sem());
1052    }
1053
1054    #[test]
1055    fn test_geometric_add() {
1056        const VALUE: f32 = 0.1;
1057        let size = 1_000_000;
1058
1059        let mut stats_ref = Geometric::default();
1060        let mut stats_summed = Geometric::default();
1061        let mut stats_summed_in_place = Geometric::default();
1062        for _ in 0..size {
1063            stats_ref.append(VALUE).unwrap();
1064
1065            let mut new_stat = Geometric::default();
1066            new_stat.append(VALUE).unwrap();
1067            stats_summed = stats_summed + new_stat;
1068            stats_summed_in_place += new_stat;
1069        }
1070
1071        assert_eq!(stats_ref.sample_count(), size);
1072        assert_eq!(stats_summed.sample_count(), size);
1073        assert_eq!(stats_summed_in_place.sample_count(), size);
1074
1075        assert_eq!(stats_ref.sample_mean(), stats_summed.sample_mean());
1076        assert_eq!(stats_ref.sample_sem(), stats_summed.sample_sem());
1077
1078        assert_eq!(stats_ref.sample_mean(), stats_summed_in_place.sample_mean());
1079        assert_eq!(stats_ref.sample_sem(), stats_summed_in_place.sample_sem());
1080    }
1081
1082    #[test]
1083    fn test_harmonic_add() {
1084        const VALUE: f32 = 0.1;
1085        let size = 1_000_000;
1086
1087        let mut stats_ref = Harmonic::default();
1088        let mut stats_summed = Harmonic::default();
1089        let mut stats_summed_in_place = Harmonic::default();
1090        for _ in 0..size {
1091            stats_ref.append(VALUE).unwrap();
1092
1093            let mut new_stat = Harmonic::default();
1094            new_stat.append(VALUE).unwrap();
1095            stats_summed = stats_summed + new_stat;
1096            stats_summed_in_place += new_stat;
1097        }
1098
1099        assert_eq!(stats_ref.sample_count(), size);
1100        assert_eq!(stats_summed.sample_count(), size);
1101        assert_eq!(stats_summed_in_place.sample_count(), size);
1102
1103        assert_eq!(stats_ref.sample_mean(), stats_summed.sample_mean());
1104        assert_eq!(stats_ref.sample_sem(), stats_summed.sample_sem());
1105
1106        assert_eq!(stats_ref.sample_mean(), stats_summed_in_place.sample_mean());
1107        assert_eq!(stats_ref.sample_sem(), stats_summed_in_place.sample_sem());
1108    }
1109
1110    #[test]
1111    fn test_misc() -> CIResult<()> {
1112        let data = [1., 2., 3., 4., 5., 6., 7., 8., 9., 10.];
1113        let stats = mean::Arithmetic::from_iter(&data)?;
1114        assert_eq!(stats.sample_count(), 10);
1115        assert_eq!(stats.sample_mean(), 5.5);
1116        assert_abs_diff_eq!(stats.sample_sem(), 1.0092, epsilon = 1e-4);
1117        let confidence = Confidence::new_two_sided(0.95);
1118        let ci = stats.ci_mean(confidence)?;
1119        assert_abs_diff_eq!(ci, Interval::new(3.3341, 7.6659)?, epsilon = 1e-4);
1120        Ok(())
1121    }
1122}
stats_ci/mean.rs

stats_ci/
mean.rs