stats_ci/mean.rs
1//!
2//! Confidence intervals over the mean (arithmetic, geometric, harmonic) of a given sample.
3//!
4//! The premise on which confidence intervals are computed is that the sample data is a random
5//! sample from a population following some (unknown) distribution. The confidence interval
6//! is computed from the sample data, and is an estimate of the true mean of the population.
7//!
8//! Unlike what is sometimes stated, the population does not need to be normally distributed.
9//! However, it is assumed that the __standard error__ of the sample mean is normally distributed
10//! (or close to it).
11//! This is true for most distributions (especially symmetrical ones), and is guaranteed by
12//! the central limit theorem as the size of the sample data grows large.
13//!
14//! The calculations use Student's t distribution almost regardless of sample size (until
15//! a size of 100'000). This provides more conservative (and accurate intervals) than the
16//! normal distribution when the number of samples is small, and asymptotically approaches
17//! the normal distribution as the number of samples increases. This compensates for the
18//! fact that the central limit theorem applies only asymptotically.
19//!
20//! # Assumptions
21//!
22//! The following assumptions are made:
23//!
24//! * The sample data is a random sample from a population following some (unknown) distribution.
25//! * The sample data is independent and identically distributed (iid).
26//! * The standard approaches a normal distribution.
27//! * For geometric / harmonic means, the sample data is strictly positive.
28//!
29//! # Examples
30//!
31//! Confidence intervals on the arithmetic mean of a sample:
32//! ```
33//! use stats_ci::*;
34//! let data = [
35//! 82., 94., 68., 6., 39., 80., 10., 97., 34., 66., 62., 7., 39., 68., 93., 64., 10., 74.,
36//! 15., 34., 4., 48., 88., 94., 17., 99., 81., 37., 68., 66., 40., 23., 67., 72., 63.,
37//! 71., 18., 51., 65., 87., 12., 44., 89., 67., 28., 86., 62., 22., 90., 18., 50., 25.,
38//! 98., 24., 61., 62., 86., 100., 96., 27., 36., 82., 90., 55., 26., 38., 97., 73., 16.,
39//! 49., 23., 26., 55., 26., 3., 23., 47., 27., 58., 27., 97., 32., 29., 56., 28., 23.,
40//! 37., 72., 62., 77., 63., 100., 40., 84., 77., 39., 71., 61., 17., 77.,
41//! ];
42//! let confidence = Confidence::new_two_sided(0.95);
43//! let stats = mean::Arithmetic::from_iter(&data)?;
44//! // reference values computed in python / numpy
45//! use approx::*;
46//! assert_abs_diff_eq!(stats.sample_mean(), 53.67, epsilon = 1e-6);
47//! assert_abs_diff_eq!(stats.sample_std_dev(), 28.097613040716798, epsilon = 1e-6);
48//! assert_abs_diff_eq!(stats.ci_mean(confidence)?, Interval::new(48.094823990767836, 59.24517600923217)?, epsilon = 1e-6);
49//! # Ok::<(),error::CIError>(())
50//! ```
51//!
52//! Confidence intervals on the geometric mean of a sample:
53//! ```
54//! # use stats_ci::*;
55//! # let data = [
56//! # 82., 94., 68., 6., 39., 80., 10., 97., 34., 66., 62., 7., 39., 68., 93., 64., 10., 74.,
57//! # 15., 34., 4., 48., 88., 94., 17., 99., 81., 37., 68., 66., 40., 23., 67., 72., 63.,
58//! # 71., 18., 51., 65., 87., 12., 44., 89., 67., 28., 86., 62., 22., 90., 18., 50., 25.,
59//! # 98., 24., 61., 62., 86., 100., 96., 27., 36., 82., 90., 55., 26., 38., 97., 73., 16.,
60//! # 49., 23., 26., 55., 26., 3., 23., 47., 27., 58., 27., 97., 32., 29., 56., 28., 23.,
61//! # 37., 72., 62., 77., 63., 100., 40., 84., 77., 39., 71., 61., 17., 77.,
62//! # ];
63//! # let confidence = Confidence::new_two_sided(0.95);
64//! let stats = mean::Geometric::from_iter(&data)?;
65//! // reference values computed in python / numpy
66//! use approx::*;
67//! assert_abs_diff_eq!(stats.sample_mean(), 43.7268032829256, epsilon = 1e-6);
68//! assert_abs_diff_eq!(stats.ci_mean(confidence)?, Interval::new(37.731050052224354, 50.67532768627392)?, epsilon = 1e-6);
69//! # Ok::<(),error::CIError>(())
70//! ```
71//!
72//! Confidence intervals on the harmonic mean of a sample:
73//! ```
74//! # use stats_ci::*;
75//! # let data = [
76//! # 82., 94., 68., 6., 39., 80., 10., 97., 34., 66., 62., 7., 39., 68., 93., 64., 10., 74.,
77//! # 15., 34., 4., 48., 88., 94., 17., 99., 81., 37., 68., 66., 40., 23., 67., 72., 63.,
78//! # 71., 18., 51., 65., 87., 12., 44., 89., 67., 28., 86., 62., 22., 90., 18., 50., 25.,
79//! # 98., 24., 61., 62., 86., 100., 96., 27., 36., 82., 90., 55., 26., 38., 97., 73., 16.,
80//! # 49., 23., 26., 55., 26., 3., 23., 47., 27., 58., 27., 97., 32., 29., 56., 28., 23.,
81//! # 37., 72., 62., 77., 63., 100., 40., 84., 77., 39., 71., 61., 17., 77.,
82//! # ];
83//! # let confidence = Confidence::new_two_sided(0.95);
84//! let stats = mean::Harmonic::from_iter(&data)?;
85//! // reference values computed in python / numpy
86//! use approx::*;
87//! assert_abs_diff_eq!(stats.sample_mean(), 30.031313156339586, epsilon = 1e-6);
88//! assert_abs_diff_eq!(stats.ci_mean(confidence)?, Interval::new(23.614092539657168, 41.237860649168255)?, epsilon = 1e-6);
89//! # Ok::<(),error::CIError>(())
90//! ```
91//!
92use super::*;
93use crate::utils;
94
95use error::*;
96use num_traits::Float;
97
98///
99/// Trait for incremental statistics.
100/// This trait is implemented for the following statistics:
101/// - [`mean::Arithmetic`] for arithmetic calculations
102/// - [`mean::Geometric`] for geometric calculations (logarithmic space)
103/// - [`mean::Harmonic`] for harmonic calculations (reciprocal space)
104///
105/// # Example
106/// ```
107/// use stats_ci::*;
108/// let data = [1., 2., 3., 4., 5., 6., 7., 8., 9., 10.];
109/// let stats = mean::Arithmetic::from_iter(&data)?;
110/// assert_eq!(stats.sample_count(), 10);
111/// assert_eq!(stats.sample_mean(), 5.5);
112/// assert_abs_diff_eq!(stats.sample_sem(), 1.0092, epsilon = 1e-4);
113/// let confidence = Confidence::new_two_sided(0.95);
114/// let ci = stats.ci_mean(confidence)?;
115/// # use approx::*;
116/// assert_abs_diff_eq!(ci, Interval::new(3.3341, 7.6659)?, epsilon = 1e-4);
117/// # Ok::<(),error::CIError>(())
118/// ```
119pub trait StatisticsOps<F: Float>: Default {
120 ///
121 /// Create a new state and "populates" it with data from an iterator
122 ///
123 /// Complexity: \\( O(n) \\), where \\( n \\) is the number of elements in `data`
124 ///
125 /// # Arguments
126 ///
127 /// * `data` - The data to populate the state with
128 ///
129 /// # Errors
130 ///
131 /// * [`CIError::NonPositiveValue`] - If the input data contains non-positive values when computing harmonic/geometric means.
132 ///
133 /// # Example
134 /// ```
135 /// # use approx::*;
136 /// use stats_ci::*;
137 /// let data = [1., 2., 3., 4., 5., 6., 7., 8., 9., 10.];
138 /// let stats = mean::Arithmetic::from_iter(&data)?;
139 /// assert_eq!(stats.sample_count(), 10);
140 /// assert_eq!(stats.sample_mean(), 5.5);
141 /// assert_abs_diff_eq!(stats.sample_sem(), 1.0092, epsilon = 1e-4);
142 /// # Ok::<(),error::CIError>(())
143 /// ```
144 ///
145 /// # Note
146 ///
147 /// This is simply a shortcut for [`Default::default`] and [`Self::extend`]:
148 /// ```
149 /// # use stats_ci::*;
150 /// # let data = [1., 2., 3., 4., 5., 6., 7., 8., 9., 10.];
151 /// let stats = mean::Arithmetic::new().extend(&data)?;
152 /// # Ok::<(),error::CIError>(())
153 /// ```
154 ///
155 fn from_iter<I>(data: &I) -> CIResult<Self>
156 where
157 for<'a> &'a I: IntoIterator<Item = &'a F>,
158 {
159 let mut stats = Self::default();
160 stats.extend(data)?;
161 Ok(stats)
162 }
163
164 ///
165 /// Mean of the sample
166 ///
167 /// Complexity: \\( O(1) \\)
168 ///
169 fn sample_mean(&self) -> F;
170
171 ///
172 /// Standard error of the sample mean
173 ///
174 /// Complexity: \\( O(1) \\)
175 ///
176 fn sample_sem(&self) -> F;
177
178 ///
179 /// Number of samples
180 ///
181 /// Complexity: \\( O(1) \\)
182 ///
183 fn sample_count(&self) -> usize;
184
185 ///
186 /// Confidence interval of the sample mean
187 ///
188 /// Complexity: \\( O(1) \\)
189 ///
190 fn ci_mean(&self, confidence: Confidence) -> CIResult<Interval<F>>;
191
192 ///
193 /// Append a new sample to the data
194 ///
195 /// Complexity: \\( O(1) \\)
196 ///
197 fn append(&mut self, x: F) -> CIResult<()>;
198
199 ///
200 /// Extend the data with additional sample data.
201 ///
202 /// This is equivalent to calling [`Self::append`] for each value in `data`.
203 ///
204 /// Complexity: \\( O(n) \\), where \\( n \\) is the number of elements in `data`
205 ///
206 /// # Arguments
207 ///
208 /// * `data` - The data to append as an array or an iterator
209 ///
210 /// # Output
211 ///
212 /// * `Ok(())` - If the data was successfully appended
213 ///
214 /// # Errors
215 ///
216 /// * [`CIError::NonPositiveValue`] - If the input data is invalid (for harmonic/geometric means).
217 ///
218 fn extend<I>(&mut self, data: &I) -> CIResult<()>
219 where
220 for<'a> &'a I: IntoIterator<Item = &'a F>,
221 {
222 for x_i in data {
223 self.append(*x_i)?;
224 }
225 Ok(())
226 }
227
228 ///
229 /// Compute the confidence interval on the mean of a sample
230 ///
231 /// # Arguments
232 ///
233 /// * `confidence` - The confidence level of the interval
234 /// * `data` - The data to compute the confidence interval on
235 ///
236 /// # Output
237 ///
238 /// * `Ok(interval)` - The confidence interval on the mean of the sample
239 ///
240 /// # Errors
241 ///
242 /// * [`CIError::TooFewSamples`] - If the input data has too few samples to compute the confidence interval
243 /// * [`CIError::NonPositiveValue`] - If the input data contains non-positive values when computing harmonic/geometric means.
244 /// * [`CIError::InvalidInputData`] - If the input data contains invalid values (e.g. NaN)
245 /// * [`CIError::FloatConversionError`] - If some data cannot be converted to a float
246 ///
247 fn ci<I>(confidence: Confidence, data: &I) -> CIResult<Interval<F>>
248 where
249 for<'a> &'a I: IntoIterator<Item = &'a F>;
250}
251
252macro_rules! impl_statistics_ops_for {
253 ( $x:ty ) => {
254 impl<F: Float> StatisticsOps<F> for $x {
255 #[inline]
256 fn append(&mut self, x: F) -> CIResult<()> {
257 self.append(x)
258 }
259 #[inline]
260 fn sample_mean(&self) -> F {
261 self.sample_mean()
262 }
263 #[inline]
264 fn sample_sem(&self) -> F {
265 self.sample_sem()
266 }
267 #[inline]
268 fn ci_mean(&self, confidence: Confidence) -> CIResult<Interval<F>> {
269 self.ci_mean(confidence)
270 }
271 #[inline]
272 fn sample_count(&self) -> usize {
273 self.sample_count()
274 }
275 #[inline]
276 fn ci<I>(confidence: Confidence, data: &I) -> CIResult<Interval<F>>
277 where
278 for<'a> &'a I: IntoIterator<Item = &'a F>,
279 {
280 <$x>::ci(confidence, data)
281 }
282 }
283 };
284}
285
286impl_statistics_ops_for!(Arithmetic<F>);
287impl_statistics_ops_for!(Harmonic<F>);
288impl_statistics_ops_for!(Geometric<F>);
289
290///
291/// Represents the state of the computation of the arithmetic mean.
292/// This is a simple implementation that accumulates information about the samples, such as sum and sum of squares.
293///
294/// It is best used through the [`StatisticsOps`] trait.
295///
296#[derive(Debug, Clone, Copy, PartialEq)]
297#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
298pub struct Arithmetic<F: Float> {
299 sum: utils::KahanSum<F>,
300 sum_sq: utils::KahanSum<F>,
301 count: usize,
302}
303
304impl<F: Float> Default for Arithmetic<F> {
305 fn default() -> Self {
306 Self {
307 sum: utils::KahanSum::default(),
308 sum_sq: utils::KahanSum::default(),
309 count: 0,
310 }
311 }
312}
313
314impl<F: Float> Arithmetic<F> {
315 ///
316 /// Create a new empty state
317 ///
318 /// # Example
319 /// ```
320 /// use stats_ci::*;
321 /// let mut stats = mean::Arithmetic::new();
322 /// stats.append(10.);
323 /// assert_eq!(stats.sample_count(), 1);
324 /// assert_eq!(stats.sample_mean(), 10.);
325 /// ```
326 ///
327 pub fn new() -> Self {
328 Default::default()
329 }
330
331 ///
332 /// Variance of the sample
333 /// \\( \frac{1}{n-1}\left(\sum_{i=1}^n x_i^2 - \frac{1}{n} \left(\sum_{i=1}^n x_i\right)^2 \right) \\)
334 ///
335 /// Complexity: \\( O(1) \\)
336 ///
337 pub fn sample_variance(&self) -> F {
338 let mean = self.sample_mean();
339 (self.sum_sq.value() - mean * self.sum.value()) / F::from(self.count - 1).unwrap()
340 }
341
342 ///
343 /// Standard deviation of the sample
344 ///
345 /// Complexity: \\( O(1) \\)
346 ///
347 pub fn sample_std_dev(&self) -> F {
348 self.sample_variance().sqrt()
349 }
350
351 ///
352 /// Append a new sample to the data
353 ///
354 /// Complexity: \\( O(1) \\)
355 ///
356 fn append(&mut self, x: F) -> CIResult<()> {
357 self.sum += x;
358 self.sum_sq += x * x;
359 self.count += 1;
360 Ok(())
361 }
362
363 ///
364 /// Mean of the sample
365 ///
366 /// Complexity: \\( O(1) \\)
367 ///
368 pub fn sample_mean(&self) -> F {
369 self.sum.value() / F::from(self.count).unwrap()
370 }
371
372 ///
373 /// Standard error of the sample mean
374 ///
375 /// Complexity: \\( O(1) \\)
376 ///
377 pub fn sample_sem(&self) -> F {
378 self.sample_std_dev() / F::from(self.count - 1).unwrap().sqrt()
379 }
380
381 ///
382 /// Confidence interval of the sample mean
383 ///
384 /// Complexity: \\( O(1) \\)
385 ///
386 pub fn ci_mean(&self, confidence: Confidence) -> CIResult<Interval<F>> {
387 let n = self.count as f64;
388 let mean = self.sample_mean().try_f64("stats.mean")?;
389 let std_dev = self.sample_std_dev().try_f64("stats.std_dev")?;
390 let std_err_mean = std_dev / n.sqrt();
391 let degrees_of_freedom = n - 1.;
392 let (lo, hi) = stats::interval_bounds(confidence, mean, std_err_mean, degrees_of_freedom);
393 let (lo, hi) = (F::from(lo).convert("lo")?, F::from(hi).convert("hi")?);
394 match confidence {
395 Confidence::TwoSided(_) => Interval::new(lo, hi).map_err(|e| e.into()),
396 Confidence::UpperOneSided(_) => Ok(Interval::new_upper(lo)),
397 Confidence::LowerOneSided(_) => Ok(Interval::new_lower(hi)),
398 }
399 }
400
401 ///
402 /// Number of samples
403 ///
404 /// Complexity: \\( O(1) \\)
405 ///
406 pub fn sample_count(&self) -> usize {
407 self.count
408 }
409
410 ///
411 /// Combine two states
412 ///
413 /// Complexity: \\( O(1) \\)
414 ///
415 #[allow(clippy::should_implement_trait)]
416 pub fn add(self, rhs: Self) -> Self {
417 let mut sum = self.sum;
418 let mut sum_sq = self.sum_sq;
419 sum += rhs.sum;
420 sum_sq += rhs.sum_sq;
421 let count = self.count + rhs.count;
422 Self { sum, sum_sq, count }
423 }
424
425 ///
426 /// Compute the confidence interval on the mean of a sample
427 ///
428 /// # Arguments
429 ///
430 /// * `confidence` - The confidence level of the interval
431 /// * `data` - The data to compute the confidence interval on
432 ///
433 /// # Output
434 ///
435 /// * `Ok(interval)` - The confidence interval on the mean of the sample
436 ///
437 /// # Errors
438 ///
439 /// * [`CIError::TooFewSamples`] - If the input data has too few samples to compute the confidence interval
440 /// * [`CIError::NonPositiveValue`] - If the input data contains non-positive values when computing harmonic/geometric means.
441 /// * [`CIError::InvalidInputData`] - If the input data contains invalid values (e.g. NaN)
442 /// * [`CIError::FloatConversionError`] - If some data cannot be converted to a float
443 ///
444 pub fn ci<I>(confidence: Confidence, data: &I) -> CIResult<Interval<F>>
445 where
446 for<'a> &'a I: IntoIterator<Item = &'a F>,
447 {
448 Self::from_iter(data)?.ci_mean(confidence)
449 }
450}
451
452impl<F: Float> std::ops::Add for Arithmetic<F> {
453 type Output = Self;
454
455 #[inline]
456 fn add(self, rhs: Self) -> Self::Output {
457 self.add(rhs)
458 }
459}
460
461impl<F: Float> std::ops::AddAssign for Arithmetic<F> {
462 #[inline]
463 fn add_assign(&mut self, rhs: Self) {
464 *self = self.add(rhs);
465 }
466}
467
468///
469/// Represents the state of the computation related to the harmonic mean.
470/// This is a simple implementation that accumulates information about the samples, such as sum and sum of squares.
471/// It is implemented as a wrapper around [`Arithmetic`] to compute the arithmetic mean of the reciprocals of the samples.
472///
473/// It is best used through the [`StatisticsOps`] trait.
474///
475#[derive(Debug, Clone, Copy, PartialEq)]
476#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
477pub struct Harmonic<F: Float> {
478 recip_space: Arithmetic<F>,
479}
480
481impl<F: Float> Harmonic<F> {
482 ///
483 /// Create a new empty state
484 ///
485 /// # Example
486 /// ```
487 /// use stats_ci::*;
488 /// let mut stats = mean::Harmonic::new();
489 /// stats.append(10.);
490 /// assert_eq!(stats.sample_count(), 1);
491 /// assert_eq!(stats.sample_mean(), 10.);
492 /// ```
493 ///
494 pub fn new() -> Self {
495 Default::default()
496 }
497
498 ///
499 /// Append a new sample to the data
500 ///
501 /// Complexity: \\( O(1) \\)
502 ///
503 pub fn append(&mut self, x: F) -> CIResult<()> {
504 if x <= F::zero() {
505 return Err(error::CIError::NonPositiveValue(
506 x.to_f64().unwrap_or(f64::NAN),
507 ));
508 }
509 self.recip_space.append(F::one() / x)?;
510 Ok(())
511 }
512
513 ///
514 /// Harmonic mean of the sample
515 /// \\( H = \left( \frac{1}{n} \sum_i \frac{1}{x_i} \right)^{-1} \\)
516 ///
517 /// Complexity: \\( O(1) \\)
518 ///
519 pub fn sample_mean(&self) -> F {
520 F::one() / self.recip_space.sample_mean()
521 }
522
523 ///
524 /// Standard error of the harmonic mean
525 /// \\( s_H = \frac{1}{\alpha^2} \frac{s_{1/x_i}}{\sqrt{n-1}} \\)
526 ///
527 /// where
528 /// * the estimate of \\( \alpha \\) is given by \\( \alpha = \frac{1}{n} \sum_i 1/x_i \\);
529 /// * \\( s_{1/x_i} \\) is the estimate of the standard deviation of the reciprocals of the samples;
530 /// * and \\( n-1 \\) is the degree of freedom of the sample data.
531 ///
532 /// # Reference
533 ///
534 /// * Nilan Noris. "The standard errors of the geometric and harmonic means and their application to index numbers." Ann. Math. Statist. 11(4): 445-448 (December, 1940). DOI: [10.1214/aoms/1177731830](https://doi.org/10.1214/aoms/1177731830) [JSTOR](https://www.jstor.org/stable/2235727)
535 ///
536 pub fn sample_sem(&self) -> F {
537 let harm_mean = self.sample_mean();
538 let recip_std_dev = self.recip_space.sample_std_dev();
539 harm_mean * harm_mean * recip_std_dev
540 / F::from(self.recip_space.sample_count() - 1).unwrap().sqrt()
541 }
542
543 ///
544 /// Number of samples
545 ///
546 /// Complexity: \\( O(1) \\)
547 ///
548 pub fn sample_count(&self) -> usize {
549 self.recip_space.sample_count()
550 }
551
552 ///
553 /// Confidence interval for the harmonic mean
554 ///
555 pub fn ci_mean(&self, confidence: Confidence) -> CIResult<Interval<F>> {
556 let arith_ci = self.recip_space.ci_mean(confidence.flipped())?;
557 let (lo, hi) = (F::one() / arith_ci.high_f(), F::one() / arith_ci.low_f());
558 match confidence {
559 Confidence::TwoSided(_) => Interval::new(lo, hi).map_err(|e| e.into()),
560 Confidence::UpperOneSided(_) => Ok(Interval::new_upper(lo)),
561 Confidence::LowerOneSided(_) => Ok(Interval::new_lower(hi)),
562 }
563 }
564
565 ///
566 /// Combine two states
567 ///
568 /// Complexity: \\( O(1) \\)
569 ///
570 #[allow(clippy::should_implement_trait)]
571 pub fn add(self, rhs: Self) -> Self {
572 Self {
573 recip_space: self.recip_space + rhs.recip_space,
574 }
575 }
576
577 ///
578 /// Compute the confidence interval on the mean of a sample
579 ///
580 /// # Arguments
581 ///
582 /// * `confidence` - The confidence level of the interval
583 /// * `data` - The data to compute the confidence interval on
584 ///
585 /// # Output
586 ///
587 /// * `Ok(interval)` - The confidence interval on the mean of the sample
588 ///
589 /// # Errors
590 ///
591 /// * [`CIError::TooFewSamples`] - If the input data has too few samples to compute the confidence interval
592 /// * [`CIError::NonPositiveValue`] - If the input data contains non-positive values when computing harmonic/geometric means.
593 /// * [`CIError::InvalidInputData`] - If the input data contains invalid values (e.g. NaN)
594 /// * [`CIError::FloatConversionError`] - If some data cannot be converted to a float
595 ///
596 pub fn ci<I>(confidence: Confidence, data: &I) -> CIResult<Interval<F>>
597 where
598 for<'a> &'a I: IntoIterator<Item = &'a F>,
599 {
600 Self::from_iter(data)?.ci_mean(confidence)
601 }
602}
603
604impl<F: Float> Default for Harmonic<F> {
605 fn default() -> Self {
606 Self {
607 recip_space: Arithmetic::default(),
608 }
609 }
610}
611
612impl<F: Float> std::ops::Add for Harmonic<F> {
613 type Output = Self;
614
615 #[inline]
616 fn add(self, rhs: Self) -> Self::Output {
617 self.add(rhs)
618 }
619}
620
621impl<F: Float> std::ops::AddAssign for Harmonic<F> {
622 #[inline]
623 fn add_assign(&mut self, rhs: Self) {
624 *self = self.add(rhs);
625 }
626}
627
628///
629/// Represents the state of the computation of the geometric mean.
630/// This is a simple implementation that accumulates information about the samples, such as sum and sum of squares.
631/// It is implemented as a wrapper around [`Arithmetic`] to compute the arithmetic mean of the logarithms of the samples.
632///
633/// It is best used through the [`StatisticsOps`] trait.
634///
635#[derive(Debug, Clone, Copy, PartialEq)]
636#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
637pub struct Geometric<F: Float> {
638 log_space: Arithmetic<F>,
639}
640
641impl<F: Float> Geometric<F> {
642 ///
643 /// Create a new empty state
644 ///
645 /// # Example
646 /// ```
647 /// # use stats_ci::*;
648 /// # use approx::*;
649 /// let mut stats = mean::Geometric::new();
650 /// stats.append(10.)?;
651 /// assert_eq!(stats.sample_count(), 1);
652 /// assert_abs_diff_eq!(stats.sample_mean(), 10., epsilon = 1e-10);
653 /// # Ok::<(),error::CIError>(())
654 /// ```
655 ///
656 pub fn new() -> Self {
657 Default::default()
658 }
659
660 ///
661 /// Append a new sample to the data
662 ///
663 /// Complexity: \\( O(1) \\)
664 ///
665 pub fn append(&mut self, x: F) -> CIResult<()> {
666 if x <= F::zero() {
667 return Err(error::CIError::NonPositiveValue(
668 x.to_f64().unwrap_or(f64::NAN),
669 ));
670 }
671 self.log_space.append(x.ln())?;
672 Ok(())
673 }
674
675 ///
676 /// Geometric mean of the sample
677 ///
678 pub fn sample_mean(&self) -> F {
679 self.log_space.sample_mean().exp()
680 }
681
682 ///
683 /// Standard error of the geometric mean
684 ///
685 /// Computed as: \\( G \frac{s_{\log x_i}}{\sqrt{n-1}} \\)
686 /// where \\( G \\) is the geometric mean of the sample;
687 /// \\( s_{\log x_i} \\) is the estimate of the standard deviation of the logarithms of the samples;
688 /// and \\( n-1 \\) is the degree of freedom of the sample data.
689 ///
690 /// # Reference
691 ///
692 /// * Nilan Noris. "The standard errors of the geometric and harmonic means and their application to index numbers." Ann. Math. Statist. 11(4): 445-448 (December, 1940). DOI: [10.1214/aoms/1177731830](https://doi.org/10.1214/aoms/1177731830) [JSTOR](https://www.jstor.org/stable/2235727)
693 ///
694 pub fn sample_sem(&self) -> F {
695 let geom_mean = self.sample_mean();
696 let log_std_dev = self.log_space.sample_std_dev();
697 geom_mean * log_std_dev / F::from(self.log_space.sample_count() - 1).unwrap().sqrt()
698 }
699
700 ///
701 /// Number of samples
702 ///
703 /// Complexity: \\( O(1) \\)
704 ///
705 pub fn sample_count(&self) -> usize {
706 self.log_space.sample_count()
707 }
708
709 ///
710 /// Confidence interval for the geometric mean
711 ///
712 pub fn ci_mean(&self, confidence: Confidence) -> CIResult<Interval<F>> {
713 let arith_ci = self.log_space.ci_mean(confidence)?;
714 let (lo, hi) = (arith_ci.low_f().exp(), arith_ci.high_f().exp());
715 match confidence {
716 Confidence::TwoSided(_) => Interval::new(lo, hi).map_err(|e| e.into()),
717 Confidence::UpperOneSided(_) => Ok(Interval::new_upper(lo)),
718 Confidence::LowerOneSided(_) => Ok(Interval::new_lower(hi)),
719 }
720 }
721
722 ///
723 /// Combine two states
724 ///
725 /// Complexity: \\( O(1) \\)
726 ///
727 #[allow(clippy::should_implement_trait)]
728 pub fn add(self, rhs: Self) -> Self {
729 Self {
730 log_space: self.log_space + rhs.log_space,
731 }
732 }
733
734 ///
735 /// Compute the confidence interval on the mean of a sample
736 ///
737 /// # Arguments
738 ///
739 /// * `confidence` - The confidence level of the interval
740 /// * `data` - The data to compute the confidence interval on
741 ///
742 /// # Output
743 ///
744 /// * `Ok(interval)` - The confidence interval on the mean of the sample
745 ///
746 /// # Errors
747 ///
748 /// * [`CIError::TooFewSamples`] - If the input data has too few samples to compute the confidence interval
749 /// * [`CIError::NonPositiveValue`] - If the input data contains non-positive values when computing harmonic/geometric means.
750 /// * [`CIError::InvalidInputData`] - If the input data contains invalid values (e.g. NaN)
751 /// * [`CIError::FloatConversionError`] - If some data cannot be converted to a float
752 ///
753 pub fn ci<I>(confidence: Confidence, data: &I) -> CIResult<Interval<F>>
754 where
755 for<'a> &'a I: IntoIterator<Item = &'a F>,
756 {
757 Self::from_iter(data)?.ci_mean(confidence)
758 }
759}
760
761impl<F: Float> Default for Geometric<F> {
762 fn default() -> Self {
763 Self {
764 log_space: Arithmetic::default(),
765 }
766 }
767}
768
769impl<F: Float> std::ops::Add for Geometric<F> {
770 type Output = Self;
771
772 #[inline]
773 fn add(self, rhs: Self) -> Self::Output {
774 self.add(rhs)
775 }
776}
777
778impl<F: Float> std::ops::AddAssign for Geometric<F> {
779 #[inline]
780 fn add_assign(&mut self, rhs: Self) {
781 *self = self.add(rhs);
782 }
783}
784
785///
786/// Trait for computing confidence intervals on the mean of a sample.
787///
788/// It is superseded by the [`StatisticsOps`] trait which allows incremental statistics.
789/// It is retained for backwards compatibility and will be deprecated in the future, as
790/// it brings no advantage over [`StatisticsOps`] and is less flexible.
791///
792/// # Examples
793///
794/// ```
795/// use stats_ci::*;
796/// let data = [
797/// 82., 94., 68., 6., 39., 80., 10., 97., 34., 66., 62., 7., 39., 68., 93., 64., 10., 74.,
798/// 15., 34., 4., 48., 88., 94., 17., 99., 81., 37., 68., 66., 40., 23., 67., 72., 63.,
799/// 71., 18., 51., 65., 87., 12., 44., 89., 67., 28., 86., 62., 22., 90., 18., 50., 25.,
800/// 98., 24., 61., 62., 86., 100., 96., 27., 36., 82., 90., 55., 26., 38., 97., 73., 16.,
801/// 49., 23., 26., 55., 26., 3., 23., 47., 27., 58., 27., 97., 32., 29., 56., 28., 23.,
802/// 37., 72., 62., 77., 63., 100., 40., 84., 77., 39., 71., 61., 17., 77.,
803/// ];
804/// let confidence = Confidence::new_two_sided(0.95);
805/// let ci = mean::Arithmetic::ci(confidence, &data)?;
806/// // arithmetic mean: 52.5
807///
808/// use num_traits::Float;
809/// use approx::*;
810/// assert_abs_diff_eq!(ci, Interval::new(48.094823990767836, 59.24517600923217)?, epsilon = 1e-3);
811/// # Ok::<(),error::CIError>(())
812/// ```
813///
814pub trait MeanCI<T: PartialOrd> {
815 ///
816 /// Compute the confidence interval on the mean of a sample
817 ///
818 /// # Arguments
819 ///
820 /// * `confidence` - The confidence level of the interval
821 /// * `data` - The data to compute the confidence interval on
822 ///
823 /// # Output
824 ///
825 /// * `Ok(interval)` - The confidence interval on the mean of the sample
826 ///
827 /// # Errors
828 ///
829 /// * [`CIError::TooFewSamples`] - If the input data has too few samples to compute the confidence interval
830 /// * [`CIError::NonPositiveValue`] - If the input data contains non-positive values when computing harmonic/geometric means.
831 /// * [`CIError::InvalidInputData`] - If the input data contains invalid values (e.g. NaN)
832 /// * [`CIError::FloatConversionError`] - If some data cannot be converted to a float
833 ///
834 fn ci<I>(confidence: Confidence, data: &I) -> CIResult<Interval<T>>
835 where
836 for<'a> &'a I: IntoIterator<Item = &'a T>;
837}
838
839macro_rules! impl_mean_ci_for {
840 ( $x:ty ) => {
841 impl<F: Float> MeanCI<F> for $x {
842 fn ci<I>(confidence: Confidence, data: &I) -> CIResult<Interval<F>>
843 where
844 for<'a> &'a I: IntoIterator<Item = &'a F>,
845 {
846 <$x>::ci(confidence, data)
847 }
848 }
849 };
850}
851
852impl_mean_ci_for!(Arithmetic<F>);
853impl_mean_ci_for!(Harmonic<F>);
854impl_mean_ci_for!(Geometric<F>);
855
856#[cfg(test)]
857mod tests {
858 use super::*;
859 use approx::*;
860
861 #[test]
862 fn test_mean_ci() -> CIResult<()> {
863 let data = [
864 82., 94., 68., 6., 39., 80., 10., 97., 34., 66., 62., 7., 39., 68., 93., 64., 10., 74.,
865 15., 34., 4., 48., 88., 94., 17., 99., 81., 37., 68., 66., 40., 23., 67., 72., 63.,
866 71., 18., 51., 65., 87., 12., 44., 89., 67., 28., 86., 62., 22., 90., 18., 50., 25.,
867 98., 24., 61., 62., 86., 100., 96., 27., 36., 82., 90., 55., 26., 38., 97., 73., 16.,
868 49., 23., 26., 55., 26., 3., 23., 47., 27., 58., 27., 97., 32., 29., 56., 28., 23.,
869 37., 72., 62., 77., 63., 100., 40., 84., 77., 39., 71., 61., 17., 77.,
870 ];
871
872 let confidence = Confidence::new_two_sided(0.95);
873 let ci = Arithmetic::ci(confidence, &data)?;
874 // mean: 53.67
875 // stddev: 28.097613040716798
876 // reference values computed in python
877 // [48.094823990767836, 59.24517600923217]
878 // ```python
879 // import numpy as np
880 // import scipy.stats as st
881 // st.t.interval(confidence=0.95, df=len(data)-1, loc=np.mean(data), scale=st.sem(data))
882 // ```
883 assert_abs_diff_eq!(ci.low_f(), 48.094823990767836, epsilon = 1e-8);
884 assert_abs_diff_eq!(ci.high_f(), 59.24517600923217, epsilon = 1e-8);
885 assert_abs_diff_eq!(ci.low_f() + ci.high_f(), 2. * 53.67);
886
887 let one_sided_ci = Arithmetic::ci(Confidence::UpperOneSided(0.975), &data)?;
888 assert_abs_diff_eq!(one_sided_ci.low_f(), ci.low_f());
889 assert_eq!(one_sided_ci.high_f(), f64::INFINITY);
890
891 let one_sided_ci = Arithmetic::ci(Confidence::LowerOneSided(0.975), &data)?;
892 assert_abs_diff_eq!(one_sided_ci.high_f(), ci.high_f());
893 assert_eq!(one_sided_ci.low_f(), f64::NEG_INFINITY);
894
895 let mut stats = Arithmetic::default();
896 stats.extend(&data)?;
897 let ci = stats.ci_mean(confidence)?;
898
899 assert_abs_diff_eq!(ci.low_f(), 48.094823990767836, epsilon = 1e-8);
900 assert_abs_diff_eq!(ci.high_f(), 59.24517600923217, epsilon = 1e-8);
901 assert_abs_diff_eq!(ci.low_f() + ci.high_f(), 2. * 53.67, epsilon = 1e-8);
902
903 let one_sided_ci = stats.ci_mean(Confidence::UpperOneSided(0.975))?;
904 assert_abs_diff_eq!(one_sided_ci.low_f(), ci.low_f());
905 assert_eq!(one_sided_ci.high_f(), f64::INFINITY);
906
907 let one_sided_ci = stats.ci_mean(Confidence::LowerOneSided(0.975))?;
908 assert_abs_diff_eq!(one_sided_ci.high_f(), ci.high_f());
909 assert_eq!(one_sided_ci.low_f(), f64::NEG_INFINITY);
910
911 Ok(())
912 }
913
914 #[test]
915 fn test_geometric_ci() -> CIResult<()> {
916 let data = [
917 82., 94., 68., 6., 39., 80., 10., 97., 34., 66., 62., 7., 39., 68., 93., 64., 10., 74.,
918 15., 34., 4., 48., 88., 94., 17., 99., 81., 37., 68., 66., 40., 23., 67., 72., 63.,
919 71., 18., 51., 65., 87., 12., 44., 89., 67., 28., 86., 62., 22., 90., 18., 50., 25.,
920 98., 24., 61., 62., 86., 100., 96., 27., 36., 82., 90., 55., 26., 38., 97., 73., 16.,
921 49., 23., 26., 55., 26., 3., 23., 47., 27., 58., 27., 97., 32., 29., 56., 28., 23.,
922 37., 72., 62., 77., 63., 100., 40., 84., 77., 39., 71., 61., 17., 77.,
923 ];
924
925 let confidence = Confidence::new_two_sided(0.95);
926 let ci = Geometric::ci(confidence, &data)?;
927 // geometric mean: 43.7268032829256
928 //
929 // reference values computed in python:
930 // in log space: (3.630483364286656, 3.9254391587458475)
931 // [37.731050052224354, 50.67532768627392]
932 assert_abs_diff_eq!(ci.low_f(), 37.731050052224354, epsilon = 1e-8);
933 assert_abs_diff_eq!(ci.high_f(), 50.67532768627392, epsilon = 1e-8);
934
935 let one_sided_ci = Geometric::ci(Confidence::UpperOneSided(0.975), &data)?;
936 assert_abs_diff_eq!(one_sided_ci.low_f(), ci.low_f());
937 assert_eq!(one_sided_ci.high_f(), f64::INFINITY);
938
939 let one_sided_ci = Geometric::ci(Confidence::LowerOneSided(0.975), &data)?;
940 assert_abs_diff_eq!(one_sided_ci.high_f(), ci.high_f());
941 assert_eq!(one_sided_ci.low_f(), f64::NEG_INFINITY);
942
943 let mut stats = Geometric::default();
944 stats.extend(&data)?;
945 let ci = stats.ci_mean(confidence)?;
946 assert_abs_diff_eq!(stats.sample_mean(), 43.7268032829256, epsilon = 1e-8);
947 assert_abs_diff_eq!(ci.low_f(), 37.731050052224354, epsilon = 1e-8);
948 assert_abs_diff_eq!(ci.high_f(), 50.67532768627392, epsilon = 1e-8);
949
950 let one_sided_ci = stats.ci_mean(Confidence::UpperOneSided(0.975))?;
951 assert_abs_diff_eq!(one_sided_ci.low_f(), ci.low_f());
952 assert_eq!(one_sided_ci.high_f(), f64::INFINITY);
953
954 let one_sided_ci = stats.ci_mean(Confidence::LowerOneSided(0.975))?;
955 assert_abs_diff_eq!(one_sided_ci.high_f(), ci.high_f());
956 assert_eq!(one_sided_ci.low_f(), f64::NEG_INFINITY);
957
958 Ok(())
959 }
960
961 #[test]
962 fn test_harmonic_ci() -> CIResult<()> {
963 let data = [
964 82., 94., 68., 6., 39., 80., 10., 97., 34., 66., 62., 7., 39., 68., 93., 64., 10., 74.,
965 15., 34., 4., 48., 88., 94., 17., 99., 81., 37., 68., 66., 40., 23., 67., 72., 63.,
966 71., 18., 51., 65., 87., 12., 44., 89., 67., 28., 86., 62., 22., 90., 18., 50., 25.,
967 98., 24., 61., 62., 86., 100., 96., 27., 36., 82., 90., 55., 26., 38., 97., 73., 16.,
968 49., 23., 26., 55., 26., 3., 23., 47., 27., 58., 27., 97., 32., 29., 56., 28., 23.,
969 37., 72., 62., 77., 63., 100., 40., 84., 77., 39., 71., 61., 17., 77.,
970 ];
971
972 let confidence = Confidence::new_two_sided(0.95);
973 let ci = Harmonic::ci(confidence, &data)?;
974 // harmonic mean: 30.031313156339586
975 //
976 // reference values computed in python:
977 // in reciprocal space: (0.02424956057996111, 0.042347593849757906)
978 // [41.237860649168255, 23.614092539657168] (reversed by conversion from reciprocal space)
979 assert_abs_diff_eq!(ci.low_f(), 23.614092539657168, epsilon = 1e-8);
980 assert_abs_diff_eq!(ci.high_f(), 41.237860649168255, epsilon = 1e-8);
981
982 let one_sided_ci = Harmonic::ci(Confidence::UpperOneSided(0.975), &data)?;
983 assert_abs_diff_eq!(one_sided_ci.low_f(), ci.low_f());
984 assert_eq!(one_sided_ci.high_f(), f64::INFINITY);
985
986 let one_sided_ci = Harmonic::ci(Confidence::LowerOneSided(0.975), &data)?;
987 assert_abs_diff_eq!(one_sided_ci.high_f(), ci.high_f());
988 assert_eq!(one_sided_ci.low_f(), f64::NEG_INFINITY);
989
990 let confidence = Confidence::new_two_sided(0.95);
991 let data = [
992 1.81600583, 0.07498389, 1.29092744, 0.62023863, 0.09345327, 1.94670997, 2.27687339,
993 0.9251231, 1.78173864, 0.4391542, 1.36948099, 1.5191194, 0.42286756, 1.48463176,
994 0.17621009, 2.31810064, 0.15633061, 2.55137878, 1.11043948, 1.35923319, 1.58385561,
995 0.63431437, 0.49993148, 0.49168534, 0.11533354,
996 ];
997 let ci = Harmonic::ci(confidence, &data)?;
998 // harmonic mean: 0.38041820166550844
999 //
1000 // reference values computed in python:
1001 // in reciprocal space: (1.1735238063066096, 4.083848080632111)
1002 // [0.8521343961033607, 0.2448670911003175]
1003 assert_abs_diff_eq!(ci.low_f(), 0.2448670911003175, epsilon = 1e-6);
1004 assert_abs_diff_eq!(ci.high_f(), 0.8521343961033607, epsilon = 1e-6);
1005
1006 let mut stats = Harmonic::default();
1007 stats.extend(&data)?;
1008 let ci = stats.ci_mean(confidence)?;
1009 assert_abs_diff_eq!(stats.sample_mean(), 0.38041820166550844, epsilon = 1e-8);
1010 assert_abs_diff_eq!(ci.low_f(), 0.2448670911003175, epsilon = 1e-6);
1011 assert_abs_diff_eq!(ci.high_f(), 0.8521343961033607, epsilon = 1e-6);
1012
1013 Ok(())
1014 }
1015
1016 #[test]
1017 fn test_arithmetic_add() {
1018 const VALUE: f32 = 0.1;
1019 let size = 1_000_000;
1020
1021 let mut stats_ref = Arithmetic::default();
1022 let mut stats_summed = Arithmetic::default();
1023 let mut stats_summed_in_place = Arithmetic::default();
1024 for _ in 0..size {
1025 stats_ref.append(VALUE).unwrap();
1026
1027 let mut new_stat = Arithmetic::default();
1028 new_stat.append(VALUE).unwrap();
1029 stats_summed = stats_summed + new_stat;
1030 stats_summed_in_place += new_stat;
1031 }
1032
1033 assert_eq!(stats_ref.sample_count(), size);
1034 assert_eq!(stats_summed.sample_count(), size);
1035 assert_eq!(stats_summed_in_place.sample_count(), size);
1036
1037 assert_eq!(stats_ref.sample_mean(), stats_summed.sample_mean());
1038 assert_eq!(stats_ref.sample_variance(), stats_summed.sample_variance());
1039 assert_eq!(stats_ref.sample_std_dev(), stats_summed.sample_std_dev());
1040 assert_eq!(stats_ref.sample_sem(), stats_summed.sample_sem());
1041
1042 assert_eq!(stats_ref.sample_mean(), stats_summed_in_place.sample_mean());
1043 assert_eq!(
1044 stats_ref.sample_variance(),
1045 stats_summed_in_place.sample_variance()
1046 );
1047 assert_eq!(
1048 stats_ref.sample_std_dev(),
1049 stats_summed_in_place.sample_std_dev()
1050 );
1051 assert_eq!(stats_ref.sample_sem(), stats_summed_in_place.sample_sem());
1052 }
1053
1054 #[test]
1055 fn test_geometric_add() {
1056 const VALUE: f32 = 0.1;
1057 let size = 1_000_000;
1058
1059 let mut stats_ref = Geometric::default();
1060 let mut stats_summed = Geometric::default();
1061 let mut stats_summed_in_place = Geometric::default();
1062 for _ in 0..size {
1063 stats_ref.append(VALUE).unwrap();
1064
1065 let mut new_stat = Geometric::default();
1066 new_stat.append(VALUE).unwrap();
1067 stats_summed = stats_summed + new_stat;
1068 stats_summed_in_place += new_stat;
1069 }
1070
1071 assert_eq!(stats_ref.sample_count(), size);
1072 assert_eq!(stats_summed.sample_count(), size);
1073 assert_eq!(stats_summed_in_place.sample_count(), size);
1074
1075 assert_eq!(stats_ref.sample_mean(), stats_summed.sample_mean());
1076 assert_eq!(stats_ref.sample_sem(), stats_summed.sample_sem());
1077
1078 assert_eq!(stats_ref.sample_mean(), stats_summed_in_place.sample_mean());
1079 assert_eq!(stats_ref.sample_sem(), stats_summed_in_place.sample_sem());
1080 }
1081
1082 #[test]
1083 fn test_harmonic_add() {
1084 const VALUE: f32 = 0.1;
1085 let size = 1_000_000;
1086
1087 let mut stats_ref = Harmonic::default();
1088 let mut stats_summed = Harmonic::default();
1089 let mut stats_summed_in_place = Harmonic::default();
1090 for _ in 0..size {
1091 stats_ref.append(VALUE).unwrap();
1092
1093 let mut new_stat = Harmonic::default();
1094 new_stat.append(VALUE).unwrap();
1095 stats_summed = stats_summed + new_stat;
1096 stats_summed_in_place += new_stat;
1097 }
1098
1099 assert_eq!(stats_ref.sample_count(), size);
1100 assert_eq!(stats_summed.sample_count(), size);
1101 assert_eq!(stats_summed_in_place.sample_count(), size);
1102
1103 assert_eq!(stats_ref.sample_mean(), stats_summed.sample_mean());
1104 assert_eq!(stats_ref.sample_sem(), stats_summed.sample_sem());
1105
1106 assert_eq!(stats_ref.sample_mean(), stats_summed_in_place.sample_mean());
1107 assert_eq!(stats_ref.sample_sem(), stats_summed_in_place.sample_sem());
1108 }
1109
1110 #[test]
1111 fn test_misc() -> CIResult<()> {
1112 let data = [1., 2., 3., 4., 5., 6., 7., 8., 9., 10.];
1113 let stats = mean::Arithmetic::from_iter(&data)?;
1114 assert_eq!(stats.sample_count(), 10);
1115 assert_eq!(stats.sample_mean(), 5.5);
1116 assert_abs_diff_eq!(stats.sample_sem(), 1.0092, epsilon = 1e-4);
1117 let confidence = Confidence::new_two_sided(0.95);
1118 let ci = stats.ci_mean(confidence)?;
1119 assert_abs_diff_eq!(ci, Interval::new(3.3341, 7.6659)?, epsilon = 1e-4);
1120 Ok(())
1121 }
1122}