stats_ci/
comparison.rs

1//!
2//! This module provides functions to compare two samples for two different cases.
3//!
4//! # Paired observations
5//!
6//! The first case is when the two samples are paired, i.e. each measurement in the first sample is paired with a measurement in the second sample.
7//! For instance, when measuring the performance of two algorithms, the same input data is used for both algorithms to yield a pair of related observations.
8//! Obviously, the number of observations in the two samples must be the same.
9//! When possible, paired observations are preferred because they significantly reduce the variance of the difference between the two means.
10//! This means that fewer observations are needed to achieve the same significance.
11//!
12//! The structure [`Paired`] deals with paired observations and can be used in simple form through the function [`Paired::ci`] or incrementally
13//! with the function [`Paired::ci_mean`].
14//!
15//! # Unpaired observations
16//!
17//! The second case is when the two samples are not paired, i.e. each measurement in the first sample is not paired with a measurement in the second sample.
18//! The number of observations in the two samples may be different.
19//!
20//! The structure [`Unpaired`] deals with the case of unpaired observations and can be used in simple form through the function [`Unpaired::ci`]
21//! or incrementally with the function [`Unpaired::ci_mean`].
22//!
23//! # Examples
24//!
25//! ## Paired observations
26//!
27//! The examples below have the following preamble:
28//! ```
29//! use stats_ci::*;
30//! // Zinc concentration in water samples from a river
31//! let data_bottom_water = [
32//!    0.430, 0.266, 0.567, 0.531, 0.707, 0.716, 0.651, 0.589, 0.469, 0.723,
33//! ];
34//! let data_surface_water = [
35//!   0.415, 0.238, 0.390, 0.410, 0.605, 0.609, 0.632, 0.523, 0.411, 0.612,
36//! ];
37//! let confidence = Confidence::new_two_sided(0.95);
38//! ```
39//!
40//! ### Easy interface (paired)
41//! ```
42//! # use stats_ci::*;
43//! # // Zinc concentration in water samples from a river
44//! # let data_bottom_water = [
45//! #    0.430, 0.266, 0.567, 0.531, 0.707, 0.716, 0.651, 0.589, 0.469, 0.723,
46//! # ];
47//! # let data_surface_water = [
48//! #   0.415, 0.238, 0.390, 0.410, 0.605, 0.609, 0.632, 0.523, 0.411, 0.612,
49//! # ];
50//! # let confidence = Confidence::new_two_sided(0.95);
51//! let ci = comparison::Paired::ci(
52//!     confidence,
53//!     &data_bottom_water,
54//!     &data_surface_water
55//! )?;
56//! # Ok::<(),error::CIError>(())
57//! ```
58//!
59//! ### Incremental interface (paired)
60//! ```
61//! # use stats_ci::*;
62//! # let data_bottom_water = [
63//! #    0.430, 0.266, 0.567, 0.531, 0.707, 0.716, 0.651, 0.589, 0.469, 0.723,
64//! # ];
65//! # let data_surface_water = [
66//! #   0.415, 0.238, 0.390, 0.410, 0.605, 0.609, 0.632, 0.523, 0.411, 0.612,
67//! # ];
68//! # let confidence = Confidence::new_two_sided(0.95);
69//! let mut stats = comparison::Paired::default();
70//! stats.extend(&data_bottom_water, &data_surface_water)?;
71//! let ci = stats.ci_mean(confidence)?;
72//! let mean = stats.sample_mean();
73//! # Ok::<(),error::CIError>(())
74//! ```
75//!
76//! ## Unpaired observations
77//!
78//! The examples below have the following preamble:
79//! ```
80//! # use stats_ci::*;
81//! // Gain in weight of 19 female rats between 28 and 84 days after birth.
82//! // 12 were fed on a high protein diet and 7 on a low protein diet.
83//! let data_high_protein = [
84//!     134., 146., 104., 119., 124., 161., 107., 83., 113., 129., 97., 123.,
85//! ];
86//! let data_low_protein = [70., 118., 101., 85., 107., 132., 94.];
87//! let confidence = Confidence::new_two_sided(0.95);
88//! ```
89//!
90//! ### Easy interface (unpaired)
91//! ```
92//! # use stats_ci::*;
93//! # // Gain in weight of 19 female rats between 28 and 84 days after birth.
94//! # // 12 were fed on a high protein diet and 7 on a low protein diet.
95//! # let data_high_protein = [
96//! #     134., 146., 104., 119., 124., 161., 107., 83., 113., 129., 97., 123.,
97//! # ];
98//! # let data_low_protein = [70., 118., 101., 85., 107., 132., 94.];
99//! let confidence = Confidence::new_two_sided(0.95);
100//! let ci = comparison::Unpaired::ci(
101//!     confidence,
102//!     &data_high_protein,
103//!     &data_low_protein
104//! )?;
105//! # Ok::<(),error::CIError>(())
106//! ```
107//!
108//! ### Incremental interface (unpaired)
109//! ```
110//! # use stats_ci::*;
111//! # let data_high_protein = [
112//! #     134., 146., 104., 119., 124., 161., 107., 83., 113., 129., 97., 123.,
113//! # ];
114//! # let data_low_protein = [70., 118., 101., 85., 107., 132., 94.];
115//! # let confidence = Confidence::new_two_sided(0.95);
116//! let mut stats = comparison::Unpaired::default();
117//! stats.extend(&data_high_protein, &data_low_protein)?;
118//! let ci = stats.ci_mean(confidence)?;
119//! # Ok::<(),error::CIError>(())
120//! ```
121//!
122//! # References
123//!
124//! * R. Jain, The Art of Computer Systems Performance Analysis, Wiley, 1991.
125//! * [Wikipedia article on paired difference test](https://en.wikipedia.org/wiki/Paired_difference_test)
126//! * PennState. Stat 500. Lesson 7: Comparing Two Population Parameters. [Online](https://online.stat.psu.edu/stat500/lesson/7)
127//!
128use crate::*;
129use error::*;
130use mean::StatisticsOps;
131use num_traits::Float;
132
133///
134/// Structure to collect statistics on two paired samples.
135///
136/// Paired observations are when each measurement in the first sample is paired with
137/// a measurement in the second sample.
138/// For instance, when measuring the performance of two algorithms, the same input
139/// data is used for both algorithms to yield a pair of related observations.
140///
141/// When observations cannot naturally be paired, the samples must be compared using
142/// unpaired observations (see [`Unpaired`]). Typically, unpaired observations require
143/// noticeably more observations to achieve the same statistical significance.
144///
145/// # Examples
146///
147/// The example below considers the zinc concentration in water samples from a river.
148/// Each sample is taken at the same location, but one at the bottom of the river and
149/// the other at the surface. Thus, those measurements are paired (bottom and surface).
150/// See <https://online.stat.psu.edu/stat500/lesson/7/7.3/7.3.2> for details on this
151/// example.
152///
153/// ```
154/// # use stats_ci::*;
155/// // Zinc concentration in water samples from a river
156/// let data_bottom_water = [
157///     0.430, 0.266, 0.567, 0.531, 0.707, 0.716, 0.651, 0.589, 0.469, 0.723,
158/// ];
159/// let data_surface_water = [
160///     0.415, 0.238, 0.390, 0.410, 0.605, 0.609, 0.632, 0.523, 0.411, 0.612,
161/// ];
162///
163/// let mut stats = comparison::Paired::default();
164/// stats.extend(&data_bottom_water, &data_surface_water).unwrap();
165/// let ci = stats.ci_mean(Confidence::new_two_sided(0.95)).unwrap();
166/// ```
167///
168/// # References
169///
170/// * R. Jain, The Art of Computer Systems Performance Analysis, Wiley, 1991.
171/// * [Wikipedia article on paired difference test](https://en.wikipedia.org/wiki/Paired_difference_test)
172/// * PennState. Stat 500. Lesson 7: Comparing Two Population Parameters. [Online](https://online.stat.psu.edu/stat500/lesson/7)
173
174#[derive(Debug, Clone, PartialEq)]
175#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
176pub struct Paired<T: Float> {
177    stats: mean::Arithmetic<T>,
178}
179
180impl<T: Float> Paired<T> {
181    ///
182    /// Add a pair of observations to the two samples.
183    ///
184    /// # Arguments
185    ///
186    /// * `data_a` - the observation for the first sample
187    /// * `data_b` - the observation for the second sample
188    ///
189    /// # Errors
190    ///
191    /// * [`CIError::FloatConversionError`] - if the conversion to `T` fails
192    ///
193    /// # Examples
194    ///
195    /// ```
196    /// # use stats_ci::*;
197    /// let mut stats = comparison::Paired::default();
198    /// stats.append_pair(1., 2.)?;
199    /// # assert_eq!(stats.sample_count(), 1);
200    /// # assert_eq!(stats.sample_mean(), -1.);
201    /// # Ok::<(),error::CIError>(())
202    /// ```
203    pub fn append_pair(&mut self, data_a: T, data_b: T) -> CIResult<()> {
204        self.stats.append(data_a - data_b)
205    }
206
207    ///
208    /// Append multiple pairs of observations to the two samples.
209    ///
210    /// # Arguments
211    ///
212    /// * `iter` - an iterable collection of tuples to add to the data
213    ///
214    /// # Errors
215    ///
216    /// * [`CIError::FloatConversionError`] - if the conversion to `T` fails
217    ///
218    /// # Examples
219    ///
220    /// ```
221    /// # use stats_ci::*;
222    /// let mut stats = comparison::Paired::default();
223    /// stats.extend_tuple(&[(1., 2.), (3., 4.)])?;
224    /// # assert_eq!(stats.sample_count(), 2);
225    /// # assert_eq!(stats.sample_mean(), -1.);
226    /// # Ok::<(),error::CIError>(())
227    /// ```
228    pub fn extend_tuple<I>(&mut self, iter: &I) -> CIResult<()>
229    where
230        for<'a> &'a I: IntoIterator<Item = &'a (T, T)>,
231    {
232        for &(x, y) in iter.into_iter() {
233            self.stats.append(x - y)?;
234        }
235        Ok(())
236    }
237
238    ///
239    /// Append multiple observations to the two samples.
240    ///
241    /// # Arguments
242    ///
243    /// * `data_a` - an iterable collection of observations for the first sample
244    /// * `data_b` - an iterable collection of observations for the second sample
245    ///
246    /// # Errors
247    ///
248    /// * [`CIError::DifferentSampleSizes`] - if the two iterables have different lengths
249    /// * [`CIError::FloatConversionError`] - if the conversion to `T` fails
250    ///
251    /// # Examples
252    ///
253    /// ```
254    /// # use stats_ci::*;
255    /// let mut stats = comparison::Paired::default();
256    /// stats.extend(&[1., 3.], &[2., 4.])?;
257    /// # assert_eq!(stats.sample_count(), 2);
258    /// # assert_eq!(stats.sample_mean(), -1.);
259    /// # Ok::<(),error::CIError>(())
260    /// ```
261    pub fn extend<I1, I2>(&mut self, data_a: &I1, data_b: &I2) -> CIResult<()>
262    where
263        for<'a> &'a I1: IntoIterator<Item = &'a T>,
264        for<'b> &'b I2: IntoIterator<Item = &'b T>,
265    {
266        let mut data_a = data_a.into_iter();
267        let mut data_b = data_b.into_iter();
268        let mut count = 0;
269        loop {
270            match (data_a.next(), data_b.next()) {
271                (Some(x), Some(y)) => {
272                    count += 1;
273                    self.stats.append(*x - *y)?
274                }
275                (None, None) => return Ok(()),
276                // returns error if iterables have different lengths
277                (None, _) => {
278                    return Err(CIError::DifferentSampleSizes(
279                        count,
280                        count + 1 + data_b.count(),
281                    ))
282                }
283                (_, None) => {
284                    return Err(CIError::DifferentSampleSizes(
285                        count + 1 + data_a.count(),
286                        count,
287                    ))
288                }
289            }
290        }
291    }
292
293    ///
294    /// Return the sample mean of the difference between the two samples.
295    ///
296    /// # Examples
297    ///
298    /// ```
299    /// # use stats_ci::*;
300    /// let data_a = [1., 2., 3.];
301    /// let data_b = [4., 5., 6.];
302    /// let mut stats = comparison::Paired::default();
303    /// stats.extend(&data_a, &data_b)?;
304    /// let mean = stats.sample_mean();
305    /// assert_eq!(mean, -3.);
306    /// # Ok::<(),error::CIError>(())
307    /// ```
308    pub fn sample_mean(&self) -> T {
309        self.stats.sample_mean()
310    }
311
312    ///
313    /// Return the standard error of the difference between the two samples.
314    ///
315    /// # Examples
316    ///
317    /// ```
318    /// # use stats_ci::*;
319    /// let data_a = [1., 2., 3.];
320    /// let data_b = [4., 5., 6.];
321    /// let mut stats = comparison::Paired::default();
322    /// stats.extend(&data_a, &data_b)?;
323    /// let sem = stats.sample_sem();
324    /// assert_eq!(sem, 0.);
325    /// # Ok::<(),error::CIError>(())
326    /// ```
327    pub fn sample_sem(&self) -> T {
328        self.stats.sample_sem()
329    }
330
331    ///
332    /// Return the number of sample pairs.
333    ///
334    /// # Examples
335    ///
336    /// ```
337    /// # use stats_ci::*;
338    /// let data_a = [1., 2., 3.];
339    /// let data_b = [4., 5., 6.];
340    /// let mut stats = comparison::Paired::default();
341    /// stats.extend(&data_a, &data_b)?;
342    /// let count = stats.sample_count();
343    /// assert_eq!(count, 3);
344    /// # Ok::<(),error::CIError>(())
345    /// ```
346    ///
347    pub fn sample_count(&self) -> usize {
348        self.stats.sample_count()
349    }
350
351    ///
352    /// Return the confidence interval of the difference between the means of the two samples.
353    ///
354    /// # Arguments
355    ///
356    /// * `confidence` - the confidence level
357    ///
358    /// # Returns
359    ///
360    /// The confidence interval of the difference as a result.
361    ///
362    /// # Notes
363    ///
364    /// If the interval includes zero, the difference is not significant.
365    /// If the interval is strictly positive (resp. negative), the mean of the first sample is significantly
366    /// greater (resp. smaller) than the mean of the second sample.
367    ///
368    /// # Examples
369    ///
370    /// ```
371    /// # use stats_ci::*;
372    /// let data_a = [1., 2., 3.];
373    /// let data_b = [4., 5., 6.];
374    /// let mut stats = comparison::Paired::default();
375    /// stats.extend(&data_a, &data_b)?;
376    /// let confidence = Confidence::new_two_sided(0.95);
377    /// let ci = stats.ci_mean(confidence)?;
378    /// assert_eq!(ci, Interval::new(-3., -3.)?);
379    /// # Ok::<(),error::CIError>(())
380    /// ```
381    pub fn ci_mean(&self, confidence: Confidence) -> CIResult<Interval<T>> {
382        self.stats.ci_mean(confidence)
383    }
384
385    ///
386    /// Compute the confidence interval of the difference between the means of the two samples.
387    ///
388    /// # Arguments
389    ///
390    /// * `confidence` - the confidence level
391    /// * `data_a` - the first sample
392    /// * `data_b` - the second sample
393    ///
394    /// # Returns
395    ///
396    /// The confidence interval of the difference as a result.
397    ///
398    /// # Errors
399    ///
400    /// * [`CIError::DifferentSampleSizes`] - if the two samples do not have the same length
401    ///
402    /// # Notes
403    ///
404    /// If the interval includes zero, the difference is not significant.
405    /// If the interval is strictly positive (resp. negative), the mean of the first sample is significantly
406    /// greater (resp. smaller) than the mean of the second sample.
407    ///
408    /// This function provides a simple interface to obtain the confidence interval with a single call, when
409    /// the samples are known a priori and there is no need to include additional observations,
410    /// obtain the confidence intervals for other levels or access the sample statistics. For more refined
411    /// use cases, it is recommended to use [`Paired::ci_mean`] instead.
412    ///
413    /// # References
414    ///
415    /// * R. Jain, The Art of Computer Systems Performance Analysis, Wiley, 1991.
416    /// * [Wikipedia article on paired difference test](https://en.wikipedia.org/wiki/Paired_difference_test)
417    /// * PennState. Stat 500. Lesson 7: Comparing Two Population Parameters. [Online](https://online.stat.psu.edu/stat500/lesson/7)
418    ///
419    /// # Examples
420    ///
421    /// ```
422    /// # use stats_ci::*;
423    /// let data_a = [1., 2., 3.];
424    /// let data_b = [4., 5., 6.];
425    /// let confidence = Confidence::new_two_sided(0.95);
426    /// let ci = comparison::Paired::ci(confidence, &data_a, &data_b)?;
427    /// # Ok::<(),error::CIError>(())
428    /// ```
429    ///
430    pub fn ci<Ia, Ib>(confidence: Confidence, data_a: &Ia, data_b: &Ib) -> CIResult<Interval<T>>
431    where
432        for<'a> &'a Ia: IntoIterator<Item = &'a T>,
433        for<'a> &'a Ib: IntoIterator<Item = &'a T>,
434    {
435        let mut stats = Paired::default();
436        stats.extend(data_a, data_b)?;
437        stats.ci_mean(confidence)
438    }
439}
440
441impl<T: Float> Default for Paired<T> {
442    fn default() -> Self {
443        Self {
444            stats: mean::Arithmetic::default(),
445        }
446    }
447}
448
449impl<F: Float> std::ops::Add for Paired<F> {
450    type Output = Self;
451
452    #[inline]
453    fn add(self, rhs: Self) -> Self::Output {
454        Self {
455            stats: self.stats + rhs.stats,
456        }
457    }
458}
459
460impl<F: Float> std::ops::AddAssign for Paired<F> {
461    #[inline]
462    fn add_assign(&mut self, rhs: Self) {
463        self.stats += rhs.stats;
464    }
465}
466
467///
468/// Structure to collect statistics on two unpaired samples.
469///
470/// Given two independent samples, the goal is to compute the confidence interval
471/// of the difference between their means.
472/// Unlike with paired observations ([`Paired`]), the two samples do not have to
473/// have the same length.
474/// However, comparing with unpaired observations typically requires considerably
475/// more observations to reach the same degree of statistical accuracy. This is
476/// why paired observations are preferred when the circumstances allow.
477///
478/// # Examples
479///
480/// ```
481/// # use stats_ci::*;
482/// // Gain in weight of 19 female rats between 28 and 84 days after birth.
483/// // 12 were fed on a high protein diet and 7 on a low protein diet.
484/// let data_high_protein = [
485///     134., 146., 104., 119., 124., 161., 107., 83., 113., 129., 97., 123.,
486/// ];
487/// let data_low_protein = [70., 118., 101., 85., 107., 132., 94.];
488/// let mut stats = comparison::Unpaired::default();
489/// stats.extend(&data_high_protein, &data_low_protein)?;
490/// let ci = stats.ci_mean(Confidence::new_two_sided(0.95))?;
491/// # Ok::<(),error::CIError>(())
492/// ```
493///
494/// # References
495///
496/// * R. Jain, The Art of Computer Systems Performance Analysis, Wiley, 1991.
497/// * [Wikipedia article on Student's t-test](https://en.wikipedia.org/wiki/Student%27s_t-test#Independent_two-sample_t-test)
498/// * PennState. Stat 500. Lesson 7: Comparing Two Population Parameters. [Online](https://online.stat.psu.edu/stat500/lesson/7)
499///
500#[derive(Debug, Clone, PartialEq)]
501#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
502pub struct Unpaired<T: Float> {
503    stats_a: mean::Arithmetic<T>,
504    stats_b: mean::Arithmetic<T>,
505}
506
507impl<T: Float> Default for Unpaired<T> {
508    fn default() -> Self {
509        Self {
510            stats_a: mean::Arithmetic::default(),
511            stats_b: mean::Arithmetic::default(),
512        }
513    }
514}
515
516impl<T: Float> Unpaired<T> {
517    ///
518    /// Create a new instance of `Unpaired` from two statistics.
519    ///
520    /// # Arguments
521    ///
522    /// * `stats_a` - the statistics of the first sample
523    /// * `stats_b` - the statistics of the second sample
524    ///
525    /// # Examples
526    ///
527    /// ```
528    /// # use stats_ci::*;
529    /// let stats_a = mean::Arithmetic::from_iter(&[1., 2., 3.])?;
530    /// let stats_b = mean::Arithmetic::from_iter(&[4., 5., 6.])?;
531    /// let stats = comparison::Unpaired::new(stats_a, stats_b);
532    /// # Ok::<(),error::CIError>(())
533    /// ```
534    pub fn new(stats_a: mean::Arithmetic<T>, stats_b: mean::Arithmetic<T>) -> Self {
535        Self { stats_a, stats_b }
536    }
537
538    ///
539    /// Create a new instance of `Unpaired` from two samples.
540    ///
541    /// # Arguments
542    ///
543    /// * `data_a` - the first sample
544    /// * `data_b` - the second sample
545    ///
546    /// # Errors
547    ///
548    /// * [`CIError::FloatConversionError`] - if the conversion to `T` fails
549    ///
550    /// # Examples
551    ///
552    /// ```
553    /// # use stats_ci::*;
554    /// let stats = comparison::Unpaired::from_iter(&[1., 2., 3.], &[4., 5., 6.])?;
555    /// # Ok::<(),error::CIError>(())
556    /// ```
557    ///
558    pub fn from_iter<Ia, Ib>(data_a: &Ia, data_b: &Ib) -> CIResult<Self>
559    where
560        for<'a> &'a Ia: IntoIterator<Item = &'a T>,
561        for<'b> &'b Ib: IntoIterator<Item = &'b T>,
562    {
563        let mut stats = Self::default();
564        stats.extend_a(data_a)?;
565        stats.extend_b(data_b)?;
566        Ok(stats)
567    }
568
569    ///
570    /// Return a reference to the statistics of the first sample.
571    ///
572    pub fn stats_a(&self) -> &mean::Arithmetic<T> {
573        &self.stats_a
574    }
575
576    ///
577    /// Return a mutable reference to the statistics of the first sample.
578    ///
579    /// # Examples
580    ///
581    /// ```
582    /// # use stats_ci::*;
583    /// let mut stats = comparison::Unpaired::default();
584    /// stats.stats_a_mut().append(1.)?;
585    /// # Ok::<(),error::CIError>(())
586    /// ```
587    ///
588    pub fn stats_a_mut(&mut self) -> &mut mean::Arithmetic<T> {
589        &mut self.stats_a
590    }
591
592    ///
593    /// Return a reference to the statistics of the second sample.
594    ///
595    /// # Examples
596    ///
597    /// ```
598    /// # use stats_ci::*;
599    /// # let mut stats = comparison::Unpaired::from_iter(&[1., 2. ,3.], &[4., 5., 6.])?;
600    /// let mean_b = stats.stats_b().sample_mean();
601    /// # Ok::<(),error::CIError>(())
602    /// ```
603    ///
604    pub fn stats_b(&self) -> &mean::Arithmetic<T> {
605        &self.stats_b
606    }
607
608    ///
609    /// Return a mutable reference to the statistics of the second sample.
610    ///
611    /// # Examples
612    ///
613    /// ```
614    /// # use stats_ci::*;
615    /// let mut stats = comparison::Unpaired::default();
616    /// stats.stats_b_mut().append(1.)?;
617    /// # Ok::<(),error::CIError>(())
618    /// ```
619    ///
620    pub fn stats_b_mut(&mut self) -> &mut mean::Arithmetic<T> {
621        &mut self.stats_b
622    }
623
624    ///
625    /// Append a pair of observations to the two samples.
626    ///
627    /// # Arguments
628    ///
629    /// * `data_a` - the new data for the first sample
630    /// * `data_b` - the new data for the second sample
631    ///
632    /// # Errors
633    ///
634    /// * [`CIError::FloatConversionError`] - if the conversion to `T` fails
635    ///
636    pub fn append_pair(&mut self, data_a: T, data_b: T) -> CIResult<()> {
637        self.append_a(data_a)?;
638        self.append_b(data_b)?;
639        Ok(())
640    }
641
642    ///
643    /// Append a single observation to the first sample.
644    ///
645    /// # Arguments
646    ///
647    /// * `data_a` - the new data for the first sample
648    ///
649    pub fn append_a(&mut self, data_a: T) -> CIResult<()> {
650        self.stats_a.append(data_a)
651    }
652
653    ///
654    /// Append a single observation to the second sample.
655    ///
656    /// # Arguments
657    ///
658    /// * `data_b` - the new data for the second sample
659    ///
660    pub fn append_b(&mut self, data_b: T) -> CIResult<()> {
661        self.stats_b.append(data_b)
662    }
663
664    ///
665    /// Append observations to the first sample.
666    ///
667    /// # Arguments
668    ///
669    /// * `data_a` - the new data for the first sample
670    ///
671    /// # Errors
672    ///
673    /// * [`CIError::FloatConversionError`] - if the conversion to `T` fails
674    ///
675    /// # Examples
676    ///
677    /// ```
678    /// # use stats_ci::*;
679    /// let mut stats = comparison::Unpaired::default();
680    /// stats.extend_a(&[1., 2., 3.])?;
681    /// # assert_eq!(stats.stats_a().sample_count(), 3);
682    /// # assert_eq!(stats.stats_a().sample_mean(), 2.);
683    /// # Ok::<(),error::CIError>(())
684    /// ```
685    pub fn extend_a<I>(&mut self, data_a: &I) -> CIResult<()>
686    where
687        for<'a> &'a I: IntoIterator<Item = &'a T>,
688    {
689        self.stats_a.extend(data_a)
690    }
691
692    ///
693    /// Append observations to the second sample.
694    ///
695    /// # Arguments
696    ///
697    /// * `data_b` - the new data for the second sample
698    ///
699    /// # Errors
700    ///
701    /// * [`CIError::FloatConversionError`] - if the conversion to `T` fails
702    ///
703    /// # Examples
704    ///
705    /// ```
706    /// # use stats_ci::*;
707    /// let mut stats = comparison::Unpaired::default();
708    /// stats.extend_b(&[1., 2., 3.])?;
709    /// # assert_eq!(stats.stats_b().sample_count(), 3);
710    /// # assert_eq!(stats.stats_b().sample_mean(), 2.);
711    /// # Ok::<(),error::CIError>(())
712    /// ```
713    pub fn extend_b<I>(&mut self, data_b: &I) -> CIResult<()>
714    where
715        for<'a> &'a I: IntoIterator<Item = &'a T>,
716    {
717        self.stats_b.extend(data_b)
718    }
719
720    ///
721    /// Extend the two samples with new data.
722    ///
723    /// # Arguments
724    ///
725    /// * `data_a` - the new data for the first sample
726    /// * `data_b` - the new data for the second sample
727    ///
728    /// # Errors
729    ///
730    /// * [`CIError::FloatConversionError`] - if the conversion to `T` fails
731    ///
732    /// # Examples
733    ///
734    /// ```
735    /// # use stats_ci::*;
736    /// let mut stats = comparison::Unpaired::default();
737    /// stats.extend(&[1., 2., 3.], &[4., 5., 6.])?;
738    /// # assert_eq!(stats.stats_a().sample_count(), 3);
739    /// # assert_eq!(stats.stats_a().sample_mean(), 2.);
740    /// # assert_eq!(stats.stats_b().sample_count(), 3);
741    /// # assert_eq!(stats.stats_b().sample_mean(), 5.);
742    /// # Ok::<(),error::CIError>(())
743    /// ```
744    pub fn extend<Ia, Ib>(&mut self, data_a: &Ia, data_b: &Ib) -> CIResult<()>
745    where
746        for<'a> &'a Ia: IntoIterator<Item = &'a T>,
747        for<'b> &'b Ib: IntoIterator<Item = &'b T>,
748    {
749        self.stats_a.extend(data_a)?;
750        self.stats_b.extend(data_b)?;
751        Ok(())
752    }
753
754    ///
755    /// Compute the confidence interval of the difference between the means of the two samples.
756    ///
757    /// # Arguments
758    ///
759    /// * `confidence` - the confidence level
760    ///
761    /// # Returns
762    ///
763    /// The confidence interval of the difference as a result.
764    ///
765    /// # Errors
766    ///
767    /// * [`CIError::TooFewSamples`] - if one of the two samples has less than 2 observations
768    ///
769    /// # Examples
770    ///
771    /// ```
772    /// # use stats_ci::*;
773    /// let confidence = Confidence::new_two_sided(0.95);
774    /// let mut stats = comparison::Unpaired::default();
775    /// stats.extend(&[1., 2., 3.], &[4., 5., 6.])?;
776    /// let ci = stats.ci_mean(confidence)?;
777    /// # Ok::<(),error::CIError>(())
778    /// ```
779    ///
780    /// # Notes
781    ///
782    /// If the interval includes zero, the difference is not significant.
783    /// If the interval is strictly positive (resp. negative), the mean of the first sample is significantly
784    /// greater (resp. smaller) than the mean of the second sample.
785    ///
786    /// # References
787    ///
788    /// * R. Jain, The Art of Computer Systems Performance Analysis, Wiley, 1991.
789    /// * [Wikipedia article on Student's t-test](https://en.wikipedia.org/wiki/Student%27s_t-test#Independent_two-sample_t-test)
790    /// * PennState. Stat 500. Lesson 7: Comparing Two Population Parameters. [Online](https://online.stat.psu.edu/stat500/lesson/7)
791    ///
792    pub fn ci_mean(&self, confidence: Confidence) -> CIResult<Interval<T>> {
793        let stats_a = self.stats_a;
794        let stats_b = self.stats_b;
795
796        let n_a = T::from(stats_a.sample_count()).convert("stats_a.sample_count")?;
797        let n_b = T::from(stats_b.sample_count()).convert("stats_b.sample_count")?;
798        let mean_a = stats_a.sample_mean();
799        let mean_b = stats_b.sample_mean();
800        let std_dev_a = stats_a.sample_std_dev();
801        let std_dev_b = stats_b.sample_std_dev();
802
803        let mean_difference = mean_a - mean_b;
804        let sa2_na = // $s_a^2 / n_a$
805            std_dev_a * std_dev_a / n_a;
806        let sb2_nb = // $s_b^2 / n_b$
807            std_dev_b * std_dev_b / n_b;
808        let sum_s2_n = // $s_a^2 / n_a + s_b^2 / n_b$
809            sa2_na + sb2_nb;
810        let std_err_mean = // $\sqrt{s_a^2 / n_a + s_b^2 / n_b}$
811            sum_s2_n.sqrt();
812        let effective_dof = // $ \frac{ (s_a^a / n_a + s_b^2 / n_b)^2 }{ \frac{1}{n_a+1} \left(\frac{s_a^2}{n_a}\right)^2 + \frac{1}{n_b+1} \left(\frac{s_b^2}{n_b}\right)^2 } - 2$
813            sum_s2_n * sum_s2_n
814                / (sa2_na * sa2_na / (n_a + T::one())
815                    + sb2_nb * sb2_nb / (n_b + T::one())) - T::one() - T::one();
816
817        let (lo, hi) = stats::interval_bounds(
818            confidence,
819            mean_difference.try_f64("mean_difference")?,
820            std_err_mean.try_f64("std_err_mean")?,
821            effective_dof.try_f64("effective_dof")?,
822        );
823        let lo = T::from(lo).convert("lo")?;
824        let hi = T::from(hi).convert("hi")?;
825        match confidence {
826            Confidence::TwoSided(_) => Interval::new(lo, hi).map_err(|e| e.into()),
827            Confidence::UpperOneSided(_) => Ok(Interval::new_upper(lo)),
828            Confidence::LowerOneSided(_) => Ok(Interval::new_lower(hi)),
829        }
830    }
831
832    ///
833    /// Compute the confidence interval of the difference between the means of the two samples.
834    ///
835    /// # Arguments
836    ///
837    /// * `confidence` - the confidence level
838    /// * `data_a` - the first sample
839    /// * `data_b` - the second sample
840    ///
841    /// # Returns
842    ///
843    /// The confidence interval of the difference as a result.
844    ///
845    /// # Errors
846    ///
847    /// * [`CIError::FloatConversionError`] - if the conversion to `T` fails
848    /// * [`CIError::TooFewSamples`] - if one of the two samples has less than 2 observations
849    ///
850    /// # Notes
851    ///
852    /// If the interval includes zero, the difference is not significant.
853    /// If the interval is strictly positive (resp. negative), the mean of the first sample is significantly
854    /// greater (resp. smaller) than the mean of the second sample.
855    ///
856    /// This function provides a simple interface to obtain the confidence interval with a single call, when
857    /// the samples are known a priori and there is no need to include additional observations,
858    /// obtain the confidence intervals for other levels or access the sample statistics. For more refined
859    /// use cases, it is recommended to use [`Unpaired::ci_mean`] instead.
860    ///
861    /// # Examples
862    ///
863    /// ```
864    /// # use stats_ci::*;
865    /// let data_a = [1., 2., 3.];
866    /// let data_b = [4., 5., 6.];
867    /// let ci = comparison::Unpaired::ci(Confidence::new_two_sided(0.95), &data_a, &data_b)?;
868    /// # Ok::<(),error::CIError>(())
869    /// ```
870    ///
871    /// # References
872    ///
873    /// * R. Jain, The Art of Computer Systems Performance Analysis, Wiley, 1991.
874    /// * [Wikipedia article on Student's t-test](https://en.wikipedia.org/wiki/Student%27s_t-test#Independent_two-sample_t-test)
875    /// * PennState. Stat 500. Lesson 7: Comparing Two Population Parameters. [Online](https://online.stat.psu.edu/stat500/lesson/7)
876    ///
877    pub fn ci<Ia, Ib>(confidence: Confidence, data_a: &Ia, data_b: &Ib) -> CIResult<Interval<T>>
878    where
879        for<'a> &'a Ia: IntoIterator<Item = &'a T>,
880        for<'a> &'a Ib: IntoIterator<Item = &'a T>,
881    {
882        let mut stats = Self::default();
883        stats.extend(data_a, data_b)?;
884        stats.ci_mean(confidence)
885    }
886}
887
888impl<F: Float> std::ops::Add for Unpaired<F> {
889    type Output = Self;
890
891    #[inline]
892    fn add(self, rhs: Self) -> Self::Output {
893        Self {
894            stats_a: self.stats_a + rhs.stats_a,
895            stats_b: self.stats_b + rhs.stats_b,
896        }
897    }
898}
899
900impl<F: Float> std::ops::AddAssign for Unpaired<F> {
901    #[inline]
902    fn add_assign(&mut self, rhs: Self) {
903        self.stats_a += rhs.stats_a;
904        self.stats_b += rhs.stats_b;
905    }
906}
907
908#[cfg(test)]
909mod tests {
910    use super::*;
911    use approx::*;
912
913    #[test]
914    fn test_paired() {
915        {
916            // Case 1
917            // based on example from https://online.stat.psu.edu/stat500/lesson/7/7.3/7.3.2
918
919            // Zinc concentration in water samples from a river
920            let data_bottom_water = [
921                0.430, 0.266, 0.567, 0.531, 0.707, 0.716, 0.651, 0.589, 0.469, 0.723,
922            ];
923            let data_surface_water = [
924                0.415, 0.238, 0.390, 0.410, 0.605, 0.609, 0.632, 0.523, 0.411, 0.612,
925            ];
926
927            let ci = Paired::ci(
928                Confidence::new_two_sided(0.95),
929                &data_bottom_water,
930                &data_surface_water,
931            )
932            .unwrap();
933
934            println!("ci = {} (ref: )", ci);
935            println!("reference: (0.04299, 0.11781)");
936            assert_abs_diff_eq!(ci, Interval::new(0.04299, 0.11781).unwrap(), epsilon = 1e-4);
937        }
938        {
939            // Case 2
940            // based on example from https://www.khanacademy.org/math/ap-statistics/xfb5d8e68:inference-quantitative-means/one-sample-t-interval-mean/a/one-sample-t-interval-paired-data
941
942            let data_watch_a = [9.8, 9.8, 10.1, 10.1, 10.2];
943            let data_watch_b = [10.1, 10., 10.2, 9.9, 10.1];
944            let ci = Paired::ci(
945                Confidence::new_two_sided(0.95),
946                &data_watch_b,
947                &data_watch_a,
948            )
949            .unwrap();
950
951            println!("ci = {}", ci);
952            println!("reference: (-0.20, 0.32)");
953            assert_abs_diff_eq!(ci, Interval::new(-0.20, 0.32).unwrap(), epsilon = 1e-2);
954
955            let data_pre = [140., 152., 153., 159., 150., 146.];
956            let data_post = [150., 159., 170., 164., 148., 166.];
957            let ci = Paired::ci(Confidence::new_two_sided(0.95), &data_post, &data_pre).unwrap();
958
959            println!("ci = {}", ci);
960            println!("reference: (1.03,17.97)");
961            assert_abs_diff_eq!(ci, Interval::new(1.03, 17.97).unwrap(), epsilon = 1e-2);
962        }
963    }
964
965    #[test]
966    fn test_unpaired() {
967        // based on example from https://www.statsdirect.co.uk/help/parametric_methods/utt.htm
968        // itself based on Armitage P, Berry G. Statistical Methods in Medical Research (3rd edition). Blackwell 1994.
969        // Consider the gain in weight of 19 female rats between 28 and 84 days after birth. 12 were fed on a high protein diet and 7 on a low protein diet.
970        let data_high_protein = [
971            134., 146., 104., 119., 124., 161., 107., 83., 113., 129., 97., 123.,
972        ];
973        let data_low_protein = [70., 118., 101., 85., 107., 132., 94.];
974        let ci = Unpaired::ci(
975            Confidence::new_two_sided(0.95),
976            &data_high_protein,
977            &data_low_protein,
978        )
979        .unwrap();
980
981        println!("ci = {}", ci);
982        println!("reference: (-2.193679, 40.193679)");
983        assert_abs_diff_eq!(
984            ci,
985            Interval::new(-2.193679, 40.193679).unwrap(),
986            epsilon = 1e-2
987        );
988    }
989
990    #[test]
991    fn test_paired_diff_length() {
992        let sample_size = 10;
993        let data1 = (0..sample_size)
994            .map(|_| rand::random::<f64>())
995            .collect::<Vec<_>>();
996        let data2 = (0..sample_size + 1)
997            .map(|_| rand::random::<f64>())
998            .collect::<Vec<_>>();
999
1000        let mut stats = comparison::Paired::default();
1001        let res = stats.extend(&data1, &data2);
1002        assert!(res.is_err());
1003        match res.unwrap_err() {
1004            CIError::DifferentSampleSizes(a, b) => {
1005                println!("DifferentSampleSizes({a},{b})");
1006                assert_eq!(a, sample_size);
1007                assert_eq!(b, sample_size + 1);
1008            }
1009            e => panic!("unexpected error: {}", e),
1010        }
1011    }
1012}
stats_ci/comparison.rs

stats_ci/
comparison.rs