stats_ci/comparison.rs
1//!
2//! This module provides functions to compare two samples for two different cases.
3//!
4//! # Paired observations
5//!
6//! The first case is when the two samples are paired, i.e. each measurement in the first sample is paired with a measurement in the second sample.
7//! For instance, when measuring the performance of two algorithms, the same input data is used for both algorithms to yield a pair of related observations.
8//! Obviously, the number of observations in the two samples must be the same.
9//! When possible, paired observations are preferred because they significantly reduce the variance of the difference between the two means.
10//! This means that fewer observations are needed to achieve the same significance.
11//!
12//! The structure [`Paired`] deals with paired observations and can be used in simple form through the function [`Paired::ci`] or incrementally
13//! with the function [`Paired::ci_mean`].
14//!
15//! # Unpaired observations
16//!
17//! The second case is when the two samples are not paired, i.e. each measurement in the first sample is not paired with a measurement in the second sample.
18//! The number of observations in the two samples may be different.
19//!
20//! The structure [`Unpaired`] deals with the case of unpaired observations and can be used in simple form through the function [`Unpaired::ci`]
21//! or incrementally with the function [`Unpaired::ci_mean`].
22//!
23//! # Examples
24//!
25//! ## Paired observations
26//!
27//! The examples below have the following preamble:
28//! ```
29//! use stats_ci::*;
30//! // Zinc concentration in water samples from a river
31//! let data_bottom_water = [
32//! 0.430, 0.266, 0.567, 0.531, 0.707, 0.716, 0.651, 0.589, 0.469, 0.723,
33//! ];
34//! let data_surface_water = [
35//! 0.415, 0.238, 0.390, 0.410, 0.605, 0.609, 0.632, 0.523, 0.411, 0.612,
36//! ];
37//! let confidence = Confidence::new_two_sided(0.95);
38//! ```
39//!
40//! ### Easy interface (paired)
41//! ```
42//! # use stats_ci::*;
43//! # // Zinc concentration in water samples from a river
44//! # let data_bottom_water = [
45//! # 0.430, 0.266, 0.567, 0.531, 0.707, 0.716, 0.651, 0.589, 0.469, 0.723,
46//! # ];
47//! # let data_surface_water = [
48//! # 0.415, 0.238, 0.390, 0.410, 0.605, 0.609, 0.632, 0.523, 0.411, 0.612,
49//! # ];
50//! # let confidence = Confidence::new_two_sided(0.95);
51//! let ci = comparison::Paired::ci(
52//! confidence,
53//! &data_bottom_water,
54//! &data_surface_water
55//! )?;
56//! # Ok::<(),error::CIError>(())
57//! ```
58//!
59//! ### Incremental interface (paired)
60//! ```
61//! # use stats_ci::*;
62//! # let data_bottom_water = [
63//! # 0.430, 0.266, 0.567, 0.531, 0.707, 0.716, 0.651, 0.589, 0.469, 0.723,
64//! # ];
65//! # let data_surface_water = [
66//! # 0.415, 0.238, 0.390, 0.410, 0.605, 0.609, 0.632, 0.523, 0.411, 0.612,
67//! # ];
68//! # let confidence = Confidence::new_two_sided(0.95);
69//! let mut stats = comparison::Paired::default();
70//! stats.extend(&data_bottom_water, &data_surface_water)?;
71//! let ci = stats.ci_mean(confidence)?;
72//! let mean = stats.sample_mean();
73//! # Ok::<(),error::CIError>(())
74//! ```
75//!
76//! ## Unpaired observations
77//!
78//! The examples below have the following preamble:
79//! ```
80//! # use stats_ci::*;
81//! // Gain in weight of 19 female rats between 28 and 84 days after birth.
82//! // 12 were fed on a high protein diet and 7 on a low protein diet.
83//! let data_high_protein = [
84//! 134., 146., 104., 119., 124., 161., 107., 83., 113., 129., 97., 123.,
85//! ];
86//! let data_low_protein = [70., 118., 101., 85., 107., 132., 94.];
87//! let confidence = Confidence::new_two_sided(0.95);
88//! ```
89//!
90//! ### Easy interface (unpaired)
91//! ```
92//! # use stats_ci::*;
93//! # // Gain in weight of 19 female rats between 28 and 84 days after birth.
94//! # // 12 were fed on a high protein diet and 7 on a low protein diet.
95//! # let data_high_protein = [
96//! # 134., 146., 104., 119., 124., 161., 107., 83., 113., 129., 97., 123.,
97//! # ];
98//! # let data_low_protein = [70., 118., 101., 85., 107., 132., 94.];
99//! let confidence = Confidence::new_two_sided(0.95);
100//! let ci = comparison::Unpaired::ci(
101//! confidence,
102//! &data_high_protein,
103//! &data_low_protein
104//! )?;
105//! # Ok::<(),error::CIError>(())
106//! ```
107//!
108//! ### Incremental interface (unpaired)
109//! ```
110//! # use stats_ci::*;
111//! # let data_high_protein = [
112//! # 134., 146., 104., 119., 124., 161., 107., 83., 113., 129., 97., 123.,
113//! # ];
114//! # let data_low_protein = [70., 118., 101., 85., 107., 132., 94.];
115//! # let confidence = Confidence::new_two_sided(0.95);
116//! let mut stats = comparison::Unpaired::default();
117//! stats.extend(&data_high_protein, &data_low_protein)?;
118//! let ci = stats.ci_mean(confidence)?;
119//! # Ok::<(),error::CIError>(())
120//! ```
121//!
122//! # References
123//!
124//! * R. Jain, The Art of Computer Systems Performance Analysis, Wiley, 1991.
125//! * [Wikipedia article on paired difference test](https://en.wikipedia.org/wiki/Paired_difference_test)
126//! * PennState. Stat 500. Lesson 7: Comparing Two Population Parameters. [Online](https://online.stat.psu.edu/stat500/lesson/7)
127//!
128use crate::*;
129use error::*;
130use mean::StatisticsOps;
131use num_traits::Float;
132
133///
134/// Structure to collect statistics on two paired samples.
135///
136/// Paired observations are when each measurement in the first sample is paired with
137/// a measurement in the second sample.
138/// For instance, when measuring the performance of two algorithms, the same input
139/// data is used for both algorithms to yield a pair of related observations.
140///
141/// When observations cannot naturally be paired, the samples must be compared using
142/// unpaired observations (see [`Unpaired`]). Typically, unpaired observations require
143/// noticeably more observations to achieve the same statistical significance.
144///
145/// # Examples
146///
147/// The example below considers the zinc concentration in water samples from a river.
148/// Each sample is taken at the same location, but one at the bottom of the river and
149/// the other at the surface. Thus, those measurements are paired (bottom and surface).
150/// See <https://online.stat.psu.edu/stat500/lesson/7/7.3/7.3.2> for details on this
151/// example.
152///
153/// ```
154/// # use stats_ci::*;
155/// // Zinc concentration in water samples from a river
156/// let data_bottom_water = [
157/// 0.430, 0.266, 0.567, 0.531, 0.707, 0.716, 0.651, 0.589, 0.469, 0.723,
158/// ];
159/// let data_surface_water = [
160/// 0.415, 0.238, 0.390, 0.410, 0.605, 0.609, 0.632, 0.523, 0.411, 0.612,
161/// ];
162///
163/// let mut stats = comparison::Paired::default();
164/// stats.extend(&data_bottom_water, &data_surface_water).unwrap();
165/// let ci = stats.ci_mean(Confidence::new_two_sided(0.95)).unwrap();
166/// ```
167///
168/// # References
169///
170/// * R. Jain, The Art of Computer Systems Performance Analysis, Wiley, 1991.
171/// * [Wikipedia article on paired difference test](https://en.wikipedia.org/wiki/Paired_difference_test)
172/// * PennState. Stat 500. Lesson 7: Comparing Two Population Parameters. [Online](https://online.stat.psu.edu/stat500/lesson/7)
173
174#[derive(Debug, Clone, PartialEq)]
175#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
176pub struct Paired<T: Float> {
177 stats: mean::Arithmetic<T>,
178}
179
180impl<T: Float> Paired<T> {
181 ///
182 /// Add a pair of observations to the two samples.
183 ///
184 /// # Arguments
185 ///
186 /// * `data_a` - the observation for the first sample
187 /// * `data_b` - the observation for the second sample
188 ///
189 /// # Errors
190 ///
191 /// * [`CIError::FloatConversionError`] - if the conversion to `T` fails
192 ///
193 /// # Examples
194 ///
195 /// ```
196 /// # use stats_ci::*;
197 /// let mut stats = comparison::Paired::default();
198 /// stats.append_pair(1., 2.)?;
199 /// # assert_eq!(stats.sample_count(), 1);
200 /// # assert_eq!(stats.sample_mean(), -1.);
201 /// # Ok::<(),error::CIError>(())
202 /// ```
203 pub fn append_pair(&mut self, data_a: T, data_b: T) -> CIResult<()> {
204 self.stats.append(data_a - data_b)
205 }
206
207 ///
208 /// Append multiple pairs of observations to the two samples.
209 ///
210 /// # Arguments
211 ///
212 /// * `iter` - an iterable collection of tuples to add to the data
213 ///
214 /// # Errors
215 ///
216 /// * [`CIError::FloatConversionError`] - if the conversion to `T` fails
217 ///
218 /// # Examples
219 ///
220 /// ```
221 /// # use stats_ci::*;
222 /// let mut stats = comparison::Paired::default();
223 /// stats.extend_tuple(&[(1., 2.), (3., 4.)])?;
224 /// # assert_eq!(stats.sample_count(), 2);
225 /// # assert_eq!(stats.sample_mean(), -1.);
226 /// # Ok::<(),error::CIError>(())
227 /// ```
228 pub fn extend_tuple<I>(&mut self, iter: &I) -> CIResult<()>
229 where
230 for<'a> &'a I: IntoIterator<Item = &'a (T, T)>,
231 {
232 for &(x, y) in iter.into_iter() {
233 self.stats.append(x - y)?;
234 }
235 Ok(())
236 }
237
238 ///
239 /// Append multiple observations to the two samples.
240 ///
241 /// # Arguments
242 ///
243 /// * `data_a` - an iterable collection of observations for the first sample
244 /// * `data_b` - an iterable collection of observations for the second sample
245 ///
246 /// # Errors
247 ///
248 /// * [`CIError::DifferentSampleSizes`] - if the two iterables have different lengths
249 /// * [`CIError::FloatConversionError`] - if the conversion to `T` fails
250 ///
251 /// # Examples
252 ///
253 /// ```
254 /// # use stats_ci::*;
255 /// let mut stats = comparison::Paired::default();
256 /// stats.extend(&[1., 3.], &[2., 4.])?;
257 /// # assert_eq!(stats.sample_count(), 2);
258 /// # assert_eq!(stats.sample_mean(), -1.);
259 /// # Ok::<(),error::CIError>(())
260 /// ```
261 pub fn extend<I1, I2>(&mut self, data_a: &I1, data_b: &I2) -> CIResult<()>
262 where
263 for<'a> &'a I1: IntoIterator<Item = &'a T>,
264 for<'b> &'b I2: IntoIterator<Item = &'b T>,
265 {
266 let mut data_a = data_a.into_iter();
267 let mut data_b = data_b.into_iter();
268 let mut count = 0;
269 loop {
270 match (data_a.next(), data_b.next()) {
271 (Some(x), Some(y)) => {
272 count += 1;
273 self.stats.append(*x - *y)?
274 }
275 (None, None) => return Ok(()),
276 // returns error if iterables have different lengths
277 (None, _) => {
278 return Err(CIError::DifferentSampleSizes(
279 count,
280 count + 1 + data_b.count(),
281 ))
282 }
283 (_, None) => {
284 return Err(CIError::DifferentSampleSizes(
285 count + 1 + data_a.count(),
286 count,
287 ))
288 }
289 }
290 }
291 }
292
293 ///
294 /// Return the sample mean of the difference between the two samples.
295 ///
296 /// # Examples
297 ///
298 /// ```
299 /// # use stats_ci::*;
300 /// let data_a = [1., 2., 3.];
301 /// let data_b = [4., 5., 6.];
302 /// let mut stats = comparison::Paired::default();
303 /// stats.extend(&data_a, &data_b)?;
304 /// let mean = stats.sample_mean();
305 /// assert_eq!(mean, -3.);
306 /// # Ok::<(),error::CIError>(())
307 /// ```
308 pub fn sample_mean(&self) -> T {
309 self.stats.sample_mean()
310 }
311
312 ///
313 /// Return the standard error of the difference between the two samples.
314 ///
315 /// # Examples
316 ///
317 /// ```
318 /// # use stats_ci::*;
319 /// let data_a = [1., 2., 3.];
320 /// let data_b = [4., 5., 6.];
321 /// let mut stats = comparison::Paired::default();
322 /// stats.extend(&data_a, &data_b)?;
323 /// let sem = stats.sample_sem();
324 /// assert_eq!(sem, 0.);
325 /// # Ok::<(),error::CIError>(())
326 /// ```
327 pub fn sample_sem(&self) -> T {
328 self.stats.sample_sem()
329 }
330
331 ///
332 /// Return the number of sample pairs.
333 ///
334 /// # Examples
335 ///
336 /// ```
337 /// # use stats_ci::*;
338 /// let data_a = [1., 2., 3.];
339 /// let data_b = [4., 5., 6.];
340 /// let mut stats = comparison::Paired::default();
341 /// stats.extend(&data_a, &data_b)?;
342 /// let count = stats.sample_count();
343 /// assert_eq!(count, 3);
344 /// # Ok::<(),error::CIError>(())
345 /// ```
346 ///
347 pub fn sample_count(&self) -> usize {
348 self.stats.sample_count()
349 }
350
351 ///
352 /// Return the confidence interval of the difference between the means of the two samples.
353 ///
354 /// # Arguments
355 ///
356 /// * `confidence` - the confidence level
357 ///
358 /// # Returns
359 ///
360 /// The confidence interval of the difference as a result.
361 ///
362 /// # Notes
363 ///
364 /// If the interval includes zero, the difference is not significant.
365 /// If the interval is strictly positive (resp. negative), the mean of the first sample is significantly
366 /// greater (resp. smaller) than the mean of the second sample.
367 ///
368 /// # Examples
369 ///
370 /// ```
371 /// # use stats_ci::*;
372 /// let data_a = [1., 2., 3.];
373 /// let data_b = [4., 5., 6.];
374 /// let mut stats = comparison::Paired::default();
375 /// stats.extend(&data_a, &data_b)?;
376 /// let confidence = Confidence::new_two_sided(0.95);
377 /// let ci = stats.ci_mean(confidence)?;
378 /// assert_eq!(ci, Interval::new(-3., -3.)?);
379 /// # Ok::<(),error::CIError>(())
380 /// ```
381 pub fn ci_mean(&self, confidence: Confidence) -> CIResult<Interval<T>> {
382 self.stats.ci_mean(confidence)
383 }
384
385 ///
386 /// Compute the confidence interval of the difference between the means of the two samples.
387 ///
388 /// # Arguments
389 ///
390 /// * `confidence` - the confidence level
391 /// * `data_a` - the first sample
392 /// * `data_b` - the second sample
393 ///
394 /// # Returns
395 ///
396 /// The confidence interval of the difference as a result.
397 ///
398 /// # Errors
399 ///
400 /// * [`CIError::DifferentSampleSizes`] - if the two samples do not have the same length
401 ///
402 /// # Notes
403 ///
404 /// If the interval includes zero, the difference is not significant.
405 /// If the interval is strictly positive (resp. negative), the mean of the first sample is significantly
406 /// greater (resp. smaller) than the mean of the second sample.
407 ///
408 /// This function provides a simple interface to obtain the confidence interval with a single call, when
409 /// the samples are known a priori and there is no need to include additional observations,
410 /// obtain the confidence intervals for other levels or access the sample statistics. For more refined
411 /// use cases, it is recommended to use [`Paired::ci_mean`] instead.
412 ///
413 /// # References
414 ///
415 /// * R. Jain, The Art of Computer Systems Performance Analysis, Wiley, 1991.
416 /// * [Wikipedia article on paired difference test](https://en.wikipedia.org/wiki/Paired_difference_test)
417 /// * PennState. Stat 500. Lesson 7: Comparing Two Population Parameters. [Online](https://online.stat.psu.edu/stat500/lesson/7)
418 ///
419 /// # Examples
420 ///
421 /// ```
422 /// # use stats_ci::*;
423 /// let data_a = [1., 2., 3.];
424 /// let data_b = [4., 5., 6.];
425 /// let confidence = Confidence::new_two_sided(0.95);
426 /// let ci = comparison::Paired::ci(confidence, &data_a, &data_b)?;
427 /// # Ok::<(),error::CIError>(())
428 /// ```
429 ///
430 pub fn ci<Ia, Ib>(confidence: Confidence, data_a: &Ia, data_b: &Ib) -> CIResult<Interval<T>>
431 where
432 for<'a> &'a Ia: IntoIterator<Item = &'a T>,
433 for<'a> &'a Ib: IntoIterator<Item = &'a T>,
434 {
435 let mut stats = Paired::default();
436 stats.extend(data_a, data_b)?;
437 stats.ci_mean(confidence)
438 }
439}
440
441impl<T: Float> Default for Paired<T> {
442 fn default() -> Self {
443 Self {
444 stats: mean::Arithmetic::default(),
445 }
446 }
447}
448
449impl<F: Float> std::ops::Add for Paired<F> {
450 type Output = Self;
451
452 #[inline]
453 fn add(self, rhs: Self) -> Self::Output {
454 Self {
455 stats: self.stats + rhs.stats,
456 }
457 }
458}
459
460impl<F: Float> std::ops::AddAssign for Paired<F> {
461 #[inline]
462 fn add_assign(&mut self, rhs: Self) {
463 self.stats += rhs.stats;
464 }
465}
466
467///
468/// Structure to collect statistics on two unpaired samples.
469///
470/// Given two independent samples, the goal is to compute the confidence interval
471/// of the difference between their means.
472/// Unlike with paired observations ([`Paired`]), the two samples do not have to
473/// have the same length.
474/// However, comparing with unpaired observations typically requires considerably
475/// more observations to reach the same degree of statistical accuracy. This is
476/// why paired observations are preferred when the circumstances allow.
477///
478/// # Examples
479///
480/// ```
481/// # use stats_ci::*;
482/// // Gain in weight of 19 female rats between 28 and 84 days after birth.
483/// // 12 were fed on a high protein diet and 7 on a low protein diet.
484/// let data_high_protein = [
485/// 134., 146., 104., 119., 124., 161., 107., 83., 113., 129., 97., 123.,
486/// ];
487/// let data_low_protein = [70., 118., 101., 85., 107., 132., 94.];
488/// let mut stats = comparison::Unpaired::default();
489/// stats.extend(&data_high_protein, &data_low_protein)?;
490/// let ci = stats.ci_mean(Confidence::new_two_sided(0.95))?;
491/// # Ok::<(),error::CIError>(())
492/// ```
493///
494/// # References
495///
496/// * R. Jain, The Art of Computer Systems Performance Analysis, Wiley, 1991.
497/// * [Wikipedia article on Student's t-test](https://en.wikipedia.org/wiki/Student%27s_t-test#Independent_two-sample_t-test)
498/// * PennState. Stat 500. Lesson 7: Comparing Two Population Parameters. [Online](https://online.stat.psu.edu/stat500/lesson/7)
499///
500#[derive(Debug, Clone, PartialEq)]
501#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
502pub struct Unpaired<T: Float> {
503 stats_a: mean::Arithmetic<T>,
504 stats_b: mean::Arithmetic<T>,
505}
506
507impl<T: Float> Default for Unpaired<T> {
508 fn default() -> Self {
509 Self {
510 stats_a: mean::Arithmetic::default(),
511 stats_b: mean::Arithmetic::default(),
512 }
513 }
514}
515
516impl<T: Float> Unpaired<T> {
517 ///
518 /// Create a new instance of `Unpaired` from two statistics.
519 ///
520 /// # Arguments
521 ///
522 /// * `stats_a` - the statistics of the first sample
523 /// * `stats_b` - the statistics of the second sample
524 ///
525 /// # Examples
526 ///
527 /// ```
528 /// # use stats_ci::*;
529 /// let stats_a = mean::Arithmetic::from_iter(&[1., 2., 3.])?;
530 /// let stats_b = mean::Arithmetic::from_iter(&[4., 5., 6.])?;
531 /// let stats = comparison::Unpaired::new(stats_a, stats_b);
532 /// # Ok::<(),error::CIError>(())
533 /// ```
534 pub fn new(stats_a: mean::Arithmetic<T>, stats_b: mean::Arithmetic<T>) -> Self {
535 Self { stats_a, stats_b }
536 }
537
538 ///
539 /// Create a new instance of `Unpaired` from two samples.
540 ///
541 /// # Arguments
542 ///
543 /// * `data_a` - the first sample
544 /// * `data_b` - the second sample
545 ///
546 /// # Errors
547 ///
548 /// * [`CIError::FloatConversionError`] - if the conversion to `T` fails
549 ///
550 /// # Examples
551 ///
552 /// ```
553 /// # use stats_ci::*;
554 /// let stats = comparison::Unpaired::from_iter(&[1., 2., 3.], &[4., 5., 6.])?;
555 /// # Ok::<(),error::CIError>(())
556 /// ```
557 ///
558 pub fn from_iter<Ia, Ib>(data_a: &Ia, data_b: &Ib) -> CIResult<Self>
559 where
560 for<'a> &'a Ia: IntoIterator<Item = &'a T>,
561 for<'b> &'b Ib: IntoIterator<Item = &'b T>,
562 {
563 let mut stats = Self::default();
564 stats.extend_a(data_a)?;
565 stats.extend_b(data_b)?;
566 Ok(stats)
567 }
568
569 ///
570 /// Return a reference to the statistics of the first sample.
571 ///
572 pub fn stats_a(&self) -> &mean::Arithmetic<T> {
573 &self.stats_a
574 }
575
576 ///
577 /// Return a mutable reference to the statistics of the first sample.
578 ///
579 /// # Examples
580 ///
581 /// ```
582 /// # use stats_ci::*;
583 /// let mut stats = comparison::Unpaired::default();
584 /// stats.stats_a_mut().append(1.)?;
585 /// # Ok::<(),error::CIError>(())
586 /// ```
587 ///
588 pub fn stats_a_mut(&mut self) -> &mut mean::Arithmetic<T> {
589 &mut self.stats_a
590 }
591
592 ///
593 /// Return a reference to the statistics of the second sample.
594 ///
595 /// # Examples
596 ///
597 /// ```
598 /// # use stats_ci::*;
599 /// # let mut stats = comparison::Unpaired::from_iter(&[1., 2. ,3.], &[4., 5., 6.])?;
600 /// let mean_b = stats.stats_b().sample_mean();
601 /// # Ok::<(),error::CIError>(())
602 /// ```
603 ///
604 pub fn stats_b(&self) -> &mean::Arithmetic<T> {
605 &self.stats_b
606 }
607
608 ///
609 /// Return a mutable reference to the statistics of the second sample.
610 ///
611 /// # Examples
612 ///
613 /// ```
614 /// # use stats_ci::*;
615 /// let mut stats = comparison::Unpaired::default();
616 /// stats.stats_b_mut().append(1.)?;
617 /// # Ok::<(),error::CIError>(())
618 /// ```
619 ///
620 pub fn stats_b_mut(&mut self) -> &mut mean::Arithmetic<T> {
621 &mut self.stats_b
622 }
623
624 ///
625 /// Append a pair of observations to the two samples.
626 ///
627 /// # Arguments
628 ///
629 /// * `data_a` - the new data for the first sample
630 /// * `data_b` - the new data for the second sample
631 ///
632 /// # Errors
633 ///
634 /// * [`CIError::FloatConversionError`] - if the conversion to `T` fails
635 ///
636 pub fn append_pair(&mut self, data_a: T, data_b: T) -> CIResult<()> {
637 self.append_a(data_a)?;
638 self.append_b(data_b)?;
639 Ok(())
640 }
641
642 ///
643 /// Append a single observation to the first sample.
644 ///
645 /// # Arguments
646 ///
647 /// * `data_a` - the new data for the first sample
648 ///
649 pub fn append_a(&mut self, data_a: T) -> CIResult<()> {
650 self.stats_a.append(data_a)
651 }
652
653 ///
654 /// Append a single observation to the second sample.
655 ///
656 /// # Arguments
657 ///
658 /// * `data_b` - the new data for the second sample
659 ///
660 pub fn append_b(&mut self, data_b: T) -> CIResult<()> {
661 self.stats_b.append(data_b)
662 }
663
664 ///
665 /// Append observations to the first sample.
666 ///
667 /// # Arguments
668 ///
669 /// * `data_a` - the new data for the first sample
670 ///
671 /// # Errors
672 ///
673 /// * [`CIError::FloatConversionError`] - if the conversion to `T` fails
674 ///
675 /// # Examples
676 ///
677 /// ```
678 /// # use stats_ci::*;
679 /// let mut stats = comparison::Unpaired::default();
680 /// stats.extend_a(&[1., 2., 3.])?;
681 /// # assert_eq!(stats.stats_a().sample_count(), 3);
682 /// # assert_eq!(stats.stats_a().sample_mean(), 2.);
683 /// # Ok::<(),error::CIError>(())
684 /// ```
685 pub fn extend_a<I>(&mut self, data_a: &I) -> CIResult<()>
686 where
687 for<'a> &'a I: IntoIterator<Item = &'a T>,
688 {
689 self.stats_a.extend(data_a)
690 }
691
692 ///
693 /// Append observations to the second sample.
694 ///
695 /// # Arguments
696 ///
697 /// * `data_b` - the new data for the second sample
698 ///
699 /// # Errors
700 ///
701 /// * [`CIError::FloatConversionError`] - if the conversion to `T` fails
702 ///
703 /// # Examples
704 ///
705 /// ```
706 /// # use stats_ci::*;
707 /// let mut stats = comparison::Unpaired::default();
708 /// stats.extend_b(&[1., 2., 3.])?;
709 /// # assert_eq!(stats.stats_b().sample_count(), 3);
710 /// # assert_eq!(stats.stats_b().sample_mean(), 2.);
711 /// # Ok::<(),error::CIError>(())
712 /// ```
713 pub fn extend_b<I>(&mut self, data_b: &I) -> CIResult<()>
714 where
715 for<'a> &'a I: IntoIterator<Item = &'a T>,
716 {
717 self.stats_b.extend(data_b)
718 }
719
720 ///
721 /// Extend the two samples with new data.
722 ///
723 /// # Arguments
724 ///
725 /// * `data_a` - the new data for the first sample
726 /// * `data_b` - the new data for the second sample
727 ///
728 /// # Errors
729 ///
730 /// * [`CIError::FloatConversionError`] - if the conversion to `T` fails
731 ///
732 /// # Examples
733 ///
734 /// ```
735 /// # use stats_ci::*;
736 /// let mut stats = comparison::Unpaired::default();
737 /// stats.extend(&[1., 2., 3.], &[4., 5., 6.])?;
738 /// # assert_eq!(stats.stats_a().sample_count(), 3);
739 /// # assert_eq!(stats.stats_a().sample_mean(), 2.);
740 /// # assert_eq!(stats.stats_b().sample_count(), 3);
741 /// # assert_eq!(stats.stats_b().sample_mean(), 5.);
742 /// # Ok::<(),error::CIError>(())
743 /// ```
744 pub fn extend<Ia, Ib>(&mut self, data_a: &Ia, data_b: &Ib) -> CIResult<()>
745 where
746 for<'a> &'a Ia: IntoIterator<Item = &'a T>,
747 for<'b> &'b Ib: IntoIterator<Item = &'b T>,
748 {
749 self.stats_a.extend(data_a)?;
750 self.stats_b.extend(data_b)?;
751 Ok(())
752 }
753
754 ///
755 /// Compute the confidence interval of the difference between the means of the two samples.
756 ///
757 /// # Arguments
758 ///
759 /// * `confidence` - the confidence level
760 ///
761 /// # Returns
762 ///
763 /// The confidence interval of the difference as a result.
764 ///
765 /// # Errors
766 ///
767 /// * [`CIError::TooFewSamples`] - if one of the two samples has less than 2 observations
768 ///
769 /// # Examples
770 ///
771 /// ```
772 /// # use stats_ci::*;
773 /// let confidence = Confidence::new_two_sided(0.95);
774 /// let mut stats = comparison::Unpaired::default();
775 /// stats.extend(&[1., 2., 3.], &[4., 5., 6.])?;
776 /// let ci = stats.ci_mean(confidence)?;
777 /// # Ok::<(),error::CIError>(())
778 /// ```
779 ///
780 /// # Notes
781 ///
782 /// If the interval includes zero, the difference is not significant.
783 /// If the interval is strictly positive (resp. negative), the mean of the first sample is significantly
784 /// greater (resp. smaller) than the mean of the second sample.
785 ///
786 /// # References
787 ///
788 /// * R. Jain, The Art of Computer Systems Performance Analysis, Wiley, 1991.
789 /// * [Wikipedia article on Student's t-test](https://en.wikipedia.org/wiki/Student%27s_t-test#Independent_two-sample_t-test)
790 /// * PennState. Stat 500. Lesson 7: Comparing Two Population Parameters. [Online](https://online.stat.psu.edu/stat500/lesson/7)
791 ///
792 pub fn ci_mean(&self, confidence: Confidence) -> CIResult<Interval<T>> {
793 let stats_a = self.stats_a;
794 let stats_b = self.stats_b;
795
796 let n_a = T::from(stats_a.sample_count()).convert("stats_a.sample_count")?;
797 let n_b = T::from(stats_b.sample_count()).convert("stats_b.sample_count")?;
798 let mean_a = stats_a.sample_mean();
799 let mean_b = stats_b.sample_mean();
800 let std_dev_a = stats_a.sample_std_dev();
801 let std_dev_b = stats_b.sample_std_dev();
802
803 let mean_difference = mean_a - mean_b;
804 let sa2_na = // $s_a^2 / n_a$
805 std_dev_a * std_dev_a / n_a;
806 let sb2_nb = // $s_b^2 / n_b$
807 std_dev_b * std_dev_b / n_b;
808 let sum_s2_n = // $s_a^2 / n_a + s_b^2 / n_b$
809 sa2_na + sb2_nb;
810 let std_err_mean = // $\sqrt{s_a^2 / n_a + s_b^2 / n_b}$
811 sum_s2_n.sqrt();
812 let effective_dof = // $ \frac{ (s_a^a / n_a + s_b^2 / n_b)^2 }{ \frac{1}{n_a+1} \left(\frac{s_a^2}{n_a}\right)^2 + \frac{1}{n_b+1} \left(\frac{s_b^2}{n_b}\right)^2 } - 2$
813 sum_s2_n * sum_s2_n
814 / (sa2_na * sa2_na / (n_a + T::one())
815 + sb2_nb * sb2_nb / (n_b + T::one())) - T::one() - T::one();
816
817 let (lo, hi) = stats::interval_bounds(
818 confidence,
819 mean_difference.try_f64("mean_difference")?,
820 std_err_mean.try_f64("std_err_mean")?,
821 effective_dof.try_f64("effective_dof")?,
822 );
823 let lo = T::from(lo).convert("lo")?;
824 let hi = T::from(hi).convert("hi")?;
825 match confidence {
826 Confidence::TwoSided(_) => Interval::new(lo, hi).map_err(|e| e.into()),
827 Confidence::UpperOneSided(_) => Ok(Interval::new_upper(lo)),
828 Confidence::LowerOneSided(_) => Ok(Interval::new_lower(hi)),
829 }
830 }
831
832 ///
833 /// Compute the confidence interval of the difference between the means of the two samples.
834 ///
835 /// # Arguments
836 ///
837 /// * `confidence` - the confidence level
838 /// * `data_a` - the first sample
839 /// * `data_b` - the second sample
840 ///
841 /// # Returns
842 ///
843 /// The confidence interval of the difference as a result.
844 ///
845 /// # Errors
846 ///
847 /// * [`CIError::FloatConversionError`] - if the conversion to `T` fails
848 /// * [`CIError::TooFewSamples`] - if one of the two samples has less than 2 observations
849 ///
850 /// # Notes
851 ///
852 /// If the interval includes zero, the difference is not significant.
853 /// If the interval is strictly positive (resp. negative), the mean of the first sample is significantly
854 /// greater (resp. smaller) than the mean of the second sample.
855 ///
856 /// This function provides a simple interface to obtain the confidence interval with a single call, when
857 /// the samples are known a priori and there is no need to include additional observations,
858 /// obtain the confidence intervals for other levels or access the sample statistics. For more refined
859 /// use cases, it is recommended to use [`Unpaired::ci_mean`] instead.
860 ///
861 /// # Examples
862 ///
863 /// ```
864 /// # use stats_ci::*;
865 /// let data_a = [1., 2., 3.];
866 /// let data_b = [4., 5., 6.];
867 /// let ci = comparison::Unpaired::ci(Confidence::new_two_sided(0.95), &data_a, &data_b)?;
868 /// # Ok::<(),error::CIError>(())
869 /// ```
870 ///
871 /// # References
872 ///
873 /// * R. Jain, The Art of Computer Systems Performance Analysis, Wiley, 1991.
874 /// * [Wikipedia article on Student's t-test](https://en.wikipedia.org/wiki/Student%27s_t-test#Independent_two-sample_t-test)
875 /// * PennState. Stat 500. Lesson 7: Comparing Two Population Parameters. [Online](https://online.stat.psu.edu/stat500/lesson/7)
876 ///
877 pub fn ci<Ia, Ib>(confidence: Confidence, data_a: &Ia, data_b: &Ib) -> CIResult<Interval<T>>
878 where
879 for<'a> &'a Ia: IntoIterator<Item = &'a T>,
880 for<'a> &'a Ib: IntoIterator<Item = &'a T>,
881 {
882 let mut stats = Self::default();
883 stats.extend(data_a, data_b)?;
884 stats.ci_mean(confidence)
885 }
886}
887
888impl<F: Float> std::ops::Add for Unpaired<F> {
889 type Output = Self;
890
891 #[inline]
892 fn add(self, rhs: Self) -> Self::Output {
893 Self {
894 stats_a: self.stats_a + rhs.stats_a,
895 stats_b: self.stats_b + rhs.stats_b,
896 }
897 }
898}
899
900impl<F: Float> std::ops::AddAssign for Unpaired<F> {
901 #[inline]
902 fn add_assign(&mut self, rhs: Self) {
903 self.stats_a += rhs.stats_a;
904 self.stats_b += rhs.stats_b;
905 }
906}
907
908#[cfg(test)]
909mod tests {
910 use super::*;
911 use approx::*;
912
913 #[test]
914 fn test_paired() {
915 {
916 // Case 1
917 // based on example from https://online.stat.psu.edu/stat500/lesson/7/7.3/7.3.2
918
919 // Zinc concentration in water samples from a river
920 let data_bottom_water = [
921 0.430, 0.266, 0.567, 0.531, 0.707, 0.716, 0.651, 0.589, 0.469, 0.723,
922 ];
923 let data_surface_water = [
924 0.415, 0.238, 0.390, 0.410, 0.605, 0.609, 0.632, 0.523, 0.411, 0.612,
925 ];
926
927 let ci = Paired::ci(
928 Confidence::new_two_sided(0.95),
929 &data_bottom_water,
930 &data_surface_water,
931 )
932 .unwrap();
933
934 println!("ci = {} (ref: )", ci);
935 println!("reference: (0.04299, 0.11781)");
936 assert_abs_diff_eq!(ci, Interval::new(0.04299, 0.11781).unwrap(), epsilon = 1e-4);
937 }
938 {
939 // Case 2
940 // based on example from https://www.khanacademy.org/math/ap-statistics/xfb5d8e68:inference-quantitative-means/one-sample-t-interval-mean/a/one-sample-t-interval-paired-data
941
942 let data_watch_a = [9.8, 9.8, 10.1, 10.1, 10.2];
943 let data_watch_b = [10.1, 10., 10.2, 9.9, 10.1];
944 let ci = Paired::ci(
945 Confidence::new_two_sided(0.95),
946 &data_watch_b,
947 &data_watch_a,
948 )
949 .unwrap();
950
951 println!("ci = {}", ci);
952 println!("reference: (-0.20, 0.32)");
953 assert_abs_diff_eq!(ci, Interval::new(-0.20, 0.32).unwrap(), epsilon = 1e-2);
954
955 let data_pre = [140., 152., 153., 159., 150., 146.];
956 let data_post = [150., 159., 170., 164., 148., 166.];
957 let ci = Paired::ci(Confidence::new_two_sided(0.95), &data_post, &data_pre).unwrap();
958
959 println!("ci = {}", ci);
960 println!("reference: (1.03,17.97)");
961 assert_abs_diff_eq!(ci, Interval::new(1.03, 17.97).unwrap(), epsilon = 1e-2);
962 }
963 }
964
965 #[test]
966 fn test_unpaired() {
967 // based on example from https://www.statsdirect.co.uk/help/parametric_methods/utt.htm
968 // itself based on Armitage P, Berry G. Statistical Methods in Medical Research (3rd edition). Blackwell 1994.
969 // Consider the gain in weight of 19 female rats between 28 and 84 days after birth. 12 were fed on a high protein diet and 7 on a low protein diet.
970 let data_high_protein = [
971 134., 146., 104., 119., 124., 161., 107., 83., 113., 129., 97., 123.,
972 ];
973 let data_low_protein = [70., 118., 101., 85., 107., 132., 94.];
974 let ci = Unpaired::ci(
975 Confidence::new_two_sided(0.95),
976 &data_high_protein,
977 &data_low_protein,
978 )
979 .unwrap();
980
981 println!("ci = {}", ci);
982 println!("reference: (-2.193679, 40.193679)");
983 assert_abs_diff_eq!(
984 ci,
985 Interval::new(-2.193679, 40.193679).unwrap(),
986 epsilon = 1e-2
987 );
988 }
989
990 #[test]
991 fn test_paired_diff_length() {
992 let sample_size = 10;
993 let data1 = (0..sample_size)
994 .map(|_| rand::random::<f64>())
995 .collect::<Vec<_>>();
996 let data2 = (0..sample_size + 1)
997 .map(|_| rand::random::<f64>())
998 .collect::<Vec<_>>();
999
1000 let mut stats = comparison::Paired::default();
1001 let res = stats.extend(&data1, &data2);
1002 assert!(res.is_err());
1003 match res.unwrap_err() {
1004 CIError::DifferentSampleSizes(a, b) => {
1005 println!("DifferentSampleSizes({a},{b})");
1006 assert_eq!(a, sample_size);
1007 assert_eq!(b, sample_size + 1);
1008 }
1009 e => panic!("unexpected error: {}", e),
1010 }
1011 }
1012}