stats_ci/
proportion.rs

1//!
2//! Confidence intervals for proportions
3//!
4//! # Examples
5//!
6//! ```
7//! use stats_ci::*;
8//! let data = [
9//!     true, false, true, true, false, true, true, false, true, true,
10//!     false, false, false, true, false, true, false, false, true, false
11//! ];
12//! let confidence = Confidence::new_two_sided(0.95);
13//! let interval = proportion::ci_true(confidence, &data)?;
14//! use approx::*;
15//! assert_abs_diff_eq!(interval, Interval::new(0.299, 0.701)?, epsilon = 1e-2);
16//! # Ok::<(),error::CIError>(())
17//! ```
18//!
19//! The confidence interval can also be computed incrementally, as follows:
20//! ```
21//! # use stats_ci::*;
22//! # let data = [
23//! #     true, false, true, true, false, true, true, false, true, true,
24//! #     false, false, false, true, false, true, false, false, true, false
25//! # ];
26//! # let confidence = Confidence::new_two_sided(0.95);
27//! let mut stats = proportion::Stats::default();
28//! stats.extend(&data);
29//! let interval = stats.ci(confidence)?;
30//! # use approx::*;
31//! assert_abs_diff_eq!(interval, Interval::new(0.299, 0.701)?, epsilon = 1e-2);
32//! # Ok::<(),error::CIError>(())
33//! ```
34//!
35//! # References
36//!
37//! * [Wikipedia - Confidence interval](https://en.wikipedia.org/wiki/Confidence_interval)
38//! * [Wikipedia - Binomial proportion confidence interval](https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval)
39//! * <https://influentialpoints.com/Training/confidence_intervals_of_proportions-principles-properties-assumptions.htm>
40//!
41use super::*;
42use crate::stats::z_value;
43use error::*;
44
45///
46/// Represents the state of the computation of a confidence interval for a proportion.
47///
48/// # Examples
49///
50/// ```
51/// # use stats_ci::*;
52/// let grades = [40, 59, 73, 44, 82, 44, 58, 74, 94, 79, 40, 52, 100, 57, 76, 93, 68, 96, 92, 98, 58, 64, 76, 40, 89, 65, 63, 90, 66, 89];
53/// let stats = proportion::Stats::from_iter(grades.iter().map(|&x| x >= 60));
54/// let confidence = Confidence::new_two_sided(0.95);
55/// let pass_rate_ci = stats.ci(confidence)?;
56/// println!("Pass rate: {}", pass_rate_ci);
57/// # use approx::*;
58/// assert_abs_diff_eq!(pass_rate_ci, Interval::new(0.4878, 0.8077)?, epsilon = 1e-3);
59/// # Ok::<(),error::CIError>(())
60/// ```
61///
62/// # Panics
63///
64/// * if the number of successes is larger than the population size
65///
66#[derive(Default, Debug, Clone, Copy, PartialEq, Eq)]
67#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
68pub struct Stats {
69    population: usize,
70    successes: usize,
71}
72
73impl FromIterator<bool> for Stats {
74    ///
75    /// Creates a new statistics object with initial values from a Boolean iterator counting the number of successes.
76    ///
77    /// Complexity: \\( O(n) \\) where \\( n \\) is the number of samples in `iter`.
78    ///
79    /// # Arguments
80    ///
81    /// * `iter` - a Boolean iterator or slice
82    ///
83    fn from_iter<I>(iter: I) -> Self
84    where
85        I: IntoIterator<Item = bool>,
86    {
87        let mut stats = Stats::default();
88        for value in iter {
89            if value {
90                stats.add_success();
91            } else {
92                stats.add_failure();
93            }
94        }
95        stats
96    }
97}
98
99impl Stats {
100    ///
101    /// Creates a new statistics object with initial values for the population size and the number of successes.
102    /// The number of successes must not be larger than the population size.
103    ///
104    /// Complexity: \\( O(1) \\)
105    ///
106    /// # Panics
107    ///
108    /// * if the number of successes is larger than the population size
109    ///
110    pub const fn new(population: usize, successes: usize) -> Self {
111        if population < successes {
112            panic!("Number of successes must not be larger than population size.")
113        }
114        Stats {
115            population,
116            successes,
117        }
118    }
119
120    ///
121    /// Returns the population size (total number of samples).
122    ///
123    /// Complexity: \\( O(1) \\)
124    ///
125    pub fn population(&self) -> usize {
126        self.population
127    }
128
129    ///
130    /// Returns the number of successes (number of `true` values found in the sample).
131    ///
132    /// Complexity: \\( O(1) \\)
133    ///
134    pub fn successes(&self) -> usize {
135        self.successes
136    }
137
138    ///
139    /// Add a success to the statistics and updates the population accordingly.
140    ///
141    /// Complexity: \\( O(1) \\)
142    ///
143    pub fn add_success(&mut self) {
144        self.population += 1;
145        self.successes += 1;
146    }
147
148    ///
149    /// Add a failure to the statistics and updates the population accordingly.
150    ///
151    /// Complexity: \\( O(1) \\)
152    ///
153    pub fn add_failure(&mut self) {
154        self.population += 1;
155    }
156
157    ///
158    /// Tests if the conditions for the validity of the Wilson score interval are met.
159    /// The conditions for the validity of the Wilson score interval are stated as follows:
160    /// <https://www.itl.nist.gov/div898/handbook/prc/section2/prc24.htm>
161    /// 1. The sample size is large enough to ensure that the sampling distribution of the sample proportion is approximately normal (N > 30)
162    /// 2. The number of successes and failures are large enough to ensure that the sampling distribution of the sample proportion is approximately normal (x > 5 and n - x > 5)
163    pub fn is_significant(&self) -> bool {
164        is_significant(self.population, self.successes)
165    }
166
167    /// Computes the confidence interval over the proportion of true values in a given sample.
168    ///
169    /// Complexity: \\( O(1) \\)
170    ///
171    /// # Arguments
172    ///
173    /// * `confidence` - the confidence level (must be in (0, 1))
174    ///
175    /// # Errors
176    ///
177    /// * `TooFewSuccesses` - if the number of successes is too small to compute a confidence interval
178    /// * `TooFewFailures` - if the number of failures is too small to compute a confidence interval
179    /// * `InvalidSuccesses` - if the number of successes is larger than the population size
180    /// * `InvalidConfidenceLevel` - if the confidence level is not in (0, 1)
181    ///
182    /// # Examples
183    ///
184    /// ```
185    /// # use stats_ci::*;
186    /// # use approx::*;
187    /// let data = [
188    ///    true, false, true, true, false, true, true, false, true, true,
189    ///   false, false, false, true, false, true, false, false, true, false
190    /// ];
191    /// let confidence = Confidence::new_two_sided(0.95);
192    /// let stats = proportion::Stats::from_iter(data);
193    /// let interval = stats.ci(confidence)?;
194    /// assert_abs_diff_eq!(interval, Interval::new(0.299, 0.701)?, epsilon = 1e-2);
195    /// # Ok::<(),error::CIError>(())
196    /// ```
197    ///
198    /// # Notes
199    ///
200    /// The confidence interval is computed using the function [`ci_wilson`] (Wilson score interval).
201    ///
202    pub fn ci(&self, confidence: Confidence) -> CIResult<Interval<f64>> {
203        ci(confidence, self.population, self.successes)
204    }
205
206    ///
207    /// Extend the data with additional sample data.
208    ///
209    /// Complexity: \\( O(n) \\) where \\( n \\) is the number of samples in `data`.
210    ///
211    /// # Arguments
212    ///
213    /// * `data` - the sample given as a boolean iterator or slice
214    ///
215    /// # Examples
216    /// ```
217    /// # use stats_ci::*;
218    /// let data = [true, false, true, true, false, true, true, false, true, true];
219    /// let mut stats = proportion::Stats::default();
220    /// stats.extend(&data);
221    /// assert_eq!(stats, proportion::Stats::new(10, 7));
222    /// ```
223    pub fn extend<I>(&mut self, data: &I)
224    where
225        for<'a> &'a I: IntoIterator<Item = &'a bool>,
226    {
227        for &x_i in data {
228            if x_i {
229                self.add_success();
230            } else {
231                self.add_failure();
232            }
233        }
234    }
235
236    ///
237    /// Extend the data with additional sample data and a condition that must be satisfied to be counted as a success.
238    ///
239    /// Complexity: \\( O(n) \\) where \\( n \\) is the number of samples in `data`.
240    ///
241    /// # Arguments
242    ///
243    /// * `data` - the sample given as an iterator or slice
244    /// * `is_success` - a function that returns `true` if a sample value is a success
245    ///
246    /// # Examples
247    /// ```
248    /// # use stats_ci::*;
249    /// let data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
250    /// let mut stats = proportion::Stats::default();
251    /// stats.extend_if(&data, |&x| x <= 5);
252    /// assert_eq!(stats, proportion::Stats::new(10, 5));
253    /// ```
254    pub fn extend_if<T, I, F>(&mut self, data: &I, is_success: F)
255    where
256        for<'a> &'a I: IntoIterator<Item = &'a T>,
257        F: Fn(&T) -> bool,
258    {
259        for x_i in data {
260            if is_success(x_i) {
261                self.add_success();
262            } else {
263                self.add_failure();
264            }
265        }
266    }
267}
268
269impl std::ops::Add for Stats {
270    type Output = Self;
271
272    ///
273    /// Combines two statistics objects by adding the number of samples and the number of successes.
274    ///
275    /// Complexity: \\( O(1) \\)
276    ///
277    /// # Examples
278    /// ```
279    /// # use stats_ci::*;
280    /// let stats1 = proportion::Stats::new(100, 50);
281    /// let stats2 = proportion::Stats::new(200, 100);
282    /// let stats = stats1 + stats2;
283    /// assert_eq!(stats, proportion::Stats::new(300, 150));
284    /// ```
285    fn add(self, rhs: Self) -> Self::Output {
286        Stats {
287            population: self.population + rhs.population,
288            successes: self.successes + rhs.successes,
289        }
290    }
291}
292
293impl std::ops::AddAssign for Stats {
294    ///
295    /// Combines two statistics objects by adding the number of samples and the number of successes.
296    ///
297    /// Complexity: \\( O(1) \\)
298    ///
299    /// # Examples
300    /// ```
301    /// # use stats_ci::*;
302    /// let mut stats1 = proportion::Stats::new(100, 50);
303    /// let stats2 = proportion::Stats::new(200, 100);
304    /// stats1 += stats2;
305    /// assert_eq!(stats1, proportion::Stats::new(300, 150));
306    /// ```
307    fn add_assign(&mut self, rhs: Self) {
308        self.population += rhs.population;
309        self.successes += rhs.successes;
310    }
311}
312
313///
314/// Computes the (two sided) confidence interval over the proportion of true values in a given sample.
315///
316/// Complexity: \\( O(n) \\) where \\( n \\) is the number of samples in `data`.
317///
318/// # Arguments
319///
320/// * `confidence` - the confidence level (must be in (0, 1))
321/// * `data` - the sample given as a boolean iterator or slice
322///
323/// # Errors
324///
325/// * `TooFewSuccesses` - if the number of successes is too small to compute a confidence interval
326/// * `TooFewFailures` - if the number of failures is too small to compute a confidence interval
327/// * `InvalidSuccesses` - if the number of successes is larger than the population size
328/// * `InvalidConfidenceLevel` - if the confidence level is not in (0, 1)
329///
330/// # Examples
331///
332/// ```
333/// use stats_ci::*;
334/// # use approx::*;
335/// let data = [
336///     true, false, true, true, false, true, true, false, true, true,
337///     false, false, false, true, false, true, false, false, true, false
338/// ];
339/// let confidence = Confidence::new_two_sided(0.95);
340/// let interval = proportion::ci_true(confidence, &data)?;
341/// assert_abs_diff_eq!(interval, Interval::new(0.299, 0.701)?, epsilon = 1e-2);
342/// # Ok::<(),error::CIError>(())
343/// ```
344///
345/// # Notes
346///
347/// The confidence interval is computed using the function [`ci_wilson`] (Wilson score interval).
348///
349pub fn ci_true<I>(confidence: Confidence, data: &I) -> CIResult<Interval<f64>>
350where
351    for<'a> &'a I: IntoIterator<Item = &'a bool>,
352{
353    let mut stats = Stats::default();
354    stats.extend(data);
355    stats.ci(confidence)
356}
357
358///
359/// Computes the (two sided) confidence interval over the proportion of a given sample that satisfies a given condition.
360///
361/// Complexity: \\( O(n) \\) where \\( n \\) is the number of samples in `data`.
362///
363/// # Arguments
364///
365/// * `confidence` - the confidence level (must be in (0, 1))
366/// * `data` - the sample given as a boolean iterator or slice
367/// * `condition` - the condition that must be satisfied to be counted as a success
368///
369/// # Errors
370///
371/// * `TooFewSuccesses` - if the number of successes is too small to compute a confidence interval
372/// * `TooFewFailures` - if the number of failures is too small to compute a confidence interval
373/// * `InvalidSuccesses` - if the number of successes is larger than the population size
374/// * `InvalidConfidenceLevel` - if the confidence level is not in (0, 1)
375///
376/// # Examples
377///
378/// ```
379/// use stats_ci::*;
380/// # use approx::*;
381/// let data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20];
382/// let confidence = Confidence::new_two_sided(0.95);
383/// let interval = proportion::ci_if(confidence, &data, |&x| x <= 10)?;
384/// assert_abs_diff_eq!(interval, Interval::new(0.299, 0.701)?, epsilon = 1e-2);
385/// # Ok::<(),error::CIError>(())
386/// ```
387///
388pub fn ci_if<T, I, F>(confidence: Confidence, data: &I, cond: F) -> CIResult<Interval<f64>>
389where
390    for<'a> &'a I: IntoIterator<Item = &'a T>,
391    F: Fn(&T) -> bool,
392{
393    let mut stats = Stats::default();
394    stats.extend_if(data, cond);
395    stats.ci(confidence)
396}
397
398///
399/// Computes the (two sided) confidence interval over the proportion of successes a given sample.
400///
401/// Complexity: \\( O(1) \\)
402///
403/// # Arguments
404///
405/// * `confidence` - the confidence level (must be in (0, 1))
406/// * `population` - the size of the population
407/// * `successes` - the number of successes in the sample
408///
409/// # Errors
410///
411/// * `TooFewSuccesses` - if the number of successes is too small to compute a confidence interval
412/// * `TooFewFailures` - if the number of failures is too small to compute a confidence interval
413/// * `InvalidSuccesses` - if the number of successes is larger than the population size
414/// * `InvalidConfidenceLevel` - if the confidence level is not in (0, 1)
415///
416/// # Notes
417///
418/// This function is an alias for [`ci_wilson`].
419///
420/// # Examples
421///
422/// ```
423/// use stats_ci::*;
424/// # use approx::*;
425/// let population = 500;
426/// let successes = 421;
427/// let confidence = Confidence::new_two_sided(0.95);
428/// let interval = proportion::ci(confidence, population, successes)?;
429/// assert_abs_diff_eq!(interval, Interval::new(0.81, 0.87)?, epsilon = 1e-2);
430/// # Ok::<(),error::CIError>(())
431/// ```
432///
433pub fn ci(confidence: Confidence, population: usize, successes: usize) -> CIResult<Interval<f64>> {
434    ci_wilson(confidence, population, successes)
435}
436
437///
438/// Check if the conditions for the validity of the Wilson score interval are met.
439/// The conditions for the validity of hypothesis tests (from which the Wilson score is derived) are stated as follows:
440/// <https://www.itl.nist.gov/div898/handbook/prc/section2/prc24.htm>
441/// 1. The sample size is large enough to ensure that the sampling distribution of the sample proportion is approximately normal (N > 30)
442/// 2. The number of successes and failures are large enough to ensure that the sampling distribution of the sample proportion is approximately normal (x > 5 and n - x > 5)
443///
444/// # Arguments
445///
446/// * `population` - the size of the population
447/// * `successes` - the number of successes in the sample
448///
449/// # Returns
450///
451/// `true` if the conditions are met, `false` otherwise.
452///
453/// # Examples
454///
455/// ```
456/// use stats_ci::*;
457/// assert!(proportion::is_significant(500, 10));
458/// assert!(! proportion::is_significant(10, 5));
459/// assert!(! proportion::is_significant(1000, 1));
460/// ```
461pub fn is_significant(population: usize, successes: usize) -> bool {
462    // significance criteria for Wilson score intervals.
463    // see https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Wilson_score_interval
464    // The conditions for the validity of hypothesis tests (from which the Wilson score is derived) are stated as follows:
465    // https://www.itl.nist.gov/div898/handbook/prc/section2/prc24.htm
466    // 1. The sample size is large enough to ensure that the sampling distribution of the sample proportion is approximately normal (N > 30)
467    (population > 30)
468    // 2. The number of successes and failures are large enough to ensure that the sampling distribution of the sample proportion is approximately normal (x > 5 and n - x > 5)
469    && (successes > 5)
470    && (population - successes > 5)
471}
472
473///
474/// Computes the (two sided) confidence interval over the proportion of successes a given sample using the Wilson score interval.
475/// This is the method used by default when calling the function [`ci`] of this module.
476///
477/// # Arguments
478///
479/// * `confidence` - the confidence level (must be in (0, 1))
480/// * `population` - the size of the population
481/// * `successes` - the number of successes in the sample
482///
483/// # Errors
484///
485/// * `TooFewSuccesses` - if the number of successes is too small to compute a confidence interval
486/// * `TooFewFailures` - if the number of failures is too small to compute a confidence interval
487/// * `InvalidSuccesses` - if the number of successes is larger than the population size
488/// * `InvalidConfidenceLevel` - if the confidence level is not in (0, 1)
489///
490/// # Notes
491///
492/// This method is based on the Wilson score interval, which is a modification of the normal approximation interval.
493/// It is more robust than the normal approximation interval, but it is also more conservative.
494/// In particular, it is more conservative when the sample size is small.
495/// It is also more conservative when the sample size is large and the proportion is close to 0 or 1.
496///
497/// Using the Wilson score interval, the probability of success \\( p \\) is estimated by:
498/// \\[
499/// p \approx  \frac{n_S+\frac{1}{2}z^2}{n+z^2} \pm \frac{z}{n+z^2} \sqrt{\frac{n_S ~ n_F}{n}+\frac{z^2}{4}}
500/// \\]
501/// where
502/// * \\( n_S \\) is the number of successes,
503/// * \\( n_F \\) is the number of failures,
504/// * \\( n = n_S + n_F \\) is the sample size, and
505/// * \\( z \\) is the z-value corresponding to the confidence level.
506///
507/// The conditions for the validity of the Wilson score interval can be checked with the function [`is_significant`].
508/// However, the significance check for this function is much more permissive. It is the caller's responsibility to check for the stricter conditions for statistical significance if necessary.
509/// One advantage of using the Wilson score interval is that it is still reasonably accurate for small sample sizes and when the proportion of successes is close to 0 or 1.
510///
511/// # References
512///
513/// * [Wikipedia article on Wilson score interval](https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Wilson_score_interval)
514/// * Francis J. DiTraglia. [Blog post: The Wilson Confidence Interval for a Proportion](https://www.econometrics.blog/post/the-wilson-confidence-interval-for-a-proportion/). Feb 2022.
515///
516pub fn ci_wilson(
517    confidence: Confidence,
518    population: usize,
519    successes: usize,
520) -> CIResult<Interval<f64>> {
521    if successes > population {
522        return Err(CIError::InvalidSuccesses(successes, population));
523    }
524
525    let n = population as f64;
526    let n_s = successes as f64;
527    let n_f = n - n_s;
528
529    // conditions for statistical significance:
530    // n p > 5 and n (1 - p) > 5
531    // however, we are more permissive here and rely on the user to check for the stricter conditions for statistical significance.
532    if successes < 2 {
533        // too few successes for statistical significance
534        return Err(CIError::TooFewSuccesses(successes, population, n_s));
535    }
536    if population - successes < 2 {
537        // too few failures for statistical significance
538        return Err(CIError::TooFewFailures(
539            population - successes,
540            population,
541            n_f,
542        ));
543    }
544
545    let z = z_value(confidence);
546    let z_sq = z * z;
547
548    let mean = (n_s + z_sq / 2.) / (n + z_sq);
549    let span = (z / (n + z_sq)) * ((n_s * n_f / n) + (z_sq / 4.)).sqrt();
550
551    match confidence {
552        Confidence::TwoSided(_) => Interval::new(mean - span, mean + span).map_err(|e| e.into()),
553        Confidence::UpperOneSided(_) => Interval::new(mean - span, 1.).map_err(|e| e.into()),
554        Confidence::LowerOneSided(_) => Interval::new(0., mean + span).map_err(|e| e.into()),
555    }
556}
557
558///
559/// Computes the (two sided) confidence interval over the proportion of successes in a given sample using the Wilson score interval.
560/// This is the method used by default when calling the function [`ci`] of this module.
561///
562/// # Arguments
563///
564/// * `confidence` - the confidence level (must be in (0, 1))
565/// * `population` - the size of the population
566/// * `success_rate` - the proportion of successes in the sample
567///
568/// # Errors
569///
570/// * `TooFewSuccesses` - if the number of successes is too small to compute a confidence interval
571/// * `TooFewFailures` - if the number of failures is too small to compute a confidence interval
572/// * `InvalidSuccesses` - if the number of successes is larger than the population size
573/// * `NonPositiveValue` - if the success rate is not in positive or null.
574/// * `InvalidConfidenceLevel` - if the confidence level is not in (0, 1)
575///
576/// # Notes
577///
578/// This method is simply a front for [`ci_wilson`], which takes the number of successes as an argument.
579///
580pub fn ci_wilson_ratio(
581    confidence: Confidence,
582    population: usize,
583    success_rate: f64,
584) -> CIResult<Interval<f64>> {
585    if success_rate <= 0. {
586        return Err(CIError::NonPositiveValue(success_rate));
587    }
588    let successes = (success_rate * population as f64) as usize;
589
590    ci_wilson(confidence, population, successes)
591}
592
593///
594/// Computes the confidence interval over the proportion of successes in a given sample using the normal approximation interval (Wald interval).
595///
596/// Using the normal approximation interval (Wald method), the probability of success \\( p \\) is estimated by:
597/// \\[
598/// p \approx  \frac{n_S}{n} \pm z \sqrt{\frac{n_S ~ n_F}{n^3}} = \hat{p} \pm z \sqrt{\frac{\hat{p} ~ (1 - \hat{p})}{n}}
599/// \\]
600/// where
601/// * \\( n_S \\) is the number of successes,
602/// * \\( n_F \\) is the number of failures,
603/// * \\( n = n_S + n_F \\) is the sample size,
604/// * \\( z \\) is the z-value corresponding to the confidence level, and
605/// * \\( \hat{p} = \frac{n_S}{n} \\) is the estimated probability of success.
606///
607/// # Arguments
608///
609/// * `confidence` - the confidence level (must be in (0, 1))
610/// * `population` - the size of the population
611/// * `successes` - the number of successes in the sample
612///
613/// # Errors
614///
615/// * `TooFewSuccesses` - if the number of successes is too small to compute a confidence interval
616/// * `TooFewFailures` - if the number of failures is too small to compute a confidence interval
617/// * `InvalidSuccesses` - if the number of successes is larger than the population size
618/// * `InvalidConfidenceLevel` - if the confidence level is not in (0, 1)
619///
620/// # Notes
621///
622/// This method is based on the normal approximation interval.
623/// It is less robust than the Wilson score interval, but it is also less conservative.
624///
625/// # References
626///
627/// * [Wikipedia article on normal approximation interval](https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Normal_approximation_interval)
628///
629pub fn ci_z_normal(
630    confidence: Confidence,
631    population: usize,
632    successes: usize,
633) -> CIResult<Interval<f64>> {
634    if successes > population {
635        return Err(CIError::InvalidSuccesses(successes, population));
636    }
637
638    let n = population as f64;
639    let x = successes as f64;
640    let p = x / n;
641    let q = 1. - p;
642
643    if n * p < 10. {
644        // too few successes for statistical significance
645        return Err(CIError::TooFewSuccesses(successes, population, n * p));
646    }
647    if n * q < 10. {
648        // too few failures for statistical significance
649        return Err(CIError::TooFewFailures(
650            population - successes,
651            population,
652            n * q,
653        ));
654    }
655
656    let std_dev = (p * q / n).sqrt();
657    let z = z_value(confidence);
658    let mean = p;
659    let span = z * std_dev;
660    match confidence {
661        Confidence::TwoSided(_) => Interval::new(mean - span, mean + span).map_err(|e| e.into()),
662        Confidence::UpperOneSided(_) => Interval::new(mean - span, 1.).map_err(|e| e.into()),
663        Confidence::LowerOneSided(_) => Interval::new(0., mean + span).map_err(|e| e.into()),
664    }
665}
666
667#[cfg(test)]
668mod tests {
669    use super::*;
670    use approx::*;
671
672    #[test]
673    fn test_proportion_ci() -> CIResult<()> {
674        let population = 500;
675        let successes = 421;
676        let confidence = Confidence::TwoSided(0.95);
677        let ci = proportion::ci(confidence, population, successes)?;
678        assert_abs_diff_eq!(ci, Interval::new(0.81, 0.87)?, epsilon = 1e-2);
679
680        let ci2 = proportion::ci(Confidence::UpperOneSided(0.975), population, successes)?;
681        assert_eq!(ci2.high_f(), 1.);
682        assert_abs_diff_eq!(ci2.low_f(), ci.low_f(), epsilon = 1e-2);
683
684        let ci2 = proportion::ci(Confidence::LowerOneSided(0.975), population, successes)?;
685        assert_eq!(ci2.low_f(), 0.);
686        assert_abs_diff_eq!(ci2.high_f(), ci.high_f(), epsilon = 1e-2);
687
688        Ok(())
689    }
690
691    #[test]
692    fn test_proportion_ci_if() {
693        let data = [
694            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
695        ];
696        let confidence = Confidence::TwoSided(0.95);
697        let ci = proportion::ci_if(confidence, &data, |&x| x <= 10).unwrap();
698        assert_abs_diff_eq!(ci, Interval::new(0.299, 0.701).unwrap(), epsilon = 1e-2);
699    }
700
701    #[test]
702    fn test_proportion_add() {
703        let stats1 = proportion::Stats::new(100, 50);
704        let stats2 = proportion::Stats::new(200, 100);
705        let stats = stats1 + stats2;
706        assert_eq!(stats, proportion::Stats::new(300, 150));
707
708        let mut stats = proportion::Stats::new(100, 50);
709        stats += proportion::Stats::new(200, 100);
710        assert_eq!(stats, proportion::Stats::new(300, 150));
711    }
712
713    #[test]
714    fn test_main_example() -> CIResult<()> {
715        let grades = [
716            40, 59, 73, 44, 82, 44, 58, 74, 94, 79, 40, 52, 100, 57, 76, 93, 68, 96, 92, 98, 58,
717            64, 76, 40, 89, 65, 63, 90, 66, 89,
718        ];
719        let stats = proportion::Stats::from_iter(grades.iter().map(|&x| x >= 60));
720        let confidence = Confidence::new_two_sided(0.95);
721        let pass_rate_ci = stats.ci(confidence)?;
722        println!("Pass rate: {}", pass_rate_ci);
723        use approx::*;
724        assert_abs_diff_eq!(pass_rate_ci, Interval::new(0.4878, 0.8077)?, epsilon = 1e-3);
725        Ok(())
726    }
727
728    #[test]
729    fn test_readme_simple() {
730        let confidence = Confidence::new(0.95);
731        let messages = 10_000;
732        let losses = 89;
733        let ci = proportion::ci(confidence, messages, losses).unwrap();
734        println!("Loss rate: {}", ci);
735
736        let confidence = Confidence::new_lower(0.95);
737        let ci = proportion::ci(confidence, messages, losses).unwrap();
738        println!("Loss rate less than: {}", ci);
739    }
740}
stats_ci/proportion.rs

stats_ci/
proportion.rs