stats_ci/proportion.rs
1//!
2//! Confidence intervals for proportions
3//!
4//! # Examples
5//!
6//! ```
7//! use stats_ci::*;
8//! let data = [
9//! true, false, true, true, false, true, true, false, true, true,
10//! false, false, false, true, false, true, false, false, true, false
11//! ];
12//! let confidence = Confidence::new_two_sided(0.95);
13//! let interval = proportion::ci_true(confidence, &data)?;
14//! use approx::*;
15//! assert_abs_diff_eq!(interval, Interval::new(0.299, 0.701)?, epsilon = 1e-2);
16//! # Ok::<(),error::CIError>(())
17//! ```
18//!
19//! The confidence interval can also be computed incrementally, as follows:
20//! ```
21//! # use stats_ci::*;
22//! # let data = [
23//! # true, false, true, true, false, true, true, false, true, true,
24//! # false, false, false, true, false, true, false, false, true, false
25//! # ];
26//! # let confidence = Confidence::new_two_sided(0.95);
27//! let mut stats = proportion::Stats::default();
28//! stats.extend(&data);
29//! let interval = stats.ci(confidence)?;
30//! # use approx::*;
31//! assert_abs_diff_eq!(interval, Interval::new(0.299, 0.701)?, epsilon = 1e-2);
32//! # Ok::<(),error::CIError>(())
33//! ```
34//!
35//! # References
36//!
37//! * [Wikipedia - Confidence interval](https://en.wikipedia.org/wiki/Confidence_interval)
38//! * [Wikipedia - Binomial proportion confidence interval](https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval)
39//! * <https://influentialpoints.com/Training/confidence_intervals_of_proportions-principles-properties-assumptions.htm>
40//!
41use super::*;
42use crate::stats::z_value;
43use error::*;
44
45///
46/// Represents the state of the computation of a confidence interval for a proportion.
47///
48/// # Examples
49///
50/// ```
51/// # use stats_ci::*;
52/// let grades = [40, 59, 73, 44, 82, 44, 58, 74, 94, 79, 40, 52, 100, 57, 76, 93, 68, 96, 92, 98, 58, 64, 76, 40, 89, 65, 63, 90, 66, 89];
53/// let stats = proportion::Stats::from_iter(grades.iter().map(|&x| x >= 60));
54/// let confidence = Confidence::new_two_sided(0.95);
55/// let pass_rate_ci = stats.ci(confidence)?;
56/// println!("Pass rate: {}", pass_rate_ci);
57/// # use approx::*;
58/// assert_abs_diff_eq!(pass_rate_ci, Interval::new(0.4878, 0.8077)?, epsilon = 1e-3);
59/// # Ok::<(),error::CIError>(())
60/// ```
61///
62/// # Panics
63///
64/// * if the number of successes is larger than the population size
65///
66#[derive(Default, Debug, Clone, Copy, PartialEq, Eq)]
67#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
68pub struct Stats {
69 population: usize,
70 successes: usize,
71}
72
73impl FromIterator<bool> for Stats {
74 ///
75 /// Creates a new statistics object with initial values from a Boolean iterator counting the number of successes.
76 ///
77 /// Complexity: \\( O(n) \\) where \\( n \\) is the number of samples in `iter`.
78 ///
79 /// # Arguments
80 ///
81 /// * `iter` - a Boolean iterator or slice
82 ///
83 fn from_iter<I>(iter: I) -> Self
84 where
85 I: IntoIterator<Item = bool>,
86 {
87 let mut stats = Stats::default();
88 for value in iter {
89 if value {
90 stats.add_success();
91 } else {
92 stats.add_failure();
93 }
94 }
95 stats
96 }
97}
98
99impl Stats {
100 ///
101 /// Creates a new statistics object with initial values for the population size and the number of successes.
102 /// The number of successes must not be larger than the population size.
103 ///
104 /// Complexity: \\( O(1) \\)
105 ///
106 /// # Panics
107 ///
108 /// * if the number of successes is larger than the population size
109 ///
110 pub const fn new(population: usize, successes: usize) -> Self {
111 if population < successes {
112 panic!("Number of successes must not be larger than population size.")
113 }
114 Stats {
115 population,
116 successes,
117 }
118 }
119
120 ///
121 /// Returns the population size (total number of samples).
122 ///
123 /// Complexity: \\( O(1) \\)
124 ///
125 pub fn population(&self) -> usize {
126 self.population
127 }
128
129 ///
130 /// Returns the number of successes (number of `true` values found in the sample).
131 ///
132 /// Complexity: \\( O(1) \\)
133 ///
134 pub fn successes(&self) -> usize {
135 self.successes
136 }
137
138 ///
139 /// Add a success to the statistics and updates the population accordingly.
140 ///
141 /// Complexity: \\( O(1) \\)
142 ///
143 pub fn add_success(&mut self) {
144 self.population += 1;
145 self.successes += 1;
146 }
147
148 ///
149 /// Add a failure to the statistics and updates the population accordingly.
150 ///
151 /// Complexity: \\( O(1) \\)
152 ///
153 pub fn add_failure(&mut self) {
154 self.population += 1;
155 }
156
157 ///
158 /// Tests if the conditions for the validity of the Wilson score interval are met.
159 /// The conditions for the validity of the Wilson score interval are stated as follows:
160 /// <https://www.itl.nist.gov/div898/handbook/prc/section2/prc24.htm>
161 /// 1. The sample size is large enough to ensure that the sampling distribution of the sample proportion is approximately normal (N > 30)
162 /// 2. The number of successes and failures are large enough to ensure that the sampling distribution of the sample proportion is approximately normal (x > 5 and n - x > 5)
163 pub fn is_significant(&self) -> bool {
164 is_significant(self.population, self.successes)
165 }
166
167 /// Computes the confidence interval over the proportion of true values in a given sample.
168 ///
169 /// Complexity: \\( O(1) \\)
170 ///
171 /// # Arguments
172 ///
173 /// * `confidence` - the confidence level (must be in (0, 1))
174 ///
175 /// # Errors
176 ///
177 /// * `TooFewSuccesses` - if the number of successes is too small to compute a confidence interval
178 /// * `TooFewFailures` - if the number of failures is too small to compute a confidence interval
179 /// * `InvalidSuccesses` - if the number of successes is larger than the population size
180 /// * `InvalidConfidenceLevel` - if the confidence level is not in (0, 1)
181 ///
182 /// # Examples
183 ///
184 /// ```
185 /// # use stats_ci::*;
186 /// # use approx::*;
187 /// let data = [
188 /// true, false, true, true, false, true, true, false, true, true,
189 /// false, false, false, true, false, true, false, false, true, false
190 /// ];
191 /// let confidence = Confidence::new_two_sided(0.95);
192 /// let stats = proportion::Stats::from_iter(data);
193 /// let interval = stats.ci(confidence)?;
194 /// assert_abs_diff_eq!(interval, Interval::new(0.299, 0.701)?, epsilon = 1e-2);
195 /// # Ok::<(),error::CIError>(())
196 /// ```
197 ///
198 /// # Notes
199 ///
200 /// The confidence interval is computed using the function [`ci_wilson`] (Wilson score interval).
201 ///
202 pub fn ci(&self, confidence: Confidence) -> CIResult<Interval<f64>> {
203 ci(confidence, self.population, self.successes)
204 }
205
206 ///
207 /// Extend the data with additional sample data.
208 ///
209 /// Complexity: \\( O(n) \\) where \\( n \\) is the number of samples in `data`.
210 ///
211 /// # Arguments
212 ///
213 /// * `data` - the sample given as a boolean iterator or slice
214 ///
215 /// # Examples
216 /// ```
217 /// # use stats_ci::*;
218 /// let data = [true, false, true, true, false, true, true, false, true, true];
219 /// let mut stats = proportion::Stats::default();
220 /// stats.extend(&data);
221 /// assert_eq!(stats, proportion::Stats::new(10, 7));
222 /// ```
223 pub fn extend<I>(&mut self, data: &I)
224 where
225 for<'a> &'a I: IntoIterator<Item = &'a bool>,
226 {
227 for &x_i in data {
228 if x_i {
229 self.add_success();
230 } else {
231 self.add_failure();
232 }
233 }
234 }
235
236 ///
237 /// Extend the data with additional sample data and a condition that must be satisfied to be counted as a success.
238 ///
239 /// Complexity: \\( O(n) \\) where \\( n \\) is the number of samples in `data`.
240 ///
241 /// # Arguments
242 ///
243 /// * `data` - the sample given as an iterator or slice
244 /// * `is_success` - a function that returns `true` if a sample value is a success
245 ///
246 /// # Examples
247 /// ```
248 /// # use stats_ci::*;
249 /// let data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
250 /// let mut stats = proportion::Stats::default();
251 /// stats.extend_if(&data, |&x| x <= 5);
252 /// assert_eq!(stats, proportion::Stats::new(10, 5));
253 /// ```
254 pub fn extend_if<T, I, F>(&mut self, data: &I, is_success: F)
255 where
256 for<'a> &'a I: IntoIterator<Item = &'a T>,
257 F: Fn(&T) -> bool,
258 {
259 for x_i in data {
260 if is_success(x_i) {
261 self.add_success();
262 } else {
263 self.add_failure();
264 }
265 }
266 }
267}
268
269impl std::ops::Add for Stats {
270 type Output = Self;
271
272 ///
273 /// Combines two statistics objects by adding the number of samples and the number of successes.
274 ///
275 /// Complexity: \\( O(1) \\)
276 ///
277 /// # Examples
278 /// ```
279 /// # use stats_ci::*;
280 /// let stats1 = proportion::Stats::new(100, 50);
281 /// let stats2 = proportion::Stats::new(200, 100);
282 /// let stats = stats1 + stats2;
283 /// assert_eq!(stats, proportion::Stats::new(300, 150));
284 /// ```
285 fn add(self, rhs: Self) -> Self::Output {
286 Stats {
287 population: self.population + rhs.population,
288 successes: self.successes + rhs.successes,
289 }
290 }
291}
292
293impl std::ops::AddAssign for Stats {
294 ///
295 /// Combines two statistics objects by adding the number of samples and the number of successes.
296 ///
297 /// Complexity: \\( O(1) \\)
298 ///
299 /// # Examples
300 /// ```
301 /// # use stats_ci::*;
302 /// let mut stats1 = proportion::Stats::new(100, 50);
303 /// let stats2 = proportion::Stats::new(200, 100);
304 /// stats1 += stats2;
305 /// assert_eq!(stats1, proportion::Stats::new(300, 150));
306 /// ```
307 fn add_assign(&mut self, rhs: Self) {
308 self.population += rhs.population;
309 self.successes += rhs.successes;
310 }
311}
312
313///
314/// Computes the (two sided) confidence interval over the proportion of true values in a given sample.
315///
316/// Complexity: \\( O(n) \\) where \\( n \\) is the number of samples in `data`.
317///
318/// # Arguments
319///
320/// * `confidence` - the confidence level (must be in (0, 1))
321/// * `data` - the sample given as a boolean iterator or slice
322///
323/// # Errors
324///
325/// * `TooFewSuccesses` - if the number of successes is too small to compute a confidence interval
326/// * `TooFewFailures` - if the number of failures is too small to compute a confidence interval
327/// * `InvalidSuccesses` - if the number of successes is larger than the population size
328/// * `InvalidConfidenceLevel` - if the confidence level is not in (0, 1)
329///
330/// # Examples
331///
332/// ```
333/// use stats_ci::*;
334/// # use approx::*;
335/// let data = [
336/// true, false, true, true, false, true, true, false, true, true,
337/// false, false, false, true, false, true, false, false, true, false
338/// ];
339/// let confidence = Confidence::new_two_sided(0.95);
340/// let interval = proportion::ci_true(confidence, &data)?;
341/// assert_abs_diff_eq!(interval, Interval::new(0.299, 0.701)?, epsilon = 1e-2);
342/// # Ok::<(),error::CIError>(())
343/// ```
344///
345/// # Notes
346///
347/// The confidence interval is computed using the function [`ci_wilson`] (Wilson score interval).
348///
349pub fn ci_true<I>(confidence: Confidence, data: &I) -> CIResult<Interval<f64>>
350where
351 for<'a> &'a I: IntoIterator<Item = &'a bool>,
352{
353 let mut stats = Stats::default();
354 stats.extend(data);
355 stats.ci(confidence)
356}
357
358///
359/// Computes the (two sided) confidence interval over the proportion of a given sample that satisfies a given condition.
360///
361/// Complexity: \\( O(n) \\) where \\( n \\) is the number of samples in `data`.
362///
363/// # Arguments
364///
365/// * `confidence` - the confidence level (must be in (0, 1))
366/// * `data` - the sample given as a boolean iterator or slice
367/// * `condition` - the condition that must be satisfied to be counted as a success
368///
369/// # Errors
370///
371/// * `TooFewSuccesses` - if the number of successes is too small to compute a confidence interval
372/// * `TooFewFailures` - if the number of failures is too small to compute a confidence interval
373/// * `InvalidSuccesses` - if the number of successes is larger than the population size
374/// * `InvalidConfidenceLevel` - if the confidence level is not in (0, 1)
375///
376/// # Examples
377///
378/// ```
379/// use stats_ci::*;
380/// # use approx::*;
381/// let data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20];
382/// let confidence = Confidence::new_two_sided(0.95);
383/// let interval = proportion::ci_if(confidence, &data, |&x| x <= 10)?;
384/// assert_abs_diff_eq!(interval, Interval::new(0.299, 0.701)?, epsilon = 1e-2);
385/// # Ok::<(),error::CIError>(())
386/// ```
387///
388pub fn ci_if<T, I, F>(confidence: Confidence, data: &I, cond: F) -> CIResult<Interval<f64>>
389where
390 for<'a> &'a I: IntoIterator<Item = &'a T>,
391 F: Fn(&T) -> bool,
392{
393 let mut stats = Stats::default();
394 stats.extend_if(data, cond);
395 stats.ci(confidence)
396}
397
398///
399/// Computes the (two sided) confidence interval over the proportion of successes a given sample.
400///
401/// Complexity: \\( O(1) \\)
402///
403/// # Arguments
404///
405/// * `confidence` - the confidence level (must be in (0, 1))
406/// * `population` - the size of the population
407/// * `successes` - the number of successes in the sample
408///
409/// # Errors
410///
411/// * `TooFewSuccesses` - if the number of successes is too small to compute a confidence interval
412/// * `TooFewFailures` - if the number of failures is too small to compute a confidence interval
413/// * `InvalidSuccesses` - if the number of successes is larger than the population size
414/// * `InvalidConfidenceLevel` - if the confidence level is not in (0, 1)
415///
416/// # Notes
417///
418/// This function is an alias for [`ci_wilson`].
419///
420/// # Examples
421///
422/// ```
423/// use stats_ci::*;
424/// # use approx::*;
425/// let population = 500;
426/// let successes = 421;
427/// let confidence = Confidence::new_two_sided(0.95);
428/// let interval = proportion::ci(confidence, population, successes)?;
429/// assert_abs_diff_eq!(interval, Interval::new(0.81, 0.87)?, epsilon = 1e-2);
430/// # Ok::<(),error::CIError>(())
431/// ```
432///
433pub fn ci(confidence: Confidence, population: usize, successes: usize) -> CIResult<Interval<f64>> {
434 ci_wilson(confidence, population, successes)
435}
436
437///
438/// Check if the conditions for the validity of the Wilson score interval are met.
439/// The conditions for the validity of hypothesis tests (from which the Wilson score is derived) are stated as follows:
440/// <https://www.itl.nist.gov/div898/handbook/prc/section2/prc24.htm>
441/// 1. The sample size is large enough to ensure that the sampling distribution of the sample proportion is approximately normal (N > 30)
442/// 2. The number of successes and failures are large enough to ensure that the sampling distribution of the sample proportion is approximately normal (x > 5 and n - x > 5)
443///
444/// # Arguments
445///
446/// * `population` - the size of the population
447/// * `successes` - the number of successes in the sample
448///
449/// # Returns
450///
451/// `true` if the conditions are met, `false` otherwise.
452///
453/// # Examples
454///
455/// ```
456/// use stats_ci::*;
457/// assert!(proportion::is_significant(500, 10));
458/// assert!(! proportion::is_significant(10, 5));
459/// assert!(! proportion::is_significant(1000, 1));
460/// ```
461pub fn is_significant(population: usize, successes: usize) -> bool {
462 // significance criteria for Wilson score intervals.
463 // see https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Wilson_score_interval
464 // The conditions for the validity of hypothesis tests (from which the Wilson score is derived) are stated as follows:
465 // https://www.itl.nist.gov/div898/handbook/prc/section2/prc24.htm
466 // 1. The sample size is large enough to ensure that the sampling distribution of the sample proportion is approximately normal (N > 30)
467 (population > 30)
468 // 2. The number of successes and failures are large enough to ensure that the sampling distribution of the sample proportion is approximately normal (x > 5 and n - x > 5)
469 && (successes > 5)
470 && (population - successes > 5)
471}
472
473///
474/// Computes the (two sided) confidence interval over the proportion of successes a given sample using the Wilson score interval.
475/// This is the method used by default when calling the function [`ci`] of this module.
476///
477/// # Arguments
478///
479/// * `confidence` - the confidence level (must be in (0, 1))
480/// * `population` - the size of the population
481/// * `successes` - the number of successes in the sample
482///
483/// # Errors
484///
485/// * `TooFewSuccesses` - if the number of successes is too small to compute a confidence interval
486/// * `TooFewFailures` - if the number of failures is too small to compute a confidence interval
487/// * `InvalidSuccesses` - if the number of successes is larger than the population size
488/// * `InvalidConfidenceLevel` - if the confidence level is not in (0, 1)
489///
490/// # Notes
491///
492/// This method is based on the Wilson score interval, which is a modification of the normal approximation interval.
493/// It is more robust than the normal approximation interval, but it is also more conservative.
494/// In particular, it is more conservative when the sample size is small.
495/// It is also more conservative when the sample size is large and the proportion is close to 0 or 1.
496///
497/// Using the Wilson score interval, the probability of success \\( p \\) is estimated by:
498/// \\[
499/// p \approx \frac{n_S+\frac{1}{2}z^2}{n+z^2} \pm \frac{z}{n+z^2} \sqrt{\frac{n_S ~ n_F}{n}+\frac{z^2}{4}}
500/// \\]
501/// where
502/// * \\( n_S \\) is the number of successes,
503/// * \\( n_F \\) is the number of failures,
504/// * \\( n = n_S + n_F \\) is the sample size, and
505/// * \\( z \\) is the z-value corresponding to the confidence level.
506///
507/// The conditions for the validity of the Wilson score interval can be checked with the function [`is_significant`].
508/// However, the significance check for this function is much more permissive. It is the caller's responsibility to check for the stricter conditions for statistical significance if necessary.
509/// One advantage of using the Wilson score interval is that it is still reasonably accurate for small sample sizes and when the proportion of successes is close to 0 or 1.
510///
511/// # References
512///
513/// * [Wikipedia article on Wilson score interval](https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Wilson_score_interval)
514/// * Francis J. DiTraglia. [Blog post: The Wilson Confidence Interval for a Proportion](https://www.econometrics.blog/post/the-wilson-confidence-interval-for-a-proportion/). Feb 2022.
515///
516pub fn ci_wilson(
517 confidence: Confidence,
518 population: usize,
519 successes: usize,
520) -> CIResult<Interval<f64>> {
521 if successes > population {
522 return Err(CIError::InvalidSuccesses(successes, population));
523 }
524
525 let n = population as f64;
526 let n_s = successes as f64;
527 let n_f = n - n_s;
528
529 // conditions for statistical significance:
530 // n p > 5 and n (1 - p) > 5
531 // however, we are more permissive here and rely on the user to check for the stricter conditions for statistical significance.
532 if successes < 2 {
533 // too few successes for statistical significance
534 return Err(CIError::TooFewSuccesses(successes, population, n_s));
535 }
536 if population - successes < 2 {
537 // too few failures for statistical significance
538 return Err(CIError::TooFewFailures(
539 population - successes,
540 population,
541 n_f,
542 ));
543 }
544
545 let z = z_value(confidence);
546 let z_sq = z * z;
547
548 let mean = (n_s + z_sq / 2.) / (n + z_sq);
549 let span = (z / (n + z_sq)) * ((n_s * n_f / n) + (z_sq / 4.)).sqrt();
550
551 match confidence {
552 Confidence::TwoSided(_) => Interval::new(mean - span, mean + span).map_err(|e| e.into()),
553 Confidence::UpperOneSided(_) => Interval::new(mean - span, 1.).map_err(|e| e.into()),
554 Confidence::LowerOneSided(_) => Interval::new(0., mean + span).map_err(|e| e.into()),
555 }
556}
557
558///
559/// Computes the (two sided) confidence interval over the proportion of successes in a given sample using the Wilson score interval.
560/// This is the method used by default when calling the function [`ci`] of this module.
561///
562/// # Arguments
563///
564/// * `confidence` - the confidence level (must be in (0, 1))
565/// * `population` - the size of the population
566/// * `success_rate` - the proportion of successes in the sample
567///
568/// # Errors
569///
570/// * `TooFewSuccesses` - if the number of successes is too small to compute a confidence interval
571/// * `TooFewFailures` - if the number of failures is too small to compute a confidence interval
572/// * `InvalidSuccesses` - if the number of successes is larger than the population size
573/// * `NonPositiveValue` - if the success rate is not in positive or null.
574/// * `InvalidConfidenceLevel` - if the confidence level is not in (0, 1)
575///
576/// # Notes
577///
578/// This method is simply a front for [`ci_wilson`], which takes the number of successes as an argument.
579///
580pub fn ci_wilson_ratio(
581 confidence: Confidence,
582 population: usize,
583 success_rate: f64,
584) -> CIResult<Interval<f64>> {
585 if success_rate <= 0. {
586 return Err(CIError::NonPositiveValue(success_rate));
587 }
588 let successes = (success_rate * population as f64) as usize;
589
590 ci_wilson(confidence, population, successes)
591}
592
593///
594/// Computes the confidence interval over the proportion of successes in a given sample using the normal approximation interval (Wald interval).
595///
596/// Using the normal approximation interval (Wald method), the probability of success \\( p \\) is estimated by:
597/// \\[
598/// p \approx \frac{n_S}{n} \pm z \sqrt{\frac{n_S ~ n_F}{n^3}} = \hat{p} \pm z \sqrt{\frac{\hat{p} ~ (1 - \hat{p})}{n}}
599/// \\]
600/// where
601/// * \\( n_S \\) is the number of successes,
602/// * \\( n_F \\) is the number of failures,
603/// * \\( n = n_S + n_F \\) is the sample size,
604/// * \\( z \\) is the z-value corresponding to the confidence level, and
605/// * \\( \hat{p} = \frac{n_S}{n} \\) is the estimated probability of success.
606///
607/// # Arguments
608///
609/// * `confidence` - the confidence level (must be in (0, 1))
610/// * `population` - the size of the population
611/// * `successes` - the number of successes in the sample
612///
613/// # Errors
614///
615/// * `TooFewSuccesses` - if the number of successes is too small to compute a confidence interval
616/// * `TooFewFailures` - if the number of failures is too small to compute a confidence interval
617/// * `InvalidSuccesses` - if the number of successes is larger than the population size
618/// * `InvalidConfidenceLevel` - if the confidence level is not in (0, 1)
619///
620/// # Notes
621///
622/// This method is based on the normal approximation interval.
623/// It is less robust than the Wilson score interval, but it is also less conservative.
624///
625/// # References
626///
627/// * [Wikipedia article on normal approximation interval](https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Normal_approximation_interval)
628///
629pub fn ci_z_normal(
630 confidence: Confidence,
631 population: usize,
632 successes: usize,
633) -> CIResult<Interval<f64>> {
634 if successes > population {
635 return Err(CIError::InvalidSuccesses(successes, population));
636 }
637
638 let n = population as f64;
639 let x = successes as f64;
640 let p = x / n;
641 let q = 1. - p;
642
643 if n * p < 10. {
644 // too few successes for statistical significance
645 return Err(CIError::TooFewSuccesses(successes, population, n * p));
646 }
647 if n * q < 10. {
648 // too few failures for statistical significance
649 return Err(CIError::TooFewFailures(
650 population - successes,
651 population,
652 n * q,
653 ));
654 }
655
656 let std_dev = (p * q / n).sqrt();
657 let z = z_value(confidence);
658 let mean = p;
659 let span = z * std_dev;
660 match confidence {
661 Confidence::TwoSided(_) => Interval::new(mean - span, mean + span).map_err(|e| e.into()),
662 Confidence::UpperOneSided(_) => Interval::new(mean - span, 1.).map_err(|e| e.into()),
663 Confidence::LowerOneSided(_) => Interval::new(0., mean + span).map_err(|e| e.into()),
664 }
665}
666
667#[cfg(test)]
668mod tests {
669 use super::*;
670 use approx::*;
671
672 #[test]
673 fn test_proportion_ci() -> CIResult<()> {
674 let population = 500;
675 let successes = 421;
676 let confidence = Confidence::TwoSided(0.95);
677 let ci = proportion::ci(confidence, population, successes)?;
678 assert_abs_diff_eq!(ci, Interval::new(0.81, 0.87)?, epsilon = 1e-2);
679
680 let ci2 = proportion::ci(Confidence::UpperOneSided(0.975), population, successes)?;
681 assert_eq!(ci2.high_f(), 1.);
682 assert_abs_diff_eq!(ci2.low_f(), ci.low_f(), epsilon = 1e-2);
683
684 let ci2 = proportion::ci(Confidence::LowerOneSided(0.975), population, successes)?;
685 assert_eq!(ci2.low_f(), 0.);
686 assert_abs_diff_eq!(ci2.high_f(), ci.high_f(), epsilon = 1e-2);
687
688 Ok(())
689 }
690
691 #[test]
692 fn test_proportion_ci_if() {
693 let data = [
694 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
695 ];
696 let confidence = Confidence::TwoSided(0.95);
697 let ci = proportion::ci_if(confidence, &data, |&x| x <= 10).unwrap();
698 assert_abs_diff_eq!(ci, Interval::new(0.299, 0.701).unwrap(), epsilon = 1e-2);
699 }
700
701 #[test]
702 fn test_proportion_add() {
703 let stats1 = proportion::Stats::new(100, 50);
704 let stats2 = proportion::Stats::new(200, 100);
705 let stats = stats1 + stats2;
706 assert_eq!(stats, proportion::Stats::new(300, 150));
707
708 let mut stats = proportion::Stats::new(100, 50);
709 stats += proportion::Stats::new(200, 100);
710 assert_eq!(stats, proportion::Stats::new(300, 150));
711 }
712
713 #[test]
714 fn test_main_example() -> CIResult<()> {
715 let grades = [
716 40, 59, 73, 44, 82, 44, 58, 74, 94, 79, 40, 52, 100, 57, 76, 93, 68, 96, 92, 98, 58,
717 64, 76, 40, 89, 65, 63, 90, 66, 89,
718 ];
719 let stats = proportion::Stats::from_iter(grades.iter().map(|&x| x >= 60));
720 let confidence = Confidence::new_two_sided(0.95);
721 let pass_rate_ci = stats.ci(confidence)?;
722 println!("Pass rate: {}", pass_rate_ci);
723 use approx::*;
724 assert_abs_diff_eq!(pass_rate_ci, Interval::new(0.4878, 0.8077)?, epsilon = 1e-3);
725 Ok(())
726 }
727
728 #[test]
729 fn test_readme_simple() {
730 let confidence = Confidence::new(0.95);
731 let messages = 10_000;
732 let losses = 89;
733 let ci = proportion::ci(confidence, messages, losses).unwrap();
734 println!("Loss rate: {}", ci);
735
736 let confidence = Confidence::new_lower(0.95);
737 let ci = proportion::ci(confidence, messages, losses).unwrap();
738 println!("Loss rate less than: {}", ci);
739 }
740}