stats_ci/quantile.rs
1//!
2//! Confidence intervals for quantiles
3//!
4//! # Examples
5//!
6//! ```
7//! # use stats_ci::error;
8//! use stats_ci::{quantile,Confidence,Interval};
9//! let data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
10//! let confidence = Confidence::new_two_sided(0.95);
11//! let quantile = 0.5; // median
12//! let interval = quantile::ci(confidence, &data, quantile)?;
13//! assert_eq!(interval, Interval::new(5, 12)?);
14//!
15//! let confidence = Confidence::new_two_sided(0.8);
16//! let interval = quantile::ci(confidence, &data, quantile)?;
17//! assert_eq!(interval, Interval::new(6, 11)?);
18//!
19//! let confidence = Confidence::new_two_sided(0.5);
20//! let quantile = 0.4; // 40th percentile
21//! let interval = quantile::ci(confidence, &data, quantile)?;
22//! assert_eq!(interval, Interval::new(5, 8)?);
23//! # Ok::<(),error::CIError>(())
24//! ```
25use super::*;
26
27///
28/// Running statistics for quantiles
29///
30#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
31pub struct Stats {
32 population: usize,
33}
34
35impl Stats {
36 ///
37 /// Create a new instance with an initial population
38 ///
39 pub fn new(population: usize) -> Self {
40 Self { population }
41 }
42
43 ///
44 /// Return the confidence interval on the indices for a given quantile.
45 ///
46 /// # Arguments
47 ///
48 /// * `confidence` - the confidence level
49 /// * `quantile` - the quantile (must be in the range [0, 1])
50 ///
51 /// # Returns
52 ///
53 /// A confidence interval containing indices on the corresponding data.
54 ///
55 /// # Errors
56 ///
57 /// * `TooFewSamples` - if the number of samples is too small to compute a confidence interval
58 /// * `InvalidQuantile` - if the quantile is not in the range [0, 1]
59 /// * `IndexError` - if the confidence interval falls outside the range of the data
60 ///
61 /// # Examples
62 ///
63 /// ```
64 /// # use stats_ci::*;
65 /// let data = [1, 2, 3, 4, 5, 6, 7, 8, 9];
66 /// let confidence = Confidence::new_two_sided(0.8);
67 /// let quantile = 0.5; // median
68 /// let stats = quantile::Stats::new(data.len());
69 /// let interval = stats.ci(confidence, quantile)?;
70 /// assert_eq!(interval, Interval::new(3, 6)?);
71 /// # Ok::<(),error::CIError>(())
72 /// ```
73 pub fn ci(&self, confidence: Confidence, quantile: f64) -> CIResult<Interval<usize>> {
74 if quantile <= 0. || 1. <= quantile {
75 return Err(error::CIError::InvalidQuantile(quantile));
76 }
77
78 if self.population < 4 {
79 // too few samples to compute
80 return Err(error::CIError::TooFewSamples(self.population));
81 }
82
83 let successes = (quantile * self.population as f64).round() as usize;
84 let proportion_ci = proportion::ci_wilson(confidence, self.population, successes)?;
85
86 let (low, high): (f64, f64) = proportion_ci.into();
87
88 if low < 0. {
89 // interval falls outside the range of the data
90 return Err(error::CIError::IndexError(low, self.population));
91 }
92
93 if high > 1. {
94 // interval falls outside the range of the data
95 return Err(error::CIError::IndexError(high, self.population));
96 }
97
98 let lo_index = self.index(low)?;
99 let hi_index = self.index(high)?;
100
101 match confidence {
102 Confidence::TwoSided(_) => Interval::new(lo_index, hi_index).map_err(|e| e.into()),
103 Confidence::UpperOneSided(_) => Ok(Interval::new_upper(lo_index)),
104 Confidence::LowerOneSided(_) => Ok(Interval::new_lower(hi_index)),
105 }
106 }
107
108 ///
109 /// Return the index for a given quantile.
110 ///
111 /// # Arguments
112 ///
113 /// * `quantile` - the quantile (must be in the range [0, 1])
114 ///
115 /// # Returns
116 ///
117 /// The index corresponding to the quantile.
118 ///
119 /// # Errors
120 ///
121 /// * `TooFewSamples` - if the number of samples is too small to compute a confidence interval
122 /// * `InvalidQuantile` - if the quantile is not in (0, 1)
123 ///
124 /// # Examples
125 ///
126 /// ```
127 /// # use stats_ci::*;
128 /// let data = ['a', 'b', 'c', 'd', 'e'];
129 /// let stats = quantile::Stats::new(data.len());
130 /// assert_eq!(stats.index(0.).unwrap(), 0);
131 /// assert_eq!(stats.index(0.5).unwrap(), 2);
132 /// assert_eq!(stats.index(1.).unwrap(), 4);
133 /// assert_eq!(data[stats.index(0.25).unwrap()], 'b');
134 /// assert_eq!(data[stats.index(0.75).unwrap()], 'd');
135 /// ```
136 pub fn index(&self, quantile: f64) -> CIResult<usize> {
137 if self.population == 0 {
138 return Err(error::CIError::TooFewSamples(self.population));
139 }
140 #[allow(clippy::manual_range_contains)]
141 if quantile < 0. || 1. < quantile {
142 return Err(error::CIError::InvalidQuantile(quantile));
143 }
144 let index = (quantile * self.population as f64).floor() as usize;
145 let index = index.min(self.population - 1);
146 Ok(index)
147 }
148}
149
150impl std::ops::Add for Stats {
151 type Output = Self;
152
153 #[inline]
154 fn add(self, rhs: Self) -> Self::Output {
155 Self {
156 population: self.population + rhs.population,
157 }
158 }
159}
160
161impl std::ops::AddAssign for Stats {
162 #[inline]
163 fn add_assign(&mut self, rhs: Self) {
164 self.population += rhs.population;
165 }
166}
167
168///
169/// Compute the confidence interval for a given quantile, assuming that the data is __already sorted__.
170/// This is the function to call if the data is known to be sorted,
171/// or if the order of elements is meant to be their position in the slice (e.g., order of arrival).
172///
173/// Complexity: \\( O(1) \\)
174///
175/// # Arguments
176///
177/// * `confidence` - the confidence level (must be in (0, 1))
178/// * `sorted` - the sorted sample
179/// * `quantile` - the quantile to compute the confidence interval for (must be in (0, 1))
180///
181/// # Output
182///
183/// * `Interval` - the confidence interval for the quantile
184/// * `None` - if the number of samples is too small to compute a confidence interval, or if the interval falls outside the range of the data.
185///
186/// # Errors
187///
188/// * `TooFewSamples` - if the number of samples is too small to compute a confidence interval
189/// * `InvalidConfidenceLevel` - if the confidence level is not in (0, 1)
190/// * `InvalidQuantile` - if the quantile is not in (0, 1)
191///
192/// # Examples
193///
194/// ```
195/// # use stats_ci::*;
196/// let data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
197/// let confidence = Confidence::new_two_sided(0.95);
198/// let quantile = 0.5; // median
199/// let interval = quantile::ci_sorted_unchecked(confidence, &data, quantile)?;
200/// assert_eq!(interval, Interval::new(5, 12)?);
201///
202/// let confidence = Confidence::new_two_sided(0.8);
203/// let interval = quantile::ci_sorted_unchecked(confidence, &data, quantile)?;
204/// assert_eq!(interval, Interval::new(6, 11)?);
205///
206/// let confidence = Confidence::new_two_sided(0.5);
207/// let quantile = 0.4; // 40th percentile
208/// let interval = quantile::ci_sorted_unchecked(confidence, &data, quantile)?;
209/// assert_eq!(interval, Interval::new(5, 8)?);
210/// # Ok::<(),error::CIError>(())
211/// ```
212pub fn ci_sorted_unchecked<T>(
213 confidence: Confidence,
214 sorted: &[T],
215 quantile: f64,
216) -> CIResult<Interval<T>>
217where
218 T: PartialOrd + Clone,
219{
220 assert!(quantile > 0. && quantile < 1.);
221
222 ci_indices(confidence, sorted.len(), quantile).and_then(|indices| match indices.into() {
223 (Some(lo), Some(hi)) => {
224 Interval::new(sorted[lo].clone(), sorted[hi].clone()).map_err(|e| e.into())
225 }
226 (Some(lo), None) => Ok(Interval::new_upper(sorted[lo].clone())),
227 (None, Some(hi)) => Ok(Interval::new_lower(sorted[hi].clone())),
228 _ => Err(error::CIError::IntervalError(
229 interval::IntervalError::EmptyInterval,
230 )),
231 })
232}
233
234///
235/// Compute the confidence interval for a given quantile.
236/// Use [`ci_sorted_unchecked`] instead if the data is already sorted.
237///
238/// Complexity: \\( O(n \log n) \\) where \\( n \\) is the number of samples.
239///
240/// # Arguments
241///
242/// * `confidence` - the confidence level (must be in (0, 1))
243/// * `data` - the sample data
244/// * `quantile` - the quantile to compute the confidence interval for (must be in (0, 1))
245///
246/// # Errors
247///
248/// * `TooFewSamples` - if the number of samples is too small to compute a confidence interval
249/// * `InvalidConfidenceLevel` - if the confidence level is not in (0, 1)
250/// * `InvalidQuantile` - if the quantile is not in (0, 1)
251///
252/// # Panics
253///
254/// * if the data contains elements that are not comparable (with their partial ordering).
255///
256/// # Examples
257///
258/// ```
259/// # use stats_ci::*;
260/// let data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
261/// let confidence = Confidence::new_two_sided(0.95);
262/// let quantile = 0.5; // median
263/// let interval = quantile::ci(confidence, &data, quantile)?;
264/// assert_eq!(interval, Interval::new(5, 12)?);
265///
266/// let data2 = [2, 14, 13, 6, 8, 4, 15, 9, 3, 11, 10, 7, 1, 12, 5];
267/// let interval2 = quantile::ci(confidence, &data2, quantile)?;
268/// assert_eq!(interval, interval2);
269///
270/// let confidence = Confidence::new_two_sided(0.8);
271/// let interval = quantile::ci(confidence, &data, quantile)?;
272/// assert_eq!(interval, Interval::new(6, 11)?);
273///
274/// let confidence = Confidence::new_two_sided(0.5);
275/// let quantile = 0.4; // 40th percentile
276/// let interval = quantile::ci(confidence, &data, quantile)?;
277/// assert_eq!(interval, Interval::new(5, 8)?);
278/// # Ok::<(),error::CIError>(())
279/// ```
280pub fn ci<T, I>(confidence: Confidence, data: &I, quantile: f64) -> CIResult<Interval<T>>
281where
282 T: PartialOrd + Copy,
283 for<'a> &'a I: IntoIterator<Item = &'a T>,
284{
285 let mut sorted: Vec<T> = data.into_iter().copied().collect();
286 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
287 ci_sorted_unchecked(confidence, &sorted, quantile)
288}
289
290///
291/// Compute the indices of the confidence interval for a given quantile.
292/// The function returns the indices of the lower and upper bounds of the interval.
293///
294/// Complexity: \\( O(1) \\)
295///
296/// # Arguments
297///
298/// * `confidence` - the confidence level (must be in (0, 1))
299/// * `data_len` - the number of samples
300/// * `quantile` - the quantile to compute the confidence interval for (must be in (0, 1))
301///
302/// # Output
303///
304/// * `Interval` - the confidence interval for the quantile
305/// * `None` - if the number of samples is too small to compute a confidence interval, or if the interval falls outside the range of the data.
306///
307/// # Errors
308///
309/// * `TooFewSamples` - if the number of samples is too small to compute a confidence interval
310/// * `InvalidConfidenceLevel` - if the confidence level is not in (0, 1)
311/// * `InvalidQuantile` - if the quantile is not in (0, 1)
312///
313/// # Examples
314///
315/// ```
316/// # use stats_ci::*;
317/// let data = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O"];
318/// let confidence = Confidence::new_two_sided(0.95);
319/// let quantile = 0.5; // median
320/// let interval = quantile::ci_indices(confidence, data.len(), quantile)?;
321/// assert_eq!(interval, Interval::new(4, 11)?);
322///
323/// let confidence = Confidence::new_two_sided(0.8);
324/// let interval = quantile::ci_indices(confidence, data.len(), quantile)?;
325/// assert_eq!(interval, Interval::new(5, 10)?);
326///
327/// let confidence = Confidence::new_two_sided(0.5);
328/// let quantile = 0.4; // 40th percentile
329/// let interval = quantile::ci_indices(confidence, data.len(), quantile)?;
330/// assert_eq!(interval, Interval::new(4, 7)?);
331/// # Ok::<(),error::CIError>(())
332/// ```
333pub fn ci_indices(
334 confidence: Confidence,
335 data_len: usize,
336 quantile: f64,
337) -> CIResult<Interval<usize>> {
338 let stats = Stats::new(data_len);
339 stats.ci(confidence, quantile)
340}
341
342#[cfg(test)]
343mod tests {
344 use super::*;
345 use rand::thread_rng;
346
347 #[test]
348 fn test_median_ci() -> CIResult<()> {
349 let data = [
350 8., 11., 12., 13., 15., 17., 19., 20., 21., 21., 22., 23., 25., 26., 28.,
351 ];
352 let confidence = Confidence::new_two_sided(0.95);
353 let median_ci = ci_sorted_unchecked(confidence, &data, 0.5)?;
354 assert_eq!(median_ci, Interval::new(15., 23.)?);
355
356 let confidence = Confidence::new_lower(0.975);
357 let median_ci = ci_sorted_unchecked(confidence, &data, 0.5)?;
358 assert_eq!(median_ci, Interval::new_lower(23.));
359
360 let confidence = Confidence::new_upper(0.975);
361 let median_ci = ci_sorted_unchecked(confidence, &data, 0.5)?;
362 assert_eq!(median_ci, Interval::new_upper(15.));
363
364 Ok(())
365 }
366
367 #[test]
368 fn test_quantile_ci() -> CIResult<()> {
369 let data = [
370 8., 11., 12., 13., 15., 17., 19., 20., 21., 21., 22., 23., 25., 26., 28.,
371 ];
372 let confidence = Confidence::new_two_sided(0.95);
373 let quantile_ci = ci_sorted_unchecked(confidence, &data, 0.4).unwrap();
374 assert_eq!(quantile_ci, Interval::new(12., 21.)?);
375
376 let confidence = Confidence::new_two_sided(0.999);
377 let quantile_ci = ci_sorted_unchecked(confidence, &data, 0.867).unwrap();
378 assert_eq!(quantile_ci, Interval::new(19., 28.)?);
379
380 let confidence = Confidence::new_two_sided(0.999);
381 let quantile_ci = ci_sorted_unchecked(confidence, &data, 0.133).unwrap();
382 assert_eq!(quantile_ci, Interval::new(8., 21.)?);
383
384 let data = [
385 "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O",
386 ];
387 let confidence = Confidence::new_two_sided(0.95);
388 let quantile = 0.5; // median
389 let interval = quantile::ci_indices(confidence, data.len(), quantile).unwrap();
390 assert_eq!(interval, Interval::new(4, 11)?);
391
392 let confidence = Confidence::new_two_sided(0.8);
393 let interval = quantile::ci_indices(confidence, data.len(), quantile).unwrap();
394 assert_eq!(interval, Interval::new(5, 10)?);
395
396 let confidence = Confidence::new_two_sided(0.5);
397 let quantile = 0.4; // 40th percentile
398 let interval = quantile::ci_indices(confidence, data.len(), quantile)?;
399 assert_eq!(interval, Interval::new(4, 7)?);
400
401 let data = [
402 "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O",
403 ];
404 let confidence = Confidence::new_two_sided(0.95);
405 let quantile = 0.5; // median
406 let interval = quantile::ci_sorted_unchecked(confidence, &data, quantile)?;
407 assert_eq!(interval, Interval::new("E", "L")?);
408
409 let data = [
410 'J', 'E', 'M', 'G', 'K', 'H', 'N', 'A', 'C', 'L', 'F', 'O', 'D', 'B', 'I',
411 ];
412 let confidence = Confidence::new_two_sided(0.95);
413 let quantile = 0.5; // median
414 let interval = quantile::ci(confidence, &data, quantile)?;
415 assert_eq!(interval, Interval::new('E', 'L')?);
416
417 let data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
418 let confidence = Confidence::new_two_sided(0.95);
419 let quantile = 0.5; // median
420 let interval = quantile::ci(confidence, &data, quantile)?;
421 assert_eq!(interval, Interval::new(5, 12)?);
422
423 let confidence = Confidence::new_two_sided(0.8);
424 let interval = quantile::ci(confidence, &data, quantile)?;
425 assert_eq!(interval, Interval::new(6, 11)?);
426
427 let confidence = Confidence::new_two_sided(0.5);
428 let quantile = 0.4; // 40th percentile
429 let interval = quantile::ci(confidence, &data, quantile)?;
430 assert_eq!(interval, Interval::new(5, 8)?);
431
432 Ok(())
433 }
434
435 #[test]
436 fn test_one_sided() {
437 let data = [
438 8., 11., 12., 13., 15., 17., 19., 20., 21., 21., 22., 23., 25., 26., 28.,
439 ];
440 let confidence = Confidence::new_upper(0.975);
441 let quantile_ci = ci_sorted_unchecked(confidence, &data, 0.4).unwrap();
442 assert_eq!(quantile_ci, Interval::new_upper(12.));
443
444 let confidence = Confidence::new_lower(0.975);
445 let quantile_ci = ci_sorted_unchecked(confidence, &data, 0.4).unwrap();
446 assert_eq!(quantile_ci, Interval::new_lower(21.));
447
448 let data = [
449 "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O",
450 ];
451 let confidence = Confidence::new_upper(0.975);
452 let quantile = 0.5; // median
453 let interval = quantile::ci_indices(confidence, data.len(), quantile).unwrap();
454 assert_eq!(interval, Interval::new_upper(4));
455
456 let confidence = Confidence::new_lower(0.975);
457 let interval = quantile::ci_indices(confidence, data.len(), quantile).unwrap();
458 assert_eq!(interval, Interval::new_lower(11));
459 }
460
461 #[test]
462 fn test_ci_indices() -> CIResult<()> {
463 let data = [
464 "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O",
465 ];
466 let confidence = Confidence::new_two_sided(0.95);
467 let quantile = 0.5; // median
468 let interval = quantile::ci_indices(confidence, data.len(), quantile)?;
469 assert_eq!(interval, Interval::new(4, 11)?);
470
471 let confidence = Confidence::new_two_sided(0.8);
472 let interval = quantile::ci_indices(confidence, data.len(), quantile)?;
473 assert_eq!(interval, Interval::new(5, 10)?);
474
475 let confidence = Confidence::new_two_sided(0.5);
476 let quantile = 0.4; // 40th percentile
477 let interval = quantile::ci_indices(confidence, data.len(), quantile)?;
478 assert_eq!(interval, Interval::new(4, 7)?);
479
480 Ok(())
481 }
482
483 #[derive(Debug, Clone, Copy, PartialEq)]
484 enum Numbers {
485 One,
486 Two,
487 Three,
488 Four,
489 Five,
490 Six,
491 Seven,
492 Eight,
493 Nine,
494 Ten,
495 Eleven,
496 Twelve,
497 Thirteen,
498 Fourteen,
499 Fifteen,
500 }
501
502 #[test]
503 fn test_median_unordered() -> CIResult<()> {
504 use Numbers::*;
505 let data = [
506 One, Two, Three, Four, Five, Six, Seven, Eight, Nine, Ten, Eleven, Twelve, Thirteen,
507 Fourteen, Fifteen,
508 ];
509 let confidence = Confidence::new_two_sided(0.95);
510 let median_ci = ci_indices(confidence, data.len(), 0.5)?;
511 assert_eq!(median_ci, Interval::new(4, 11)?);
512 assert_eq!(median_ci.left(), Some(&4));
513 assert_eq!(median_ci.right(), Some(&11));
514
515 Ok(())
516 }
517
518 #[test]
519 fn test_median_ci_unsorted() -> CIResult<()> {
520 use rand::seq::SliceRandom;
521 let data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
522 let confidence = Confidence::new_two_sided(0.95);
523 let quantile = 0.5; // median
524 for _i in 0..100 {
525 let mut shuffled = data.to_vec();
526 shuffled.shuffle(&mut thread_rng());
527 let interval = ci(confidence, &shuffled, quantile)?;
528 assert_eq!(interval, Interval::new(5, 12)?);
529 }
530 Ok(())
531 }
532
533 #[test]
534 fn test_proportion_add() {
535 let stats1 = quantile::Stats::new(100);
536 let stats2 = quantile::Stats::new(250);
537 let stats = stats1 + stats2;
538 assert_eq!(stats, quantile::Stats::new(350));
539
540 let mut stats = quantile::Stats::new(100);
541 stats += quantile::Stats::new(250);
542 assert_eq!(stats, quantile::Stats::new(350));
543 }
544}