stats_ci/
quantile.rs

1//!
2//! Confidence intervals for quantiles
3//!
4//! # Examples
5//!
6//! ```
7//! # use stats_ci::error;
8//! use stats_ci::{quantile,Confidence,Interval};
9//! let data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
10//! let confidence = Confidence::new_two_sided(0.95);
11//! let quantile = 0.5; // median
12//! let interval = quantile::ci(confidence, &data, quantile)?;
13//! assert_eq!(interval, Interval::new(5, 12)?);
14//!
15//! let confidence = Confidence::new_two_sided(0.8);
16//! let interval = quantile::ci(confidence, &data, quantile)?;
17//! assert_eq!(interval, Interval::new(6, 11)?);
18//!
19//! let confidence = Confidence::new_two_sided(0.5);
20//! let quantile = 0.4; // 40th percentile
21//! let interval = quantile::ci(confidence, &data, quantile)?;
22//! assert_eq!(interval, Interval::new(5, 8)?);
23//! # Ok::<(),error::CIError>(())
24//! ```
25use super::*;
26
27///
28/// Running statistics for quantiles
29///
30#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
31pub struct Stats {
32    population: usize,
33}
34
35impl Stats {
36    ///
37    /// Create a new instance with an initial population
38    ///
39    pub fn new(population: usize) -> Self {
40        Self { population }
41    }
42
43    ///
44    /// Return the confidence interval on the indices for a given quantile.
45    ///
46    /// # Arguments
47    ///
48    /// * `confidence` - the confidence level
49    /// * `quantile` - the quantile (must be in the range [0, 1])
50    ///
51    /// # Returns
52    ///
53    /// A confidence interval containing indices on the corresponding data.
54    ///
55    /// # Errors
56    ///
57    /// * `TooFewSamples` - if the number of samples is too small to compute a confidence interval
58    /// * `InvalidQuantile` - if the quantile is not in the range [0, 1]
59    /// * `IndexError` - if the confidence interval falls outside the range of the data
60    ///
61    /// # Examples
62    ///
63    /// ```
64    /// # use stats_ci::*;
65    /// let data = [1, 2, 3, 4, 5, 6, 7, 8, 9];
66    /// let confidence = Confidence::new_two_sided(0.8);
67    /// let quantile = 0.5; // median
68    /// let stats = quantile::Stats::new(data.len());
69    /// let interval = stats.ci(confidence, quantile)?;
70    /// assert_eq!(interval, Interval::new(3, 6)?);
71    /// # Ok::<(),error::CIError>(())
72    /// ```
73    pub fn ci(&self, confidence: Confidence, quantile: f64) -> CIResult<Interval<usize>> {
74        if quantile <= 0. || 1. <= quantile {
75            return Err(error::CIError::InvalidQuantile(quantile));
76        }
77
78        if self.population < 4 {
79            // too few samples to compute
80            return Err(error::CIError::TooFewSamples(self.population));
81        }
82
83        let successes = (quantile * self.population as f64).round() as usize;
84        let proportion_ci = proportion::ci_wilson(confidence, self.population, successes)?;
85
86        let (low, high): (f64, f64) = proportion_ci.into();
87
88        if low < 0. {
89            // interval falls outside the range of the data
90            return Err(error::CIError::IndexError(low, self.population));
91        }
92
93        if high > 1. {
94            // interval falls outside the range of the data
95            return Err(error::CIError::IndexError(high, self.population));
96        }
97
98        let lo_index = self.index(low)?;
99        let hi_index = self.index(high)?;
100
101        match confidence {
102            Confidence::TwoSided(_) => Interval::new(lo_index, hi_index).map_err(|e| e.into()),
103            Confidence::UpperOneSided(_) => Ok(Interval::new_upper(lo_index)),
104            Confidence::LowerOneSided(_) => Ok(Interval::new_lower(hi_index)),
105        }
106    }
107
108    ///
109    /// Return the index for a given quantile.
110    ///
111    /// # Arguments
112    ///
113    /// * `quantile` - the quantile (must be in the range [0, 1])
114    ///
115    /// # Returns
116    ///
117    /// The index corresponding to the quantile.
118    ///
119    /// # Errors
120    ///
121    /// * `TooFewSamples` - if the number of samples is too small to compute a confidence interval
122    /// * `InvalidQuantile` - if the quantile is not in (0, 1)
123    ///
124    /// # Examples
125    ///
126    /// ```
127    /// # use stats_ci::*;
128    /// let data = ['a', 'b', 'c', 'd', 'e'];
129    /// let stats = quantile::Stats::new(data.len());
130    /// assert_eq!(stats.index(0.).unwrap(), 0);
131    /// assert_eq!(stats.index(0.5).unwrap(), 2);
132    /// assert_eq!(stats.index(1.).unwrap(), 4);
133    /// assert_eq!(data[stats.index(0.25).unwrap()], 'b');
134    /// assert_eq!(data[stats.index(0.75).unwrap()], 'd');
135    /// ```
136    pub fn index(&self, quantile: f64) -> CIResult<usize> {
137        if self.population == 0 {
138            return Err(error::CIError::TooFewSamples(self.population));
139        }
140        #[allow(clippy::manual_range_contains)]
141        if quantile < 0. || 1. < quantile {
142            return Err(error::CIError::InvalidQuantile(quantile));
143        }
144        let index = (quantile * self.population as f64).floor() as usize;
145        let index = index.min(self.population - 1);
146        Ok(index)
147    }
148}
149
150impl std::ops::Add for Stats {
151    type Output = Self;
152
153    #[inline]
154    fn add(self, rhs: Self) -> Self::Output {
155        Self {
156            population: self.population + rhs.population,
157        }
158    }
159}
160
161impl std::ops::AddAssign for Stats {
162    #[inline]
163    fn add_assign(&mut self, rhs: Self) {
164        self.population += rhs.population;
165    }
166}
167
168///
169/// Compute the confidence interval for a given quantile, assuming that the data is __already sorted__.
170/// This is the function to call if the data is known to be sorted,
171/// or if the order of elements is meant to be their position in the slice (e.g., order of arrival).
172///
173/// Complexity: \\( O(1) \\)
174///
175/// # Arguments
176///
177/// * `confidence` - the confidence level (must be in (0, 1))
178/// * `sorted` - the sorted sample
179/// * `quantile` - the quantile to compute the confidence interval for (must be in (0, 1))
180///
181/// # Output
182///
183/// * `Interval` - the confidence interval for the quantile
184/// * `None` - if the number of samples is too small to compute a confidence interval, or if the interval falls outside the range of the data.
185///
186/// # Errors
187///
188/// * `TooFewSamples` - if the number of samples is too small to compute a confidence interval
189/// * `InvalidConfidenceLevel` - if the confidence level is not in (0, 1)
190/// * `InvalidQuantile` - if the quantile is not in (0, 1)
191///
192/// # Examples
193///
194/// ```
195/// # use stats_ci::*;
196/// let data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
197/// let confidence = Confidence::new_two_sided(0.95);
198/// let quantile = 0.5; // median
199/// let interval = quantile::ci_sorted_unchecked(confidence, &data, quantile)?;
200/// assert_eq!(interval, Interval::new(5, 12)?);
201///
202/// let confidence = Confidence::new_two_sided(0.8);
203/// let interval = quantile::ci_sorted_unchecked(confidence, &data, quantile)?;
204/// assert_eq!(interval, Interval::new(6, 11)?);
205///
206/// let confidence = Confidence::new_two_sided(0.5);
207/// let quantile = 0.4; // 40th percentile
208/// let interval = quantile::ci_sorted_unchecked(confidence, &data, quantile)?;
209/// assert_eq!(interval, Interval::new(5, 8)?);
210/// # Ok::<(),error::CIError>(())
211/// ```
212pub fn ci_sorted_unchecked<T>(
213    confidence: Confidence,
214    sorted: &[T],
215    quantile: f64,
216) -> CIResult<Interval<T>>
217where
218    T: PartialOrd + Clone,
219{
220    assert!(quantile > 0. && quantile < 1.);
221
222    ci_indices(confidence, sorted.len(), quantile).and_then(|indices| match indices.into() {
223        (Some(lo), Some(hi)) => {
224            Interval::new(sorted[lo].clone(), sorted[hi].clone()).map_err(|e| e.into())
225        }
226        (Some(lo), None) => Ok(Interval::new_upper(sorted[lo].clone())),
227        (None, Some(hi)) => Ok(Interval::new_lower(sorted[hi].clone())),
228        _ => Err(error::CIError::IntervalError(
229            interval::IntervalError::EmptyInterval,
230        )),
231    })
232}
233
234///
235/// Compute the confidence interval for a given quantile.
236/// Use [`ci_sorted_unchecked`] instead if the data is already sorted.
237///
238/// Complexity: \\( O(n \log n) \\) where \\( n \\) is the number of samples.
239///
240/// # Arguments
241///
242/// * `confidence` - the confidence level (must be in (0, 1))
243/// * `data` - the sample data
244/// * `quantile` - the quantile to compute the confidence interval for (must be in (0, 1))
245///
246/// # Errors
247///
248/// * `TooFewSamples` - if the number of samples is too small to compute a confidence interval
249/// * `InvalidConfidenceLevel` - if the confidence level is not in (0, 1)
250/// * `InvalidQuantile` - if the quantile is not in (0, 1)
251///
252/// # Panics
253///
254/// * if the data contains elements that are not comparable (with their partial ordering).
255///
256/// # Examples
257///
258/// ```
259/// # use stats_ci::*;
260/// let data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
261/// let confidence = Confidence::new_two_sided(0.95);
262/// let quantile = 0.5; // median
263/// let interval = quantile::ci(confidence, &data, quantile)?;
264/// assert_eq!(interval, Interval::new(5, 12)?);
265///
266/// let data2 = [2, 14, 13, 6, 8, 4, 15, 9, 3, 11, 10, 7, 1, 12, 5];
267/// let interval2 = quantile::ci(confidence, &data2, quantile)?;
268/// assert_eq!(interval, interval2);
269///
270/// let confidence = Confidence::new_two_sided(0.8);
271/// let interval = quantile::ci(confidence, &data, quantile)?;
272/// assert_eq!(interval, Interval::new(6, 11)?);
273///
274/// let confidence = Confidence::new_two_sided(0.5);
275/// let quantile = 0.4; // 40th percentile
276/// let interval = quantile::ci(confidence, &data, quantile)?;
277/// assert_eq!(interval, Interval::new(5, 8)?);
278/// # Ok::<(),error::CIError>(())
279/// ```
280pub fn ci<T, I>(confidence: Confidence, data: &I, quantile: f64) -> CIResult<Interval<T>>
281where
282    T: PartialOrd + Copy,
283    for<'a> &'a I: IntoIterator<Item = &'a T>,
284{
285    let mut sorted: Vec<T> = data.into_iter().copied().collect();
286    sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
287    ci_sorted_unchecked(confidence, &sorted, quantile)
288}
289
290///
291/// Compute the indices of the confidence interval for a given quantile.
292/// The function returns the indices of the lower and upper bounds of the interval.
293///
294/// Complexity: \\( O(1) \\)
295///
296/// # Arguments
297///
298/// * `confidence` - the confidence level (must be in (0, 1))
299/// * `data_len` - the number of samples
300/// * `quantile` - the quantile to compute the confidence interval for (must be in (0, 1))
301///
302/// # Output
303///
304/// * `Interval` - the confidence interval for the quantile
305/// * `None` - if the number of samples is too small to compute a confidence interval, or if the interval falls outside the range of the data.
306///
307/// # Errors
308///
309/// * `TooFewSamples` - if the number of samples is too small to compute a confidence interval
310/// * `InvalidConfidenceLevel` - if the confidence level is not in (0, 1)
311/// * `InvalidQuantile` - if the quantile is not in (0, 1)
312///
313/// # Examples
314///
315/// ```
316/// # use stats_ci::*;
317/// let data = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O"];
318/// let confidence = Confidence::new_two_sided(0.95);
319/// let quantile = 0.5; // median
320/// let interval = quantile::ci_indices(confidence, data.len(), quantile)?;
321/// assert_eq!(interval, Interval::new(4, 11)?);
322///
323/// let confidence = Confidence::new_two_sided(0.8);
324/// let interval = quantile::ci_indices(confidence, data.len(), quantile)?;
325/// assert_eq!(interval, Interval::new(5, 10)?);
326///
327/// let confidence = Confidence::new_two_sided(0.5);
328/// let quantile = 0.4; // 40th percentile
329/// let interval = quantile::ci_indices(confidence, data.len(), quantile)?;
330/// assert_eq!(interval, Interval::new(4, 7)?);
331/// # Ok::<(),error::CIError>(())
332/// ```
333pub fn ci_indices(
334    confidence: Confidence,
335    data_len: usize,
336    quantile: f64,
337) -> CIResult<Interval<usize>> {
338    let stats = Stats::new(data_len);
339    stats.ci(confidence, quantile)
340}
341
342#[cfg(test)]
343mod tests {
344    use super::*;
345    use rand::thread_rng;
346
347    #[test]
348    fn test_median_ci() -> CIResult<()> {
349        let data = [
350            8., 11., 12., 13., 15., 17., 19., 20., 21., 21., 22., 23., 25., 26., 28.,
351        ];
352        let confidence = Confidence::new_two_sided(0.95);
353        let median_ci = ci_sorted_unchecked(confidence, &data, 0.5)?;
354        assert_eq!(median_ci, Interval::new(15., 23.)?);
355
356        let confidence = Confidence::new_lower(0.975);
357        let median_ci = ci_sorted_unchecked(confidence, &data, 0.5)?;
358        assert_eq!(median_ci, Interval::new_lower(23.));
359
360        let confidence = Confidence::new_upper(0.975);
361        let median_ci = ci_sorted_unchecked(confidence, &data, 0.5)?;
362        assert_eq!(median_ci, Interval::new_upper(15.));
363
364        Ok(())
365    }
366
367    #[test]
368    fn test_quantile_ci() -> CIResult<()> {
369        let data = [
370            8., 11., 12., 13., 15., 17., 19., 20., 21., 21., 22., 23., 25., 26., 28.,
371        ];
372        let confidence = Confidence::new_two_sided(0.95);
373        let quantile_ci = ci_sorted_unchecked(confidence, &data, 0.4).unwrap();
374        assert_eq!(quantile_ci, Interval::new(12., 21.)?);
375
376        let confidence = Confidence::new_two_sided(0.999);
377        let quantile_ci = ci_sorted_unchecked(confidence, &data, 0.867).unwrap();
378        assert_eq!(quantile_ci, Interval::new(19., 28.)?);
379
380        let confidence = Confidence::new_two_sided(0.999);
381        let quantile_ci = ci_sorted_unchecked(confidence, &data, 0.133).unwrap();
382        assert_eq!(quantile_ci, Interval::new(8., 21.)?);
383
384        let data = [
385            "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O",
386        ];
387        let confidence = Confidence::new_two_sided(0.95);
388        let quantile = 0.5; // median
389        let interval = quantile::ci_indices(confidence, data.len(), quantile).unwrap();
390        assert_eq!(interval, Interval::new(4, 11)?);
391
392        let confidence = Confidence::new_two_sided(0.8);
393        let interval = quantile::ci_indices(confidence, data.len(), quantile).unwrap();
394        assert_eq!(interval, Interval::new(5, 10)?);
395
396        let confidence = Confidence::new_two_sided(0.5);
397        let quantile = 0.4; // 40th percentile
398        let interval = quantile::ci_indices(confidence, data.len(), quantile)?;
399        assert_eq!(interval, Interval::new(4, 7)?);
400
401        let data = [
402            "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O",
403        ];
404        let confidence = Confidence::new_two_sided(0.95);
405        let quantile = 0.5; // median
406        let interval = quantile::ci_sorted_unchecked(confidence, &data, quantile)?;
407        assert_eq!(interval, Interval::new("E", "L")?);
408
409        let data = [
410            'J', 'E', 'M', 'G', 'K', 'H', 'N', 'A', 'C', 'L', 'F', 'O', 'D', 'B', 'I',
411        ];
412        let confidence = Confidence::new_two_sided(0.95);
413        let quantile = 0.5; // median
414        let interval = quantile::ci(confidence, &data, quantile)?;
415        assert_eq!(interval, Interval::new('E', 'L')?);
416
417        let data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
418        let confidence = Confidence::new_two_sided(0.95);
419        let quantile = 0.5; // median
420        let interval = quantile::ci(confidence, &data, quantile)?;
421        assert_eq!(interval, Interval::new(5, 12)?);
422
423        let confidence = Confidence::new_two_sided(0.8);
424        let interval = quantile::ci(confidence, &data, quantile)?;
425        assert_eq!(interval, Interval::new(6, 11)?);
426
427        let confidence = Confidence::new_two_sided(0.5);
428        let quantile = 0.4; // 40th percentile
429        let interval = quantile::ci(confidence, &data, quantile)?;
430        assert_eq!(interval, Interval::new(5, 8)?);
431
432        Ok(())
433    }
434
435    #[test]
436    fn test_one_sided() {
437        let data = [
438            8., 11., 12., 13., 15., 17., 19., 20., 21., 21., 22., 23., 25., 26., 28.,
439        ];
440        let confidence = Confidence::new_upper(0.975);
441        let quantile_ci = ci_sorted_unchecked(confidence, &data, 0.4).unwrap();
442        assert_eq!(quantile_ci, Interval::new_upper(12.));
443
444        let confidence = Confidence::new_lower(0.975);
445        let quantile_ci = ci_sorted_unchecked(confidence, &data, 0.4).unwrap();
446        assert_eq!(quantile_ci, Interval::new_lower(21.));
447
448        let data = [
449            "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O",
450        ];
451        let confidence = Confidence::new_upper(0.975);
452        let quantile = 0.5; // median
453        let interval = quantile::ci_indices(confidence, data.len(), quantile).unwrap();
454        assert_eq!(interval, Interval::new_upper(4));
455
456        let confidence = Confidence::new_lower(0.975);
457        let interval = quantile::ci_indices(confidence, data.len(), quantile).unwrap();
458        assert_eq!(interval, Interval::new_lower(11));
459    }
460
461    #[test]
462    fn test_ci_indices() -> CIResult<()> {
463        let data = [
464            "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O",
465        ];
466        let confidence = Confidence::new_two_sided(0.95);
467        let quantile = 0.5; // median
468        let interval = quantile::ci_indices(confidence, data.len(), quantile)?;
469        assert_eq!(interval, Interval::new(4, 11)?);
470
471        let confidence = Confidence::new_two_sided(0.8);
472        let interval = quantile::ci_indices(confidence, data.len(), quantile)?;
473        assert_eq!(interval, Interval::new(5, 10)?);
474
475        let confidence = Confidence::new_two_sided(0.5);
476        let quantile = 0.4; // 40th percentile
477        let interval = quantile::ci_indices(confidence, data.len(), quantile)?;
478        assert_eq!(interval, Interval::new(4, 7)?);
479
480        Ok(())
481    }
482
483    #[derive(Debug, Clone, Copy, PartialEq)]
484    enum Numbers {
485        One,
486        Two,
487        Three,
488        Four,
489        Five,
490        Six,
491        Seven,
492        Eight,
493        Nine,
494        Ten,
495        Eleven,
496        Twelve,
497        Thirteen,
498        Fourteen,
499        Fifteen,
500    }
501
502    #[test]
503    fn test_median_unordered() -> CIResult<()> {
504        use Numbers::*;
505        let data = [
506            One, Two, Three, Four, Five, Six, Seven, Eight, Nine, Ten, Eleven, Twelve, Thirteen,
507            Fourteen, Fifteen,
508        ];
509        let confidence = Confidence::new_two_sided(0.95);
510        let median_ci = ci_indices(confidence, data.len(), 0.5)?;
511        assert_eq!(median_ci, Interval::new(4, 11)?);
512        assert_eq!(median_ci.left(), Some(&4));
513        assert_eq!(median_ci.right(), Some(&11));
514
515        Ok(())
516    }
517
518    #[test]
519    fn test_median_ci_unsorted() -> CIResult<()> {
520        use rand::seq::SliceRandom;
521        let data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
522        let confidence = Confidence::new_two_sided(0.95);
523        let quantile = 0.5; // median
524        for _i in 0..100 {
525            let mut shuffled = data.to_vec();
526            shuffled.shuffle(&mut thread_rng());
527            let interval = ci(confidence, &shuffled, quantile)?;
528            assert_eq!(interval, Interval::new(5, 12)?);
529        }
530        Ok(())
531    }
532
533    #[test]
534    fn test_proportion_add() {
535        let stats1 = quantile::Stats::new(100);
536        let stats2 = quantile::Stats::new(250);
537        let stats = stats1 + stats2;
538        assert_eq!(stats, quantile::Stats::new(350));
539
540        let mut stats = quantile::Stats::new(100);
541        stats += quantile::Stats::new(250);
542        assert_eq!(stats, quantile::Stats::new(350));
543    }
544}