std_dev/
percentile.rs

1//! Percentile / median calculations.
2//!
3//! - `O(n log n)` [`naive_percentile`] (simple to understand)
4//! - probabilistic `O(n)` [`percentile`] (recommended, fastest, and also quite simple to understand)
5//! - deterministic `O(n)` [`median_of_medians`] (harder to understand, probably slower than the
6//!     probabilistic version. However guarantees linear time, so useful in critical applications.)
7//!
8//! You should probably use [`percentile_rand`].
9//!
10//! The linear time algoritms are implementations following [this blogpost](https://rcoh.me/posts/linear-time-median-finding/).
11#[cfg(feature = "percentile-rand")]
12use rand::Rng;
13use std::borrow::Cow;
14use std::cmp;
15
16/// The result of a percentile (e.g. median) lookup.
17#[derive(Debug, Clone, Copy, PartialEq, Eq)]
18pub enum MeanValue<T> {
19    /// A single value was found.
20    Single(T),
21    /// The percentile lies between two values. Take the mean of these to get the percentile.
22    Mean(T, T),
23}
24impl MeanValue<crate::F64OrdHash> {
25    #[inline]
26    pub fn resolve(self) -> f64 {
27        self.map(|v| v.0).resolve()
28    }
29}
30impl<T: PercentileResolve> MeanValue<T> {
31    #[inline]
32    pub fn resolve(self) -> T {
33        PercentileResolve::compute(self)
34    }
35}
36impl<T> MeanValue<T> {
37    #[inline]
38    pub fn into_single(self) -> Option<T> {
39        if let Self::Single(t) = self {
40            Some(t)
41        } else {
42            None
43        }
44    }
45    #[inline]
46    pub fn map<O>(self, mut f: impl FnMut(T) -> O) -> MeanValue<O> {
47        match self {
48            Self::Single(v) => MeanValue::Single(f(v)),
49            Self::Mean(a, b) => MeanValue::Mean(f(a), f(b)),
50        }
51    }
52}
53impl<T: Clone> MeanValue<&T> {
54    #[inline]
55    pub fn clone_inner(&self) -> MeanValue<T> {
56        match self {
57            Self::Single(t) => MeanValue::Single((*t).clone()),
58            Self::Mean(a, b) => MeanValue::Mean((*a).clone(), (*b).clone()),
59        }
60    }
61}
62/// Resolves the mean function to return a concrete value.
63/// Accessible through [`MeanValue::resolve`].
64pub trait PercentileResolve
65where
66    Self: Sized,
67{
68    fn mean(a: Self, b: Self) -> Self;
69    #[inline]
70    fn compute(percentile: MeanValue<Self>) -> Self {
71        match percentile {
72            MeanValue::Single(me) => me,
73            MeanValue::Mean(a, b) => PercentileResolve::mean(a, b),
74        }
75    }
76}
77
78#[cfg(feature = "generic-impls")]
79impl<T: num_traits::identities::One + std::ops::Add<Output = T> + std::ops::Div<Output = T>>
80    PercentileResolve for T
81{
82    #[inline]
83    fn mean(a: Self, b: Self) -> Self {
84        (a + b) / (T::one() + T::one())
85    }
86}
87#[cfg(not(feature = "generic-impls"))]
88macro_rules! impl_resolve {
89    ($($t:ty:$two:expr, )+) => {
90        $(
91        impl PercentileResolve for $t {
92            #[inline]
93            fn mean(a: Self, b: Self) -> Self {
94                (a + b) / $two
95            }
96        }
97        )+
98    };
99}
100#[cfg(not(feature = "generic-impls"))]
101macro_rules! impl_resolve_integer {
102    ($($t:ty, )+) => {
103        impl_resolve!($($t:2, )+);
104    };
105}
106#[cfg(not(feature = "generic-impls"))]
107macro_rules! impl_resolve_float {
108    ($($t:ty, )+) => {
109        impl_resolve!($($t:2.0, )+);
110    };
111}
112#[cfg(not(feature = "generic-impls"))]
113impl_resolve_integer!(u8, u16, u32, u64, u128, usize, i8, i16, i32, i64, i128, isize,);
114#[cfg(not(feature = "generic-impls"))]
115impl_resolve_float!(f32, f64,);
116
117/// Trait to get the index of a sorted list. Implemented by [`Fraction`], [`KthSmallest`], and
118/// [`KthLargest`].
119///
120/// The target list does not need to be a list, but indexable, and does not need to be sorted, as
121/// long as the k-th smallest element is accessible (which it is for all lists, see [`percentile_rand`]).
122pub trait OrderedListIndex {
123    /// Returns the index this object is targeting.
124    /// Could be either a single value or the mean of two.
125    fn index(&self, len: usize) -> MeanValue<usize>;
126}
127
128/// A simplified fraction.
129/// This is used to get a percentile from any function in this module.
130///
131/// We use two methods of getting the index in a list.
132/// The first one can give a mean of two values it the [`Self::denominator`]
133/// [`usize::is_power_of_two`]. This enables `3/8` to give a correct result.
134/// If the above isn't applicable, the fallback `idx = ceil(len * fraction) - 1`.
135///
136/// Please contribute if you need a more solid system.
137///
138/// # Eq & Ord
139///
140/// These are implemented to follow the expected ordering.
141#[derive(Debug, Clone, Copy)]
142pub struct Fraction {
143    pub numerator: u64,
144    pub denominator: u64,
145}
146impl Fraction {
147    pub const HALF: Self = Self {
148        numerator: 1,
149        denominator: 2,
150    };
151    pub const ONE_QUARTER: Self = Self {
152        numerator: 1,
153        denominator: 4,
154    };
155    pub const THREE_QUARTERS: Self = Self {
156        numerator: 3,
157        denominator: 4,
158    };
159    /// Create a new fraction. This function also simplifies the fraction.
160    ///
161    /// # Panics
162    ///
163    /// Panics if `numerator > denominator`.
164    #[inline]
165    pub fn new(numerator: u64, denominator: u64) -> Self {
166        {
167            Self {
168                numerator,
169                denominator,
170            }
171            .simplify()
172        }
173    }
174
175    /// Euklides' algorithm
176    #[inline]
177    fn simplify(self) -> Self {
178        let mut x = self.numerator;
179        let mut remainder = self.denominator;
180        let gcd = loop {
181            let tmp = x % remainder;
182            if tmp == 0 {
183                break remainder;
184            }
185            x = remainder;
186            remainder = tmp;
187        };
188
189        Self {
190            numerator: self.numerator / gcd,
191            denominator: self.denominator / gcd,
192        }
193    }
194}
195impl OrderedListIndex for Fraction {
196    fn index(&self, len: usize) -> MeanValue<usize> {
197        assert!(self.numerator <= self.denominator);
198        fn assert_not_zero(denominator: u64) {
199            assert_ne!(denominator, 0);
200        }
201        assert_not_zero(self.denominator);
202        fn power_of_two(me: Fraction, len: usize) -> MeanValue<usize> {
203            if me.denominator == 2 {
204                if len % 2 == 0 {
205                    MeanValue::Mean(len / 2 - 1, len / 2)
206                } else {
207                    MeanValue::Single(len / 2)
208                }
209            } else {
210                // if say `me == 4/8`, we'd get `0/4`. That's no good! But we don't have to worry
211                // about that, as we require (for now) fractions to be in their most simplified
212                // form.
213                let new = Fraction::new(me.numerator % (me.denominator / 2), me.denominator / 2);
214                let mut value = power_of_two(new, len / 2);
215                if me.numerator > me.denominator / 2 {
216                    // len % 2 because if the value is odd, we add one (the middle term is removed).
217                    value = value.map(|v| v + len / 2 + len % 2);
218                }
219
220                value
221            }
222        }
223        // `TODO`: implement https://en.wikipedia.org/wiki/Percentile#The_linear_interpolation_between_closest_ranks_method
224
225        // exception for when self.denominator.is_power_of_two(), as we want quartiles and median
226        // to be the mean of two values sometimes.
227        if self.denominator == 1 {
228            MeanValue::Single(self.numerator as _)
229        } else if self.denominator.is_power_of_two() {
230            power_of_two(*self, len)
231        } else {
232            // ceil(len * percentile) - 1
233            let m = len * self.numerator as usize;
234            let rem = m % self.denominator as usize;
235            let rem = usize::from(rem > 1);
236            MeanValue::Single((m / self.denominator as usize + rem - 1).min(len))
237        }
238    }
239}
240
241impl PartialEq for Fraction {
242    #[inline]
243    fn eq(&self, other: &Self) -> bool {
244        self.cmp(other).is_eq()
245    }
246}
247impl Eq for Fraction {}
248impl Ord for Fraction {
249    #[inline]
250    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
251        // we don't need to simplify, as [`Self::new`] always does it, there's no way to not get a
252        // simplified `Fraction`.
253
254        // If we multiply `me` with `other.denominator`, out denominators are the same.
255        // We don't need `me.denominators`, so we don't calculate it.
256        let my_numerator_with_same_denominator = self.numerator * other.denominator;
257        // Same reasoning as above.
258        let other_numerator_with_same_denominator = other.numerator * self.denominator;
259        my_numerator_with_same_denominator.cmp(&other_numerator_with_same_denominator)
260    }
261}
262impl PartialOrd for Fraction {
263    #[inline]
264    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
265        Some(self.cmp(other))
266    }
267}
268
269/// Get the k-th smallest value.
270/// Implements [`OrderedListIndex`].
271pub struct KthSmallest {
272    pub k: usize,
273}
274impl KthSmallest {
275    pub fn new(k: usize) -> Self {
276        Self { k }
277    }
278}
279impl OrderedListIndex for KthSmallest {
280    #[inline]
281    fn index(&self, len: usize) -> MeanValue<usize> {
282        assert!(self.k < len);
283        MeanValue::Single(self.k)
284    }
285}
286/// Get the k-th largest value.
287/// Implements [`OrderedListIndex`].
288pub struct KthLargest {
289    pub k: usize,
290}
291impl KthLargest {
292    pub fn new(k: usize) -> Self {
293        Self { k }
294    }
295}
296impl OrderedListIndex for KthLargest {
297    #[inline]
298    fn index(&self, len: usize) -> MeanValue<usize> {
299        assert!(self.k < len);
300        // `-1` because if len == 2 and k==0, we want 1, as that's the second index.
301        MeanValue::Single(len - 1 - self.k)
302    }
303}
304
305#[inline(always)]
306fn a_cmp_b<T: Ord>(a: &T, b: &T) -> cmp::Ordering {
307    a.cmp(b)
308}
309
310/// Percentile by sorting.
311///
312/// See [`naive_percentile_by`] for support for a custom comparator function.
313///
314/// # Performance & scalability
315///
316/// This will be very quick for small sets.
317/// O(n) performance when `values.len() < 5`, else O(n log n).
318#[inline]
319pub fn naive_percentile<T: Ord>(values: &mut [T], target: impl OrderedListIndex) -> MeanValue<&T> {
320    naive_percentile_by(values, target, &mut a_cmp_b)
321}
322/// Same as [`naive_percentile`] but with a custom comparator function.
323#[inline]
324pub fn naive_percentile_by<'a, T>(
325    values: &'a mut [T],
326    target: impl OrderedListIndex,
327    compare: &mut impl FnMut(&T, &T) -> cmp::Ordering,
328) -> MeanValue<&'a T> {
329    debug_assert!(!values.is_empty(), "we must have more than 0 values!");
330    values.sort_by(compare);
331    target.index(values.len()).map(|idx| &values[idx])
332}
333/// quickselect algorithm
334///
335/// Consider using [`percentile_rand`] or [`median`].
336/// See [`percentile_by`] for support for a custom comparator function.
337///
338/// `pivot_fn` must return a value from the supplied slice.
339#[inline]
340pub fn percentile<T: Clone + Ord>(
341    values: &mut [T],
342    target: impl OrderedListIndex,
343    pivot_fn: &mut impl FnMut(&mut [T]) -> Cow<'_, T>,
344) -> MeanValue<T> {
345    percentile_by(values, target, pivot_fn, &mut a_cmp_b)
346}
347/// Same as [`percentile`] but with a custom comparator function.
348#[inline]
349pub fn percentile_by<T: Clone>(
350    values: &mut [T],
351    target: impl OrderedListIndex,
352    mut pivot_fn: &mut impl FnMut(&mut [T]) -> Cow<'_, T>,
353    mut compare: &mut impl FnMut(&T, &T) -> cmp::Ordering,
354) -> MeanValue<T> {
355    target
356        .index(values.len())
357        .map(|v| quickselect(values, v, &mut pivot_fn, &mut compare).clone())
358}
359/// Convenience function for [`percentile`] with [`pivot_fn::rand`].
360#[cfg(feature = "percentile-rand")]
361#[inline]
362pub fn percentile_rand<T: Ord + Clone>(
363    values: &mut [T],
364    target: impl OrderedListIndex,
365) -> MeanValue<T> {
366    percentile(values, target, &mut pivot_fn::rand())
367}
368/// Get the value at `target` in `values`.
369/// Uses the best method available ([`percentile_rand`] if feature `percentile-rand` is enabled,
370/// else [`pivot_fn::middle`])
371#[inline]
372pub fn percentile_default_pivot<T: Ord + Clone>(
373    values: &mut [T],
374    target: impl OrderedListIndex,
375) -> MeanValue<T> {
376    percentile_default_pivot_by(values, target, &mut a_cmp_b)
377}
378/// Same as [`percentile_default_pivot`] but with a custom comparator function.
379#[inline]
380pub fn percentile_default_pivot_by<T: Clone>(
381    values: &mut [T],
382    target: impl OrderedListIndex,
383    compare: &mut impl FnMut(&T, &T) -> cmp::Ordering,
384) -> MeanValue<T> {
385    #[cfg(feature = "percentile-rand")]
386    {
387        percentile_by(values, target, &mut pivot_fn::rand(), compare)
388    }
389    #[cfg(not(feature = "percentile-rand"))]
390    {
391        percentile_by(values, target, &mut pivot_fn::middle(), compare)
392    }
393}
394
395/// Convenience function for [`percentile`] with the 50% mark as the target and [`pivot_fn::rand`]
396/// (if the `percentile-rand` feature is enabled, else [`pivot_fn::middle`]).
397///
398/// See [`percentile_default_pivot_by`] for supplying a custom comparator function.
399/// This is critical for types which does not implement [`Ord`] (e.g. f64).
400#[inline]
401pub fn median<T: Ord + Clone>(values: &mut [T]) -> MeanValue<T> {
402    percentile_default_pivot(values, Fraction::HALF)
403}
404/// Low level function used by this module.
405fn quickselect<T: Clone>(
406    values: &mut [T],
407    mut k: usize,
408    mut pivot_fn: impl FnMut(&mut [T]) -> Cow<'_, T>,
409    mut compare: impl FnMut(&T, &T) -> cmp::Ordering,
410) -> &T {
411    if k >= values.len() {
412        k = values.len() - 1;
413    }
414    if values.len() == 1 {
415        assert_eq!(k, 0);
416        return &values[0];
417    }
418    if values.len() <= 5 {
419        values.sort_unstable_by(compare);
420        return &values[k];
421    }
422
423    let pivot = pivot_fn(values);
424    let pivot = pivot.into_owned();
425
426    let (lows, highs_inclusive) =
427        split_include(values, |v| compare(v, &pivot) == cmp::Ordering::Less);
428    let (highs, pivots) = split_include(highs_inclusive, |v| {
429        compare(v, &pivot) == cmp::Ordering::Greater
430    });
431
432    if k < lows.len() {
433        quickselect(lows, k, pivot_fn, compare)
434    } else if k < lows.len() + pivots.len() {
435        &pivots[0]
436    } else {
437        quickselect(highs, k - lows.len() - pivots.len(), pivot_fn, compare)
438    }
439}
440/// Moves items in the slice and splits it so the first returned slice contains all elements where
441/// `predicate` is true. The second contains all other.
442#[inline]
443pub fn split_include<T>(
444    slice: &mut [T],
445    mut predicate: impl FnMut(&T) -> bool,
446) -> (&mut [T], &mut [T]) {
447    let mut add_index = 0;
448    let mut index = 0;
449    let len = slice.len();
450    while index < len {
451        let value = &mut slice[index];
452        if predicate(value) {
453            slice.swap(index, add_index);
454            add_index += 1;
455        }
456        index += 1;
457    }
458
459    slice.split_at_mut(add_index)
460}
461/// Same result as [`percentile_rand`] but in deterministic linear time.
462/// But probabilistically way slower.
463pub fn median_of_medians<T: Ord + Clone + PercentileResolve>(
464    values: &mut [T],
465    target: impl OrderedListIndex,
466) -> MeanValue<T> {
467    median_of_medians_by(values, target, &|a, b| a.cmp(b))
468}
469/// Same as [`median_of_medians`] but with a custom comparator function.
470pub fn median_of_medians_by<T: Clone + PercentileResolve>(
471    values: &mut [T],
472    target: impl OrderedListIndex,
473    mut compare: &impl Fn(&T, &T) -> cmp::Ordering,
474) -> MeanValue<T> {
475    percentile_by(
476        values,
477        target,
478        &mut pivot_fn::median_of_medians(compare),
479        &mut compare,
480    )
481}
482
483pub mod pivot_fn {
484    use super::*;
485
486    pub trait SliceSubset<T> {
487        fn len(&self) -> usize;
488        fn is_empty(&self) -> bool {
489            self.len() == 0
490        }
491        /// Returns [`None`] if `idx >= Self::len`.
492        fn get(&self, idx: usize) -> Option<&T>;
493    }
494    impl<T> SliceSubset<T> for [T] {
495        #[inline]
496        fn len(&self) -> usize {
497            <[T]>::len(self)
498        }
499        #[inline]
500        fn get(&self, idx: usize) -> Option<&T> {
501            <[T]>::get(self, idx)
502        }
503    }
504    impl<T> SliceSubset<T> for &[T] {
505        #[inline]
506        fn len(&self) -> usize {
507            <[T]>::len(self)
508        }
509        #[inline]
510        fn get(&self, idx: usize) -> Option<&T> {
511            <[T]>::get(self, idx)
512        }
513    }
514    impl<T> SliceSubset<T> for &mut [T] {
515        #[inline]
516        fn len(&self) -> usize {
517            <[T]>::len(self)
518        }
519        #[inline]
520        fn get(&self, idx: usize) -> Option<&T> {
521            <[T]>::get(self, idx)
522        }
523    }
524    //// See todo note under `clusters`.
525    //
526    // impl<'a> SliceSubset<f64> for ClusterList<'a> {
527    // #[inline]
528    // fn len(&self) -> usize {
529    // self.len()
530    // }
531    // #[inline]
532    // fn get(&self, idx: usize) -> Option<&f64> {
533    // if idx < self.len() {
534    // Some(self.index(idx))
535    // } else {
536    // None
537    // }
538    // }
539    // }
540
541    #[cfg(feature = "percentile-rand")]
542    #[inline]
543    pub fn rand<T: Clone, S: SliceSubset<T> + ?Sized>() -> impl FnMut(&mut S) -> Cow<'_, T> {
544        let mut rng = rand::thread_rng();
545        move |slice| {
546            let idx = rng.sample(rand::distributions::Uniform::new(0_usize, slice.len()));
547            // UNWRAP: it's less than `slice.len`.
548            // We assume `!slice.is_empty()`.
549            Cow::Borrowed(slice.get(idx).unwrap())
550        }
551    }
552    #[inline]
553    pub fn middle<T: Clone, S: SliceSubset<T> + ?Sized>() -> impl FnMut(&mut S) -> Cow<'_, T> {
554        #[inline(always)]
555        fn inner<T: Clone, S: SliceSubset<T> + ?Sized>(slice: &mut S) -> Cow<'_, T> {
556            // UNWRAP: it's less than `slice.len`.
557            // We assume `!slice.is_empty()`.
558            Cow::Borrowed(slice.get(slice.len() / 2).unwrap())
559        }
560        inner
561    }
562    /// Slice the list using the median of medians method.
563    /// It's not recommended to use this.
564    /// See the [module-level documentation](super) for more info.
565    ///
566    /// Picks a good pivot within l, a list of numbers.
567    /// This algorithm runs in O(n) time.
568    pub fn median_of_medians<T: Clone + PercentileResolve>(
569        mut compare: &impl Fn(&T, &T) -> cmp::Ordering,
570    ) -> impl FnMut(&mut [T]) -> Cow<'_, T> + '_ {
571        move |l| {
572            let len = l.len();
573            assert!(len > 0);
574
575            // / 5 * 5 truncates to the lower 5-multiple.
576            // We only want full chunks.
577            let chunks = l[..(len / 5) * 5].chunks_mut(5);
578
579            // Next, we sort each chunk. Each group is a fixed length, so each sort
580            // takes constant time. Since we have n/5 chunks, this operation
581            // is also O(n)
582            let sorted_chunks = chunks.map(|c| {
583                c.sort_unstable_by(compare);
584                c
585            });
586
587            let medians = sorted_chunks.map(|chunk| chunk[2].clone());
588            let mut medians: Vec<_> = medians.collect();
589            let median_of_medians = percentile_by(
590                &mut medians,
591                Fraction::new(1, 2),
592                &mut median_of_medians(compare),
593                &mut compare,
594            );
595            Cow::Owned(median_of_medians.resolve())
596        }
597    }
598}
599
600/// Operations on [`crate::Cluster`]s.
601///
602/// This attempts to implement all functionality of this module but using clusters.
603/// This turned out to be quite difficult.
604pub mod cluster {
605    use super::*;
606    use crate::{Cluster, ClusterList, OwnedClusterList};
607    use std::ops::{Deref, DerefMut};
608
609    // `TODO`: use `super::pivot_fn` instead. That doesn't however seem to work, due to idiotic
610    // lifetime requirements.
611    pub mod pivot_fn {
612        use super::*;
613        #[cfg(feature = "percentile-rand")]
614        #[inline]
615        pub fn rand() -> impl FnMut(&ClusterList) -> f64 {
616            let mut rng = rand::thread_rng();
617            move |slice| {
618                let idx = rng.sample(rand::distributions::Uniform::new(0_usize, slice.len()));
619                // Panic (index call): it's less than `slice.len`.
620                // We assume `!slice.is_empty()`.
621                *slice.index(idx)
622            }
623        }
624        #[inline]
625        pub fn middle() -> impl FnMut(&ClusterList) -> f64 {
626            #[inline(always)]
627            fn inner(slice: &ClusterList) -> f64 {
628                // Panic (index call): it's less than `slice.len`.
629                // We assume `!slice.is_empty()`.
630                *slice.index(slice.len() / 2)
631            }
632            inner
633        }
634    }
635
636    /// Percentile by sorting.
637    ///
638    /// See [`naive_percentile_by`] for support for a custom comparator function.
639    ///
640    /// # Performance & scalability
641    ///
642    /// This will be very quick for small sets.
643    /// O(n) performance when `values.len() < 5`, else O(n log n).
644    #[inline]
645    pub fn naive_percentile(
646        values: &mut OwnedClusterList,
647        target: impl OrderedListIndex,
648    ) -> MeanValue<f64> {
649        naive_percentile_by(values, target, &mut crate::F64OrdHash::f64_cmp)
650    }
651    /// Same as [`naive_percentile`] but with a custom comparator function.
652    #[inline]
653    pub fn naive_percentile_by(
654        values: &mut OwnedClusterList,
655        target: impl OrderedListIndex,
656        compare: &mut impl FnMut(f64, f64) -> cmp::Ordering,
657    ) -> MeanValue<f64> {
658        values.list.sort_unstable_by(|a, b| compare(a.0, b.0));
659        let values = values.borrow();
660        let len = values.len();
661        target.index(len).map(|idx| *values.index(idx))
662    }
663    /// quickselect algorithm
664    ///
665    /// Consider using [`percentile_rand`] or [`median`].
666    /// See [`percentile_by`] for support for a custom comparator function.
667    ///
668    /// `pivot_fn` must return a value from the supplied slice.
669    #[inline]
670    pub fn percentile(
671        values: &mut OwnedClusterList,
672        target: impl OrderedListIndex,
673        pivot_fn: &mut impl FnMut(&ClusterList) -> f64,
674    ) -> MeanValue<f64> {
675        percentile_by(values, target, pivot_fn, &mut crate::F64OrdHash::f64_cmp)
676    }
677    /// Same as [`percentile`] but with a custom comparator function.
678    #[inline]
679    pub fn percentile_by(
680        values: &mut OwnedClusterList,
681        target: impl OrderedListIndex,
682        mut pivot_fn: &mut impl FnMut(&ClusterList) -> f64,
683        mut compare: &mut impl FnMut(f64, f64) -> cmp::Ordering,
684    ) -> MeanValue<f64> {
685        target
686            .index(values.borrow().len())
687            .map(|idx| quickselect(&mut values.into(), idx, &mut pivot_fn, &mut compare))
688    }
689    /// Convenience function for [`percentile`] with [`pivot_fn::rand`].
690    #[cfg(feature = "percentile-rand")]
691    #[inline]
692    pub fn percentile_rand(
693        values: &mut OwnedClusterList,
694        target: impl OrderedListIndex,
695    ) -> MeanValue<f64> {
696        percentile(values, target, &mut pivot_fn::rand())
697    }
698    /// Get the value at `target` in `values`.
699    /// Uses the best method available ([`percentile_rand`] if feature `percentile-rand` is enabled,
700    /// else [`pivot_fn::middle`])
701    #[inline]
702    pub fn percentile_default_pivot(
703        values: &mut OwnedClusterList,
704        target: impl OrderedListIndex,
705    ) -> MeanValue<f64> {
706        percentile_default_pivot_by(values, target, &mut crate::F64OrdHash::f64_cmp)
707    }
708    /// Same as [`percentile_default_pivot`] but with a custom comparator function.
709    #[inline]
710    pub fn percentile_default_pivot_by(
711        values: &mut OwnedClusterList,
712        target: impl OrderedListIndex,
713        compare: &mut impl FnMut(f64, f64) -> cmp::Ordering,
714    ) -> MeanValue<f64> {
715        #[cfg(feature = "percentile-rand")]
716        {
717            percentile_by(values, target, &mut pivot_fn::rand(), compare)
718        }
719        #[cfg(not(feature = "percentile-rand"))]
720        {
721            percentile_by(values, target, &mut pivot_fn::middle(), compare)
722        }
723    }
724
725    /// Convenience function for [`percentile`] with the 50% mark as the target and [`pivot_fn::rand`]
726    /// (if the `percentile-rand` feature is enabled, else [`pivot_fn::middle`]).
727    ///
728    /// See [`percentile_default_pivot_by`] for supplying a custom comparator function.
729    /// This is critical for types which does not implement [`Ord`] (e.g. f64).
730    #[inline]
731    pub fn median(values: &mut OwnedClusterList) -> MeanValue<f64> {
732        percentile_default_pivot(values, Fraction::HALF)
733    }
734
735    struct ClusterMut<'a> {
736        list: &'a mut [Cluster],
737        len: usize,
738    }
739    impl<'a> Deref for ClusterMut<'a> {
740        type Target = [Cluster];
741        #[inline]
742        fn deref(&self) -> &Self::Target {
743            self.list
744        }
745    }
746    impl<'a> DerefMut for ClusterMut<'a> {
747        #[inline]
748        fn deref_mut(&mut self) -> &mut Self::Target {
749            self.list
750        }
751    }
752    impl<'a> From<&'a ClusterMut<'a>> for ClusterList<'a> {
753        #[inline]
754        fn from(c: &'a ClusterMut<'a>) -> Self {
755            ClusterList {
756                list: c.list,
757                len: c.len,
758            }
759        }
760    }
761    impl<'a> From<&'a mut OwnedClusterList> for ClusterMut<'a> {
762        #[inline]
763        fn from(l: &'a mut OwnedClusterList) -> Self {
764            Self {
765                list: &mut l.list,
766                len: l.len,
767            }
768        }
769    }
770    impl<'a> ClusterMut<'a> {
771        #[inline]
772        fn list(&self) -> ClusterList {
773            ClusterList::from(self)
774        }
775    }
776    fn quickselect<'a>(
777        values: &'a mut ClusterMut<'a>,
778        k: usize,
779        mut pivot_fn: impl FnMut(&ClusterList) -> f64,
780        mut compare: impl FnMut(f64, f64) -> cmp::Ordering,
781    ) -> f64 {
782        if values.len() == 1 {
783            debug_assert!(k < values.list().len());
784            return values[0].0;
785        }
786
787        let pivot = pivot_fn(&values.list());
788
789        let (mut lows, mut highs_inclusive) =
790            split_include(values, |v| compare(v, pivot) == cmp::Ordering::Less);
791        let (mut highs, pivots) = split_include(&mut highs_inclusive, |v| {
792            compare(v, pivot) == cmp::Ordering::Greater
793        });
794
795        if k < lows.list().len() {
796            quickselect(&mut lows, k, pivot_fn, compare)
797        } else if k < lows.list().len() + pivots.list().len() {
798            pivots[0].0
799        } else if highs.is_empty() {
800            quickselect(&mut lows, k, pivot_fn, compare)
801        } else {
802            quickselect(
803                &mut highs,
804                k - lows.list().len() - pivots.list().len(),
805                pivot_fn,
806                compare,
807            )
808        }
809    }
810    #[inline]
811    fn split_include<'a>(
812        slice: &'a mut ClusterMut<'a>,
813        mut predicate: impl FnMut(f64) -> bool,
814    ) -> (ClusterMut<'a>, ClusterMut<'a>) {
815        let mut add_index = 0;
816        let mut index = 0;
817        let len = slice.len();
818        let mut total_len = 0;
819        let cluser_len = slice.list().len();
820        while index < len {
821            let value = &mut slice[index];
822            if predicate(value.0) {
823                total_len += value.1;
824                slice.swap(index, add_index);
825                add_index += 1;
826            }
827            index += 1;
828        }
829
830        let (a, b) = slice.split_at_mut(add_index);
831        (
832            ClusterMut {
833                list: a,
834                len: total_len,
835            },
836            ClusterMut {
837                list: b,
838                len: cluser_len - total_len,
839            },
840        )
841    }
842}
843
844#[cfg(test)]
845mod tests {
846    use crate::Fraction;
847
848    fn raw_fraction(n: u64, d: u64) -> Fraction {
849        Fraction {
850            numerator: n,
851            denominator: d,
852        }
853    }
854    #[test]
855    fn fraction_1() {
856        assert_eq!(raw_fraction(8316, 2940).simplify(), Fraction::new(99, 35));
857    }
858    #[test]
859    fn fraction_2() {
860        assert_eq!(raw_fraction(2940, 8316).simplify(), Fraction::new(35, 99));
861    }
862    #[test]
863    fn fraction_3() {
864        assert_eq!(raw_fraction(29, 41).simplify(), Fraction::new(29, 41));
865    }
866}
std_dev/percentile.rs

std_dev/
percentile.rs