1#[cfg(feature = "percentile-rand")]
12use rand::Rng;
13use std::borrow::Cow;
14use std::cmp;
15
16#[derive(Debug, Clone, Copy, PartialEq, Eq)]
18pub enum MeanValue<T> {
19 Single(T),
21 Mean(T, T),
23}
24impl MeanValue<crate::F64OrdHash> {
25 #[inline]
26 pub fn resolve(self) -> f64 {
27 self.map(|v| v.0).resolve()
28 }
29}
30impl<T: PercentileResolve> MeanValue<T> {
31 #[inline]
32 pub fn resolve(self) -> T {
33 PercentileResolve::compute(self)
34 }
35}
36impl<T> MeanValue<T> {
37 #[inline]
38 pub fn into_single(self) -> Option<T> {
39 if let Self::Single(t) = self {
40 Some(t)
41 } else {
42 None
43 }
44 }
45 #[inline]
46 pub fn map<O>(self, mut f: impl FnMut(T) -> O) -> MeanValue<O> {
47 match self {
48 Self::Single(v) => MeanValue::Single(f(v)),
49 Self::Mean(a, b) => MeanValue::Mean(f(a), f(b)),
50 }
51 }
52}
53impl<T: Clone> MeanValue<&T> {
54 #[inline]
55 pub fn clone_inner(&self) -> MeanValue<T> {
56 match self {
57 Self::Single(t) => MeanValue::Single((*t).clone()),
58 Self::Mean(a, b) => MeanValue::Mean((*a).clone(), (*b).clone()),
59 }
60 }
61}
62pub trait PercentileResolve
65where
66 Self: Sized,
67{
68 fn mean(a: Self, b: Self) -> Self;
69 #[inline]
70 fn compute(percentile: MeanValue<Self>) -> Self {
71 match percentile {
72 MeanValue::Single(me) => me,
73 MeanValue::Mean(a, b) => PercentileResolve::mean(a, b),
74 }
75 }
76}
77
78#[cfg(feature = "generic-impls")]
79impl<T: num_traits::identities::One + std::ops::Add<Output = T> + std::ops::Div<Output = T>>
80 PercentileResolve for T
81{
82 #[inline]
83 fn mean(a: Self, b: Self) -> Self {
84 (a + b) / (T::one() + T::one())
85 }
86}
87#[cfg(not(feature = "generic-impls"))]
88macro_rules! impl_resolve {
89 ($($t:ty:$two:expr, )+) => {
90 $(
91 impl PercentileResolve for $t {
92 #[inline]
93 fn mean(a: Self, b: Self) -> Self {
94 (a + b) / $two
95 }
96 }
97 )+
98 };
99}
100#[cfg(not(feature = "generic-impls"))]
101macro_rules! impl_resolve_integer {
102 ($($t:ty, )+) => {
103 impl_resolve!($($t:2, )+);
104 };
105}
106#[cfg(not(feature = "generic-impls"))]
107macro_rules! impl_resolve_float {
108 ($($t:ty, )+) => {
109 impl_resolve!($($t:2.0, )+);
110 };
111}
112#[cfg(not(feature = "generic-impls"))]
113impl_resolve_integer!(u8, u16, u32, u64, u128, usize, i8, i16, i32, i64, i128, isize,);
114#[cfg(not(feature = "generic-impls"))]
115impl_resolve_float!(f32, f64,);
116
117pub trait OrderedListIndex {
123 fn index(&self, len: usize) -> MeanValue<usize>;
126}
127
128#[derive(Debug, Clone, Copy)]
142pub struct Fraction {
143 pub numerator: u64,
144 pub denominator: u64,
145}
146impl Fraction {
147 pub const HALF: Self = Self {
148 numerator: 1,
149 denominator: 2,
150 };
151 pub const ONE_QUARTER: Self = Self {
152 numerator: 1,
153 denominator: 4,
154 };
155 pub const THREE_QUARTERS: Self = Self {
156 numerator: 3,
157 denominator: 4,
158 };
159 #[inline]
165 pub fn new(numerator: u64, denominator: u64) -> Self {
166 {
167 Self {
168 numerator,
169 denominator,
170 }
171 .simplify()
172 }
173 }
174
175 #[inline]
177 fn simplify(self) -> Self {
178 let mut x = self.numerator;
179 let mut remainder = self.denominator;
180 let gcd = loop {
181 let tmp = x % remainder;
182 if tmp == 0 {
183 break remainder;
184 }
185 x = remainder;
186 remainder = tmp;
187 };
188
189 Self {
190 numerator: self.numerator / gcd,
191 denominator: self.denominator / gcd,
192 }
193 }
194}
195impl OrderedListIndex for Fraction {
196 fn index(&self, len: usize) -> MeanValue<usize> {
197 assert!(self.numerator <= self.denominator);
198 fn assert_not_zero(denominator: u64) {
199 assert_ne!(denominator, 0);
200 }
201 assert_not_zero(self.denominator);
202 fn power_of_two(me: Fraction, len: usize) -> MeanValue<usize> {
203 if me.denominator == 2 {
204 if len % 2 == 0 {
205 MeanValue::Mean(len / 2 - 1, len / 2)
206 } else {
207 MeanValue::Single(len / 2)
208 }
209 } else {
210 let new = Fraction::new(me.numerator % (me.denominator / 2), me.denominator / 2);
214 let mut value = power_of_two(new, len / 2);
215 if me.numerator > me.denominator / 2 {
216 value = value.map(|v| v + len / 2 + len % 2);
218 }
219
220 value
221 }
222 }
223 if self.denominator == 1 {
228 MeanValue::Single(self.numerator as _)
229 } else if self.denominator.is_power_of_two() {
230 power_of_two(*self, len)
231 } else {
232 let m = len * self.numerator as usize;
234 let rem = m % self.denominator as usize;
235 let rem = usize::from(rem > 1);
236 MeanValue::Single((m / self.denominator as usize + rem - 1).min(len))
237 }
238 }
239}
240
241impl PartialEq for Fraction {
242 #[inline]
243 fn eq(&self, other: &Self) -> bool {
244 self.cmp(other).is_eq()
245 }
246}
247impl Eq for Fraction {}
248impl Ord for Fraction {
249 #[inline]
250 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
251 let my_numerator_with_same_denominator = self.numerator * other.denominator;
257 let other_numerator_with_same_denominator = other.numerator * self.denominator;
259 my_numerator_with_same_denominator.cmp(&other_numerator_with_same_denominator)
260 }
261}
262impl PartialOrd for Fraction {
263 #[inline]
264 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
265 Some(self.cmp(other))
266 }
267}
268
269pub struct KthSmallest {
272 pub k: usize,
273}
274impl KthSmallest {
275 pub fn new(k: usize) -> Self {
276 Self { k }
277 }
278}
279impl OrderedListIndex for KthSmallest {
280 #[inline]
281 fn index(&self, len: usize) -> MeanValue<usize> {
282 assert!(self.k < len);
283 MeanValue::Single(self.k)
284 }
285}
286pub struct KthLargest {
289 pub k: usize,
290}
291impl KthLargest {
292 pub fn new(k: usize) -> Self {
293 Self { k }
294 }
295}
296impl OrderedListIndex for KthLargest {
297 #[inline]
298 fn index(&self, len: usize) -> MeanValue<usize> {
299 assert!(self.k < len);
300 MeanValue::Single(len - 1 - self.k)
302 }
303}
304
305#[inline(always)]
306fn a_cmp_b<T: Ord>(a: &T, b: &T) -> cmp::Ordering {
307 a.cmp(b)
308}
309
310#[inline]
319pub fn naive_percentile<T: Ord>(values: &mut [T], target: impl OrderedListIndex) -> MeanValue<&T> {
320 naive_percentile_by(values, target, &mut a_cmp_b)
321}
322#[inline]
324pub fn naive_percentile_by<'a, T>(
325 values: &'a mut [T],
326 target: impl OrderedListIndex,
327 compare: &mut impl FnMut(&T, &T) -> cmp::Ordering,
328) -> MeanValue<&'a T> {
329 debug_assert!(!values.is_empty(), "we must have more than 0 values!");
330 values.sort_by(compare);
331 target.index(values.len()).map(|idx| &values[idx])
332}
333#[inline]
340pub fn percentile<T: Clone + Ord>(
341 values: &mut [T],
342 target: impl OrderedListIndex,
343 pivot_fn: &mut impl FnMut(&mut [T]) -> Cow<'_, T>,
344) -> MeanValue<T> {
345 percentile_by(values, target, pivot_fn, &mut a_cmp_b)
346}
347#[inline]
349pub fn percentile_by<T: Clone>(
350 values: &mut [T],
351 target: impl OrderedListIndex,
352 mut pivot_fn: &mut impl FnMut(&mut [T]) -> Cow<'_, T>,
353 mut compare: &mut impl FnMut(&T, &T) -> cmp::Ordering,
354) -> MeanValue<T> {
355 target
356 .index(values.len())
357 .map(|v| quickselect(values, v, &mut pivot_fn, &mut compare).clone())
358}
359#[cfg(feature = "percentile-rand")]
361#[inline]
362pub fn percentile_rand<T: Ord + Clone>(
363 values: &mut [T],
364 target: impl OrderedListIndex,
365) -> MeanValue<T> {
366 percentile(values, target, &mut pivot_fn::rand())
367}
368#[inline]
372pub fn percentile_default_pivot<T: Ord + Clone>(
373 values: &mut [T],
374 target: impl OrderedListIndex,
375) -> MeanValue<T> {
376 percentile_default_pivot_by(values, target, &mut a_cmp_b)
377}
378#[inline]
380pub fn percentile_default_pivot_by<T: Clone>(
381 values: &mut [T],
382 target: impl OrderedListIndex,
383 compare: &mut impl FnMut(&T, &T) -> cmp::Ordering,
384) -> MeanValue<T> {
385 #[cfg(feature = "percentile-rand")]
386 {
387 percentile_by(values, target, &mut pivot_fn::rand(), compare)
388 }
389 #[cfg(not(feature = "percentile-rand"))]
390 {
391 percentile_by(values, target, &mut pivot_fn::middle(), compare)
392 }
393}
394
395#[inline]
401pub fn median<T: Ord + Clone>(values: &mut [T]) -> MeanValue<T> {
402 percentile_default_pivot(values, Fraction::HALF)
403}
404fn quickselect<T: Clone>(
406 values: &mut [T],
407 mut k: usize,
408 mut pivot_fn: impl FnMut(&mut [T]) -> Cow<'_, T>,
409 mut compare: impl FnMut(&T, &T) -> cmp::Ordering,
410) -> &T {
411 if k >= values.len() {
412 k = values.len() - 1;
413 }
414 if values.len() == 1 {
415 assert_eq!(k, 0);
416 return &values[0];
417 }
418 if values.len() <= 5 {
419 values.sort_unstable_by(compare);
420 return &values[k];
421 }
422
423 let pivot = pivot_fn(values);
424 let pivot = pivot.into_owned();
425
426 let (lows, highs_inclusive) =
427 split_include(values, |v| compare(v, &pivot) == cmp::Ordering::Less);
428 let (highs, pivots) = split_include(highs_inclusive, |v| {
429 compare(v, &pivot) == cmp::Ordering::Greater
430 });
431
432 if k < lows.len() {
433 quickselect(lows, k, pivot_fn, compare)
434 } else if k < lows.len() + pivots.len() {
435 &pivots[0]
436 } else {
437 quickselect(highs, k - lows.len() - pivots.len(), pivot_fn, compare)
438 }
439}
440#[inline]
443pub fn split_include<T>(
444 slice: &mut [T],
445 mut predicate: impl FnMut(&T) -> bool,
446) -> (&mut [T], &mut [T]) {
447 let mut add_index = 0;
448 let mut index = 0;
449 let len = slice.len();
450 while index < len {
451 let value = &mut slice[index];
452 if predicate(value) {
453 slice.swap(index, add_index);
454 add_index += 1;
455 }
456 index += 1;
457 }
458
459 slice.split_at_mut(add_index)
460}
461pub fn median_of_medians<T: Ord + Clone + PercentileResolve>(
464 values: &mut [T],
465 target: impl OrderedListIndex,
466) -> MeanValue<T> {
467 median_of_medians_by(values, target, &|a, b| a.cmp(b))
468}
469pub fn median_of_medians_by<T: Clone + PercentileResolve>(
471 values: &mut [T],
472 target: impl OrderedListIndex,
473 mut compare: &impl Fn(&T, &T) -> cmp::Ordering,
474) -> MeanValue<T> {
475 percentile_by(
476 values,
477 target,
478 &mut pivot_fn::median_of_medians(compare),
479 &mut compare,
480 )
481}
482
483pub mod pivot_fn {
484 use super::*;
485
486 pub trait SliceSubset<T> {
487 fn len(&self) -> usize;
488 fn is_empty(&self) -> bool {
489 self.len() == 0
490 }
491 fn get(&self, idx: usize) -> Option<&T>;
493 }
494 impl<T> SliceSubset<T> for [T] {
495 #[inline]
496 fn len(&self) -> usize {
497 <[T]>::len(self)
498 }
499 #[inline]
500 fn get(&self, idx: usize) -> Option<&T> {
501 <[T]>::get(self, idx)
502 }
503 }
504 impl<T> SliceSubset<T> for &[T] {
505 #[inline]
506 fn len(&self) -> usize {
507 <[T]>::len(self)
508 }
509 #[inline]
510 fn get(&self, idx: usize) -> Option<&T> {
511 <[T]>::get(self, idx)
512 }
513 }
514 impl<T> SliceSubset<T> for &mut [T] {
515 #[inline]
516 fn len(&self) -> usize {
517 <[T]>::len(self)
518 }
519 #[inline]
520 fn get(&self, idx: usize) -> Option<&T> {
521 <[T]>::get(self, idx)
522 }
523 }
524 #[cfg(feature = "percentile-rand")]
542 #[inline]
543 pub fn rand<T: Clone, S: SliceSubset<T> + ?Sized>() -> impl FnMut(&mut S) -> Cow<'_, T> {
544 let mut rng = rand::thread_rng();
545 move |slice| {
546 let idx = rng.sample(rand::distributions::Uniform::new(0_usize, slice.len()));
547 Cow::Borrowed(slice.get(idx).unwrap())
550 }
551 }
552 #[inline]
553 pub fn middle<T: Clone, S: SliceSubset<T> + ?Sized>() -> impl FnMut(&mut S) -> Cow<'_, T> {
554 #[inline(always)]
555 fn inner<T: Clone, S: SliceSubset<T> + ?Sized>(slice: &mut S) -> Cow<'_, T> {
556 Cow::Borrowed(slice.get(slice.len() / 2).unwrap())
559 }
560 inner
561 }
562 pub fn median_of_medians<T: Clone + PercentileResolve>(
569 mut compare: &impl Fn(&T, &T) -> cmp::Ordering,
570 ) -> impl FnMut(&mut [T]) -> Cow<'_, T> + '_ {
571 move |l| {
572 let len = l.len();
573 assert!(len > 0);
574
575 let chunks = l[..(len / 5) * 5].chunks_mut(5);
578
579 let sorted_chunks = chunks.map(|c| {
583 c.sort_unstable_by(compare);
584 c
585 });
586
587 let medians = sorted_chunks.map(|chunk| chunk[2].clone());
588 let mut medians: Vec<_> = medians.collect();
589 let median_of_medians = percentile_by(
590 &mut medians,
591 Fraction::new(1, 2),
592 &mut median_of_medians(compare),
593 &mut compare,
594 );
595 Cow::Owned(median_of_medians.resolve())
596 }
597 }
598}
599
600pub mod cluster {
605 use super::*;
606 use crate::{Cluster, ClusterList, OwnedClusterList};
607 use std::ops::{Deref, DerefMut};
608
609 pub mod pivot_fn {
612 use super::*;
613 #[cfg(feature = "percentile-rand")]
614 #[inline]
615 pub fn rand() -> impl FnMut(&ClusterList) -> f64 {
616 let mut rng = rand::thread_rng();
617 move |slice| {
618 let idx = rng.sample(rand::distributions::Uniform::new(0_usize, slice.len()));
619 *slice.index(idx)
622 }
623 }
624 #[inline]
625 pub fn middle() -> impl FnMut(&ClusterList) -> f64 {
626 #[inline(always)]
627 fn inner(slice: &ClusterList) -> f64 {
628 *slice.index(slice.len() / 2)
631 }
632 inner
633 }
634 }
635
636 #[inline]
645 pub fn naive_percentile(
646 values: &mut OwnedClusterList,
647 target: impl OrderedListIndex,
648 ) -> MeanValue<f64> {
649 naive_percentile_by(values, target, &mut crate::F64OrdHash::f64_cmp)
650 }
651 #[inline]
653 pub fn naive_percentile_by(
654 values: &mut OwnedClusterList,
655 target: impl OrderedListIndex,
656 compare: &mut impl FnMut(f64, f64) -> cmp::Ordering,
657 ) -> MeanValue<f64> {
658 values.list.sort_unstable_by(|a, b| compare(a.0, b.0));
659 let values = values.borrow();
660 let len = values.len();
661 target.index(len).map(|idx| *values.index(idx))
662 }
663 #[inline]
670 pub fn percentile(
671 values: &mut OwnedClusterList,
672 target: impl OrderedListIndex,
673 pivot_fn: &mut impl FnMut(&ClusterList) -> f64,
674 ) -> MeanValue<f64> {
675 percentile_by(values, target, pivot_fn, &mut crate::F64OrdHash::f64_cmp)
676 }
677 #[inline]
679 pub fn percentile_by(
680 values: &mut OwnedClusterList,
681 target: impl OrderedListIndex,
682 mut pivot_fn: &mut impl FnMut(&ClusterList) -> f64,
683 mut compare: &mut impl FnMut(f64, f64) -> cmp::Ordering,
684 ) -> MeanValue<f64> {
685 target
686 .index(values.borrow().len())
687 .map(|idx| quickselect(&mut values.into(), idx, &mut pivot_fn, &mut compare))
688 }
689 #[cfg(feature = "percentile-rand")]
691 #[inline]
692 pub fn percentile_rand(
693 values: &mut OwnedClusterList,
694 target: impl OrderedListIndex,
695 ) -> MeanValue<f64> {
696 percentile(values, target, &mut pivot_fn::rand())
697 }
698 #[inline]
702 pub fn percentile_default_pivot(
703 values: &mut OwnedClusterList,
704 target: impl OrderedListIndex,
705 ) -> MeanValue<f64> {
706 percentile_default_pivot_by(values, target, &mut crate::F64OrdHash::f64_cmp)
707 }
708 #[inline]
710 pub fn percentile_default_pivot_by(
711 values: &mut OwnedClusterList,
712 target: impl OrderedListIndex,
713 compare: &mut impl FnMut(f64, f64) -> cmp::Ordering,
714 ) -> MeanValue<f64> {
715 #[cfg(feature = "percentile-rand")]
716 {
717 percentile_by(values, target, &mut pivot_fn::rand(), compare)
718 }
719 #[cfg(not(feature = "percentile-rand"))]
720 {
721 percentile_by(values, target, &mut pivot_fn::middle(), compare)
722 }
723 }
724
725 #[inline]
731 pub fn median(values: &mut OwnedClusterList) -> MeanValue<f64> {
732 percentile_default_pivot(values, Fraction::HALF)
733 }
734
735 struct ClusterMut<'a> {
736 list: &'a mut [Cluster],
737 len: usize,
738 }
739 impl<'a> Deref for ClusterMut<'a> {
740 type Target = [Cluster];
741 #[inline]
742 fn deref(&self) -> &Self::Target {
743 self.list
744 }
745 }
746 impl<'a> DerefMut for ClusterMut<'a> {
747 #[inline]
748 fn deref_mut(&mut self) -> &mut Self::Target {
749 self.list
750 }
751 }
752 impl<'a> From<&'a ClusterMut<'a>> for ClusterList<'a> {
753 #[inline]
754 fn from(c: &'a ClusterMut<'a>) -> Self {
755 ClusterList {
756 list: c.list,
757 len: c.len,
758 }
759 }
760 }
761 impl<'a> From<&'a mut OwnedClusterList> for ClusterMut<'a> {
762 #[inline]
763 fn from(l: &'a mut OwnedClusterList) -> Self {
764 Self {
765 list: &mut l.list,
766 len: l.len,
767 }
768 }
769 }
770 impl<'a> ClusterMut<'a> {
771 #[inline]
772 fn list(&self) -> ClusterList {
773 ClusterList::from(self)
774 }
775 }
776 fn quickselect<'a>(
777 values: &'a mut ClusterMut<'a>,
778 k: usize,
779 mut pivot_fn: impl FnMut(&ClusterList) -> f64,
780 mut compare: impl FnMut(f64, f64) -> cmp::Ordering,
781 ) -> f64 {
782 if values.len() == 1 {
783 debug_assert!(k < values.list().len());
784 return values[0].0;
785 }
786
787 let pivot = pivot_fn(&values.list());
788
789 let (mut lows, mut highs_inclusive) =
790 split_include(values, |v| compare(v, pivot) == cmp::Ordering::Less);
791 let (mut highs, pivots) = split_include(&mut highs_inclusive, |v| {
792 compare(v, pivot) == cmp::Ordering::Greater
793 });
794
795 if k < lows.list().len() {
796 quickselect(&mut lows, k, pivot_fn, compare)
797 } else if k < lows.list().len() + pivots.list().len() {
798 pivots[0].0
799 } else if highs.is_empty() {
800 quickselect(&mut lows, k, pivot_fn, compare)
801 } else {
802 quickselect(
803 &mut highs,
804 k - lows.list().len() - pivots.list().len(),
805 pivot_fn,
806 compare,
807 )
808 }
809 }
810 #[inline]
811 fn split_include<'a>(
812 slice: &'a mut ClusterMut<'a>,
813 mut predicate: impl FnMut(f64) -> bool,
814 ) -> (ClusterMut<'a>, ClusterMut<'a>) {
815 let mut add_index = 0;
816 let mut index = 0;
817 let len = slice.len();
818 let mut total_len = 0;
819 let cluser_len = slice.list().len();
820 while index < len {
821 let value = &mut slice[index];
822 if predicate(value.0) {
823 total_len += value.1;
824 slice.swap(index, add_index);
825 add_index += 1;
826 }
827 index += 1;
828 }
829
830 let (a, b) = slice.split_at_mut(add_index);
831 (
832 ClusterMut {
833 list: a,
834 len: total_len,
835 },
836 ClusterMut {
837 list: b,
838 len: cluser_len - total_len,
839 },
840 )
841 }
842}
843
844#[cfg(test)]
845mod tests {
846 use crate::Fraction;
847
848 fn raw_fraction(n: u64, d: u64) -> Fraction {
849 Fraction {
850 numerator: n,
851 denominator: d,
852 }
853 }
854 #[test]
855 fn fraction_1() {
856 assert_eq!(raw_fraction(8316, 2940).simplify(), Fraction::new(99, 35));
857 }
858 #[test]
859 fn fraction_2() {
860 assert_eq!(raw_fraction(2940, 8316).simplify(), Fraction::new(35, 99));
861 }
862 #[test]
863 fn fraction_3() {
864 assert_eq!(raw_fraction(29, 41).simplify(), Fraction::new(29, 41));
865 }
866}