#[cfg(feature = "percentile-rand")]
use rand::Rng;
use std::borrow::Cow;
use std::cmp;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum MeanValue<T> {
Single(T),
Mean(T, T),
}
impl MeanValue<crate::F64OrdHash> {
#[inline]
pub fn resolve(self) -> f64 {
self.map(|v| v.0).resolve()
}
}
impl<T: PercentileResolve> MeanValue<T> {
#[inline]
pub fn resolve(self) -> T {
PercentileResolve::compute(self)
}
}
impl<T> MeanValue<T> {
#[inline]
pub fn into_single(self) -> Option<T> {
if let Self::Single(t) = self {
Some(t)
} else {
None
}
}
#[inline]
pub fn map<O>(self, mut f: impl FnMut(T) -> O) -> MeanValue<O> {
match self {
Self::Single(v) => MeanValue::Single(f(v)),
Self::Mean(a, b) => MeanValue::Mean(f(a), f(b)),
}
}
}
impl<T: Clone> MeanValue<&T> {
#[inline]
pub fn clone_inner(&self) -> MeanValue<T> {
match self {
Self::Single(t) => MeanValue::Single((*t).clone()),
Self::Mean(a, b) => MeanValue::Mean((*a).clone(), (*b).clone()),
}
}
}
pub trait PercentileResolve
where
Self: Sized,
{
fn mean(a: Self, b: Self) -> Self;
#[inline]
fn compute(percentile: MeanValue<Self>) -> Self {
match percentile {
MeanValue::Single(me) => me,
MeanValue::Mean(a, b) => PercentileResolve::mean(a, b),
}
}
}
#[cfg(feature = "generic-impls")]
impl<T: num_traits::identities::One + std::ops::Add<Output = T> + std::ops::Div<Output = T>>
PercentileResolve for T
{
#[inline]
fn mean(a: Self, b: Self) -> Self {
(a + b) / (T::one() + T::one())
}
}
#[cfg(not(feature = "generic-impls"))]
macro_rules! impl_resolve {
($($t:ty:$two:expr, )+) => {
$(
impl PercentileResolve for $t {
#[inline]
fn mean(a: Self, b: Self) -> Self {
(a + b) / $two
}
}
)+
};
}
#[cfg(not(feature = "generic-impls"))]
macro_rules! impl_resolve_integer {
($($t:ty, )+) => {
impl_resolve!($($t:2, )+);
};
}
#[cfg(not(feature = "generic-impls"))]
macro_rules! impl_resolve_float {
($($t:ty, )+) => {
impl_resolve!($($t:2.0, )+);
};
}
#[cfg(not(feature = "generic-impls"))]
impl_resolve_integer!(u8, u16, u32, u64, u128, usize, i8, i16, i32, i64, i128, isize,);
#[cfg(not(feature = "generic-impls"))]
impl_resolve_float!(f32, f64,);
pub trait OrderedListIndex {
fn index(&self, len: usize) -> MeanValue<usize>;
}
#[derive(Debug, Clone, Copy)]
pub struct Fraction {
pub numerator: u64,
pub denominator: u64,
}
impl Fraction {
pub const HALF: Self = Self {
numerator: 1,
denominator: 2,
};
pub const ONE_QUARTER: Self = Self {
numerator: 1,
denominator: 4,
};
pub const THREE_QUARTERS: Self = Self {
numerator: 3,
denominator: 4,
};
#[inline]
pub fn new(numerator: u64, denominator: u64) -> Self {
{
Self {
numerator,
denominator,
}
.simplify()
}
}
#[inline]
fn simplify(self) -> Self {
let mut x = self.numerator;
let mut remainder = self.denominator;
let gcd = loop {
let tmp = x % remainder;
if tmp == 0 {
break remainder;
}
x = remainder;
remainder = tmp;
};
Self {
numerator: self.numerator / gcd,
denominator: self.denominator / gcd,
}
}
}
impl OrderedListIndex for Fraction {
fn index(&self, len: usize) -> MeanValue<usize> {
assert!(self.numerator <= self.denominator);
fn assert_not_zero(denominator: u64) {
assert_ne!(denominator, 0);
}
assert_not_zero(self.denominator);
fn power_of_two(me: Fraction, len: usize) -> MeanValue<usize> {
if me.denominator == 2 {
if len % 2 == 0 {
MeanValue::Mean(len / 2 - 1, len / 2)
} else {
MeanValue::Single(len / 2)
}
} else {
let new = Fraction::new(me.numerator % (me.denominator / 2), me.denominator / 2);
let mut value = power_of_two(new, len / 2);
if me.numerator > me.denominator / 2 {
value = value.map(|v| v + len / 2 + len % 2);
}
value
}
}
if self.denominator == 1 {
MeanValue::Single(self.numerator as _)
} else if self.denominator.is_power_of_two() {
power_of_two(*self, len)
} else {
let m = len * self.numerator as usize;
let rem = m % self.denominator as usize;
let rem = usize::from(rem > 1);
MeanValue::Single((m / self.denominator as usize + rem - 1).min(len))
}
}
}
impl PartialEq for Fraction {
#[inline]
fn eq(&self, other: &Self) -> bool {
self.cmp(other).is_eq()
}
}
impl Eq for Fraction {}
impl Ord for Fraction {
#[inline]
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
let my_numerator_with_same_denominator = self.numerator * other.denominator;
let other_numerator_with_same_denominator = other.numerator * self.denominator;
my_numerator_with_same_denominator.cmp(&other_numerator_with_same_denominator)
}
}
impl PartialOrd for Fraction {
#[inline]
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.cmp(other))
}
}
pub struct KthSmallest {
pub k: usize,
}
impl KthSmallest {
pub fn new(k: usize) -> Self {
Self { k }
}
}
impl OrderedListIndex for KthSmallest {
#[inline]
fn index(&self, len: usize) -> MeanValue<usize> {
assert!(self.k < len);
MeanValue::Single(self.k)
}
}
pub struct KthLargest {
pub k: usize,
}
impl KthLargest {
pub fn new(k: usize) -> Self {
Self { k }
}
}
impl OrderedListIndex for KthLargest {
#[inline]
fn index(&self, len: usize) -> MeanValue<usize> {
assert!(self.k < len);
MeanValue::Single(len - 1 - self.k)
}
}
#[inline(always)]
fn a_cmp_b<T: Ord>(a: &T, b: &T) -> cmp::Ordering {
a.cmp(b)
}
#[inline]
pub fn naive_percentile<T: Ord>(values: &mut [T], target: impl OrderedListIndex) -> MeanValue<&T> {
naive_percentile_by(values, target, &mut a_cmp_b)
}
#[inline]
pub fn naive_percentile_by<'a, T>(
values: &'a mut [T],
target: impl OrderedListIndex,
compare: &mut impl FnMut(&T, &T) -> cmp::Ordering,
) -> MeanValue<&'a T> {
debug_assert!(!values.is_empty(), "we must have more than 0 values!");
values.sort_by(compare);
target.index(values.len()).map(|idx| &values[idx])
}
#[inline]
pub fn percentile<T: Clone + Ord>(
values: &mut [T],
target: impl OrderedListIndex,
pivot_fn: &mut impl FnMut(&mut [T]) -> Cow<'_, T>,
) -> MeanValue<T> {
percentile_by(values, target, pivot_fn, &mut a_cmp_b)
}
#[inline]
pub fn percentile_by<T: Clone>(
values: &mut [T],
target: impl OrderedListIndex,
mut pivot_fn: &mut impl FnMut(&mut [T]) -> Cow<'_, T>,
mut compare: &mut impl FnMut(&T, &T) -> cmp::Ordering,
) -> MeanValue<T> {
target
.index(values.len())
.map(|v| quickselect(values, v, &mut pivot_fn, &mut compare).clone())
}
#[cfg(feature = "percentile-rand")]
#[inline]
pub fn percentile_rand<T: Ord + Clone>(
values: &mut [T],
target: impl OrderedListIndex,
) -> MeanValue<T> {
percentile(values, target, &mut pivot_fn::rand())
}
#[inline]
pub fn percentile_default_pivot<T: Ord + Clone>(
values: &mut [T],
target: impl OrderedListIndex,
) -> MeanValue<T> {
percentile_default_pivot_by(values, target, &mut a_cmp_b)
}
#[inline]
pub fn percentile_default_pivot_by<T: Clone>(
values: &mut [T],
target: impl OrderedListIndex,
compare: &mut impl FnMut(&T, &T) -> cmp::Ordering,
) -> MeanValue<T> {
#[cfg(feature = "percentile-rand")]
{
percentile_by(values, target, &mut pivot_fn::rand(), compare)
}
#[cfg(not(feature = "percentile-rand"))]
{
percentile_by(values, target, &mut pivot_fn::middle(), compare)
}
}
#[inline]
pub fn median<T: Ord + Clone>(values: &mut [T]) -> MeanValue<T> {
percentile_default_pivot(values, Fraction::HALF)
}
fn quickselect<T: Clone>(
values: &mut [T],
mut k: usize,
mut pivot_fn: impl FnMut(&mut [T]) -> Cow<'_, T>,
mut compare: impl FnMut(&T, &T) -> cmp::Ordering,
) -> &T {
if k >= values.len() {
k = values.len() - 1;
}
if values.len() == 1 {
assert_eq!(k, 0);
return &values[0];
}
if values.len() <= 5 {
values.sort_unstable_by(compare);
return &values[k];
}
let pivot = pivot_fn(values);
let pivot = pivot.into_owned();
let (lows, highs_inclusive) =
split_include(values, |v| compare(v, &pivot) == cmp::Ordering::Less);
let (highs, pivots) = split_include(highs_inclusive, |v| {
compare(v, &pivot) == cmp::Ordering::Greater
});
if k < lows.len() {
quickselect(lows, k, pivot_fn, compare)
} else if k < lows.len() + pivots.len() {
&pivots[0]
} else {
quickselect(highs, k - lows.len() - pivots.len(), pivot_fn, compare)
}
}
#[inline]
pub fn split_include<T>(
slice: &mut [T],
mut predicate: impl FnMut(&T) -> bool,
) -> (&mut [T], &mut [T]) {
let mut add_index = 0;
let mut index = 0;
let len = slice.len();
while index < len {
let value = &mut slice[index];
if predicate(value) {
slice.swap(index, add_index);
add_index += 1;
}
index += 1;
}
slice.split_at_mut(add_index)
}
pub fn median_of_medians<T: Ord + Clone + PercentileResolve>(
values: &mut [T],
target: impl OrderedListIndex,
) -> MeanValue<T> {
median_of_medians_by(values, target, &|a, b| a.cmp(b))
}
pub fn median_of_medians_by<T: Clone + PercentileResolve>(
values: &mut [T],
target: impl OrderedListIndex,
mut compare: &impl Fn(&T, &T) -> cmp::Ordering,
) -> MeanValue<T> {
percentile_by(
values,
target,
&mut pivot_fn::median_of_medians(compare),
&mut compare,
)
}
pub mod pivot_fn {
use super::*;
pub trait SliceSubset<T> {
fn len(&self) -> usize;
fn is_empty(&self) -> bool {
self.len() == 0
}
fn get(&self, idx: usize) -> Option<&T>;
}
impl<T> SliceSubset<T> for [T] {
#[inline]
fn len(&self) -> usize {
<[T]>::len(self)
}
#[inline]
fn get(&self, idx: usize) -> Option<&T> {
<[T]>::get(self, idx)
}
}
impl<T> SliceSubset<T> for &[T] {
#[inline]
fn len(&self) -> usize {
<[T]>::len(self)
}
#[inline]
fn get(&self, idx: usize) -> Option<&T> {
<[T]>::get(self, idx)
}
}
impl<T> SliceSubset<T> for &mut [T] {
#[inline]
fn len(&self) -> usize {
<[T]>::len(self)
}
#[inline]
fn get(&self, idx: usize) -> Option<&T> {
<[T]>::get(self, idx)
}
}
#[cfg(feature = "percentile-rand")]
#[inline]
pub fn rand<T: Clone, S: SliceSubset<T> + ?Sized>() -> impl FnMut(&mut S) -> Cow<'_, T> {
let mut rng = rand::thread_rng();
move |slice| {
let idx = rng.sample(rand::distributions::Uniform::new(0_usize, slice.len()));
Cow::Borrowed(slice.get(idx).unwrap())
}
}
#[inline]
pub fn middle<T: Clone, S: SliceSubset<T> + ?Sized>() -> impl FnMut(&mut S) -> Cow<'_, T> {
#[inline(always)]
fn inner<T: Clone, S: SliceSubset<T> + ?Sized>(slice: &mut S) -> Cow<'_, T> {
Cow::Borrowed(slice.get(slice.len() / 2).unwrap())
}
inner
}
pub fn median_of_medians<T: Clone + PercentileResolve>(
mut compare: &impl Fn(&T, &T) -> cmp::Ordering,
) -> impl FnMut(&mut [T]) -> Cow<'_, T> + '_ {
move |l| {
let len = l.len();
assert!(len > 0);
let chunks = l[..(len / 5) * 5].chunks_mut(5);
let sorted_chunks = chunks.map(|c| {
c.sort_unstable_by(compare);
c
});
let medians = sorted_chunks.map(|chunk| chunk[2].clone());
let mut medians: Vec<_> = medians.collect();
let median_of_medians = percentile_by(
&mut medians,
Fraction::new(1, 2),
&mut median_of_medians(compare),
&mut compare,
);
Cow::Owned(median_of_medians.resolve())
}
}
}
pub mod cluster {
use super::*;
use crate::{Cluster, ClusterList, OwnedClusterList};
use std::ops::{Deref, DerefMut};
pub mod pivot_fn {
use super::*;
#[cfg(feature = "percentile-rand")]
#[inline]
pub fn rand() -> impl FnMut(&ClusterList) -> f64 {
let mut rng = rand::thread_rng();
move |slice| {
let idx = rng.sample(rand::distributions::Uniform::new(0_usize, slice.len()));
*slice.index(idx)
}
}
#[inline]
pub fn middle() -> impl FnMut(&ClusterList) -> f64 {
#[inline(always)]
fn inner(slice: &ClusterList) -> f64 {
*slice.index(slice.len() / 2)
}
inner
}
}
#[inline]
pub fn naive_percentile(
values: &mut OwnedClusterList,
target: impl OrderedListIndex,
) -> MeanValue<f64> {
naive_percentile_by(values, target, &mut crate::F64OrdHash::f64_cmp)
}
#[inline]
pub fn naive_percentile_by(
values: &mut OwnedClusterList,
target: impl OrderedListIndex,
compare: &mut impl FnMut(f64, f64) -> cmp::Ordering,
) -> MeanValue<f64> {
values.list.sort_unstable_by(|a, b| compare(a.0, b.0));
let values = values.borrow();
let len = values.len();
target.index(len).map(|idx| *values.index(idx))
}
#[inline]
pub fn percentile(
values: &mut OwnedClusterList,
target: impl OrderedListIndex,
pivot_fn: &mut impl FnMut(&ClusterList) -> f64,
) -> MeanValue<f64> {
percentile_by(values, target, pivot_fn, &mut crate::F64OrdHash::f64_cmp)
}
#[inline]
pub fn percentile_by(
values: &mut OwnedClusterList,
target: impl OrderedListIndex,
mut pivot_fn: &mut impl FnMut(&ClusterList) -> f64,
mut compare: &mut impl FnMut(f64, f64) -> cmp::Ordering,
) -> MeanValue<f64> {
target
.index(values.borrow().len())
.map(|idx| quickselect(&mut values.into(), idx, &mut pivot_fn, &mut compare))
}
#[cfg(feature = "percentile-rand")]
#[inline]
pub fn percentile_rand(
values: &mut OwnedClusterList,
target: impl OrderedListIndex,
) -> MeanValue<f64> {
percentile(values, target, &mut pivot_fn::rand())
}
#[inline]
pub fn percentile_default_pivot(
values: &mut OwnedClusterList,
target: impl OrderedListIndex,
) -> MeanValue<f64> {
percentile_default_pivot_by(values, target, &mut crate::F64OrdHash::f64_cmp)
}
#[inline]
pub fn percentile_default_pivot_by(
values: &mut OwnedClusterList,
target: impl OrderedListIndex,
compare: &mut impl FnMut(f64, f64) -> cmp::Ordering,
) -> MeanValue<f64> {
#[cfg(feature = "percentile-rand")]
{
percentile_by(values, target, &mut pivot_fn::rand(), compare)
}
#[cfg(not(feature = "percentile-rand"))]
{
percentile_by(values, target, &mut pivot_fn::middle(), compare)
}
}
#[inline]
pub fn median(values: &mut OwnedClusterList) -> MeanValue<f64> {
percentile_default_pivot(values, Fraction::HALF)
}
struct ClusterMut<'a> {
list: &'a mut [Cluster],
len: usize,
}
impl<'a> Deref for ClusterMut<'a> {
type Target = [Cluster];
#[inline]
fn deref(&self) -> &Self::Target {
self.list
}
}
impl<'a> DerefMut for ClusterMut<'a> {
#[inline]
fn deref_mut(&mut self) -> &mut Self::Target {
self.list
}
}
impl<'a> From<&'a ClusterMut<'a>> for ClusterList<'a> {
#[inline]
fn from(c: &'a ClusterMut<'a>) -> Self {
ClusterList {
list: c.list,
len: c.len,
}
}
}
impl<'a> From<&'a mut OwnedClusterList> for ClusterMut<'a> {
#[inline]
fn from(l: &'a mut OwnedClusterList) -> Self {
Self {
list: &mut l.list,
len: l.len,
}
}
}
impl<'a> ClusterMut<'a> {
#[inline]
fn list(&self) -> ClusterList {
ClusterList::from(self)
}
}
fn quickselect<'a>(
values: &'a mut ClusterMut<'a>,
k: usize,
mut pivot_fn: impl FnMut(&ClusterList) -> f64,
mut compare: impl FnMut(f64, f64) -> cmp::Ordering,
) -> f64 {
if values.len() == 1 {
debug_assert!(k < values.list().len());
return values[0].0;
}
let pivot = pivot_fn(&values.list());
let (mut lows, mut highs_inclusive) =
split_include(values, |v| compare(v, pivot) == cmp::Ordering::Less);
let (mut highs, pivots) = split_include(&mut highs_inclusive, |v| {
compare(v, pivot) == cmp::Ordering::Greater
});
if k < lows.list().len() {
quickselect(&mut lows, k, pivot_fn, compare)
} else if k < lows.list().len() + pivots.list().len() {
pivots[0].0
} else if highs.is_empty() {
quickselect(&mut lows, k, pivot_fn, compare)
} else {
quickselect(
&mut highs,
k - lows.list().len() - pivots.list().len(),
pivot_fn,
compare,
)
}
}
#[inline]
fn split_include<'a>(
slice: &'a mut ClusterMut<'a>,
mut predicate: impl FnMut(f64) -> bool,
) -> (ClusterMut<'a>, ClusterMut<'a>) {
let mut add_index = 0;
let mut index = 0;
let len = slice.len();
let mut total_len = 0;
let cluser_len = slice.list().len();
while index < len {
let value = &mut slice[index];
if predicate(value.0) {
total_len += value.1;
slice.swap(index, add_index);
add_index += 1;
}
index += 1;
}
let (a, b) = slice.split_at_mut(add_index);
(
ClusterMut {
list: a,
len: total_len,
},
ClusterMut {
list: b,
len: cluser_len - total_len,
},
)
}
}
#[cfg(test)]
mod tests {
use crate::Fraction;
fn raw_fraction(n: u64, d: u64) -> Fraction {
Fraction {
numerator: n,
denominator: d,
}
}
#[test]
fn fraction_1() {
assert_eq!(raw_fraction(8316, 2940).simplify(), Fraction::new(99, 35));
}
#[test]
fn fraction_2() {
assert_eq!(raw_fraction(2940, 8316).simplify(), Fraction::new(35, 99));
}
#[test]
fn fraction_3() {
assert_eq!(raw_fraction(29, 41).simplify(), Fraction::new(29, 41));
}
}