use super::Statistic;
use num_traits::Float;
use std::cmp::Ordering;
use std::fmt::Debug;
#[derive(Debug, Clone)]
pub struct EmpiricalCDF<T> {
sorted: Vec<T>,
}
impl<T: Clone> EmpiricalCDF<T> {
#[inline]
pub fn n(&self) -> usize {
self.sorted.len()
}
#[inline]
pub fn points(&self) -> &[T] {
&self.sorted
}
pub fn len(&self) -> usize {
self.sorted.len()
}
pub fn is_empty(&self) -> bool {
self.sorted.is_empty()
}
}
impl<T> EmpiricalCDF<T>
where
T: Float + Copy,
{
pub fn from_float_slice(data: &[T]) -> Self {
let mut sorted: Vec<T> = data.iter()
.copied()
.filter(|&x| !x.is_nan())
.collect();
sorted.sort_by(|a, b| a.partial_cmp(b).expect("NaNs already filtered"));
Self { sorted }
}
#[inline]
pub fn eval_float(&self, x: &T) -> f64 {
if x.is_nan() {
return f64::NAN;
}
if x.is_infinite() {
return if x.is_sign_positive() { 1.0 } else { 0.0 };
}
let n = self.sorted.len();
if n == 0 {
return f64::NAN;
}
let idx = self.sorted.partition_point(|v| {
v.partial_cmp(x).map_or(false, |ord| ord != Ordering::Greater)
});
idx as f64 / n as f64
}
#[inline]
fn count_leq(&self, x: &T) -> usize {
self.sorted.partition_point(|v| {
v.partial_cmp(x).map_or(false, |ord| ord != Ordering::Greater)
})
}
}
impl<T> PartialOrd for EmpiricalCDF<T>
where
T: Float + Copy + Debug,
{
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
if self.sorted.is_empty() || other.sorted.is_empty() {
return None;
}
let mut points: Vec<T> = Vec::with_capacity(self.sorted.len() + other.sorted.len());
points.extend_from_slice(&self.sorted);
points.extend_from_slice(&other.sorted);
points.sort_by(|a, b| a.partial_cmp(b).expect("No NaNs in ECDF points"));
points.dedup_by(|a, b| a == b);
let n_self = self.n() as u64;
let n_other = other.n() as u64;
let mut self_dominates = true; let mut other_dominates = true;
for &x in &points {
let k_self = self.count_leq(&x) as u64;
let k_other = other.count_leq(&x) as u64;
let self_le_other = k_self * n_other <= k_other * n_self;
let other_le_self = k_other * n_self <= k_self * n_other;
if !self_le_other {
self_dominates = false;
}
if !other_le_self {
other_dominates = false;
}
if !self_dominates && !other_dominates {
return None;
}
}
match (self_dominates, other_dominates) {
(true, true) => Some(Ordering::Equal),
(true, false) => Some(Ordering::Less), (false, true) => Some(Ordering::Greater), (false, false) => None,
}
}
}
impl<T> PartialEq for EmpiricalCDF<T>
where
T: Float + Copy + Debug,
{
fn eq(&self, other: &Self) -> bool {
self.partial_cmp(other) == Some(Ordering::Equal)
}
}
#[derive(Debug, Clone, Copy, Default)]
pub struct CDF;
impl<D, T> Statistic<D, EmpiricalCDF<T>> for CDF
where
D: AsRef<[T]>,
T: Float + Copy,
{
#[inline]
fn compute(&self, data: &D) -> EmpiricalCDF<T> {
EmpiricalCDF::from_float_slice(data.as_ref())
}
}