use std::iter::IntoIterator;
#[cfg(feature = "parallel")]
use rayon::prelude::*;
use statrs::distribution::{ContinuousCDF, Normal};
use crate::{Computation, Error, Float};
pub fn lilliefors<T: Float, I: IntoIterator<Item = T>>(data: I) -> Result<Computation<T>, Error> {
let data: Vec<T> = data.into_iter().collect();
let n = data.len();
if n < 5 {
return Err(Error::InsufficientSampleSize {
given: n,
needed: 5,
});
}
if data.iter().any(|&v| v.is_nan()) {
return Err(Error::ContainsNaN);
}
let mut sorted_data = data;
sort_if_parallel!(sorted_data.as_mut_slice(), |a, b| a.partial_cmp(b).unwrap());
let n_t = T::from(n).unwrap();
let mean = iter_if_parallel!(&sorted_data).copied().sum::<T>() / n_t;
let variance = iter_if_parallel!(&sorted_data).map(|&x| (x - mean).powi(2)).sum::<T>()
/ T::from(n - 1).unwrap();
let std_dev = variance.sqrt();
if std_dev < T::epsilon() {
return Err(Error::ZeroRange);
}
let standard_normal = Normal::new(0.0, 1.0)?;
#[cfg(feature = "parallel")]
let (d_plus, d_minus) = (0..n)
.into_par_iter()
.map(|i| {
let x = sorted_data[i];
let z = (x - mean) / std_dev;
let p = T::from(standard_normal.cdf(z.to_f64().unwrap())).unwrap();
let dp = T::from((i + 1) as f64 / n as f64).unwrap() - p;
let dm = p - T::from(i as f64 / n as f64).unwrap();
(dp, dm)
})
.reduce(|| (T::neg_infinity(), T::neg_infinity()), |a, b| (a.0.max(b.0), a.1.max(b.1)));
#[cfg(not(feature = "parallel"))]
let (d_plus, d_minus) = (0..n)
.map(|i| {
let x = sorted_data[i];
let z = (x - mean) / std_dev;
let p = T::from(standard_normal.cdf(z.to_f64().unwrap())).unwrap();
let dp = T::from((i + 1) as f64 / n as f64).unwrap() - p;
let dm = p - T::from(i as f64 / n as f64).unwrap();
(dp, dm)
})
.fold((T::neg_infinity(), T::neg_infinity()), |a, b| (a.0.max(b.0), a.1.max(b.1)));
let k = d_plus.max(d_minus);
let (kd, nd) = if n <= 100 {
(k, T::from(n).unwrap())
} else {
(k * (n_t / T::from(100.0).unwrap()).powf(T::from(0.49).unwrap()), T::from(100.0).unwrap())
};
let mut p_value = (-T::from(7.01256).unwrap() * kd.powi(2) * (nd + T::from(2.78019).unwrap())
+ T::from(2.99587).unwrap() * kd * (nd + T::from(2.78019).unwrap()).sqrt()
- T::from(0.122_119).unwrap()
+ T::from(0.974_598).unwrap() / nd.sqrt()
+ T::from(1.67997).unwrap() / nd)
.exp();
if p_value > T::from(0.1).unwrap() {
let kk = (n_t.sqrt() - T::from(0.01).unwrap() + T::from(0.85).unwrap() / n_t.sqrt()) * k;
p_value = if kk <= T::from(0.302).unwrap() {
T::one()
} else if kk <= T::from(0.5).unwrap() {
T::from(2.76773).unwrap() - T::from(19.828_315).unwrap() * kk
+ T::from(80.709_644).unwrap() * kk.powi(2)
- T::from(138.55152).unwrap() * kk.powi(3)
+ T::from(81.218_052).unwrap() * kk.powi(4)
} else if kk <= T::from(0.9).unwrap() {
-T::from(4.901_232).unwrap() + T::from(40.662_806).unwrap() * kk
- T::from(97.490_286).unwrap() * kk.powi(2)
+ T::from(94.029_866).unwrap() * kk.powi(3)
- T::from(32.355_711).unwrap() * kk.powi(4)
} else if kk <= T::from(1.31).unwrap() {
T::from(6.198_765).unwrap() - T::from(19.558_097).unwrap() * kk
+ T::from(23.186_922).unwrap() * kk.powi(2)
- T::from(12.234_627).unwrap() * kk.powi(3)
+ T::from(2.423_045).unwrap() * kk.powi(4)
} else {
T::zero()
};
}
Ok(Computation {
statistic: k,
p_value: p_value.max(T::zero()).min(T::one()), })
}