use crate::error::{StatsError, StatsResult};
use scirs2_core::ndarray::{Array1, ArrayView1};
use scirs2_core::numeric::{Float, NumCast};
#[inline(always)]
fn const_f64<F: Float + NumCast>(value: f64) -> F {
F::from(value).expect("Failed to convert constant to target float type")
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum QuantileInterpolation {
InvertedCdf,
AveragedInvertedCdf,
ClosestObservation,
InterpolatedInvertedCdf,
Hazen,
Weibull,
#[default]
Linear,
MedianUnbiased,
NormalUnbiased,
Midpoint,
Nearest,
Lower,
Higher,
}
#[allow(dead_code)]
pub fn quantile<F>(x: &ArrayView1<F>, q: F, method: QuantileInterpolation) -> StatsResult<F>
where
F: Float + NumCast + std::fmt::Display,
{
if x.is_empty() {
return Err(StatsError::InvalidArgument(
"Empty array provided".to_string(),
));
}
if q < F::zero() || q > F::one() {
return Err(StatsError::InvalidArgument(
"Quantile must be between 0 and 1".to_string(),
));
}
let mut sorteddata: Vec<F> = x.iter().cloned().collect();
sorteddata.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let n = F::from(sorteddata.len()).expect("Test/example failed");
match method {
QuantileInterpolation::Lower => {
let index = (q * (n - F::one()))
.floor()
.to_usize()
.expect("Failed to convert to usize");
Ok(sorteddata[index])
}
QuantileInterpolation::Higher => {
let index = (q * (n - F::one()))
.ceil()
.to_usize()
.expect("Failed to convert to usize")
.min(sorteddata.len() - 1);
Ok(sorteddata[index])
}
QuantileInterpolation::Nearest => {
let index = (q * (n - F::one()))
.round()
.to_usize()
.expect("Failed to convert to usize")
.min(sorteddata.len() - 1);
Ok(sorteddata[index])
}
QuantileInterpolation::Midpoint => {
let i_lower = (q * (n - F::one()))
.floor()
.to_usize()
.expect("Failed to convert to usize");
let i_upper = (q * (n - F::one()))
.ceil()
.to_usize()
.expect("Failed to convert to usize")
.min(sorteddata.len() - 1);
Ok((sorteddata[i_lower] + sorteddata[i_upper]) / const_f64::<F>(2.0))
}
QuantileInterpolation::InvertedCdf => {
let jg = q * n;
let j = jg.floor().to_usize().expect("Failed to convert to usize");
let g = if jg % F::one() > F::zero() {
F::one()
} else {
F::zero()
};
let j = j.min(sorteddata.len() - 1);
let jp1 = (j + 1).min(sorteddata.len() - 1);
if g <= F::epsilon() {
Ok(sorteddata[j])
} else {
Ok(sorteddata[jp1])
}
}
QuantileInterpolation::AveragedInvertedCdf => {
let jg = q * n;
let j = jg.floor().to_usize().expect("Failed to convert to usize");
let g = if jg % F::one() > F::zero() {
const_f64::<F>(0.5)
} else {
F::zero()
};
let j = j.min(sorteddata.len() - 1);
let jp1 = (j + 1).min(sorteddata.len() - 1);
if g <= F::epsilon() {
Ok(sorteddata[j])
} else {
Ok(sorteddata[j] * (F::one() - g) + sorteddata[jp1] * g)
}
}
QuantileInterpolation::ClosestObservation => {
let jg = q * n - const_f64::<F>(0.5);
let j = jg.floor().to_usize().expect("Failed to convert to usize");
let g = if jg % F::one() == F::zero() && j % 2 == 1 {
F::zero()
} else {
F::one()
};
let j = j.min(sorteddata.len() - 1);
let jp1 = (j + 1).min(sorteddata.len() - 1);
if g <= F::epsilon() {
Ok(sorteddata[j])
} else {
Ok(sorteddata[jp1])
}
}
_ => {
let m = match method {
QuantileInterpolation::InterpolatedInvertedCdf => F::zero(),
QuantileInterpolation::Hazen => const_f64::<F>(0.5),
QuantileInterpolation::Weibull => q,
QuantileInterpolation::Linear => F::one() - q,
QuantileInterpolation::MedianUnbiased => {
q / const_f64::<F>(3.0) + const_f64::<F>(1.0 / 3.0)
}
QuantileInterpolation::NormalUnbiased => {
q / const_f64::<F>(4.0) + const_f64::<F>(3.0 / 8.0)
}
_ => unreachable!(),
};
let jg = q * n + m - F::one();
let j = jg.floor().to_usize().expect("Failed to convert to usize");
let g = jg % F::one();
let j = if jg < F::zero() {
0
} else {
j.min(sorteddata.len() - 1)
};
let jp1 = (j + 1).min(sorteddata.len() - 1);
let g = if jg < F::zero() { F::zero() } else { g };
Ok((F::one() - g) * sorteddata[j] + g * sorteddata[jp1])
}
}
}
#[allow(dead_code)]
pub fn percentile<F>(x: &ArrayView1<F>, p: F, method: QuantileInterpolation) -> StatsResult<F>
where
F: Float + NumCast + std::fmt::Display,
{
if x.is_empty() {
return Err(StatsError::InvalidArgument(
"Empty array provided".to_string(),
));
}
if p < F::zero() || p > const_f64::<F>(100.0) {
return Err(StatsError::InvalidArgument(
"Percentile must be between 0 and 100".to_string(),
));
}
let q = p / const_f64::<F>(100.0);
quantile(x, q, method)
}
#[allow(dead_code)]
pub fn quartiles<F>(x: &ArrayView1<F>, method: QuantileInterpolation) -> StatsResult<Array1<F>>
where
F: Float + NumCast + std::fmt::Display,
{
if x.is_empty() {
return Err(StatsError::InvalidArgument(
"Empty array provided".to_string(),
));
}
let q1 = quantile(x, const_f64::<F>(0.25), method)?;
let q2 = quantile(x, const_f64::<F>(0.5), method)?;
let q3 = quantile(x, const_f64::<F>(0.75), method)?;
Ok(Array1::from(vec![q1, q2, q3]))
}
#[allow(dead_code)]
pub fn quintiles<F>(x: &ArrayView1<F>, method: QuantileInterpolation) -> StatsResult<Array1<F>>
where
F: Float + NumCast + std::fmt::Display,
{
if x.is_empty() {
return Err(StatsError::InvalidArgument(
"Empty array provided".to_string(),
));
}
let q1 = quantile(x, const_f64::<F>(0.2), method)?;
let q2 = quantile(x, const_f64::<F>(0.4), method)?;
let q3 = quantile(x, const_f64::<F>(0.6), method)?;
let q4 = quantile(x, const_f64::<F>(0.8), method)?;
Ok(Array1::from(vec![q1, q2, q3, q4]))
}
#[allow(dead_code)]
pub fn deciles<F>(x: &ArrayView1<F>, method: QuantileInterpolation) -> StatsResult<Array1<F>>
where
F: Float + NumCast + std::fmt::Display,
{
if x.is_empty() {
return Err(StatsError::InvalidArgument(
"Empty array provided".to_string(),
));
}
let mut result = Vec::with_capacity(9);
for i in 1..=9 {
let p = F::from(i * 10).expect("Test/example failed");
let decile = percentile(x, p, method)?;
result.push(decile);
}
Ok(Array1::from(result))
}
#[allow(dead_code)]
pub fn boxplot_stats<F>(
x: &ArrayView1<F>,
whis: Option<F>,
method: QuantileInterpolation,
) -> StatsResult<(F, F, F, F, F, Vec<F>)>
where
F: Float + NumCast + std::fmt::Debug + std::fmt::Display,
{
if x.is_empty() {
return Err(StatsError::InvalidArgument(
"Empty array provided".to_string(),
));
}
let whis_factor = whis.unwrap_or(const_f64::<F>(1.5));
if whis_factor < F::zero() {
return Err(StatsError::InvalidArgument(
"Whisker range must be non-negative".to_string(),
));
}
let q1 = quantile(x, const_f64::<F>(0.25), method)?;
let q2 = quantile(x, const_f64::<F>(0.5), method)?;
let q3 = quantile(x, const_f64::<F>(0.75), method)?;
let iqr = q3 - q1;
let whislo_limit = q1 - whis_factor * iqr;
let whishi_limit = q3 + whis_factor * iqr;
let mut sorteddata: Vec<F> = x.iter().cloned().collect();
sorteddata.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let whislo = *sorteddata
.iter()
.find(|&&val| val >= whislo_limit)
.unwrap_or(&sorteddata[0]);
let whishi = *sorteddata
.iter()
.rev()
.find(|&&val| val <= whishi_limit)
.unwrap_or(&sorteddata[sorteddata.len() - 1]);
let outliers: Vec<F> = sorteddata
.iter()
.filter(|&&val| val < whislo || val > whishi)
.cloned()
.collect();
Ok((q1, q2, q3, whislo, whishi, outliers))
}
#[allow(dead_code)]
pub fn winsorized_mean<F>(x: &ArrayView1<F>, limits: F) -> StatsResult<F>
where
F: Float + NumCast + std::iter::Sum + std::fmt::Display,
{
if x.is_empty() {
return Err(StatsError::InvalidArgument(
"Empty array provided".to_string(),
));
}
if limits < F::zero() || limits > const_f64::<F>(0.5) {
return Err(StatsError::InvalidArgument(
"Limits must be between 0 and 0.5".to_string(),
));
}
if limits == F::zero() {
return Ok(x.iter().cloned().sum::<F>()
/ F::from(x.len()).expect("Failed to convert length to float"));
}
let mut data: Vec<F> = x.iter().cloned().collect();
data.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let n = data.len();
let n_replace = (F::from(n).expect("Failed to convert to float") * limits)
.to_usize()
.expect("Failed to convert to usize")
.max(1);
let low_val = data[n_replace];
let high_val = data[n - n_replace - 1];
for i in 0..n_replace {
data[i] = low_val;
data[n - i - 1] = high_val;
}
let mean = data.iter().cloned().sum::<F>() / F::from(n).expect("Test/example failed");
Ok(mean)
}
#[allow(dead_code)]
pub fn winsorized_variance<F>(x: &ArrayView1<F>, limits: F, ddof: usize) -> StatsResult<F>
where
F: Float + NumCast + std::iter::Sum + std::fmt::Display,
{
if x.is_empty() {
return Err(StatsError::InvalidArgument(
"Empty array provided".to_string(),
));
}
if limits < F::zero() || limits > const_f64::<F>(0.5) {
return Err(StatsError::InvalidArgument(
"Limits must be between 0 and 0.5".to_string(),
));
}
if ddof >= x.len() {
return Err(StatsError::InvalidArgument(
"Degrees of freedom must be less than the number of observations".to_string(),
));
}
let mut data: Vec<F> = x.iter().cloned().collect();
data.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let n = data.len();
let n_replace = (F::from(n).expect("Failed to convert to float") * limits)
.to_usize()
.expect("Failed to convert to usize")
.max(1);
let low_val = data[n_replace];
let high_val = data[n - n_replace - 1];
for i in 0..n_replace {
data[i] = low_val;
data[n - i - 1] = high_val;
}
let mean = data.iter().cloned().sum::<F>() / F::from(n).expect("Test/example failed");
let sum_sq_dev = data.iter().map(|&x| (x - mean).powi(2)).sum::<F>();
let denom = F::from(n - ddof).expect("Test/example failed");
Ok(sum_sq_dev / denom)
}
#[cfg(test)]
mod tests {
use super::*;
use approx::assert_abs_diff_eq;
use scirs2_core::ndarray::array;
#[test]
fn test_quantile() {
let data = array![1.0, 3.0, 5.0, 7.0, 9.0];
let median = quantile(&data.view(), 0.5, QuantileInterpolation::Linear)
.expect("Test/example failed");
assert_abs_diff_eq!(median, 5.0, epsilon = 1e-10);
let q1 = quantile(&data.view(), 0.25, QuantileInterpolation::Linear)
.expect("Test/example failed");
assert_abs_diff_eq!(q1, 3.0, epsilon = 1e-10);
let q3 = quantile(&data.view(), 0.75, QuantileInterpolation::Linear)
.expect("Test/example failed");
assert_abs_diff_eq!(q3, 7.0, epsilon = 1e-10);
let q_lower =
quantile(&data.view(), 0.4, QuantileInterpolation::Lower).expect("Test/example failed");
assert_abs_diff_eq!(q_lower, 3.0, epsilon = 1e-10);
let q_higher = quantile(&data.view(), 0.4, QuantileInterpolation::Higher)
.expect("Test/example failed");
assert_abs_diff_eq!(q_higher, 5.0, epsilon = 1e-10);
let q_midpoint = quantile(&data.view(), 0.4, QuantileInterpolation::Midpoint)
.expect("Test/example failed");
assert_abs_diff_eq!(q_midpoint, 4.0, epsilon = 1e-10);
}
#[test]
fn test_quantile_r_methods() {
let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0];
let q_type1 = quantile(&data.view(), 0.4, QuantileInterpolation::InvertedCdf)
.expect("Test/example failed");
assert_abs_diff_eq!(q_type1, 5.0, epsilon = 1e-10);
let q_type2 = quantile(
&data.view(),
0.4,
QuantileInterpolation::AveragedInvertedCdf,
)
.expect("Test/example failed");
assert_abs_diff_eq!(q_type2, 4.5, epsilon = 1e-10);
let q_type3 = quantile(&data.view(), 0.4, QuantileInterpolation::ClosestObservation)
.expect("Test/example failed");
assert_abs_diff_eq!(q_type3, 5.0, epsilon = 1e-10);
let q_type4 = quantile(
&data.view(),
0.4,
QuantileInterpolation::InterpolatedInvertedCdf,
)
.expect("Test/example failed");
assert_abs_diff_eq!(q_type4, 3.6, epsilon = 1e-10);
let q_type5 =
quantile(&data.view(), 0.4, QuantileInterpolation::Hazen).expect("Test/example failed");
assert_abs_diff_eq!(q_type5, 4.1, epsilon = 1e-10);
let q_type6 = quantile(&data.view(), 0.4, QuantileInterpolation::Weibull)
.expect("Test/example failed");
assert_abs_diff_eq!(q_type6, 4.0, epsilon = 1e-10);
let q_type7 = quantile(&data.view(), 0.4, QuantileInterpolation::Linear)
.expect("Test/example failed");
assert_abs_diff_eq!(q_type7, 4.2, epsilon = 1e-10);
let q_type8 = quantile(&data.view(), 0.4, QuantileInterpolation::MedianUnbiased)
.expect("Test/example failed");
assert_abs_diff_eq!(q_type8, 4.066666666666666, epsilon = 1e-10);
let q_type9 = quantile(&data.view(), 0.4, QuantileInterpolation::NormalUnbiased)
.expect("Test/example failed");
assert_abs_diff_eq!(q_type9, 4.075, epsilon = 1e-10);
}
#[test]
fn test_percentile() {
let data = array![1.0, 3.0, 5.0, 7.0, 9.0];
let p50 = percentile(&data.view(), 50.0, QuantileInterpolation::Linear)
.expect("Test/example failed");
assert_abs_diff_eq!(p50, 5.0, epsilon = 1e-10);
let p25 = percentile(&data.view(), 25.0, QuantileInterpolation::Linear)
.expect("Test/example failed");
assert_abs_diff_eq!(p25, 3.0, epsilon = 1e-10);
let p75 = percentile(&data.view(), 75.0, QuantileInterpolation::Linear)
.expect("Test/example failed");
assert_abs_diff_eq!(p75, 7.0, epsilon = 1e-10);
assert!(percentile(&data.view(), -1.0, QuantileInterpolation::Linear).is_err());
assert!(percentile(&data.view(), 101.0, QuantileInterpolation::Linear).is_err());
}
#[test]
fn test_quartiles() {
let data = array![1.0, 3.0, 5.0, 7.0, 9.0];
let q =
quartiles(&data.view(), QuantileInterpolation::Linear).expect("Test/example failed");
assert_abs_diff_eq!(q[0], 3.0, epsilon = 1e-10); assert_abs_diff_eq!(q[1], 5.0, epsilon = 1e-10); assert_abs_diff_eq!(q[2], 7.0, epsilon = 1e-10); }
#[test]
fn test_quintiles() {
let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
let q =
quintiles(&data.view(), QuantileInterpolation::Linear).expect("Test/example failed");
assert_abs_diff_eq!(q[0], 2.8, epsilon = 1e-10); assert_abs_diff_eq!(q[1], 4.6, epsilon = 1e-10); assert_abs_diff_eq!(q[2], 6.4, epsilon = 1e-10); assert_abs_diff_eq!(q[3], 8.2, epsilon = 1e-10); }
#[test]
fn test_deciles() {
let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
let d = deciles(&data.view(), QuantileInterpolation::Linear).expect("Test/example failed");
assert_abs_diff_eq!(d[0], 1.9, epsilon = 1e-10); assert_abs_diff_eq!(d[4], 5.5, epsilon = 1e-10); assert_abs_diff_eq!(d[8], 9.1, epsilon = 1e-10); }
#[test]
fn test_boxplot_stats() {
let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 20.0];
let (q1, q2, q3, whislo, whishi, outliers) =
boxplot_stats(&data.view(), Some(1.5), QuantileInterpolation::Linear)
.expect("Test/example failed");
assert_abs_diff_eq!(q1, 3.25, epsilon = 1e-10); assert_abs_diff_eq!(q2, 5.5, epsilon = 1e-10); assert_abs_diff_eq!(q3, 7.75, epsilon = 1e-10);
assert_abs_diff_eq!(whislo, 1.0, epsilon = 1e-10);
assert_abs_diff_eq!(whishi, 9.0, epsilon = 1e-10);
assert_eq!(outliers.len(), 1);
assert_abs_diff_eq!(outliers[0], 20.0, epsilon = 1e-10);
}
#[test]
fn test_winsorized_mean() {
let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 100.0];
let mean_10 = winsorized_mean(&data.view(), 0.1).expect("Test/example failed");
assert_abs_diff_eq!(mean_10, 5.5, epsilon = 1e-10);
let mean_20 = winsorized_mean(&data.view(), 0.2).expect("Test/example failed");
assert_abs_diff_eq!(mean_20, 5.5, epsilon = 1e-10);
let mean_0 = winsorized_mean(&data.view(), 0.0).expect("Test/example failed");
let expected_mean = data.iter().sum::<f64>() / data.len() as f64;
assert_abs_diff_eq!(mean_0, expected_mean, epsilon = 1e-10);
}
#[test]
fn test_winsorized_variance() {
let data = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 100.0];
let var_10 = winsorized_variance(&data.view(), 0.1, 1).expect("Test/example failed");
assert_abs_diff_eq!(var_10, 7.388888888888889, epsilon = 1e-10);
let var_20 = winsorized_variance(&data.view(), 0.2, 0).expect("Test/example failed");
assert_abs_diff_eq!(var_20, 4.25, epsilon = 1e-10);
}
}