pub fn quantile(values: &[f64], p: f64) -> f64 {
if values.is_empty() {
return 0.0;
}
if values.len() == 1 {
return values[0];
}
let mut sorted: Vec<f64> = values.iter().copied().filter(|v| !v.is_nan()).collect();
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
quantile_sorted(&sorted, p)
}
pub fn quantile_sorted(sorted: &[f64], p: f64) -> f64 {
let n = sorted.len();
if n == 0 {
return 0.0;
}
if n == 1 {
return sorted[0];
}
let rank = p * (n - 1) as f64;
let lo = rank.trunc() as usize;
let hi = (lo + 1).min(n - 1);
let frac = rank - lo as f64;
sorted[lo] + frac * (sorted[hi] - sorted[lo])
}
#[derive(Debug, Clone, PartialEq)]
pub struct BoxStats {
pub min: f64,
pub q1: f64,
pub median: f64,
pub q3: f64,
pub max: f64,
pub outliers: Vec<f64>,
}
pub fn boxplot_stats(values: &[f64], whisker_iqr: f64) -> BoxStats {
let mut sorted: Vec<f64> = values.iter().copied().filter(|v| !v.is_nan()).collect();
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
if sorted.is_empty() {
return BoxStats {
min: 0.0,
q1: 0.0,
median: 0.0,
q3: 0.0,
max: 0.0,
outliers: Vec::new(),
};
}
let q1 = quantile_sorted(&sorted, 0.25);
let median = quantile_sorted(&sorted, 0.5);
let q3 = quantile_sorted(&sorted, 0.75);
let iqr = q3 - q1;
let lower = q1 - whisker_iqr * iqr;
let upper = q3 + whisker_iqr * iqr;
let inliers: Vec<f64> = sorted.iter().copied().filter(|v| *v >= lower && *v <= upper).collect();
let outliers: Vec<f64> = sorted.iter().copied().filter(|v| *v < lower || *v > upper).collect();
let min = inliers.first().copied().unwrap_or(q1);
let max = inliers.last().copied().unwrap_or(q3);
BoxStats {
min,
q1,
median,
q3,
max,
outliers,
}
}
pub fn normal_quantile(p: f64) -> f64 {
if p <= 0.0 || p >= 1.0 {
return f64::NAN;
}
const A: [f64; 6] = [
-3.969683028665376e+01,
2.209460984245205e+02,
-2.759285104469687e+02,
1.383577518672690e+02,
-3.066479806614716e+01,
2.506628277459239e+00,
];
const B: [f64; 5] = [
-5.447609879822406e+01,
1.615858368580409e+02,
-1.556989798598866e+02,
6.680131188771972e+01,
-1.328068155288572e+01,
];
const C: [f64; 6] = [
-7.784894002430293e-03,
-3.223964580411365e-01,
-2.400758277161838e+00,
-2.549732539343734e+00,
4.374664141464968e+00,
2.938163982698783e+00,
];
const D: [f64; 4] = [
7.784695709041462e-03,
3.224671290700398e-01,
2.445134137142996e+00,
3.754408661907416e+00,
];
let plow = 0.02425;
let phigh = 1.0 - plow;
if p < plow {
let q = (-2.0 * p.ln()).sqrt();
(((((C[0] * q + C[1]) * q + C[2]) * q + C[3]) * q + C[4]) * q + C[5])
/ ((((D[0] * q + D[1]) * q + D[2]) * q + D[3]) * q + 1.0)
} else if p > phigh {
let q = (-2.0 * (1.0 - p).ln()).sqrt();
-(((((C[0] * q + C[1]) * q + C[2]) * q + C[3]) * q + C[4]) * q + C[5])
/ ((((D[0] * q + D[1]) * q + D[2]) * q + D[3]) * q + 1.0)
} else {
let q = p - 0.5;
let r = q * q;
((((((A[0] * r + A[1]) * r + A[2]) * r + A[3]) * r + A[4]) * r + A[5]) * q)
/ (((((B[0] * r + B[1]) * r + B[2]) * r + B[3]) * r + B[4]) * r + 1.0)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn quantile_median_of_simple_list() {
assert_eq!(quantile(&[1.0, 2.0, 3.0, 4.0, 5.0], 0.5), 3.0);
}
#[test]
fn boxplot_flags_outliers() {
let s = boxplot_stats(&[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 100.0], 1.5);
assert_eq!(s.median, 4.5);
assert_eq!(s.outliers.len(), 1);
assert_eq!(s.outliers[0], 100.0);
}
#[test]
fn probit_of_half_is_zero() {
assert!(normal_quantile(0.5).abs() < 1e-9);
}
#[test]
fn probit_975_is_196() {
assert!((normal_quantile(0.975) - 1.96).abs() < 1e-3);
}
}