Skip to main content

entrenar/eval/drift/
statistical.rs

1//! Statistical helper functions for drift detection.
2
3/// Count samples in bins defined by edges
4pub fn bin_counts(data: &[f64], edges: &[f64]) -> Vec<usize> {
5    let mut counts = vec![0; edges.len() - 1];
6    for &val in data {
7        for i in 0..counts.len() {
8            if val > edges[i] && val <= edges[i + 1] {
9                counts[i] += 1;
10                break;
11            }
12        }
13    }
14    counts
15}
16
17/// Approximate p-value for KS statistic using Kolmogorov distribution
18pub fn ks_p_value(lambda: f64) -> f64 {
19    if lambda <= 0.0 {
20        return 1.0;
21    }
22    // Asymptotic approximation: P(D > d) ≈ 2 * sum_{k=1}^∞ (-1)^{k+1} * exp(-2 * k^2 * λ^2)
23    let mut p = 0.0;
24    for k in 1..=100 {
25        let sign = if k % 2 == 1 { 1.0 } else { -1.0 };
26        let term = sign * (-2.0 * f64::from(k).powi(2) * lambda.powi(2)).exp();
27        p += term;
28        if term.abs() < 1e-10 {
29            break;
30        }
31    }
32    (2.0 * p).clamp(0.0, 1.0)
33}
34
35/// Approximate chi-square p-value using Wilson-Hilferty approximation
36pub fn chi_square_p_value(chi_sq: f64, df: usize) -> f64 {
37    if df == 0 || chi_sq <= 0.0 {
38        return 1.0;
39    }
40    let k = df as f64;
41    // Wilson-Hilferty transformation to normal
42    let z = ((chi_sq / k).powf(1.0 / 3.0) - (1.0 - 2.0 / (9.0 * k))) / (2.0 / (9.0 * k)).sqrt();
43    // Convert z to p-value (upper tail)
44    0.5 * (1.0 - erf(z / std::f64::consts::SQRT_2))
45}
46
47/// Error function approximation
48pub fn erf(x: f64) -> f64 {
49    let a1 = 0.254829592;
50    let a2 = -0.284496736;
51    let a3 = 1.421413741;
52    let a4 = -1.453152027;
53    let a5 = 1.061405429;
54    let p = 0.3275911;
55
56    let sign = if x >= 0.0 { 1.0 } else { -1.0 };
57    let x = x.abs();
58    let t = 1.0 / (1.0 + p * x);
59    let y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * (-x * x).exp();
60
61    sign * y
62}