rsomics-gradient-trajectory 0.1.0

Gradient/trajectory ANOVA over ordination coordinates (QIIME-style microbiome trajectory analysis): per-group trajectory vectors plus closed-form one-way ANOVA F/p, selectable algorithm — a Rust reimplementation of scikit-bio's skbio.stats.gradient.
Documentation
/// One-way ANOVA, the closed-form `scipy.stats.f_oneway`. Returns `(F, p)` where
/// the p-value is the F-distribution survival function evaluated through the
/// regularised incomplete beta function — the same path scipy takes.
pub fn f_oneway(groups: &[Vec<f64>]) -> (f64, f64) {
    let k = groups.len();
    let n_total: usize = groups.iter().map(Vec::len).sum();

    let grand: f64 = groups.iter().flatten().sum::<f64>() / n_total as f64;
    let mut ss_between = 0.0;
    let mut ss_within = 0.0;
    for g in groups {
        let m = g.iter().sum::<f64>() / g.len() as f64;
        ss_between += g.len() as f64 * (m - grand).powi(2);
        for &x in g {
            ss_within += (x - m).powi(2);
        }
    }
    let df_between = (k - 1) as f64;
    let df_within = (n_total - k) as f64;
    let f = (ss_between / df_between) / (ss_within / df_within);
    let p = betainc(
        df_within / 2.0,
        df_between / 2.0,
        df_within / (df_within + df_between * f),
    );
    (f, p)
}

/// Regularised incomplete beta function I_x(a, b), matching `scipy.special.betainc`
/// to ~1e-13. Lentz's modified continued fraction (Numerical Recipes `betai`).
pub fn betainc(a: f64, b: f64, x: f64) -> f64 {
    if x <= 0.0 {
        return 0.0;
    }
    if x >= 1.0 {
        return 1.0;
    }
    let ln_beta = ln_gamma(a + b) - ln_gamma(a) - ln_gamma(b) + a * x.ln() + b * (1.0 - x).ln();
    let front = ln_beta.exp();
    if x < (a + 1.0) / (a + b + 2.0) {
        front * betacf(a, b, x) / a
    } else {
        1.0 - front * betacf(b, a, 1.0 - x) / b
    }
}

fn betacf(a: f64, b: f64, x: f64) -> f64 {
    const TINY: f64 = 1e-30;
    const EPS: f64 = 3e-16;
    let qab = a + b;
    let qap = a + 1.0;
    let qam = a - 1.0;
    let mut c = 1.0;
    let mut d = 1.0 - qab * x / qap;
    if d.abs() < TINY {
        d = TINY;
    }
    d = 1.0 / d;
    let mut h = d;
    for m in 1..=300 {
        let m = m as f64;
        let m2 = 2.0 * m;
        let aa = m * (b - m) * x / ((qam + m2) * (a + m2));
        d = 1.0 + aa * d;
        if d.abs() < TINY {
            d = TINY;
        }
        c = 1.0 + aa / c;
        if c.abs() < TINY {
            c = TINY;
        }
        d = 1.0 / d;
        h *= d * c;
        let aa = -(a + m) * (qab + m) * x / ((a + m2) * (qap + m2));
        d = 1.0 + aa * d;
        if d.abs() < TINY {
            d = TINY;
        }
        c = 1.0 + aa / c;
        if c.abs() < TINY {
            c = TINY;
        }
        d = 1.0 / d;
        let del = d * c;
        h *= del;
        if (del - 1.0).abs() < EPS {
            break;
        }
    }
    h
}

/// Lanczos approximation, g = 7, n = 9 (Numerical Recipes coefficients).
fn ln_gamma(x: f64) -> f64 {
    const C: [f64; 9] = [
        0.999_999_999_999_809_9,
        676.520_368_121_885_1,
        -1_259.139_216_722_402_8,
        771.323_428_777_653_1,
        -176.615_029_162_140_6,
        12.507_343_278_686_905,
        -0.138_571_095_265_720_12,
        9.984_369_578_019_572e-6,
        1.505_632_735_149_311_6e-7,
    ];
    if x < 0.5 {
        // reflection
        (std::f64::consts::PI / (std::f64::consts::PI * x).sin()).ln() - ln_gamma(1.0 - x)
    } else {
        let x = x - 1.0;
        let mut a = C[0];
        let t = x + 7.5;
        for (i, &c) in C.iter().enumerate().skip(1) {
            a += c / (x + i as f64);
        }
        0.5 * (2.0 * std::f64::consts::PI).ln() + (x + 0.5) * t.ln() - t + a.ln()
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    fn close(a: f64, b: f64, tol: f64) {
        assert!((a - b).abs() <= tol, "{a} vs {b} (tol {tol})");
    }

    #[test]
    #[allow(clippy::excessive_precision)]
    fn betainc_against_scipy() {
        // scipy.special.betainc reference grid (captured on the 4090 oracle).
        let cases = [
            (0.5, 0.5, 0.3, 0.369_010_119_565_545_36),
            (1.0, 1.0, 0.5, 0.5),
            (2.0, 3.0, 0.4, 0.524_799_999_999_999_93),
            (5.0, 7.5, 0.6, 0.924_235_640_210_277_04),
            (0.5, 10.0, 0.05, 0.682_848_424_534_454_71),
            (50.0, 2.0, 0.999, 0.998_765_909_606_882_94),
            (1.5, 1.5, 0.5, 0.499_999_999_999_999_8),
            (10.0, 1.0, 0.8, 0.107_374_182_400_000_05),
            (7.0, 2.0, 0.5, 0.035_156_25),
            // betainc(0.5, 1, 0.5) = sqrt(0.5) exactly.
            (0.5, 1.0, 0.5, std::f64::consts::FRAC_1_SQRT_2),
            (100.0, 100.0, 0.5, 0.499_999_999_999_999_6),
        ];
        for (a, b, x, want) in cases {
            close(betainc(a, b, x), want, 1e-12);
        }
    }

    #[test]
    fn f_oneway_against_scipy() {
        let g1 = vec![1.0, 2.0, 3.0, 4.0, 5.0];
        let g2 = vec![2.0, 3.0, 4.0, 5.0, 7.0];
        let g3 = vec![1.0, 1.5, 2.0, 2.0, 3.0];
        let (f, p) = f_oneway(&[g1, g2, g3]);
        close(f, 2.940_740_740_740_741_5, 1e-12);
        close(p, 0.091_341_153_689_852_25, 1e-12);
    }

    #[test]
    fn identical_groups_f_zero_p_one() {
        let g = vec![1.0, 2.0, 3.0];
        let (f, p) = f_oneway(&[g.clone(), g.clone(), g]);
        close(f, 0.0, 1e-12);
        close(p, 1.0, 1e-12);
    }
}