limma-rust 0.1.0

//! Mid-p z-score equivalents of hypergeometric deviates.
//!
//! Pure-Rust port of limma's `zscoreHyper.R` ([`zscore_hyper`]): the signed
//! standard-normal deviate matching the mid-p-value of a hypergeometric count.
//! Tail probabilities are accumulated in log space (via [`logsumexp`]) so the
//! result stays accurate deep into either tail, as in R's `log.p = TRUE` path.

use crate::logsumexp::logsumexp;
use crate::special::ln_gamma;
use crate::zscore::zscore_from_log_tails;
use std::f64::consts::LN_2;

/// `ln C(n, k)` (log binomial coefficient); `-inf` when `k > n`.
fn lchoose(n: u64, k: u64) -> f64 {
    if k > n {
        return f64::NEG_INFINITY;
    }
    ln_gamma((n + 1) as f64) - ln_gamma((k + 1) as f64) - ln_gamma((n - k + 1) as f64)
}

/// `ln dhyper(x; m, n, k)`: log hypergeometric pmf for drawing `x` white balls
/// in `k` draws from an urn of `m` white and `n` black. `-inf` outside the
/// support `max(0, k-n) <= x <= min(m, k)`.
fn ln_dhyper(x: i64, m: u64, n: u64, k: u64) -> f64 {
    if x < 0 {
        return f64::NEG_INFINITY;
    }
    let x = x as u64;
    if x > m || x > k || k - x > n {
        return f64::NEG_INFINITY;
    }
    lchoose(m, x) + lchoose(n, k - x) - lchoose(m + n, k)
}

/// `zscoreHyper(q, m, n, k)`: z-score equivalent of a hypergeometric deviate `q`
/// (white balls drawn) for an urn of `m` white and `n` black with `k` draws.
/// Uses the mid-p-value — the full tail plus half the point mass at `q` — and
/// maps the smaller tail through the normal quantile in log space.
pub fn zscore_hyper(q: i64, m: u64, n: u64, k: u64) -> f64 {
    let lo = k.saturating_sub(n) as i64;
    let hi = m.min(k) as i64;

    let ln_d = ln_dhyper(q, m, n, k) - LN_2;

    // ln P(X > q) and ln P(X < q), summed over the support in log space.
    let mut ln_pupper = f64::NEG_INFINITY;
    for x in (q + 1).max(lo)..=hi {
        ln_pupper = logsumexp(ln_pupper, ln_dhyper(x, m, n, k));
    }
    let mut ln_plower = f64::NEG_INFINITY;
    for x in lo..=(q - 1).min(hi) {
        ln_plower = logsumexp(ln_plower, ln_dhyper(x, m, n, k));
    }

    // Add half the point mass to each tail (mid-p), preserving log accuracy.
    let pmidupper = logsumexp(ln_pupper, ln_d);
    let pmidlower = logsumexp(ln_plower, ln_d);

    zscore_from_log_tails(pmidlower, pmidupper)
}

#[cfg(test)]
mod tests {
    use super::*;

    fn close(a: f64, b: f64, tol: f64) -> bool {
        (a - b).abs() <= tol + tol * b.abs()
    }

    #[test]
    fn zscore_hyper_matches_r() {
        // Reference: zscoreHyper(q, m, n, k) in limma 3.68.3.
        let q = [5, 1, 8, 0, 3];
        let want = [
            1.85530391853958,
            -1.35476719565062,
            4.47368039807182,
            -2.29868959056222,
            0.281691411313753,
        ];
        for i in 0..q.len() {
            let got = zscore_hyper(q[i], 10, 20, 8);
            assert!(
                close(got, want[i], 1e-9),
                "q={}: got {got}, want {}",
                q[i],
                want[i]
            );
        }
    }

    #[test]
    fn zscore_hyper_is_symmetric_about_mean() {
        // m=n, k=20: the mean is q=10, where z is ~0, and q=5/15 are mirror
        // images. Reference: zscoreHyper(c(15,10,5), 50, 50, 20).
        let want = [2.45887335894704, 5.56583284934354e-16, -2.45887335894704];
        let q = [15, 10, 5];
        for i in 0..q.len() {
            let got = zscore_hyper(q[i], 50, 50, 20);
            assert!(close(got, want[i], 1e-9), "q={}: got {got}", q[i]);
        }
    }
}