gsva-rust 0.1.0

//! Kernel cumulative-distribution estimates for GSVA's `kcdf` step.
//!
//! For each gene (row) GSVA estimates a smoothed CDF of that gene's expression
//! across samples and evaluates it at every sample. The Bioconductor C routine
//! `matrix_density_R` (`src/kernel_estimation.c`) computes, for a Gaussian
//! kernel,
//!
//! ```text
//! left_tail[g, j] = (1/n) * Σ_l  Φ_table((x[g,j] − x[g,l]) / bw_g)
//! r[g, j]         = logit(left_tail[g, j]) = −log((1 − left_tail) / left_tail)
//! ```
//!
//! with per-gene bandwidth `bw_g = sd(row_g) / 4` (sample sd, denominator
//! `n − 1`) and `Φ_table` a 10 001-point lookup of the standard-normal CDF.
//! GSVA then takes per-sample ranks of `r`. Because `logit` is strictly
//! increasing, those ranks equal the ranks of `left_tail`, so this module
//! returns the raw `left_tail` values and lets the caller rank them — avoiding
//! the `±∞` that `logit` produces at the saturated `0`/`1` ends.
//!
//! The standard-normal CDF is computed with W. J. Cody's (1969) rational
//! approximation — the same algorithm R uses in `pnorm` — so the lookup table is
//! bit-for-bit identical to GSVA's, and `left_tail` matches `matrix_density_R`
//! to full `f64` precision (verified against R 4.6.0 / GSVA 2.6.2).

// The Cody (1969) `pnorm` coefficients below are transcribed verbatim from R's
// `src/nmath/pnorm.c` so they can be diffed against the original, and the unit
// tests hold R's `%.17g` console output verbatim. Some literals carry digits
// past `f64` resolution — kept intentionally, for provenance — so silence the
// excessive-precision lint for the whole module.
#![allow(clippy::excessive_precision)]

use crate::matrix::ExprMatrix;

/// Bandwidth divisor: `bw = sd / 4` (`SIGMA_FACTOR` in `kernel_estimation.c`).
const SIGMA_FACTOR: f64 = 4.0;
/// Number of intervals in the precomputed CDF table (`PRECOMPUTE_RESOLUTION`).
const PRECOMPUTE_RESOLUTION: usize = 10_000;
/// Largest `|z|` the table covers, in units of `bw` (`MAX_PRECOMPUTE`).
const MAX_PRECOMPUTE: f64 = 10.0;

/// Standard-normal cumulative distribution function `Φ(x)`.
///
/// Port of W. J. Cody's rational Chebyshev approximation (the algorithm in R's
/// `src/nmath/pnorm.c`). On `x ≥ 0` it reproduces R's `pnorm` bit-for-bit across
/// the whole table range `[0, 10]` (verified against R 4.6.0). For `x < 0` it
/// reflects via `Φ(x) = 1 − Φ(−x)`; the GSVA table only ever evaluates `x ≥ 0`.
pub fn std_normal_cdf(x: f64) -> f64 {
    if x < 0.0 {
        return 1.0 - std_normal_cdf(-x);
    }

    // Cody (1969) coefficients, identical to R's pnorm.c.
    const A: [f64; 5] = [
        2.2352520354606839287,
        161.02823106855587881,
        1067.6894854603709582,
        18154.981253343561249,
        0.065682337918207449113,
    ];
    const B: [f64; 4] = [
        47.20258190468824187,
        976.09855173777669322,
        10260.932208618978205,
        45507.789335026729956,
    ];
    const C: [f64; 9] = [
        0.39894151208813466764,
        8.8831497943883759412,
        93.506656132177855979,
        597.27027639480026226,
        2494.5375852903726711,
        6848.1904505362823326,
        11602.651437647350124,
        9842.7148383839780218,
        1.0765576773720192317e-8,
    ];
    const D: [f64; 8] = [
        22.266688044328115691,
        235.38790178262499861,
        1519.377599407554805,
        6485.558298266760755,
        18615.571640885098091,
        34900.952721145977266,
        38912.003286093271411,
        19685.429676859990727,
    ];
    const P: [f64; 6] = [
        0.21589853405795699,
        0.1274011611602473639,
        0.022235277870649807,
        0.001421619193227893466,
        2.9112874951168792e-5,
        0.02307344176494017303,
    ];
    const Q: [f64; 5] = [
        1.28426009614491121,
        0.468238212480865118,
        0.0659881378689285515,
        0.00378239633202758244,
        7.29751555083966205e-5,
    ];
    const M_1_SQRT_2PI: f64 = 0.398942280401432677939946059934;

    let y = x; // x ≥ 0 here, so |x| == x.
    if y <= 0.67448975 {
        // Central region: rational approximation of the area around 0.
        let xsq = if y > f64::EPSILON * 0.5 { x * x } else { 0.0 };
        let mut xnum = A[4] * xsq;
        let mut xden = xsq;
        for i in 0..3 {
            xnum = (xnum + A[i]) * xsq;
            xden = (xden + B[i]) * xsq;
        }
        let temp = x * (xnum + A[3]) / (xden + B[3]);
        0.5 + temp
    } else if y <= 32.0_f64.sqrt() {
        // Intermediate region: approximate the small (upper) tail, then 1 − it.
        let mut xnum = C[8] * y;
        let mut xden = y;
        for i in 0..7 {
            xnum = (xnum + C[i]) * y;
            xden = (xden + D[i]) * y;
        }
        let temp = (xnum + C[7]) / (xden + D[7]);
        let xsq = (y * 16.0).trunc() / 16.0;
        let del = (y - xsq) * (y + xsq);
        let small = (-xsq * xsq * 0.5).exp() * (-del * 0.5).exp() * temp;
        1.0 - small
    } else {
        // Far upper tail: asymptotic series for the (tiny) complementary area.
        let xsq = 1.0 / (x * x);
        let mut xnum = P[5] * xsq;
        let mut xden = xsq;
        for i in 0..4 {
            xnum = (xnum + P[i]) * xsq;
            xden = (xden + Q[i]) * xsq;
        }
        let mut temp = xsq * (xnum + P[4]) / (xden + Q[4]);
        temp = (M_1_SQRT_2PI - temp) / y;
        let xsq = (y * 16.0).trunc() / 16.0;
        let del = (y - xsq) * (y + xsq);
        let small = (-xsq * xsq * 0.5).exp() * (-del * 0.5).exp() * temp;
        1.0 - small
    }
}

/// Build GSVA's precomputed standard-normal CDF table: `table[i] = Φ(10·i/10000)`
/// for `i = 0..=10000` (`initCdfs` in `kernel_estimation.c`).
fn build_table() -> Vec<f64> {
    (0..=PRECOMPUTE_RESOLUTION)
        .map(|i| std_normal_cdf(MAX_PRECOMPUTE * i as f64 / PRECOMPUTE_RESOLUTION as f64))
        .collect()
}

/// Look up `Φ(x / sigma)` in the precomputed table, reproducing
/// `precomputedCdf` exactly: clamp at `±10` sigma, otherwise index by the
/// truncated `|x/sigma|` and reflect for negative arguments.
#[inline]
fn precomputed_cdf(table: &[f64], x: f64, sigma: f64) -> f64 {
    let v = x / sigma;
    if v < -MAX_PRECOMPUTE {
        return 0.0;
    }
    if v > MAX_PRECOMPUTE {
        return 1.0;
    }
    // Same integer truncation and operation order as the C cast.
    let idx = ((v.abs() / MAX_PRECOMPUTE) * PRECOMPUTE_RESOLUTION as f64) as usize;
    let base = table[idx];
    if v < 0.0 {
        1.0 - base
    } else {
        base
    }
}

/// Per-gene bandwidth `sd(row) / 4`, where `sd` is the sample standard deviation
/// (denominator `n − 1`). Matches GSVA's C `sd` (`src/utils.c`): a two-pass
/// corrected mean followed by the sum of squared deviations. The `0.001`
/// fallback mirrors the C guard for a zero/NA bandwidth (constant rows are
/// dropped upstream, so it is essentially never hit).
fn bandwidth(row: &[f64]) -> f64 {
    let n = row.len();
    let nf = n as f64;
    let mut mean = row.iter().sum::<f64>() / nf;
    let mut corr = 0.0f64;
    for &v in row {
        corr += v - mean;
    }
    mean += corr / nf;
    let mut ss = 0.0f64;
    for &v in row {
        let d = v - mean;
        ss += d * d;
    }
    let sd = (ss / (nf - 1.0)).sqrt();
    let bw = sd / SIGMA_FACTOR;
    if bw == 0.0 || bw.is_nan() {
        0.001
    } else {
        bw
    }
}

/// Gaussian kernel CDF (`left_tail`) for every gene × sample, row-major
/// (`out[g * nsamp + j]`).
///
/// For each gene `g` with bandwidth `bw_g`, `left_tail[g, j]` is the mean over
/// all samples `l` of `Φ((x[g,j] − x[g,l]) / bw_g)`, accumulated in the same
/// order and plain `f64` arithmetic as `row_d` in `kernel_estimation.c`. The
/// returned values are rank-equivalent to GSVA's logit output.
pub(crate) fn gaussian_left_tail(expr: &ExprMatrix) -> Vec<f64> {
    let p = expr.nrow();
    let n = expr.ncol();
    let table = build_table();
    let nf = n as f64;
    let mut out = vec![0.0f64; p * n];
    // Each gene's row is independent (its bandwidth and pairwise CDF terms touch
    // only that row), so parallelize over genes; each unit owns one contiguous
    // output row and the inner accumulation order is unchanged.
    crate::par::fill_chunks_mut(&mut out, n, |g, row_out| {
        let row = expr.row(g);
        let bw = bandwidth(row);
        for (slot, &yj) in row_out.iter_mut().zip(row) {
            let mut lt = 0.0f64;
            for &xl in row {
                lt += precomputed_cdf(&table, yj - xl, bw);
            }
            *slot = lt / nf;
        }
    });
    out
}

// --- Poisson kernel (for integer count data; GSVA `kcdf = "Poisson"`) -------

/// Natural log of the gamma function via the Lanczos approximation (g = 7).
/// Accurate to ~1e-15 relative for the `a ≥ 1` arguments used by [`ppois`].
fn ln_gamma(x: f64) -> f64 {
    const G: f64 = 7.0;
    const C: [f64; 9] = [
        0.99999999999980993,
        676.5203681218851,
        -1259.1392167224028,
        771.32342877765313,
        -176.61502916214059,
        12.507343278686905,
        -0.13857109526572012,
        9.9843695780195716e-6,
        1.5056327351493116e-7,
    ];
    if x < 0.5 {
        // Reflection formula, for completeness (not hit by ppois).
        (std::f64::consts::PI / (std::f64::consts::PI * x).sin()).ln() - ln_gamma(1.0 - x)
    } else {
        let x = x - 1.0;
        let mut a = C[0];
        let t = x + G + 0.5;
        for (i, &c) in C.iter().enumerate().skip(1) {
            a += c / (x + i as f64);
        }
        0.5 * (2.0 * std::f64::consts::PI).ln() + (x + 0.5) * t.ln() - t + a.ln()
    }
}

/// Regularized lower incomplete gamma `P(a, x)` via its series expansion
/// (converges quickly for `x < a + 1`).
fn gamma_p_series(a: f64, x: f64) -> f64 {
    let mut ap = a;
    let mut del = 1.0 / a;
    let mut sum = del;
    for _ in 0..300 {
        ap += 1.0;
        del *= x / ap;
        sum += del;
        if del.abs() < sum.abs() * 1e-16 {
            break;
        }
    }
    sum * (-x + a * x.ln() - ln_gamma(a)).exp()
}

/// Regularized upper incomplete gamma `Q(a, x)` via the Lentz continued
/// fraction (converges quickly for `x ≥ a + 1`).
fn gamma_q_cf(a: f64, x: f64) -> f64 {
    const FPMIN: f64 = 1e-300;
    let mut b = x + 1.0 - a;
    let mut c = 1.0 / FPMIN;
    let mut d = 1.0 / b;
    let mut h = d;
    for i in 1..300 {
        let an = -(i as f64) * (i as f64 - a);
        b += 2.0;
        d = an * d + b;
        if d.abs() < FPMIN {
            d = FPMIN;
        }
        c = b + an / c;
        if c.abs() < FPMIN {
            c = FPMIN;
        }
        d = 1.0 / d;
        let del = d * c;
        h *= del;
        if (del - 1.0).abs() < 1e-16 {
            break;
        }
    }
    (-x + a * x.ln() - ln_gamma(a)).exp() * h
}

/// Regularized upper incomplete gamma `Q(a, x) = Γ(a, x) / Γ(a)`.
fn gamma_q(a: f64, x: f64) -> f64 {
    if x <= 0.0 {
        return 1.0;
    }
    if x < a + 1.0 {
        1.0 - gamma_p_series(a, x)
    } else {
        gamma_q_cf(a, x)
    }
}

/// Poisson CDF `P(N ≤ ⌊k⌋)` for mean `lambda`, using the identity
/// `F(k; λ) = Q(⌊k⌋ + 1, λ)`. Equivalent to R's `ppois(k, lambda)`.
fn ppois(k: f64, lambda: f64) -> f64 {
    let kk = k.floor();
    if kk < 0.0 {
        return 0.0;
    }
    gamma_q(kk + 1.0, lambda)
}

/// Poisson kernel CDF (`left_tail`) for every gene × sample, row-major.
///
/// Mirrors the Poisson branch of `row_d` in `kernel_estimation.c`: bandwidth is
/// the fixed `0.5` offset, and each term is `ppois(x[g,j], x[g,l] + 0.5)`,
/// summed in sample order with plain `f64` arithmetic and divided by `n`.
pub(crate) fn poisson_left_tail(expr: &ExprMatrix) -> Vec<f64> {
    let p = expr.nrow();
    let n = expr.ncol();
    let nf = n as f64;
    let mut out = vec![0.0f64; p * n];
    // Per-gene rows are independent — parallelize over genes (see the Gaussian
    // kernel above); accumulation order within a row is unchanged.
    crate::par::fill_chunks_mut(&mut out, n, |g, row_out| {
        let row = expr.row(g);
        for (slot, &yj) in row_out.iter_mut().zip(row) {
            let mut lt = 0.0f64;
            for &xl in row {
                lt += ppois(yj, xl + 0.5);
            }
            *slot = lt / nf;
        }
    });
    out
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn pnorm_matches_known_values() {
        // Reference values from R 4.6.0 `pnorm()` (%.17g).
        let cases = [
            (0.0, 0.5),
            (0.5, 0.69146246127401301),
            (1.0, 0.84134474606854293),
            (1.6166, 0.94701767404030579),
            (2.0, 0.97724986805182079),
            (3.0, 0.9986501019683699),
            (5.0, 0.99999971334842808),
            (8.0, 0.99999999999999933),
        ];
        for (z, want) in cases {
            let got = std_normal_cdf(z);
            assert!(
                (got - want).abs() <= 1e-15,
                "pnorm({z}) = {got:.17e}, want {want:.17e}"
            );
        }
        // Reflection for negative inputs.
        assert!((std_normal_cdf(-1.0) - (1.0 - 0.84134474606854293)).abs() <= 1e-15);
    }

    #[test]
    fn ppois_matches_known_values() {
        // Reference values from R 4.6.0 `ppois(k, lambda)` (%.17g).
        let cases = [
            (0.0, 0.5, 0.60653065971263342),
            (3.0, 2.5, 0.75757613313306593),
            (5.0, 5.5, 0.52891868652586216),
            (10.0, 4.5, 0.99333132791281809),
            (0.0, 10.5, 2.7536449349747158e-5),
            (20.0, 15.5, 0.89436693722434268),
        ];
        for (k, lambda, want) in cases {
            let got = ppois(k, lambda);
            assert!(
                (got - want).abs() <= 1e-12,
                "ppois({k}, {lambda}) = {got:.17e}, want {want:.17e}"
            );
        }
    }

    #[test]
    fn left_tail_sums_to_half_n() {
        // Each gene's kernel CDF sums to n/2 across samples (antisymmetry of the
        // pairwise differences plus the n diagonal 0.5 terms).
        let expr = ExprMatrix::new(
            vec!["G1".into(), "G2".into()],
            vec!["S1".into(), "S2".into(), "S3".into(), "S4".into()],
            vec![1.0, 2.0, 5.0, 9.0, 3.0, 3.1, 2.9, 4.0],
        );
        let lt = gaussian_left_tail(&expr);
        for g in 0..2 {
            let s: f64 = lt[g * 4..(g + 1) * 4].iter().sum();
            assert!((s - 2.0).abs() < 1e-12, "row {g} sums to {s}, want 2.0");
        }
    }
}