limma-rust 0.1.0

Pure-Rust port of the Bioconductor limma differential-expression package
Documentation
//! Bit-exact port of R's default random number generator: the Mersenne-Twister
//! uniform generator with R's `set.seed` initialisation, plus the `Inversion`
//! normal generator. This reproduces `set.seed`, `runif` and `rnorm` exactly,
//! which the rotation gene-set tests (`roast`, `romer`) depend on for
//! reproducible p-values.
//!
//! R does **not** seed the Mersenne-Twister with the reference `sgenrand` /
//! `init_by_array`. Instead `set.seed(k)` scrambles `k` through the linear
//! congruential generator `s = 69069 s + 1` (50 warm-up steps, then one value
//! per state word). `unif_rand` is `fixup(MT_genrand() * 2^-32)` and `norm_rand`
//! uses the high-precision inversion `qnorm5((floor(2^27 u1) + u2) / 2^27)`,
//! consuming two uniforms per normal deviate.

const N: usize = 624;
const M: usize = 397;
const MATRIX_A: u32 = 0x9908_b0df;
const UPPER_MASK: u32 = 0x8000_0000;
const LOWER_MASK: u32 = 0x7fff_ffff;
/// `2^-32`, R's `MT_genrand` scaling constant.
const TWO_POW_M32: f64 = 2.328_306_436_538_696_3e-10;
/// `1/(2^32 - 1)`, used by R's `fixup` to nudge values off the open interval.
const I2_32M1: f64 = 2.328_306_437_080_797e-10;
/// `2^27`, the inversion-method precision multiplier (`BIG` in R's `snorm.c`).
const BIG: f64 = 134_217_728.0;

/// R's default RNG state: a Mersenne-Twister (`MT19937`) seeded the R way.
pub struct RRng {
    mt: [u32; N],
    mti: usize,
}

impl RRng {
    /// `set.seed(seed)` with the default `kind = "Mersenne-Twister"`.
    ///
    /// Mirrors R's `RNG_Init`: 50 LCG warm-up steps, then 625 LCG outputs filling
    /// `i_seed[0..624]` (the first, `mti`, is overwritten to `624` by
    /// `FixupSeeds`, leaving the 624-word state in `i_seed[1..624]`).
    pub fn new(seed: i32) -> Self {
        let mut s = seed as u32;
        for _ in 0..50 {
            s = s.wrapping_mul(69069).wrapping_add(1);
        }
        // i_seed[0] (the mti slot) — discarded, FixupSeeds forces it to 624.
        s = s.wrapping_mul(69069).wrapping_add(1);
        let mut mt = [0u32; N];
        for word in mt.iter_mut() {
            s = s.wrapping_mul(69069).wrapping_add(1);
            *word = s;
        }
        RRng { mt, mti: N }
    }

    /// One tempered 32-bit Mersenne-Twister output (`MT_genrand`).
    fn genrand(&mut self) -> u32 {
        const MAG01: [u32; 2] = [0, MATRIX_A];
        if self.mti >= N {
            let mt = &mut self.mt;
            for kk in 0..(N - M) {
                let y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
                mt[kk] = mt[kk + M] ^ (y >> 1) ^ MAG01[(y & 1) as usize];
            }
            for kk in (N - M)..(N - 1) {
                let y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
                mt[kk] = mt[kk + M - N] ^ (y >> 1) ^ MAG01[(y & 1) as usize];
            }
            let y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK);
            mt[N - 1] = mt[M - 1] ^ (y >> 1) ^ MAG01[(y & 1) as usize];
            self.mti = 0;
        }
        let mut y = self.mt[self.mti];
        self.mti += 1;
        y ^= y >> 11;
        y ^= (y << 7) & 0x9d2c_5680;
        y ^= (y << 15) & 0xefc6_0000;
        y ^= y >> 18;
        y
    }

    /// `unif_rand()`: a uniform deviate on the open interval `(0, 1)`.
    pub fn unif_rand(&mut self) -> f64 {
        fixup(self.genrand() as f64 * TWO_POW_M32)
    }

    /// `norm_rand()` with the default `normal.kind = "Inversion"`. Consumes two
    /// uniforms for full double precision.
    pub fn norm_rand(&mut self) -> f64 {
        let u1 = self.unif_rand();
        let big_u = (BIG * u1).trunc() + self.unif_rand();
        qnorm(big_u / BIG)
    }

    /// `runif(n)`: `n` uniform deviates.
    pub fn runif(&mut self, n: usize) -> Vec<f64> {
        (0..n).map(|_| self.unif_rand()).collect()
    }

    /// `rnorm(n)`: `n` standard normal deviates.
    pub fn rnorm(&mut self, n: usize) -> Vec<f64> {
        (0..n).map(|_| self.norm_rand()).collect()
    }
}

/// R's `fixup`: keep uniforms strictly inside `(0, 1)`.
fn fixup(x: f64) -> f64 {
    if x <= 0.0 {
        0.5 * I2_32M1
    } else if 1.0 - x <= 0.0 {
        1.0 - 0.5 * I2_32M1
    } else {
        x
    }
}

/// R's `qnorm5(p, 0, 1, lower_tail = TRUE, log_p = FALSE)` — the standard-normal
/// quantile via Wichura's AS 241 algorithm. `p` is assumed in the open interval
/// `(0, 1)` (the only range the inversion generator produces).
#[allow(clippy::excessive_precision)]
pub fn qnorm(p: f64) -> f64 {
    let q = p - 0.5;
    if q.abs() <= 0.425 {
        let r = 0.180625 - q * q;
        return q
            * (((((((r * 2509.0809287301226727 + 33430.575583588128105) * r
                + 67265.770927008700853)
                * r
                + 45921.953931549871457)
                * r
                + 13731.693765509461125)
                * r
                + 1971.5909503065514427)
                * r
                + 133.14166789178437745)
                * r
                + 3.387132872796366608)
            / (((((((r * 5226.495278852854561 + 28729.085735721942674) * r
                + 39307.89580009271061)
                * r
                + 21213.794301586595867)
                * r
                + 5394.1960214247511077)
                * r
                + 687.1870074920579083)
                * r
                + 42.313330701600911252)
                * r
                + 1.0);
    }
    let mut r = if q < 0.0 { p } else { 1.0 - p };
    r = (-r.ln()).sqrt();
    let val = if r <= 5.0 {
        r -= 1.6;
        (((((((r * 7.7454501427834140764e-4 + 0.0227238449892691845833) * r
            + 0.24178072517745061177)
            * r
            + 1.27045825245236838258)
            * r
            + 3.64784832476320460504)
            * r
            + 5.7694972214606914055)
            * r
            + 4.6303378461565452959)
            * r
            + 1.42343711074968357734)
            / (((((((r * 1.05075007164441684324e-9 + 5.475938084995344946e-4) * r
                + 0.0151986665636164571966)
                * r
                + 0.14810397642748007459)
                * r
                + 0.68976733498510000455)
                * r
                + 1.6763848301838038494)
                * r
                + 2.05319162663775882187)
                * r
                + 1.0)
    } else {
        r -= 5.0;
        (((((((r * 2.01033439929228813265e-7 + 2.71155556874348757815e-5) * r
            + 0.0012426609473880784386)
            * r
            + 0.026532189526576123093)
            * r
            + 0.29656057182850489123)
            * r
            + 1.7848265399172913358)
            * r
            + 5.4637849111641143699)
            * r
            + 6.6579046435011037772)
            / (((((((r * 2.04426310338993978564e-15 + 1.4215117583164458887e-7) * r
                + 1.8463183175100546818e-5)
                * r
                + 7.868691311456132591e-4)
                * r
                + 0.0148753612908506148525)
                * r
                + 0.13692988092273580531)
                * r
                + 0.59983220655588793769)
                * r
                + 1.0)
    };
    if q < 0.0 {
        -val
    } else {
        val
    }
}

#[cfg(test)]
#[allow(clippy::excessive_precision)]
mod tests {
    use super::*;

    #[test]
    fn seed_state_matches_r() {
        let rng = RRng::new(1);
        // .Random.seed[3..12] (state[0..9]) and state[623] after set.seed(1L).
        let want_head: [u32; 10] = [
            4125696813, 3852956682, 3691408899, 4072619880, 1489374793, 865871222, 1734802815,
            98005428, 268448037, 63650722,
        ];
        for (k, &w) in want_head.iter().enumerate() {
            assert_eq!(rng.mt[k], w, "state[{k}]");
        }
        assert_eq!(rng.mt[623], 3605718188, "state[623]");
        assert_eq!(rng.mti, 624);
    }

    #[test]
    fn runif_matches_r() {
        let mut rng = RRng::new(1);
        let got = rng.runif(10);
        let want = [
            0.26550866314209998,
            0.37212389963679016,
            0.57285336335189641,
            0.90820778999477625,
            0.2016819310374558,
            0.89838968496769667,
            0.94467526860535145,
            0.66079779248684645,
            0.62911404389888048,
            0.061786270467564464,
        ];
        for (g, w) in got.iter().zip(want.iter()) {
            assert_eq!(g, w, "runif seed 1");
        }

        let mut rng = RRng::new(42);
        let got = rng.runif(8);
        let want = [
            0.91480604349635541,
            0.93707541329786181,
            0.28613953478634357,
            0.83044762606732547,
            0.64174551889300346,
            0.51909594913013279,
            0.73658831464126706,
            0.13466659723781049,
        ];
        for (g, w) in got.iter().zip(want.iter()) {
            assert_eq!(g, w, "runif seed 42");
        }
    }

    #[test]
    fn rnorm_matches_r() {
        let mut rng = RRng::new(1);
        let got = rng.rnorm(10);
        let want = [
            -0.62645381074233242,
            0.18364332422208224,
            -0.83562861241004716,
            1.5952808021377916,
            0.32950777181536051,
            -0.82046838411801526,
            0.48742905242848528,
            0.73832470512921733,
            0.57578135165349231,
            -0.30538838715635602,
        ];
        for (g, w) in got.iter().zip(want.iter()) {
            assert_eq!(g, w, "rnorm seed 1");
        }

        let mut rng = RRng::new(42);
        let got = rng.rnorm(8);
        let want = [
            1.3709584471466685,
            -0.56469817139608869,
            0.3631284113373392,
            0.63286260496104041,
            0.40426832314099903,
            -0.10612451609148403,
            1.5115219974389389,
            -0.094659038413097557,
        ];
        for (g, w) in got.iter().zip(want.iter()) {
            assert_eq!(g, w, "rnorm seed 42");
        }
    }

    #[test]
    fn qnorm_matches_r() {
        let probs = [1e-10, 0.0123, 0.25, 0.5, 0.75, 0.9999, 1.0 - 1e-12];
        let want = [
            -6.3613409024040557,
            -2.2476267557951379,
            -0.67448975019608171,
            0.0,
            0.67448975019608171,
            3.7190164854557084,
            7.0344869100478356,
        ];
        for (&p, &w) in probs.iter().zip(want.iter()) {
            assert_eq!(qnorm(p), w, "qnorm({p})");
        }
    }
}