limma-rust 0.1.0

//! Quantile-quantile plot points for Student's t and F distributions, ports of
//! limma's `qqt` and `qqf`. Only the numeric core is ported — the plotting
//! arguments (`plot.it`, `main`, `xlab`, ...) are out of scope — so each
//! function returns `(x, y)`, where `x` are the theoretical quantiles ordered to
//! match the ranks of `y` and `y` is the input with `NaN`s removed.

use crate::special::{f_qf, t_ppf};
use anyhow::{bail, Result};
use statrs::distribution::{ContinuousCDF, Normal};

/// Plotting positions `(i - a) / (n + 1 - 2a)` with `a = 3/8` for `n <= 10` and
/// `a = 1/2` otherwise — R's `ppoints(n)`.
fn ppoints(n: usize) -> Vec<f64> {
    let a = if n <= 10 { 3.0 / 8.0 } else { 0.5 };
    (1..=n)
        .map(|i| (i as f64 - a) / (n as f64 + 1.0 - 2.0 * a))
        .collect()
}

/// Zero-based ranks of `y` (R's `order(order(y))` minus 1), stable on ties so
/// that equal values keep their input order.
fn ranks(y: &[f64]) -> Vec<usize> {
    let mut idx: Vec<usize> = (0..y.len()).collect();
    idx.sort_by(|&a, &b| y[a].partial_cmp(&y[b]).unwrap().then(a.cmp(&b)));
    let mut r = vec![0usize; y.len()];
    for (k, &orig) in idx.iter().enumerate() {
        r[orig] = k;
    }
    r
}

/// Shared core: strip `NaN`s from `y`, evaluate the theoretical quantile at each
/// `ppoints` position, then scatter those (ascending) quantiles onto the ranks
/// of `y`.
fn qq(y: &[f64], theo_at: impl Fn(f64) -> f64) -> Result<(Vec<f64>, Vec<f64>)> {
    let yc: Vec<f64> = y.iter().copied().filter(|v| !v.is_nan()).collect();
    if yc.is_empty() {
        bail!("y is empty");
    }
    let theo: Vec<f64> = ppoints(yc.len()).iter().map(|&p| theo_at(p)).collect();
    let x: Vec<f64> = ranks(&yc).iter().map(|&rk| theo[rk]).collect();
    Ok((x, yc))
}

/// Student's t Q-Q plot points. `df = f64::INFINITY` gives the normal-quantile
/// (Gaussian) reference, matching `qt(p, df = Inf)`.
pub fn qqt(y: &[f64], df: f64) -> Result<(Vec<f64>, Vec<f64>)> {
    if df.is_infinite() {
        let z = Normal::new(0.0, 1.0).unwrap();
        qq(y, |p| z.inverse_cdf(p))
    } else {
        qq(y, |p| t_ppf(p, df))
    }
}

/// F-distribution Q-Q plot points (port of `qqf`).
pub fn qqf(y: &[f64], df1: f64, df2: f64) -> Result<(Vec<f64>, Vec<f64>)> {
    qq(y, |p| f_qf(p, df1, df2))
}

#[cfg(test)]
mod tests {
    use super::*;

    const Y: [f64; 12] = [
        0.3,
        -1.7,
        1.2,
        2.5,
        -0.4,
        1.2,
        f64::NAN,
        0.9,
        -2.1,
        3.3,
        0.1,
        -0.8,
    ];
    // Y with the NaN removed, in input order.
    const YC: [f64; 11] = [0.3, -1.7, 1.2, 2.5, -0.4, 1.2, 0.9, -2.1, 3.3, 0.1, -0.8];

    fn assert_close(got: &[f64], want: &[f64]) {
        assert_eq!(got.len(), want.len());
        for (g, w) in got.iter().zip(want.iter()) {
            assert!((g - w).abs() < 1e-9, "got {g}, want {w}");
        }
    }

    #[test]
    fn qqt_matches_r() {
        // df = 5 (finite t).
        let (x, y) = qqt(&Y, 5.0).unwrap();
        assert_close(
            &x,
            &[
                0.0,
                -1.23198711032414,
                0.502953281687005,
                1.23198711032414,
                -0.502953281687005,
                0.810383997809011,
                0.242287404471427,
                -2.08992002958375,
                2.08992002958375,
                -0.242287404471427,
                -0.810383997809011,
            ],
        );
        assert_close(&y, &YC);

        // df = 20.
        let (x, _) = qqt(&Y, 20.0).unwrap();
        assert_close(
            &x,
            &[
                0.0,
                -1.12786228696882,
                0.480102454500496,
                1.12786228696882,
                -0.480102454500497,
                0.762698567276393,
                0.232931568020726,
                -1.7762457881447,
                1.7762457881447,
                -0.232931568020726,
                -0.762698567276393,
            ],
        );

        // df = Inf -> normal quantiles.
        let (x, _) = qqt(&Y, f64::INFINITY).unwrap();
        assert_close(
            &x,
            &[
                0.0,
                -1.09680356209351,
                0.472789120992267,
                1.09680356209351,
                -0.472789120992267,
                0.747858594763302,
                0.229884117579232,
                -1.6906216295849,
                1.6906216295849,
                -0.229884117579232,
                -0.747858594763302,
            ],
        );
    }

    #[test]
    fn qqf_matches_r() {
        let (x, y) = qqf(&Y, 3.0, 10.0).unwrap();
        assert_close(
            &x,
            &[
                0.845080576591714,
                0.244118830691621,
                1.33260260140614,
                2.32729903285299,
                0.513954579920213,
                1.71221115610176,
                1.05924859191604,
                0.106213291153813,
                3.85329085611296,
                0.667597469205698,
                0.375423754916837,
            ],
        );
        assert_close(&y, &YC);

        let (x, _) = qqf(&Y, 4.0, 8.0).unwrap();
        assert_close(
            &x,
            &[
                0.914645355977072,
                0.309870602043059,
                1.39570462056132,
                2.3949374906501,
                0.586390018113275,
                1.77344818558774,
                1.12570692785527,
                0.156549563940445,
                3.99319919728592,
                0.739322903995197,
                0.446405502584276,
            ],
        );

        let (x, _) = qqf(&Y, 1.0, 50.0).unwrap();
        assert_close(
            &x,
            &[
                0.461622679398699,
                0.0298023566924766,
                1.01659865168114,
                2.29169880496666,
                0.170063583948288,
                1.49435871823477,
                0.693040444198623,
                0.00328172082098011,
                4.20947435594102,
                0.292682224702878,
                0.084319979645453,
            ],
        );
    }

    #[test]
    fn empty_after_nan_strip_errors() {
        assert!(qqt(&[f64::NAN, f64::NAN], 5.0).is_err());
    }
}