limma-rust 0.1.0

Pure-Rust port of the Bioconductor limma differential-expression package
Documentation
//! Detection p-values from negative controls (limma `detectionPValues`).
//!
//! Pure-Rust port of `detectionPValues.default`: for each array, the detection
//! p-value of a probe is the right-tail rank of its intensity within the
//! negative-control distribution. Counting controls `>=` and `>` the observed
//! value and averaging makes ties count as one half.

use ndarray::Array2;

/// Detection p-values for an `nprobes × narrays` intensity matrix `x`, given a
/// per-probe `status` and the label `negctrl` marking negative controls.
///
/// For probe `i` in array `j` the p-value is `(c_ge + c_gt) / (2·nneg)`, where
/// `c_ge` / `c_gt` count negative controls with intensity `>=` / `>` the
/// observed value. Requires at least one negative control and finite
/// intensities.
pub fn detection_p_values<S: AsRef<str>>(
    x: &Array2<f64>,
    status: &[S],
    negctrl: &str,
) -> Array2<f64> {
    let (nrow, ncol) = (x.nrows(), x.ncols());
    let isneg: Vec<i64> = status
        .iter()
        .map(|s| (s.as_ref() == negctrl) as i64)
        .collect();
    let nneg: i64 = isneg.iter().sum();
    let denom = (2 * nneg) as f64;

    let mut out = Array2::<f64>::zeros((nrow, ncol));
    for j in 0..ncol {
        let col: Vec<f64> = (0..nrow).map(|i| x[[i, j]]).collect();
        // o1: value desc, negatives-first on ties; o2: negatives-last on ties.
        // Final tie-break is ascending index, matching R's stable `order`.
        let mut o1: Vec<usize> = (0..nrow).collect();
        o1.sort_by(|&a, &b| {
            col[b]
                .partial_cmp(&col[a])
                .unwrap()
                .then(isneg[b].cmp(&isneg[a]))
                .then(a.cmp(&b))
        });
        let mut o2: Vec<usize> = (0..nrow).collect();
        o2.sort_by(|&a, &b| {
            col[b]
                .partial_cmp(&col[a])
                .unwrap()
                .then(isneg[a].cmp(&isneg[b]))
                .then(a.cmp(&b))
        });
        // cs1[i] = #negatives >= x[i,j] (ties included),
        // cs2[i] = #negatives >  x[i,j] (ties excluded).
        let mut run = 0i64;
        let mut cs = vec![0i64; nrow];
        for &i in &o1 {
            run += isneg[i];
            cs[i] = run;
        }
        run = 0;
        for &i in &o2 {
            run += isneg[i];
            cs[i] += run;
        }
        for i in 0..nrow {
            out[[i, j]] = cs[i] as f64 / denom;
        }
    }
    out
}

#[cfg(test)]
mod tests {
    use super::*;
    use ndarray::array;

    fn close(a: &Array2<f64>, b: &Array2<f64>, tol: f64) -> bool {
        a.shape() == b.shape() && a.iter().zip(b.iter()).all(|(&x, &y)| (x - y).abs() <= tol)
    }

    #[test]
    fn detection_with_ties_matches_r() {
        let x = array![
            [10.0, 12.0, 8.0],
            [10.0, 5.0, 8.0],
            [3.0, 20.0, 15.0],
            [7.0, 7.0, 7.0],
            [3.0, 4.0, 2.0],
            [1.0, 2.0, 9.0],
        ];
        let status = [
            "regular", "regular", "regular", "regular", "negative", "negative",
        ];
        let got = detection_p_values(&x, &status, "negative");
        let want = array![
            [0.0, 0.0, 0.5],
            [0.0, 0.0, 0.5],
            [0.25, 0.0, 0.0],
            [0.0, 0.0, 0.5],
            [0.5, 0.5, 1.0],
            [1.0, 1.0, 0.5],
        ];
        assert!(close(&got, &want, 1e-12));
    }

    #[test]
    fn detection_single_negative_matches_r() {
        let x = array![[9.5, 1.0], [2.0, 8.0], [6.0, 3.0], [0.5, 0.5]];
        let status = ["reg", "reg", "reg", "neg"];
        let got = detection_p_values(&x, &status, "neg");
        let want = array![[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [1.0, 1.0]];
        assert!(close(&got, &want, 1e-12));
    }
}