limma-rust 0.1.0

//! Proportion of expressed probes per array (Shi & Smyth).
//!
//! Pure-Rust port of the numeric core of limma's `propexpr.R` ([`propexpr`]):
//! the branch taking an explicit matrix of regular probes plus a matrix of
//! negative-control probes. The `status`/grep label-extraction wrapper is an
//! annotation-layer concern and is out of scope for the numeric port.

use ndarray::Array2;

fn median(v: &mut [f64]) -> f64 {
    v.sort_by(|a, b| a.partial_cmp(b).unwrap());
    let n = v.len();
    if n % 2 == 1 {
        v[n / 2]
    } else {
        (v[n / 2 - 1] + v[n / 2]) / 2.0
    }
}

/// Exponential CDF `pexp(q, rate) = 1 - exp(-rate*q)` for `q >= 0`, else 0.
fn pexp(q: f64, rate: f64) -> f64 {
    if q < 0.0 {
        0.0
    } else {
        1.0 - (-rate * q).exp()
    }
}

/// `propexpr(x, neg.x)` for explicitly supplied matrices. For each column
/// (array) it estimates the proportion of expressed (regular) probes by
/// comparing the regular-probe values `x` against the negative-control values
/// `neg_x`, fitting an exponential to the negative tail. `NaN` entries are
/// dropped per column. Returns one proportion per column, clamped to `[0, 1]`.
pub fn propexpr(x: &Array2<f64>, neg_x: &Array2<f64>) -> Vec<f64> {
    let narrays = x.ncols();
    assert_eq!(
        neg_x.ncols(),
        narrays,
        "x and neg_x must have equal columns"
    );

    let mut out = vec![0.0_f64; narrays];
    for (i, o) in out.iter_mut().enumerate() {
        let mut b: Vec<f64> = neg_x
            .column(i)
            .iter()
            .copied()
            .filter(|v| !v.is_nan())
            .collect();
        let r: Vec<f64> = x
            .column(i)
            .iter()
            .copied()
            .filter(|v| !v.is_nan())
            .collect();
        let nb = b.len() as f64;
        let nr = r.len() as f64;

        let mu = b.iter().sum::<f64>() / nb;
        let alpha = (r.iter().sum::<f64>() / nr - mu).max(10.0);
        let b1 = median(&mut b);

        let p1 = b.iter().map(|&bj| pexp(b1 - bj, 1.0 / alpha)).sum::<f64>() / nb;
        let pb = (b.iter().filter(|&&v| v < b1).count() as f64
            + b.iter().filter(|&&v| v == b1).count() as f64 / 2.0)
            / nb;
        let p = (r.iter().filter(|&&v| v < b1).count() as f64
            + r.iter().filter(|&&v| v == b1).count() as f64 / 2.0)
            / nr;

        *o = ((pb - p) / (pb - p1)).clamp(0.0, 1.0);
    }
    out
}

#[cfg(test)]
mod tests {
    use super::*;
    use ndarray::array;

    fn close(a: &[f64], b: &[f64], tol: f64) -> bool {
        a.len() == b.len()
            && a.iter()
                .zip(b)
                .all(|(&x, &y)| (x - y).abs() <= tol + tol * y.abs())
    }

    #[test]
    fn two_arrays_match_r() {
        // Reference: propexpr(reg, neg.x = neg) in limma 3.68.3 (columns of
        // overlapping regular vs negative-control intensities).
        let neg = array![
            [4.0, 3.0],
            [5.0, 4.0],
            [5.0, 5.0],
            [6.0, 5.0],
            [6.0, 6.0],
            [7.0, 7.0],
        ];
        let reg = array![
            [5.0, 4.0],
            [5.0, 5.0],
            [6.0, 5.0],
            [7.0, 6.0],
            [8.0, 8.0],
            [9.0, 9.0],
            [10.0, 11.0],
            [12.0, 13.0],
        ];
        let want = [0.54285538831644, 0.550748101666388];
        assert!(close(&propexpr(&reg, &neg), &want, 1e-9));
    }
}