mcpcounter-rust 0.1.0

Pure-Rust port of the MCPcounter cell-population quantification methods: MCP-counter (human) and mMCP-counter (mouse), validated for numeric parity against the original R implementations.
Documentation
//! R-compatible summary statistics.
//!
//! These reproduce the *algorithms* used by R's C internals so that ported
//! methods match the reference closely. Note: R accumulates `mean` in
//! `long double` (80-bit on the GNU toolchains R is built with), whereas Rust
//! uses 64-bit `f64`. The two-pass structure is reproduced exactly, but the
//! extended-precision accumulation is not, so parity is to ~15-16 significant
//! figures rather than always bit-identical.

/// R's `mean(x, na.rm = TRUE)` for a numeric vector.
///
/// Reproduces R's two-pass algorithm: a naive `sum / n`, then a correction pass
/// `s += sum(x - s) / n`. `NaN` (R's `NA`) values are dropped. The mean of an
/// empty vector (or all-`NaN`) is `NaN`, matching R.
pub fn r_mean_narm(values: &[f64]) -> f64 {
    let mut sum = 0.0f64;
    let mut n = 0usize;
    for &v in values {
        if !v.is_nan() {
            sum += v;
            n += 1;
        }
    }
    if n == 0 {
        return f64::NAN;
    }
    let nf = n as f64;
    let mut s = sum / nf;
    if s.is_finite() {
        let mut t = 0.0f64;
        for &v in values {
            if !v.is_nan() {
                t += v - s;
            }
        }
        s += t / nf;
    }
    s
}

/// R's `median(x, na.rm = TRUE)` for a numeric vector.
///
/// Drops `NaN`, sorts ascending; for odd length returns the central element,
/// for even length returns the mean of the two central elements. The median of
/// an empty vector is `NaN`, matching R's `NA`.
pub fn r_median_narm(values: &[f64]) -> f64 {
    let mut v: Vec<f64> = values.iter().copied().filter(|x| !x.is_nan()).collect();
    let n = v.len();
    if n == 0 {
        return f64::NAN;
    }
    v.sort_by(f64::total_cmp);
    let mid = n / 2;
    if n % 2 == 1 {
        v[mid]
    } else {
        // R's `median` averages the two central order statistics with
        // `mean(c(a, b))` (see `median.default`), i.e. the same two-pass
        // algorithm as `r_mean_narm` -- not a naive `(a + b) / 2`, which can
        // differ in the last ULP. Reuse the mean helper to track R exactly.
        r_mean_narm(&[v[mid - 1], v[mid]])
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn mean_basic() {
        assert_eq!(r_mean_narm(&[1.0, 2.0, 3.0, 4.0]), 2.5);
        assert_eq!(r_mean_narm(&[5.0]), 5.0);
    }

    #[test]
    fn mean_narm() {
        // R: mean(c(1,2,NA,4), na.rm=TRUE) == 7/3
        let got = r_mean_narm(&[1.0, 2.0, f64::NAN, 4.0]);
        assert!((got - 7.0 / 3.0).abs() < 1e-15);
    }

    #[test]
    fn mean_empty_is_nan() {
        assert!(r_mean_narm(&[]).is_nan());
        assert!(r_mean_narm(&[f64::NAN, f64::NAN]).is_nan());
    }

    #[test]
    fn median_odd_even_na() {
        assert_eq!(r_median_narm(&[3.0, 1.0, 2.0]), 2.0);
        assert_eq!(r_median_narm(&[1.0, 2.0, 3.0, 4.0]), 2.5);
        // R: median(c(1,NA,3), na.rm=TRUE) == 2
        assert_eq!(r_median_narm(&[1.0, f64::NAN, 3.0]), 2.0);
        assert!(r_median_narm(&[]).is_nan());
    }
}