limma-rust 0.1.0

//! Mixture-model fit by nonlinear least squares.
//!
//! Pure-Rust port of limma's `fitmixture.R` ([`fitmixture`]): for a titration
//! series where each array is a known mixture proportion of two samples, fit
//! the per-probe log-ratio `M`, average `A` and residual standard deviation,
//! using the same Gauss-Newton-style update as limma (in `log(cosh)` space so
//! the per-probe means stay well-conditioned).

use crate::logsumexp::logcosh;
use ndarray::Array2;

/// Output of [`fitmixture`], all per-probe (length `nprobes`) and in `log2`
/// units: `a` is the average expression, `m` the log-ratio, `stdev` the
/// residual standard deviation.
#[derive(Debug, Clone)]
pub struct FitMixture {
    pub a: Vec<f64>,
    pub m: Vec<f64>,
    pub stdev: Vec<f64>,
}

/// `mub[p,a] = logcosh(b_p/2) + ln(1 + tanh(b_p/2)·pm_a)` together with the
/// per-probe offset `a_p = mean_a(z - mub)`.
fn mub_and_offset(b: &[f64], z: &Array2<f64>, pm: &[f64]) -> (Array2<f64>, Vec<f64>) {
    let (nprobes, narrays) = (z.nrows(), z.ncols());
    let mut mub = Array2::<f64>::zeros((nprobes, narrays));
    let mut a = vec![0.0; nprobes];
    for p in 0..nprobes {
        let half = b[p] / 2.0;
        let lc = logcosh(half);
        let th = half.tanh();
        let mut sum = 0.0;
        for j in 0..narrays {
            let m = lc + (1.0 + th * pm[j]).ln();
            mub[[p, j]] = m;
            sum += z[[p, j]] - m;
        }
        a[p] = sum / narrays as f64;
    }
    (mub, a)
}

/// `fitmixture(log2e, mixprop, niter)`: `log2e` is the `nprobes × narrays`
/// matrix of log2 expression, `mixprop` the per-array mixing proportion of
/// sample 1. Returns per-probe `A`, `M` and residual `stdev`.
pub fn fitmixture(log2e: &Array2<f64>, mixprop: &[f64], niter: usize) -> FitMixture {
    let nprobes = log2e.nrows();
    let narrays = log2e.ncols();
    let ln2 = std::f64::consts::LN_2;
    let pm: Vec<f64> = mixprop.iter().map(|&m| 2.0 * m - 1.0).collect();

    // Linear-model starting values: regress intensities y = 2^log2e on the two
    // columns [mixprop, 1-mixprop], floor coefficients at 1, take b = log ratio.
    let mut xtx = [[0.0_f64; 2]; 2];
    for &mp in mixprop {
        let (x0, x1) = (mp, 1.0 - mp);
        xtx[0][0] += x0 * x0;
        xtx[0][1] += x0 * x1;
        xtx[1][1] += x1 * x1;
    }
    xtx[1][0] = xtx[0][1];
    let det = xtx[0][0] * xtx[1][1] - xtx[0][1] * xtx[1][0];
    let inv = [
        [xtx[1][1] / det, -xtx[0][1] / det],
        [-xtx[1][0] / det, xtx[0][0] / det],
    ];

    let mut z = Array2::<f64>::zeros((nprobes, narrays));
    let mut b = vec![0.0; nprobes];
    for p in 0..nprobes {
        let (mut xty0, mut xty1) = (0.0, 0.0);
        for j in 0..narrays {
            let l = log2e[[p, j]];
            z[[p, j]] = l * ln2;
            let y = l.exp2();
            xty0 += mixprop[j] * y;
            xty1 += (1.0 - mixprop[j]) * y;
        }
        let s0 = (inv[0][0] * xty0 + inv[0][1] * xty1).max(1.0);
        let s1 = (inv[1][0] * xty0 + inv[1][1] * xty1).max(1.0);
        b[p] = s0.ln() - s1.ln();
    }

    // Gauss-Newton iterations on the per-probe b (= M·ln2).
    for _ in 0..niter {
        let (mub, a) = mub_and_offset(&b, &z, &pm);
        for p in 0..nprobes {
            let th = (b[p] / 2.0).tanh();
            let mut dmu = vec![0.0; narrays];
            let mut dmu_mean = 0.0;
            for j in 0..narrays {
                dmu[j] = (th + pm[j]) / (1.0 + th * pm[j]) / 2.0;
                dmu_mean += dmu[j];
            }
            dmu_mean /= narrays as f64;
            let (mut num, mut den) = (0.0, 0.0);
            for j in 0..narrays {
                let mu = a[p] + mub[[p, j]];
                num += dmu[j] * (z[[p, j]] - mu);
                let dd = dmu[j] - dmu_mean;
                den += dd * dd;
            }
            b[p] += (num / narrays as f64) / (1e-6 + den / narrays as f64);
        }
    }

    // Final offsets and residual standard deviations.
    let (mub, a) = mub_and_offset(&b, &z, &pm);
    let scale = (narrays as f64 / (narrays as f64 - 2.0) / narrays as f64).sqrt();
    let mut stdev = vec![0.0; nprobes];
    for p in 0..nprobes {
        let mut ss = 0.0;
        for j in 0..narrays {
            let r = z[[p, j]] - (a[p] + mub[[p, j]]);
            ss += r * r;
        }
        stdev[p] = ss.sqrt() * scale / ln2;
    }

    FitMixture {
        a: a.iter().map(|&v| v / ln2).collect(),
        m: b.iter().map(|&v| v / ln2).collect(),
        stdev,
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use ndarray::array;

    fn close(a: &[f64], b: &[f64], tol: f64) -> bool {
        a.len() == b.len()
            && a.iter()
                .zip(b)
                .all(|(&x, &y)| (x - y).abs() <= tol + tol * y.abs())
    }

    #[test]
    fn fitmixture_matches_r() {
        // Reference: fitmixture(log2e, mixprop, niter=4) in limma 3.68.3, on a
        // model-consistent titration (mixprop 0..1) with mild noise.
        let log2e = array![
            [
                4.3904760172447,
                5.80204869625303,
                6.58880543876819,
                7.0317975199035,
                7.34222007491416,
                7.62233273319441
            ],
            [
                8.61562128120492,
                8.44189831411776,
                8.13670008266351,
                7.55564296226014,
                6.81782517390952,
                5.63099272063628
            ],
            [
                6.34008451545423,
                8.0396611674378,
                8.73791188700199,
                9.17095797713186,
                9.66382947418352,
                9.87762613040235
            ],
            [
                6.03164313024805,
                5.82338721016494,
                5.36457218491286,
                4.80397607212745,
                4.32376814079242,
                3.02300486774156
            ],
            [
                4.92710401176557,
                6.38918171782614,
                7.10185838995849,
                7.65096816800767,
                8.03727417840249,
                8.19681894669787
            ]
        ];
        let mixprop = [0.0, 0.2, 0.4, 0.6, 0.8, 1.0];
        let o = fitmixture(&log2e, &mixprop, 4);
        assert!(close(
            &o.a,
            &[
                6.01927142601979,
                7.13664451612898,
                8.12130966885468,
                4.51486665208882,
                6.58763605034809
            ],
            1e-9
        ));
        assert!(close(
            &o.m,
            &[
                3.25818790500491,
                -3.0629176507105,
                3.54802170625494,
                -2.99367405470867,
                3.33559775914428
            ],
            1e-9
        ));
        assert!(close(
            &o.stdev,
            &[
                0.0360867991962196,
                0.0785007679371339,
                0.0508483212078436,
                0.0857081603616798,
                0.0535213699021616
            ],
            1e-9
        ));
    }
}