limma-rust 0.1.0

//! Two-colour within-array normalization. Port of limma's `MA.RG`
//! (`RGList` -> `MAList`), `RG.MA` (the inverse), and `normalizeWithinArrays`
//! for the deterministic intensity-dependent methods none/median/loess/
//! printtiploess. The composite/control methods (which call R's `stats::loess`
//! restricted to control spots) and robustspline (`normalizeRobustSpline`) are
//! out of scope for this pure-Rust statistical port.

use anyhow::{bail, Result};
use ndarray::Array2;

use crate::lowess::loess_fit;
use crate::norm::median_finite;
use crate::normexp::{background_correct_matrix, BackgroundMethod};
use crate::weightedmedian::weighted_median;

/// `MA.RG`: convert foreground/background red–green intensities to an `(M, A)`
/// pair. Each channel is background-corrected with `method`/`offset` (delegating
/// to [`background_correct_matrix`]; the `rb`/`gb` background matrices may be
/// absent), non-positive corrected intensities become `NaN`, and then
/// `M = log2 R - log2 G`, `A = (log2 R + log2 G) / 2`.
pub fn ma_from_rg(
    r: &Array2<f64>,
    g: &Array2<f64>,
    rb: Option<&Array2<f64>>,
    gb: Option<&Array2<f64>>,
    method: BackgroundMethod,
    offset: f64,
) -> (Array2<f64>, Array2<f64>) {
    let rc = background_correct_matrix(r, rb, method, offset);
    let gc = background_correct_matrix(g, gb, method, offset);
    let log2r = rc.mapv(|v| if v <= 0.0 { f64::NAN } else { v.log2() });
    let log2g = gc.mapv(|v| if v <= 0.0 { f64::NAN } else { v.log2() });
    let m = &log2r - &log2g;
    let a = (&log2r + &log2g).mapv(|v| v / 2.0);
    (m, a)
}

/// `RG.MA`: inverse of [`ma_from_rg`] on already-`(M, A)` data, with
/// `R = 2^(A + M/2)` and `G = 2^(A - M/2)`.
pub fn rg_from_ma(m: &Array2<f64>, a: &Array2<f64>) -> (Array2<f64>, Array2<f64>) {
    let r = Array2::from_shape_fn(m.dim(), |(i, j)| (a[[i, j]] + m[[i, j]] / 2.0).exp2());
    let g = Array2::from_shape_fn(m.dim(), |(i, j)| (a[[i, j]] - m[[i, j]] / 2.0).exp2());
    (r, g)
}

/// Print-tip layout: `ngrid_r * ngrid_c` tip groups, each printing
/// `nspot_r * nspot_c` spots. Spots are laid out as contiguous row blocks, one
/// block per tip, so block `t` occupies rows `[t*nspots, (t+1)*nspots)`.
#[derive(Clone, Copy, Debug)]
pub struct PrinterLayout {
    pub ngrid_r: usize,
    pub ngrid_c: usize,
    pub nspot_r: usize,
    pub nspot_c: usize,
}

/// Intensity-dependent within-array normalization method. Mirrors the
/// deterministic subset of limma's `normalizeWithinArrays` `method` choices.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum WithinArrayMethod {
    None,
    Median,
    Loess,
    PrintTipLoess,
}

/// `normalizeWithinArrays` on the `(M, A)` matrix path; returns the normalized
/// `M` matrix (`A` is unchanged). `weights` is an optional full
/// `n_probes x n_arrays` weight matrix — expand a vector beforehand with
/// [`crate::weights::as_matrix_weights`] to match limma's `asMatrixWeights`.
/// `layout` is required for [`WithinArrayMethod::PrintTipLoess`]. `span` and
/// `iterations` follow limma's defaults of `0.3` and `4`.
///
/// `Median` subtracts each column's median (or weighted median when weights are
/// supplied); `Loess` and `PrintTipLoess` replace each column (or each print-tip
/// block) with the residuals of a [`loess_fit`] of `M` on `A`, using limma's
/// `loessFit` weight clamps (`min.weight = 1e-5`, `max.weight = 1e5`,
/// `equal.weights.as.null = TRUE`).
pub fn normalize_within_arrays(
    m: &Array2<f64>,
    a: &Array2<f64>,
    method: WithinArrayMethod,
    weights: Option<&Array2<f64>>,
    span: f64,
    iterations: usize,
    layout: Option<PrinterLayout>,
) -> Result<Array2<f64>> {
    let (nprobes, narrays) = m.dim();
    if a.dim() != (nprobes, narrays) {
        bail!("dimensions of A do not match M");
    }
    if let Some(w) = weights {
        if w.dim() != (nprobes, narrays) {
            bail!("dimensions of weights do not match M");
        }
    }

    let mut out = m.clone();
    match method {
        WithinArrayMethod::None => {}
        WithinArrayMethod::Median => {
            for j in 0..narrays {
                let col: Vec<f64> = (0..nprobes).map(|i| m[[i, j]]).collect();
                let center = match weights {
                    None => median_finite(&col),
                    Some(w) => {
                        let wc: Vec<f64> = (0..nprobes).map(|i| w[[i, j]]).collect();
                        weighted_median(&col, Some(&wc), true)
                    }
                };
                for i in 0..nprobes {
                    out[[i, j]] -= center;
                }
            }
        }
        WithinArrayMethod::Loess => {
            for j in 0..narrays {
                let y: Vec<f64> = (0..nprobes).map(|i| m[[i, j]]).collect();
                let x: Vec<f64> = (0..nprobes).map(|i| a[[i, j]]).collect();
                let w = weights.map(|w| (0..nprobes).map(|i| w[[i, j]]).collect::<Vec<f64>>());
                let fit = loess_fit(&y, &x, w.as_deref(), span, iterations, 1e-5, 1e5, true);
                for i in 0..nprobes {
                    out[[i, j]] = fit.residuals[i];
                }
            }
        }
        WithinArrayMethod::PrintTipLoess => {
            let layout = match layout {
                Some(l) => l,
                None => bail!("layout argument required for printtiploess"),
            };
            let nspots = layout.nspot_r * layout.nspot_c;
            let ntips = layout.ngrid_r * layout.ngrid_c;
            if ntips * nspots != nprobes {
                bail!("printer layout information does not match M row dimension");
            }
            for j in 0..narrays {
                for t in 0..ntips {
                    let start = t * nspots;
                    let y: Vec<f64> = (start..start + nspots).map(|i| m[[i, j]]).collect();
                    let x: Vec<f64> = (start..start + nspots).map(|i| a[[i, j]]).collect();
                    let w = weights.map(|w| {
                        (start..start + nspots)
                            .map(|i| w[[i, j]])
                            .collect::<Vec<f64>>()
                    });
                    let fit = loess_fit(&y, &x, w.as_deref(), span, iterations, 1e-5, 1e5, true);
                    for (k, i) in (start..start + nspots).enumerate() {
                        out[[i, j]] = fit.residuals[k];
                    }
                }
            }
        }
    }
    Ok(out)
}

#[cfg(test)]
#[allow(clippy::excessive_precision)]
mod tests {
    use super::*;

    fn rclose(a: f64, b: f64) -> bool {
        if a.is_nan() && b.is_nan() {
            return true;
        }
        (a - b).abs() <= 1e-8 * (1.0 + b.abs())
    }

    fn assert_mat(out: &Array2<f64>, expected: &[[f64; 2]], label: &str) {
        assert_eq!(out.nrows(), expected.len(), "{label}: row count");
        for i in 0..expected.len() {
            for j in 0..2 {
                assert!(
                    rclose(out[[i, j]], expected[i][j]),
                    "{label}[{i},{j}]: {} vs {}",
                    out[[i, j]],
                    expected[i][j]
                );
            }
        }
    }

    /// 16x2 RGList of positive integers plus a 16x2 weight matrix, built from
    /// the same rational formulas as `scratch/normwithin_ref.R` so the inputs
    /// reconstruct bit-identically.
    fn rg_fixture() -> (Array2<f64>, Array2<f64>, Array2<f64>) {
        let (nprobe, narray) = (16usize, 2usize);
        let mut r = Array2::zeros((nprobe, narray));
        let mut g = Array2::zeros((nprobe, narray));
        let mut w = Array2::zeros((nprobe, narray));
        for g0 in 0..nprobe {
            for j0 in 0..narray {
                let (gi, ji) = (g0 as i64, j0 as i64);
                r[[g0, j0]] = (120 + gi * 8 + ji * 15 + ((gi * 3 + ji * 5) % 7) * 4) as f64;
                g[[g0, j0]] = (90 + gi * 6 + ji * 11 + ((gi * 7 + ji * 2) % 9) * 5) as f64;
                w[[g0, j0]] = 0.5 + ((gi * 5 + ji * 3) % 6) as f64 * 0.2;
            }
        }
        (r, g, w)
    }

    const MA_RG_M: [[f64; 2]; 16] = [
        [0.41503749927884392, 0.48170853892413135],
        [0.095860015407516208, 0.45820535843521704],
        [0.33324340811519715, 0.17425092684510179],
        [0.30541300810434535, 0.14295795384204357],
        [0.53144699139415419, 0.35453276031929004],
        [0.035623909730721159, 0.54916177929330967],
        [0.23815973719476435, 0.10982327795275104],
        [0.2115041051937121, 0.28647096107046011],
        [0.40525647848625823, 0.26445648090299212],
        [0.58496250072115608, 0.43457768567448873],
        [0.16905825762477988, 0.41727597147484374],
        [0.33304412708153563, 0.2183054638132047],
        [0.31375416344166229, 0.36499681677924833],
        [0.47226236797179411, 0.34845438939755002],
        [0.11651400872642448, 0.49084032335660677],
        [0.26303440583379434, 0.16505924627049762],
    ];

    const MA_RG_A: [[f64; 2]; 16] = [
        [6.6993718459690967, 7.0352701358121719],
        [7.0813530092412087, 6.9705696656187559],
        [7.1553063908297645, 7.2965788290515015],
        [7.0952210093914125, 7.2414039783633335],
        [7.1605412590050204, 7.3065493971046109],
        [7.3397400497527236, 7.3620437308969944],
        [7.4044820874596304, 7.5225171890593732],
        [7.3536795660404408, 7.5778637081719546],
        [7.4120816048720792, 7.53310767673368],
        [7.4624062518028902, 7.5836110570830604],
        [7.6159105893287018, 7.5395548638520387],
        [7.6663679506239735, 7.7673642146583965],
        [7.6244826318038292, 7.8118550284692336],
        [7.6707594116226216, 7.7741400368859033],
        [7.7997239907643596, 7.8152757700092508],
        [7.8457627205830196, 7.9342786645513055],
    ];

    const MEDIAN_W: [[f64; 2]; 16] = [
        [0.10962449117449857, 0.12717577860484131],
        [-0.20955299269682914, 0.10367259811592699],
        [0.027830400010851797, -0.18028183347418825],
        [0.0, -0.21157480647724647],
        [0.22603398328980884, 0.0],
        [-0.26978909837362419, 0.19462901897401963],
        [-0.067253270909580998, -0.244709482366539],
        [-0.093908902910633252, -0.068061799248829935],
        [0.09984347038191288, -0.090076279416297922],
        [0.27954949261681072, 0.080044925355198693],
        [-0.13635475047956547, 0.062743211155553702],
        [0.027631118977190283, -0.13622729650608534],
        [0.0083411553373169411, 0.010464056459958293],
        [0.16684935986744875, -0.0060783709217400173],
        [-0.18889899937792087, 0.13630756303731673],
        [-0.042378602270551013, -0.18947351404879242],
    ];

    const MEDIAN_NW: [[f64; 2]; 16] = [
        [0.1054539135058401, 0.13021496406571131],
        [-0.21372357036548761, 0.106711783576797],
        [0.023659822342193326, -0.17724264801331824],
        [-0.0041705776686584706, -0.20853562101637646],
        [0.22186340562115037, 0.0030391854608700086],
        [-0.27395967604228266, 0.19766820443488964],
        [-0.071423848578239468, -0.24167029690566899],
        [-0.098079480579291722, -0.065022613787959926],
        [0.095672892713254409, -0.087037093955427913],
        [0.27537891494815225, 0.083084110816068701],
        [-0.14052532814822394, 0.065782396616423711],
        [0.023460541308531813, -0.13318811104521533],
        [0.0041705776686584706, 0.013503241920828302],
        [0.16267878219879028, -0.0030391854608700086],
        [-0.19306957704657934, 0.13934674849818673],
        [-0.046549179939209484, -0.18643432858792242],
    ];

    const LOESS_W: [[f64; 2]; 16] = [
        [0.00055153930084905767, 0.040766574671558864],
        [-0.05462147727184874, -0.031536546731143034],
        [-0.083729258405399243, -0.12846041655967877],
        [0.10480902294731109, 0.034911045736998325],
        [0.09627798945378796, 0.016020662298251942],
        [-0.077779198347911027, 0.00038373974104910502],
        [-0.10952844588387034, -0.014499911899852336],
        [0.04658188483750525, -0.053388739684320896],
        [0.027754294544550362, -0.020896786039700466],
        [-0.0010882906757228028, 0.10054069295286538],
        [-0.15531488856191, 0.050255652330072564],
        [-0.00065330016751252717, -0.045214392284930405],
        [0.048617415339603731, 0.068999777369400661],
        [0.13926875256791815, 0.060288430542558835],
        [-0.10202136195850819, 0.19672064993775162],
        [0.080029632718627397, -0.056917140958086421],
    ];

    const LOESS_NW: [[f64; 2]; 16] = [
        [0.00019866839190113561, 0.052089024255749572],
        [-0.0071443390267586226, -0.031013979820145199],
        [-0.12337619317269455, -0.083454141763385392],
        [0.014405249188203129, 0.0061021005317352106],
        [0.058572163128462251, 0.053685820650237748],
        [-0.022654346312755431, -0.0050963209323636738],
        [-0.067806670626851084, 0.0084931005176177021],
        [0.044129881046102576, -0.081071603211831045],
        [0.057408864779656454, -0.023930936344493725],
        [-0.0044569372157868825, 0.052364421661424354],
        [-0.017566722061604867, 0.014048973574872226],
        [-0.068824788806081671, -0.016670189235056643],
        [0.029708142677324356, -0.060772958164841795],
        [0.054009937039857014, 0.026403796859023776],
        [-0.0019611190047384292, 0.051238823650141874],
        [0.0006145261927541501, -0.00028034644366714545],
    ];

    const PTL_W: [[f64; 2]; 16] = [
        [0.057442612862069288, 0.050210159050610603],
        [-0.15367694883732977, -0.042914759848677944],
        [0.10462697482950263, 0.023936987064198948],
        [0.059799135951460455, -0.066727564073601572],
        [0.028994964361376607, -0.087523395525124892],
        [-0.17681878527877404, 0.15695329404950134],
        [0.13049339157281115, -0.13824025525783132],
        [0.021620657322331531, 0.088122324229944468],
        [-0.081404690682113659, -0.12133937127726957],
        [0.14551881405230987, 0.083158932563343516],
        [-0.12635881757690007, 0.035868649106876822],
        [0.084969054640595579, -0.0080345231137678996],
        [-0.13233555553422785, -0.003760362730623472],
        [0.082385589860677996, -0.069634857554475182],
        [-0.11670800020386694, 0.12655755164241353],
        [0.085736175673749124, -0.043565030693951998],
    ];

    #[test]
    fn ma_rg_and_round_trip_match_r() {
        let (r, g, _w) = rg_fixture();
        let (m, a) = ma_from_rg(&r, &g, None, None, BackgroundMethod::Subtract, 0.0);
        assert_mat(&m, &MA_RG_M, "MA.RG M");
        assert_mat(&a, &MA_RG_A, "MA.RG A");

        // RG.MA inverts MA.RG back to the original intensities.
        let (r2, g2) = rg_from_ma(&m, &a);
        for i in 0..16 {
            for j in 0..2 {
                assert!(rclose(r2[[i, j]], r[[i, j]]), "RG.MA R[{i},{j}]");
                assert!(rclose(g2[[i, j]], g[[i, j]]), "RG.MA G[{i},{j}]");
            }
        }
    }

    #[test]
    fn normalize_within_arrays_methods_match_r() {
        let (r, g, w) = rg_fixture();
        let (m, a) = ma_from_rg(&r, &g, None, None, BackgroundMethod::Subtract, 0.0);
        let layout = Some(PrinterLayout {
            ngrid_r: 2,
            ngrid_c: 2,
            nspot_r: 2,
            nspot_c: 2,
        });

        // none: M is returned unchanged.
        let none =
            normalize_within_arrays(&m, &a, WithinArrayMethod::None, None, 0.3, 4, layout).unwrap();
        assert_eq!(none, m);

        let med_w =
            normalize_within_arrays(&m, &a, WithinArrayMethod::Median, Some(&w), 0.3, 4, None)
                .unwrap();
        assert_mat(&med_w, &MEDIAN_W, "median weighted");

        let med_nw =
            normalize_within_arrays(&m, &a, WithinArrayMethod::Median, None, 0.3, 4, None).unwrap();
        assert_mat(&med_nw, &MEDIAN_NW, "median noweights");

        let loess_w =
            normalize_within_arrays(&m, &a, WithinArrayMethod::Loess, Some(&w), 0.3, 4, None)
                .unwrap();
        assert_mat(&loess_w, &LOESS_W, "loess weighted");

        let loess_nw =
            normalize_within_arrays(&m, &a, WithinArrayMethod::Loess, None, 0.3, 4, None).unwrap();
        assert_mat(&loess_nw, &LOESS_NW, "loess noweights");

        let ptl_w = normalize_within_arrays(
            &m,
            &a,
            WithinArrayMethod::PrintTipLoess,
            Some(&w),
            0.3,
            4,
            layout,
        )
        .unwrap();
        assert_mat(&ptl_w, &PTL_W, "printtiploess weighted");

        // printtiploess with no weights uses classic lowess on 4-spot tips,
        // which interpolates exactly -> all-zero residuals.
        let ptl_nw = normalize_within_arrays(
            &m,
            &a,
            WithinArrayMethod::PrintTipLoess,
            None,
            0.3,
            4,
            layout,
        )
        .unwrap();
        for i in 0..16 {
            for j in 0..2 {
                assert!(
                    rclose(ptl_nw[[i, j]], 0.0),
                    "printtiploess noweights[{i},{j}]"
                );
            }
        }
    }
}