limma-rust 0.1.0

Pure-Rust port of the Bioconductor limma differential-expression package
Documentation
//! 3x3 moving-window filters (limma `background.R`: `ma3x3.matrix`,
//! `ma3x3.spottedarray`). These underlie `backgroundCorrect(method =
//! "movingmin")`, replacing each spot's value by a summary (typically `min`)
//! over its 3x3 spatial neighbourhood. Border neighbours are treated as missing
//! and dropped (`na.rm = TRUE`), as is any non-finite neighbour.

use anyhow::{bail, Result};
use ndarray::Array2;

use crate::normwithin::PrinterLayout;

/// Summary function applied over each 3x3 window. limma defaults to `mean`;
/// `backgroundCorrect(method = "movingmin")` uses `min`.
#[derive(Clone, Copy, Debug)]
pub enum Ma3x3Fun {
    /// Minimum of the finite neighbours (an empty window yields `+Inf`, matching
    /// R's `min(numeric(0), na.rm = TRUE)`).
    Min,
    /// Mean of the finite neighbours (an empty window yields `NaN`).
    Mean,
}

fn summarise(vals: &[f64], fun: Ma3x3Fun) -> f64 {
    match fun {
        // Folds reproduce R's empty-window results exactly: min -> +Inf, mean -> NaN.
        Ma3x3Fun::Min => vals.iter().copied().fold(f64::INFINITY, f64::min),
        Ma3x3Fun::Mean => vals.iter().sum::<f64>() / vals.len() as f64,
    }
}

/// `ma3x3.matrix(x, FUN, na.rm = TRUE)`: replace each cell by `FUN` over its
/// 3x3 neighbourhood (the cell plus its up-to-eight neighbours). Off-grid
/// neighbours and non-finite values are dropped before applying `FUN`.
pub fn ma3x3_matrix(x: &Array2<f64>, fun: Ma3x3Fun) -> Array2<f64> {
    let d1 = x.nrows();
    let d2 = x.ncols();
    let mut out = Array2::<f64>::zeros((d1, d2));
    for r in 0..d1 {
        for c in 0..d2 {
            // A 3x3 window holds at most nine finite neighbours; collect them on
            // the stack instead of heap-allocating a Vec for every cell. The
            // dr/dc traversal order is unchanged, so `summarise` sees the same
            // values in the same order.
            let mut vals = [0.0f64; 9];
            let mut count = 0usize;
            for dr in -1i64..=1 {
                for dc in -1i64..=1 {
                    let rr = r as i64 + dr;
                    let cc = c as i64 + dc;
                    if rr >= 0 && rr < d1 as i64 && cc >= 0 && cc < d2 as i64 {
                        let v = x[[rr as usize, cc as usize]];
                        if v.is_finite() {
                            vals[count] = v;
                            count += 1;
                        }
                    }
                }
            }
            out[[r, c]] = summarise(&vals[..count], fun);
        }
    }
    out
}

/// `ma3x3.spottedarray(x, printer, FUN, na.rm = TRUE)`: apply [`ma3x3_matrix`]
/// to each array column after laying the spot vector out in its physical slide
/// geometry, then map the filtered values back to spot order.
///
/// * `x` — `n_spots x n_arrays`; rows in limma's spot order (`nspot.c` fastest,
///   then `nspot.r`, then `ngrid.c`, then `ngrid.r`).
/// * `layout` — print-tip geometry; `ngrid_r*ngrid_c*nspot_r*nspot_c` must equal
///   `n_spots`.
pub fn ma3x3_spottedarray(
    x: &Array2<f64>,
    layout: PrinterLayout,
    fun: Ma3x3Fun,
) -> Result<Array2<f64>> {
    let nspots = x.nrows();
    let narrays = x.ncols();
    let (gr, gc, sr, sc) = (
        layout.ngrid_r,
        layout.ngrid_c,
        layout.nspot_r,
        layout.nspot_c,
    );
    if gr * gc * sr * sc != nspots {
        bail!("printer layout information does not match the number of spots");
    }
    let phys_rows = sr * gr;
    let phys_cols = sc * gc;

    // Spot index -> physical (row, col): row = nspot.r within ngrid.r,
    // col = nspot.c within ngrid.c.
    let coord = |s: usize| -> (usize, usize) {
        let sc_i = s % sc;
        let sr_i = (s / sc) % sr;
        let gc_i = (s / (sc * sr)) % gc;
        let gr_i = (s / (sc * sr * gc)) % gr;
        (sr_i + sr * gr_i, sc_i + sc * gc_i)
    };

    let mut out = Array2::<f64>::zeros((nspots, narrays));
    for j in 0..narrays {
        let mut p = Array2::<f64>::from_elem((phys_rows, phys_cols), f64::NAN);
        for s in 0..nspots {
            let (row, col) = coord(s);
            p[[row, col]] = x[[s, j]];
        }
        let pf = ma3x3_matrix(&p, fun);
        for s in 0..nspots {
            let (row, col) = coord(s);
            out[[s, j]] = pf[[row, col]];
        }
    }
    Ok(out)
}

#[cfg(test)]
#[allow(clippy::excessive_precision)]
mod tests {
    use super::*;
    use ndarray::array;

    fn rclose(a: f64, b: f64) -> bool {
        (a - b).abs() <= 1e-12 * (1.0 + b.abs())
    }

    fn assert_mat(out: &Array2<f64>, exp: &Array2<f64>, label: &str) {
        assert_eq!(out.dim(), exp.dim(), "{label} dim");
        for ((i, j), &e) in exp.indexed_iter() {
            assert!(
                rclose(out[[i, j]], e),
                "{label}[{i},{j}]: {} vs {e}",
                out[[i, j]]
            );
        }
    }

    fn x43() -> Array2<f64> {
        array![
            [1.0, 5.0, 9.0],
            [2.0, 6.0, 10.0],
            [3.0, 7.0, 11.0],
            [4.0, 8.0, 12.0],
        ]
    }

    #[test]
    fn ma3x3_matrix_min_mean() {
        let x = x43();
        let min_exp = array![
            [1.0, 1.0, 5.0],
            [1.0, 1.0, 5.0],
            [2.0, 2.0, 6.0],
            [3.0, 3.0, 7.0],
        ];
        assert_mat(&ma3x3_matrix(&x, Ma3x3Fun::Min), &min_exp, "min");

        let mean_exp = array![
            [3.5, 5.5, 7.5],
            [4.0, 6.0, 8.0],
            [5.0, 7.0, 9.0],
            [5.5, 7.5, 9.5],
        ];
        assert_mat(&ma3x3_matrix(&x, Ma3x3Fun::Mean), &mean_exp, "mean");
    }

    #[test]
    fn ma3x3_matrix_na_dropped() {
        let mut x = x43();
        x[[1, 1]] = f64::NAN;
        let min_exp = array![
            [1.0, 1.0, 5.0],
            [1.0, 1.0, 5.0],
            [2.0, 2.0, 7.0],
            [3.0, 3.0, 7.0],
        ];
        assert_mat(&ma3x3_matrix(&x, Ma3x3Fun::Min), &min_exp, "minNA");

        let mean_exp = array![
            [2.6666666666666665, 5.4000000000000004, 8.0],
            [3.6000000000000001, 6.0, 8.4000000000000004],
            [4.7999999999999998, 7.125, 9.5999999999999996],
            [5.5, 7.5, 9.5],
        ];
        assert_mat(&ma3x3_matrix(&x, Ma3x3Fun::Mean), &mean_exp, "meanNA");
    }

    #[test]
    fn ma3x3_spottedarray_matches_r() {
        let layout = PrinterLayout {
            ngrid_r: 2,
            ngrid_c: 2,
            nspot_r: 2,
            nspot_c: 2,
        };
        let xs = Array2::from_shape_fn((16, 2), |(s, j)| 10.0 + s as f64 + j as f64 * 100.0);

        let min_exp = array![
            [10.0, 110.0],
            [10.0, 110.0],
            [10.0, 110.0],
            [10.0, 110.0],
            [11.0, 111.0],
            [14.0, 114.0],
            [11.0, 111.0],
            [14.0, 114.0],
            [12.0, 112.0],
            [12.0, 112.0],
            [18.0, 118.0],
            [18.0, 118.0],
            [13.0, 113.0],
            [16.0, 116.0],
            [19.0, 119.0],
            [22.0, 122.0],
        ];
        let got = ma3x3_spottedarray(&xs, layout, Ma3x3Fun::Min).unwrap();
        assert_mat(&got, &min_exp, "spotmin");

        let mean_exp = array![
            [11.5, 111.5],
            [12.666666666666666, 112.66666666666667],
            [13.833333333333334, 113.83333333333333],
            [15.0, 115.0],
            [14.333333333333334, 114.33333333333333],
            [15.5, 115.5],
            [16.666666666666668, 116.66666666666667],
            [17.833333333333332, 117.83333333333333],
            [17.166666666666668, 117.16666666666667],
            [18.333333333333332, 118.33333333333333],
            [19.5, 119.5],
            [20.666666666666668, 120.66666666666667],
            [20.0, 120.0],
            [21.166666666666668, 121.16666666666667],
            [22.333333333333332, 122.33333333333333],
            [23.5, 123.5],
        ];
        let got_mean = ma3x3_spottedarray(&xs, layout, Ma3x3Fun::Mean).unwrap();
        assert_mat(&got_mean, &mean_exp, "spotmean");
    }
}