limma-rust 0.1.0

Pure-Rust port of the Bioconductor limma differential-expression package
Documentation
//! Weight utilities (limma `weights.R`).
//!
//! Pure-Rust ports of [`modify_weights`] (limma's `modifyWeights`): scale the
//! observation weights of probes whose status matches given labels by
//! per-label multipliers; and [`as_matrix_weights`] (`asMatrixWeights`):
//! expand probe-, array- or scalar-weights to a full `n_genes x n_arrays`
//! matrix. The two-colour quality-weight helpers (`wtarea`, `wtflags`,
//! `wtIgnore.Filter`) operate on named spot-quality columns and are out of
//! scope for the numeric port.

use anyhow::{bail, Result};
use ndarray::Array2;

/// Multiply the weights of every probe (row) whose `status` equals `values[k]`
/// by `multipliers[k]`. `multipliers` is recycled when it has length 1.
/// Returns a new matrix; `weights` is left unchanged.
///
/// Panics if `status.len()` differs from `weights.nrows()`, or if
/// `multipliers` is neither length 1 nor `values.len()` — mirroring the
/// `stop()` guards in limma.
pub fn modify_weights<S: AsRef<str>>(
    weights: &Array2<f64>,
    status: &[S],
    values: &[S],
    multipliers: &[f64],
) -> Array2<f64> {
    assert_eq!(
        status.len(),
        weights.nrows(),
        "nrows of weights must equal length of status"
    );
    let nvalues = values.len();
    let mult: Vec<f64> = if multipliers.len() == 1 {
        vec![multipliers[0]; nvalues]
    } else {
        assert_eq!(
            multipliers.len(),
            nvalues,
            "no. values doesn't match no. multipliers"
        );
        multipliers.to_vec()
    };

    let mut out = weights.clone();
    for (v, value) in values.iter().enumerate() {
        let target = value.as_ref();
        for (i, s) in status.iter().enumerate() {
            if s.as_ref() == target {
                for j in 0..out.ncols() {
                    out[[i, j]] *= mult[v];
                }
            }
        }
    }
    out
}

/// `asMatrixWeights(weights, dim)`: expand a weight vector or matrix to a full
/// `dim = (n_rows, n_cols)` matrix, recycling exactly as limma does.
///
/// With `dim = None` the input is returned unchanged. Otherwise:
/// * a matrix already of shape `dim` is returned as-is;
/// * a `1 x n_cols` row matrix is recycled across rows (array weights);
/// * a length-1 or length-`n_rows` vector is recycled across columns
///   (probe weights);
/// * a length-`n_cols` vector is recycled across rows (array weights).
///
/// The cosmetic `"arrayweights"` attribute that limma attaches is not
/// represented. Errors mirror limma's `stop()` guards (zero dimensions,
/// unexpected shape or size). A vector argument should be passed as an
/// `n x 1` column (R coerces plain vectors that way).
pub fn as_matrix_weights(
    weights: &Array2<f64>,
    dim: Option<(usize, usize)>,
) -> Result<Array2<f64>> {
    let (nr, nc) = match dim {
        None => return Ok(weights.clone()),
        Some(d) => d,
    };
    if nr == 0 || nc == 0 {
        bail!("zero or negative dimensions not allowed");
    }
    let (dwr, dwc) = weights.dim();
    // Full matrix already.
    if dwr == nr && dwc == nc {
        return Ok(weights.clone());
    }
    if dwr.min(dwc) != 1 {
        bail!("weights is of unexpected shape");
    }
    let flat: Vec<f64> = weights.iter().copied().collect();
    let lw = flat.len();
    // Row matrix (1 x n_cols) of array weights -> recycle by row.
    if dwc > 1 && dwc == nc {
        return Ok(fill_byrow(&flat, nr, nc));
    }
    // Probe weights (length 1 or n_rows) -> recycle column-major.
    if lw == 1 || lw == nr {
        return Ok(fill_colmajor(&flat, nr, nc));
    }
    // Array weights (length n_cols) -> recycle by row.
    if lw == nc {
        return Ok(fill_byrow(&flat, nr, nc));
    }
    bail!("weights is of unexpected size");
}

/// R `matrix(flat, nr, nc)`: column-major fill with recycling.
fn fill_colmajor(flat: &[f64], nr: usize, nc: usize) -> Array2<f64> {
    Array2::from_shape_fn((nr, nc), |(i, j)| flat[(j * nr + i) % flat.len()])
}

/// R `matrix(flat, nr, nc, byrow = TRUE)`: row-major fill with recycling.
fn fill_byrow(flat: &[f64], nr: usize, nc: usize) -> Array2<f64> {
    Array2::from_shape_fn((nr, nc), |(i, j)| flat[(i * nc + j) % flat.len()])
}

#[cfg(test)]
mod tests {
    use super::*;
    use ndarray::array;

    fn weights() -> Array2<f64> {
        array![[1.0, 0.5], [2.0, 2.0], [1.0, 1.0], [0.25, 4.0]]
    }

    #[test]
    fn two_values_matches_r() {
        let status = ["gene", "control", "gene", "spike"];
        let got = modify_weights(&weights(), &status, &["control", "spike"], &[0.0, 3.0]);
        let want = array![[1.0, 0.5], [0.0, 0.0], [1.0, 1.0], [0.75, 12.0]];
        assert_eq!(got, want);
    }

    #[test]
    fn scalar_multiplier_recycles() {
        let status = ["gene", "control", "gene", "spike"];
        let got = modify_weights(&weights(), &status, &["control", "spike"], &[2.0]);
        let want = array![[1.0, 0.5], [4.0, 4.0], [1.0, 1.0], [0.5, 8.0]];
        assert_eq!(got, want);
    }

    #[test]
    fn default_unit_weights() {
        let status = ["gene", "control", "gene", "spike"];
        let w = Array2::<f64>::ones((4, 1));
        let got = modify_weights(&w, &status, &["gene"], &[5.0]);
        let want = array![[5.0], [1.0], [5.0], [1.0]];
        assert_eq!(got, want);
    }

    #[test]
    fn as_matrix_weights_matches_r() {
        // probe weights (length == nrow) recycle across columns
        let pw = array![[1.0], [2.0], [3.0], [4.0]];
        assert_eq!(
            as_matrix_weights(&pw, Some((4, 3))).unwrap(),
            array![
                [1.0, 1.0, 1.0],
                [2.0, 2.0, 2.0],
                [3.0, 3.0, 3.0],
                [4.0, 4.0, 4.0]
            ]
        );
        // array weights (length == ncol) recycle across rows
        let aw = array![[10.0], [20.0], [30.0]];
        assert_eq!(
            as_matrix_weights(&aw, Some((4, 3))).unwrap(),
            array![
                [10.0, 20.0, 30.0],
                [10.0, 20.0, 30.0],
                [10.0, 20.0, 30.0],
                [10.0, 20.0, 30.0]
            ]
        );
        // scalar fills the whole matrix
        let s = array![[0.5]];
        assert_eq!(
            as_matrix_weights(&s, Some((2, 3))).unwrap(),
            Array2::from_elem((2, 3), 0.5)
        );
        // full matrix (column-major like R matrix(1:6,2,3)) returned unchanged
        let fm = array![[1.0, 3.0, 5.0], [2.0, 4.0, 6.0]];
        assert_eq!(as_matrix_weights(&fm, Some((2, 3))).unwrap(), fm);
        // square target: plain length-n vector is treated as probe weights
        let sq = array![[1.0], [2.0], [3.0]];
        assert_eq!(
            as_matrix_weights(&sq, Some((3, 3))).unwrap(),
            array![[1.0, 1.0, 1.0], [2.0, 2.0, 2.0], [3.0, 3.0, 3.0]]
        );
        // 1 x ncol row matrix of array weights recycles by row
        let rm = array![[7.0, 8.0, 9.0]];
        assert_eq!(
            as_matrix_weights(&rm, Some((4, 3))).unwrap(),
            array![
                [7.0, 8.0, 9.0],
                [7.0, 8.0, 9.0],
                [7.0, 8.0, 9.0],
                [7.0, 8.0, 9.0]
            ]
        );
        // dim = None returns the input unchanged
        assert_eq!(as_matrix_weights(&pw, None).unwrap(), pw);
    }

    #[test]
    fn as_matrix_weights_errors() {
        let v = array![[1.0], [2.0], [3.0], [4.0]];
        // length 4 matches neither nrow (2) nor ncol (3)
        assert!(as_matrix_weights(&v, Some((2, 3))).is_err());
        // zero dimension
        assert!(as_matrix_weights(&v, Some((0, 3))).is_err());
        // 2x2 block is neither full nor a vector
        let blk = array![[1.0, 2.0], [3.0, 4.0]];
        assert!(as_matrix_weights(&blk, Some((4, 3))).is_err());
    }
}