limma-rust 0.1.0

Pure-Rust port of the Bioconductor limma differential-expression package
Documentation
//! Moderated t-tests relative to a fold-change threshold. Port of limma's
//! `treat`/`topTreat` (`treat.R`), default path only: `trend=FALSE`,
//! `robust=FALSE`, `upshot=FALSE`. Trend/robust variants depend on the trended
//! and robust `squeezeVar` paths, which are not yet ported.

use anyhow::{bail, Result};
use ndarray::Array2;

use crate::ebayes::squeeze_var;
use crate::fit::MArrayLM;
use crate::special::t_sf;

/// Compute treat statistics in place on a fitted model. `lfc` is the absolute
/// log2 fold-change threshold (R's `treat(fc=)` corresponds to `lfc =
/// log2(fc)`). The fit must come from [`crate::fit::lmfit`] (optionally through
/// [`crate::contrasts_fit`]) and carry `sigma` and `df_residual`.
///
/// Fills `t`, `p.value`, `df.prior`, `s2.prior`, `s2.post` and `df.total`, and
/// clears the B-statistic (`lods`): treat does not produce one.
pub fn treat(fit: &mut MArrayLM, lfc: f64) -> Result<()> {
    let n_genes = fit.n_genes();
    let ncoef = fit.n_coef();

    let df_max = fit.df_residual.iter().cloned().fold(f64::MIN, f64::max);
    if df_max == 0.0 {
        bail!("no residual degrees of freedom in linear model fits");
    }
    if !fit.sigma.iter().any(|v| v.is_finite()) {
        bail!("no finite residual standard deviations");
    }

    let var = fit.sigma.mapv(|s| s * s);
    let sq = squeeze_var(&var, &fit.df_residual, None, false)?;
    let df_prior = sq.df_prior;
    let s2_prior = sq.var_prior;
    let s2_post = sq.var_post;

    // Total degrees of freedom, capped at the pooled residual d.f.
    let df_pooled: f64 = fit.df_residual.iter().filter(|v| v.is_finite()).sum();
    let mut df_total = ndarray::Array1::<f64>::zeros(n_genes);
    for g in 0..n_genes {
        df_total[g] = (fit.df_residual[g] + df_prior[g]).min(df_pooled);
    }

    let lfc = lfc.abs();
    let mut t = Array2::<f64>::zeros((n_genes, ncoef));
    let mut p_value = Array2::<f64>::zeros((n_genes, ncoef));
    for g in 0..n_genes {
        let se_scale = s2_post[g].sqrt();
        let df = df_total[g];
        for j in 0..ncoef {
            let coef = fit.coefficients[[g, j]];
            let acoef = coef.abs();
            let se = fit.stdev_unscaled[[g, j]] * se_scale;
            let tstat_right = (acoef - lfc) / se;
            let tstat_left = (acoef + lfc) / se;
            // Two one-sided upper tails (computed from the un-floored right
            // statistic, matching the order of operations in treat.R).
            p_value[[g, j]] = t_sf(tstat_right, df) + t_sf(tstat_left, df);
            let tr = tstat_right.max(0.0);
            // `t` keeps a sign only when the effect clears the threshold; NaN
            // coefficients fall through to zero (R sets them to 0 first).
            t[[g, j]] = if coef > lfc {
                tr
            } else if coef < -lfc {
                -tr
            } else {
                0.0
            };
        }
    }

    fit.df_prior = Some(df_prior);
    fit.s2_prior = Some(s2_prior);
    fit.s2_post = Some(s2_post);
    fit.df_total = Some(df_total);
    fit.t = Some(t);
    fit.p_value = Some(p_value);
    // treat produces no B-statistic, F-statistic or per-coef prior variance.
    fit.lods = None;
    fit.var_prior = None;
    fit.proportion = None;
    fit.f_stat = None;
    fit.f_p_value = None;
    Ok(())
}