limma-rust 0.1.0

Pure-Rust port of the Bioconductor limma differential-expression package
Documentation
//! Ranked result tables. Port of limma's `topTable`
//! (`topTable` / `topTableF`) plus the Benjamini-Hochberg
//! adjustment provided by R's `p.adjust(method = "BH")`.

use anyhow::{bail, Result};

use crate::fit::MArrayLM;

/// Benjamini-Hochberg step-up FDR adjustment. Matches
/// R `p.adjust(p, method = "BH")`.
pub fn p_adjust_bh(p: &[f64]) -> Vec<f64> {
    let n = p.len();
    let mut order: Vec<usize> = (0..n).collect();
    // Ascending by p-value.
    order.sort_by(|&a, &b| p[a].partial_cmp(&p[b]).unwrap());
    let mut adj = vec![0.0_f64; n];
    let mut cummin = f64::INFINITY;
    // Walk from largest p-value to smallest, enforcing monotonicity.
    for k in (0..n).rev() {
        let rank = (k + 1) as f64;
        // Match R's `cummin(n/i * p[o])` operation order (division first) so the
        // adjusted values are bit-identical to `p.adjust(method = "BH")`.
        let val = n as f64 / rank * p[order[k]];
        cummin = cummin.min(val);
        adj[order[k]] = cummin.min(1.0);
    }
    adj
}

/// Sorting criterion for [`top_table`].
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
pub enum SortBy {
    LogFc,
    AveExpr,
    PValue,
    T,
    B,
    None,
}

impl SortBy {
    pub fn parse(s: &str) -> Result<Self> {
        Ok(match s.to_ascii_lowercase().as_str() {
            "logfc" | "log2foldchange" | "m" => SortBy::LogFc,
            "aveexpr" | "a" | "amean" => SortBy::AveExpr,
            "p" | "pvalue" => SortBy::PValue,
            "t" => SortBy::T,
            "b" => SortBy::B,
            "none" => SortBy::None,
            other => bail!("invalid sort_by '{}'", other),
        })
    }
}

/// One row of a single-contrast moderated-t table (`_topTableT`).
#[derive(Clone, Debug)]
pub struct TopRow {
    pub id: String,
    pub log2_fold_change: f64,
    pub lfc_se: f64,
    pub ave_expr: f64,
    pub t: f64,
    pub p_value: f64,
    pub adj_p_value: f64,
    pub b: f64,
}

/// Extract the top-ranked genes for a single coefficient/contrast. Port of
/// `_topTableT`. `number == None` returns all rows.
pub fn top_table(
    fit: &MArrayLM,
    coef: usize,
    sort_by: SortBy,
    number: Option<usize>,
) -> Result<Vec<TopRow>> {
    let t = fit
        .t
        .as_ref()
        .ok_or_else(|| anyhow::anyhow!("need to run eBayes first"))?;
    let p = fit.p_value.as_ref().unwrap();
    // treat fits carry no B-statistic; report NaN (written as NA) for it.
    let lods = fit.lods.as_ref();
    if sort_by == SortBy::B && lods.is_none() {
        bail!("cannot sort by B: this fit has no B-statistic (treat fits don't produce one)");
    }
    let n_genes = fit.n_genes();
    if coef >= fit.n_coef() {
        bail!(
            "coef index {} out of range ({} coefficients)",
            coef,
            fit.n_coef()
        );
    }

    let p_col: Vec<f64> = (0..n_genes).map(|g| p[[g, coef]]).collect();
    let adj = p_adjust_bh(&p_col);

    let mut rows: Vec<TopRow> = (0..n_genes)
        .map(|g| TopRow {
            id: fit.gene_names[g].clone(),
            log2_fold_change: fit.coefficients[[g, coef]],
            lfc_se: fit.stdev_unscaled[[g, coef]],
            ave_expr: fit.amean[g],
            t: t[[g, coef]],
            p_value: p[[g, coef]],
            adj_p_value: adj[g],
            b: lods.map(|m| m[[g, coef]]).unwrap_or(f64::NAN),
        })
        .collect();

    sort_rows(&mut rows, sort_by);

    if let Some(num) = number {
        rows.truncate(num);
    }
    Ok(rows)
}

/// Summary table of top genes from a [`crate::treat`] fit. Port of `topTreat`:
/// `topTable` restricted to a single coefficient, defaulting to sort by
/// p-value, with B-sorting disallowed (treat has no B-statistic).
pub fn top_treat(
    fit: &MArrayLM,
    coef: usize,
    sort_by: SortBy,
    number: Option<usize>,
) -> Result<Vec<TopRow>> {
    if sort_by == SortBy::B {
        bail!("topTreat cannot sort by B: treat doesn't produce a B-statistic");
    }
    top_table(fit, coef, sort_by, number)
}

fn sort_rows(rows: &mut [TopRow], sort_by: SortBy) {
    match sort_by {
        SortBy::LogFc => {
            rows.sort_by(|a, b| {
                b.log2_fold_change
                    .abs()
                    .partial_cmp(&a.log2_fold_change.abs())
                    .unwrap()
            });
        }
        SortBy::AveExpr => {
            rows.sort_by(|a, b| b.ave_expr.partial_cmp(&a.ave_expr).unwrap());
        }
        SortBy::PValue => {
            rows.sort_by(|a, b| a.p_value.partial_cmp(&b.p_value).unwrap());
        }
        SortBy::T => {
            rows.sort_by(|a, b| b.t.abs().partial_cmp(&a.t.abs()).unwrap());
        }
        SortBy::B => {
            rows.sort_by(|a, b| b.b.partial_cmp(&a.b).unwrap());
        }
        SortBy::None => {}
    }
}

/// One row of a multi-contrast moderated-F table (`_topTableF`).
#[derive(Clone, Debug)]
pub struct TopRowF {
    pub id: String,
    pub ave_expr: f64,
    pub f: f64,
    pub p_value: f64,
    pub adj_p_value: f64,
}

/// Extract top-ranked genes by the overall moderated F-statistic. Port of
/// `_topTableF` (sorted by F p-value).
pub fn top_table_f(fit: &MArrayLM, number: Option<usize>) -> Result<Vec<TopRowF>> {
    let f = fit
        .f_stat
        .as_ref()
        .ok_or_else(|| anyhow::anyhow!("need to run eBayes first"))?;
    let fp = fit.f_p_value.as_ref().unwrap();
    let n_genes = fit.n_genes();

    let fp_vec: Vec<f64> = fp.to_vec();
    let adj = p_adjust_bh(&fp_vec);

    let mut rows: Vec<TopRowF> = (0..n_genes)
        .map(|g| TopRowF {
            id: fit.gene_names[g].clone(),
            ave_expr: fit.amean[g],
            f: f[g],
            p_value: fp[g],
            adj_p_value: adj[g],
        })
        .collect();

    rows.sort_by(|a, b| a.p_value.partial_cmp(&b.p_value).unwrap());
    if let Some(num) = number {
        rows.truncate(num);
    }
    Ok(rows)
}