Skip to main content

limma/
toptable.rs

1//! Ranked result tables. Port of limma's `topTable`
2//! (`topTable` / `topTableF`) plus the Benjamini-Hochberg
3//! adjustment provided by R's `p.adjust(method = "BH")`.
4
5use anyhow::{bail, Result};
6
7use crate::fit::MArrayLM;
8
9/// Benjamini-Hochberg step-up FDR adjustment. Matches
10/// R `p.adjust(p, method = "BH")`.
11pub fn p_adjust_bh(p: &[f64]) -> Vec<f64> {
12    let n = p.len();
13    let mut order: Vec<usize> = (0..n).collect();
14    // Ascending by p-value.
15    order.sort_by(|&a, &b| p[a].partial_cmp(&p[b]).unwrap());
16    let mut adj = vec![0.0_f64; n];
17    let mut cummin = f64::INFINITY;
18    // Walk from largest p-value to smallest, enforcing monotonicity.
19    for k in (0..n).rev() {
20        let rank = (k + 1) as f64;
21        // Match R's `cummin(n/i * p[o])` operation order (division first) so the
22        // adjusted values are bit-identical to `p.adjust(method = "BH")`.
23        let val = n as f64 / rank * p[order[k]];
24        cummin = cummin.min(val);
25        adj[order[k]] = cummin.min(1.0);
26    }
27    adj
28}
29
30/// Sorting criterion for [`top_table`].
31#[derive(Clone, Copy, PartialEq, Eq, Debug)]
32pub enum SortBy {
33    LogFc,
34    AveExpr,
35    PValue,
36    T,
37    B,
38    None,
39}
40
41impl SortBy {
42    pub fn parse(s: &str) -> Result<Self> {
43        Ok(match s.to_ascii_lowercase().as_str() {
44            "logfc" | "log2foldchange" | "m" => SortBy::LogFc,
45            "aveexpr" | "a" | "amean" => SortBy::AveExpr,
46            "p" | "pvalue" => SortBy::PValue,
47            "t" => SortBy::T,
48            "b" => SortBy::B,
49            "none" => SortBy::None,
50            other => bail!("invalid sort_by '{}'", other),
51        })
52    }
53}
54
55/// One row of a single-contrast moderated-t table (`_topTableT`).
56#[derive(Clone, Debug)]
57pub struct TopRow {
58    pub id: String,
59    pub log2_fold_change: f64,
60    pub lfc_se: f64,
61    pub ave_expr: f64,
62    pub t: f64,
63    pub p_value: f64,
64    pub adj_p_value: f64,
65    pub b: f64,
66}
67
68/// Extract the top-ranked genes for a single coefficient/contrast. Port of
69/// `_topTableT`. `number == None` returns all rows.
70pub fn top_table(
71    fit: &MArrayLM,
72    coef: usize,
73    sort_by: SortBy,
74    number: Option<usize>,
75) -> Result<Vec<TopRow>> {
76    let t = fit
77        .t
78        .as_ref()
79        .ok_or_else(|| anyhow::anyhow!("need to run eBayes first"))?;
80    let p = fit.p_value.as_ref().unwrap();
81    // treat fits carry no B-statistic; report NaN (written as NA) for it.
82    let lods = fit.lods.as_ref();
83    if sort_by == SortBy::B && lods.is_none() {
84        bail!("cannot sort by B: this fit has no B-statistic (treat fits don't produce one)");
85    }
86    let n_genes = fit.n_genes();
87    if coef >= fit.n_coef() {
88        bail!(
89            "coef index {} out of range ({} coefficients)",
90            coef,
91            fit.n_coef()
92        );
93    }
94
95    let p_col: Vec<f64> = (0..n_genes).map(|g| p[[g, coef]]).collect();
96    let adj = p_adjust_bh(&p_col);
97
98    let mut rows: Vec<TopRow> = (0..n_genes)
99        .map(|g| TopRow {
100            id: fit.gene_names[g].clone(),
101            log2_fold_change: fit.coefficients[[g, coef]],
102            lfc_se: fit.stdev_unscaled[[g, coef]],
103            ave_expr: fit.amean[g],
104            t: t[[g, coef]],
105            p_value: p[[g, coef]],
106            adj_p_value: adj[g],
107            b: lods.map(|m| m[[g, coef]]).unwrap_or(f64::NAN),
108        })
109        .collect();
110
111    sort_rows(&mut rows, sort_by);
112
113    if let Some(num) = number {
114        rows.truncate(num);
115    }
116    Ok(rows)
117}
118
119/// Summary table of top genes from a [`crate::treat`] fit. Port of `topTreat`:
120/// `topTable` restricted to a single coefficient, defaulting to sort by
121/// p-value, with B-sorting disallowed (treat has no B-statistic).
122pub fn top_treat(
123    fit: &MArrayLM,
124    coef: usize,
125    sort_by: SortBy,
126    number: Option<usize>,
127) -> Result<Vec<TopRow>> {
128    if sort_by == SortBy::B {
129        bail!("topTreat cannot sort by B: treat doesn't produce a B-statistic");
130    }
131    top_table(fit, coef, sort_by, number)
132}
133
134fn sort_rows(rows: &mut [TopRow], sort_by: SortBy) {
135    match sort_by {
136        SortBy::LogFc => {
137            rows.sort_by(|a, b| {
138                b.log2_fold_change
139                    .abs()
140                    .partial_cmp(&a.log2_fold_change.abs())
141                    .unwrap()
142            });
143        }
144        SortBy::AveExpr => {
145            rows.sort_by(|a, b| b.ave_expr.partial_cmp(&a.ave_expr).unwrap());
146        }
147        SortBy::PValue => {
148            rows.sort_by(|a, b| a.p_value.partial_cmp(&b.p_value).unwrap());
149        }
150        SortBy::T => {
151            rows.sort_by(|a, b| b.t.abs().partial_cmp(&a.t.abs()).unwrap());
152        }
153        SortBy::B => {
154            rows.sort_by(|a, b| b.b.partial_cmp(&a.b).unwrap());
155        }
156        SortBy::None => {}
157    }
158}
159
160/// One row of a multi-contrast moderated-F table (`_topTableF`).
161#[derive(Clone, Debug)]
162pub struct TopRowF {
163    pub id: String,
164    pub ave_expr: f64,
165    pub f: f64,
166    pub p_value: f64,
167    pub adj_p_value: f64,
168}
169
170/// Extract top-ranked genes by the overall moderated F-statistic. Port of
171/// `_topTableF` (sorted by F p-value).
172pub fn top_table_f(fit: &MArrayLM, number: Option<usize>) -> Result<Vec<TopRowF>> {
173    let f = fit
174        .f_stat
175        .as_ref()
176        .ok_or_else(|| anyhow::anyhow!("need to run eBayes first"))?;
177    let fp = fit.f_p_value.as_ref().unwrap();
178    let n_genes = fit.n_genes();
179
180    let fp_vec: Vec<f64> = fp.to_vec();
181    let adj = p_adjust_bh(&fp_vec);
182
183    let mut rows: Vec<TopRowF> = (0..n_genes)
184        .map(|g| TopRowF {
185            id: fit.gene_names[g].clone(),
186            ave_expr: fit.amean[g],
187            f: f[g],
188            p_value: fp[g],
189            adj_p_value: adj[g],
190        })
191        .collect();
192
193    rows.sort_by(|a, b| a.p_value.partial_cmp(&b.p_value).unwrap());
194    if let Some(num) = number {
195        rows.truncate(num);
196    }
197    Ok(rows)
198}