use anyhow::{bail, Result};
use crate::fit::MArrayLM;
pub fn p_adjust_bh(p: &[f64]) -> Vec<f64> {
let n = p.len();
let mut order: Vec<usize> = (0..n).collect();
order.sort_by(|&a, &b| p[a].partial_cmp(&p[b]).unwrap());
let mut adj = vec![0.0_f64; n];
let mut cummin = f64::INFINITY;
for k in (0..n).rev() {
let rank = (k + 1) as f64;
let val = n as f64 / rank * p[order[k]];
cummin = cummin.min(val);
adj[order[k]] = cummin.min(1.0);
}
adj
}
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
pub enum SortBy {
LogFc,
AveExpr,
PValue,
T,
B,
None,
}
impl SortBy {
pub fn parse(s: &str) -> Result<Self> {
Ok(match s.to_ascii_lowercase().as_str() {
"logfc" | "log2foldchange" | "m" => SortBy::LogFc,
"aveexpr" | "a" | "amean" => SortBy::AveExpr,
"p" | "pvalue" => SortBy::PValue,
"t" => SortBy::T,
"b" => SortBy::B,
"none" => SortBy::None,
other => bail!("invalid sort_by '{}'", other),
})
}
}
#[derive(Clone, Debug)]
pub struct TopRow {
pub id: String,
pub log2_fold_change: f64,
pub lfc_se: f64,
pub ave_expr: f64,
pub t: f64,
pub p_value: f64,
pub adj_p_value: f64,
pub b: f64,
}
pub fn top_table(
fit: &MArrayLM,
coef: usize,
sort_by: SortBy,
number: Option<usize>,
) -> Result<Vec<TopRow>> {
let t = fit
.t
.as_ref()
.ok_or_else(|| anyhow::anyhow!("need to run eBayes first"))?;
let p = fit.p_value.as_ref().unwrap();
let lods = fit.lods.as_ref();
if sort_by == SortBy::B && lods.is_none() {
bail!("cannot sort by B: this fit has no B-statistic (treat fits don't produce one)");
}
let n_genes = fit.n_genes();
if coef >= fit.n_coef() {
bail!(
"coef index {} out of range ({} coefficients)",
coef,
fit.n_coef()
);
}
let p_col: Vec<f64> = (0..n_genes).map(|g| p[[g, coef]]).collect();
let adj = p_adjust_bh(&p_col);
let mut rows: Vec<TopRow> = (0..n_genes)
.map(|g| TopRow {
id: fit.gene_names[g].clone(),
log2_fold_change: fit.coefficients[[g, coef]],
lfc_se: fit.stdev_unscaled[[g, coef]],
ave_expr: fit.amean[g],
t: t[[g, coef]],
p_value: p[[g, coef]],
adj_p_value: adj[g],
b: lods.map(|m| m[[g, coef]]).unwrap_or(f64::NAN),
})
.collect();
sort_rows(&mut rows, sort_by);
if let Some(num) = number {
rows.truncate(num);
}
Ok(rows)
}
pub fn top_treat(
fit: &MArrayLM,
coef: usize,
sort_by: SortBy,
number: Option<usize>,
) -> Result<Vec<TopRow>> {
if sort_by == SortBy::B {
bail!("topTreat cannot sort by B: treat doesn't produce a B-statistic");
}
top_table(fit, coef, sort_by, number)
}
fn sort_rows(rows: &mut [TopRow], sort_by: SortBy) {
match sort_by {
SortBy::LogFc => {
rows.sort_by(|a, b| {
b.log2_fold_change
.abs()
.partial_cmp(&a.log2_fold_change.abs())
.unwrap()
});
}
SortBy::AveExpr => {
rows.sort_by(|a, b| b.ave_expr.partial_cmp(&a.ave_expr).unwrap());
}
SortBy::PValue => {
rows.sort_by(|a, b| a.p_value.partial_cmp(&b.p_value).unwrap());
}
SortBy::T => {
rows.sort_by(|a, b| b.t.abs().partial_cmp(&a.t.abs()).unwrap());
}
SortBy::B => {
rows.sort_by(|a, b| b.b.partial_cmp(&a.b).unwrap());
}
SortBy::None => {}
}
}
#[derive(Clone, Debug)]
pub struct TopRowF {
pub id: String,
pub ave_expr: f64,
pub f: f64,
pub p_value: f64,
pub adj_p_value: f64,
}
pub fn top_table_f(fit: &MArrayLM, number: Option<usize>) -> Result<Vec<TopRowF>> {
let f = fit
.f_stat
.as_ref()
.ok_or_else(|| anyhow::anyhow!("need to run eBayes first"))?;
let fp = fit.f_p_value.as_ref().unwrap();
let n_genes = fit.n_genes();
let fp_vec: Vec<f64> = fp.to_vec();
let adj = p_adjust_bh(&fp_vec);
let mut rows: Vec<TopRowF> = (0..n_genes)
.map(|g| TopRowF {
id: fit.gene_names[g].clone(),
ave_expr: fit.amean[g],
f: f[g],
p_value: fp[g],
adj_p_value: adj[g],
})
.collect();
rows.sort_by(|a, b| a.p_value.partial_cmp(&b.p_value).unwrap());
if let Some(num) = number {
rows.truncate(num);
}
Ok(rows)
}