1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
//! Moderated t-tests relative to a fold-change threshold. Port of limma's
//! `treat`/`topTreat` (`treat.R`), default path only: `trend=FALSE`,
//! `robust=FALSE`, `upshot=FALSE`. Trend/robust variants depend on the trended
//! and robust `squeezeVar` paths, which are not yet ported.
use anyhow::{bail, Result};
use ndarray::Array2;
use crate::ebayes::squeeze_var;
use crate::fit::MArrayLM;
use crate::special::t_sf;
/// Compute treat statistics in place on a fitted model. `lfc` is the absolute
/// log2 fold-change threshold (R's `treat(fc=)` corresponds to `lfc =
/// log2(fc)`). The fit must come from [`crate::fit::lmfit`] (optionally through
/// [`crate::contrasts_fit`]) and carry `sigma` and `df_residual`.
///
/// Fills `t`, `p.value`, `df.prior`, `s2.prior`, `s2.post` and `df.total`, and
/// clears the B-statistic (`lods`): treat does not produce one.
pub fn treat(fit: &mut MArrayLM, lfc: f64) -> Result<()> {
let n_genes = fit.n_genes();
let ncoef = fit.n_coef();
let df_max = fit.df_residual.iter().cloned().fold(f64::MIN, f64::max);
if df_max == 0.0 {
bail!("no residual degrees of freedom in linear model fits");
}
if !fit.sigma.iter().any(|v| v.is_finite()) {
bail!("no finite residual standard deviations");
}
let var = fit.sigma.mapv(|s| s * s);
let sq = squeeze_var(&var, &fit.df_residual, None, false)?;
let df_prior = sq.df_prior;
let s2_prior = sq.var_prior;
let s2_post = sq.var_post;
// Total degrees of freedom, capped at the pooled residual d.f.
let df_pooled: f64 = fit.df_residual.iter().filter(|v| v.is_finite()).sum();
let mut df_total = ndarray::Array1::<f64>::zeros(n_genes);
for g in 0..n_genes {
df_total[g] = (fit.df_residual[g] + df_prior[g]).min(df_pooled);
}
let lfc = lfc.abs();
let mut t = Array2::<f64>::zeros((n_genes, ncoef));
let mut p_value = Array2::<f64>::zeros((n_genes, ncoef));
for g in 0..n_genes {
let se_scale = s2_post[g].sqrt();
let df = df_total[g];
for j in 0..ncoef {
let coef = fit.coefficients[[g, j]];
let acoef = coef.abs();
let se = fit.stdev_unscaled[[g, j]] * se_scale;
let tstat_right = (acoef - lfc) / se;
let tstat_left = (acoef + lfc) / se;
// Two one-sided upper tails (computed from the un-floored right
// statistic, matching the order of operations in treat.R).
p_value[[g, j]] = t_sf(tstat_right, df) + t_sf(tstat_left, df);
let tr = tstat_right.max(0.0);
// `t` keeps a sign only when the effect clears the threshold; NaN
// coefficients fall through to zero (R sets them to 0 first).
t[[g, j]] = if coef > lfc {
tr
} else if coef < -lfc {
-tr
} else {
0.0
};
}
}
fit.df_prior = Some(df_prior);
fit.s2_prior = Some(s2_prior);
fit.s2_post = Some(s2_post);
fit.df_total = Some(df_total);
fit.t = Some(t);
fit.p_value = Some(p_value);
// treat produces no B-statistic, F-statistic or per-coef prior variance.
fit.lods = None;
fit.var_prior = None;
fit.proportion = None;
fit.f_stat = None;
fit.f_p_value = None;
Ok(())
}