rsomics_limma_ebayes/
lib.rs1mod ebayes;
9mod fit;
10mod matrix;
11mod special;
12
13use std::io::{BufWriter, Write};
14use std::path::Path;
15
16use rsomics_common::{Result, RsomicsError};
17
18pub use matrix::{read_contrast, read_design, read_expr};
19
20pub struct Options<'a> {
21 pub expr: &'a Path,
22 pub design: &'a Path,
23 pub contrast: Option<&'a Path>,
24 pub coef: Option<usize>,
26 pub proportion: f64,
27}
28
29pub struct Row {
31 pub gene: String,
32 pub logfc: f64,
33 pub ave_expr: f64,
34 pub t: f64,
35 pub p_value: f64,
36 pub adj_p_val: f64,
37 pub b: f64,
38}
39
40pub struct Results {
41 pub coef_name: String,
42 pub rows: Vec<Row>,
43 pub df_total: f64,
44 pub df_prior: f64,
45 pub s2_prior: f64,
46}
47
48pub fn run(opts: &Options) -> Result<Results> {
49 let expr = read_expr(opts.expr)?;
50 let design = read_design(opts.design)?;
51 if design.x.len() != expr.samples.len() {
52 return Err(RsomicsError::InvalidInput(format!(
53 "design has {} rows, expression has {} samples",
54 design.x.len(),
55 expr.samples.len()
56 )));
57 }
58
59 let base = fit::lm_fit(&expr.y, &expr.genes, &design.x, &design.coef_names)?;
60 let xtx_inv = fit::Qr::new(&design.x)?.xtx_inv();
61
62 let fit = if let Some(cpath) = opts.contrast {
63 let contrast = read_contrast(cpath, &design.coef_names)?;
64 fit::contrasts_fit(&base, &contrast, &xtx_inv)
65 } else {
66 base
67 };
68
69 let m = ebayes::ebayes(&fit, &fit.stdev_unscaled, opts.proportion);
70
71 let nc = fit.coef_names.len();
72 let coef = opts.coef.unwrap_or(nc);
73 if coef < 1 || coef > nc {
74 return Err(RsomicsError::InvalidInput(format!(
75 "--coef {coef} out of range 1..={nc}"
76 )));
77 }
78 let k = coef - 1;
79
80 let pvals: Vec<f64> = (0..fit.genes.len()).map(|gi| m.p[gi][k]).collect();
81 let adj = bh_adjust(&pvals);
82
83 let mut rows = Vec::with_capacity(fit.genes.len());
84 for (gi, gene) in fit.genes.iter().enumerate() {
85 rows.push(Row {
86 gene: gene.clone(),
87 logfc: fit.coefficients[gi][k],
88 ave_expr: fit.amean[gi],
89 t: m.t[gi][k],
90 p_value: m.p[gi][k],
91 adj_p_val: adj[gi],
92 b: m.lods[gi][k],
93 });
94 }
95
96 Ok(Results {
97 coef_name: fit.coef_names[k].clone(),
98 rows,
99 df_total: m.df_total,
100 df_prior: m.df_prior,
101 s2_prior: m.s2_prior,
102 })
103}
104
105fn bh_adjust(p: &[f64]) -> Vec<f64> {
107 let n = p.len();
108 let mut idx: Vec<usize> = (0..n).collect();
109 idx.sort_by(|&a, &b| p[b].partial_cmp(&p[a]).unwrap());
110 let mut adj = vec![0.0; n];
111 let mut cummin = f64::INFINITY;
112 for (rank, &i) in idx.iter().enumerate() {
113 let m = (n - rank) as f64;
114 let v = (n as f64 / m * p[i]).min(1.0);
115 cummin = cummin.min(v);
116 adj[i] = cummin;
117 }
118 adj
119}
120
121pub fn write_results(res: &Results, out: &mut dyn Write) -> Result<()> {
122 let mut w = BufWriter::with_capacity(1 << 20, out);
123 writeln!(w, "gene\tlogFC\tAveExpr\tt\tP.Value\tadj.P.Val\tB").map_err(RsomicsError::Io)?;
124 let mut fmt = ryu::Buffer::new();
125 let mut line = String::with_capacity(128);
126 for row in &res.rows {
127 line.clear();
128 line.push_str(&row.gene);
129 for v in [
130 row.logfc,
131 row.ave_expr,
132 row.t,
133 row.p_value,
134 row.adj_p_val,
135 row.b,
136 ] {
137 line.push('\t');
138 line.push_str(fmt.format(v));
139 }
140 line.push('\n');
141 w.write_all(line.as_bytes()).map_err(RsomicsError::Io)?;
142 }
143 w.flush().map_err(RsomicsError::Io)?;
144 Ok(())
145}