1mod special;
10
11use std::fs::File;
12use std::io::{BufRead, BufReader, Write};
13use std::path::Path;
14
15use rsomics_common::{Result, RsomicsError};
16
17const LN2: f64 = std::f64::consts::LN_2;
18const PRIOR_COUNT: f64 = 0.125;
19const AVELOGCPM_PRIOR: f64 = 2.0;
20const AVELOGCPM_DISP: f64 = 0.05;
21
22pub struct Matrix {
23 pub header: String,
24 pub genes: Vec<String>,
25 pub counts: Vec<f64>,
26 pub n_samples: usize,
27}
28
29impl Matrix {
30 pub fn load(path: &Path) -> Result<Self> {
31 let file = File::open(path)
32 .map_err(|e| RsomicsError::InvalidInput(format!("{}: {e}", path.display())))?;
33 let mut lines = BufReader::new(file).lines();
34 let header = lines
35 .next()
36 .ok_or_else(|| RsomicsError::InvalidInput("empty count matrix".into()))?
37 .map_err(RsomicsError::Io)?;
38 let n_samples = header.split('\t').count() - 1;
39 if n_samples == 0 {
40 return Err(RsomicsError::InvalidInput(
41 "count matrix has no sample columns".into(),
42 ));
43 }
44 let mut genes = Vec::new();
45 let mut counts = Vec::new();
46 for line in lines {
47 let line = line.map_err(RsomicsError::Io)?;
48 if line.is_empty() {
49 continue;
50 }
51 let mut fields = line.split('\t');
52 let gene = fields
53 .next()
54 .ok_or_else(|| RsomicsError::InvalidInput("row without a gene id".into()))?;
55 genes.push(gene.to_string());
56 let before = counts.len();
57 for f in fields {
58 counts.push(f.parse::<f64>().map_err(|_| {
59 RsomicsError::InvalidInput(format!("non-numeric count '{f}' for gene {gene}"))
60 })?);
61 }
62 if counts.len() - before != n_samples {
63 return Err(RsomicsError::InvalidInput(format!(
64 "gene {gene}: {} values, header has {n_samples} samples",
65 counts.len() - before
66 )));
67 }
68 }
69 Ok(Self {
70 header,
71 genes,
72 counts,
73 n_samples,
74 })
75 }
76
77 pub fn n_genes(&self) -> usize {
78 self.genes.len()
79 }
80 fn row(&self, g: usize) -> &[f64] {
81 &self.counts[g * self.n_samples..(g + 1) * self.n_samples]
82 }
83}
84
85pub struct Design {
87 pub data: Vec<f64>,
88 pub n_samples: usize,
89 pub n_coef: usize,
90 pub coef_names: Vec<String>,
91}
92
93impl Design {
94 fn load(path: &Path) -> Result<Self> {
95 let file = File::open(path)
96 .map_err(|e| RsomicsError::InvalidInput(format!("{}: {e}", path.display())))?;
97 let mut lines = BufReader::new(file).lines();
98 let header = lines
99 .next()
100 .ok_or_else(|| RsomicsError::InvalidInput("empty design matrix".into()))?
101 .map_err(RsomicsError::Io)?;
102 let coef_names: Vec<String> = header.split('\t').map(str::to_string).collect();
103 if coef_names.is_empty() {
104 return Err(RsomicsError::InvalidInput(
105 "design has no coefficient columns".into(),
106 ));
107 }
108 let n_coef = coef_names.len();
109 let mut data = Vec::new();
110 let mut n_samples = 0;
111 for line in lines {
112 let line = line.map_err(RsomicsError::Io)?;
113 if line.is_empty() {
114 continue;
115 }
116 let before = data.len();
117 for f in line.split('\t') {
118 data.push(f.parse::<f64>().map_err(|_| {
119 RsomicsError::InvalidInput(format!("non-numeric design value '{f}'"))
120 })?);
121 }
122 if data.len() - before != n_coef {
123 return Err(RsomicsError::InvalidInput(format!(
124 "design row {n_samples}: {} values, header has {n_coef} columns",
125 data.len() - before
126 )));
127 }
128 n_samples += 1;
129 }
130 Ok(Self {
131 data,
132 n_samples,
133 n_coef,
134 coef_names,
135 })
136 }
137
138 fn row(&self, s: usize) -> &[f64] {
139 &self.data[s * self.n_coef..(s + 1) * self.n_coef]
140 }
141}
142
143fn load_norm_factors(path: &Path, n_samples: usize) -> Result<Vec<f64>> {
144 let file = File::open(path)
145 .map_err(|e| RsomicsError::InvalidInput(format!("{}: {e}", path.display())))?;
146 let mut factors = Vec::with_capacity(n_samples);
147 for line in BufReader::new(file).lines() {
148 let line = line.map_err(RsomicsError::Io)?;
149 let line = line.trim();
150 if line.is_empty() || line.starts_with('#') {
151 continue;
152 }
153 let val = line.rsplit('\t').next().unwrap_or(line);
154 factors.push(
155 val.parse::<f64>().map_err(|_| {
156 RsomicsError::InvalidInput(format!("non-numeric norm factor '{val}'"))
157 })?,
158 );
159 }
160 if factors.len() != n_samples {
161 return Err(RsomicsError::InvalidInput(format!(
162 "{} norm factors for {n_samples} samples",
163 factors.len()
164 )));
165 }
166 Ok(factors)
167}
168
169fn load_contrast(path: &Path, n_coef: usize) -> Result<Vec<f64>> {
170 let file = File::open(path)
171 .map_err(|e| RsomicsError::InvalidInput(format!("{}: {e}", path.display())))?;
172 let mut c = Vec::with_capacity(n_coef);
173 for line in BufReader::new(file).lines() {
174 let line = line.map_err(RsomicsError::Io)?;
175 let line = line.trim();
176 if line.is_empty() || line.starts_with('#') {
177 continue;
178 }
179 let val = line.rsplit('\t').next().unwrap_or(line);
180 c.push(
181 val.parse::<f64>()
182 .map_err(|_| RsomicsError::InvalidInput(format!("non-numeric contrast '{val}'")))?,
183 );
184 }
185 if c.len() != n_coef {
186 return Err(RsomicsError::InvalidInput(format!(
187 "contrast has {} entries, design has {n_coef} coefficients",
188 c.len()
189 )));
190 }
191 Ok(c)
192}
193
194const MAXIT: usize = 200;
195const TOL: f64 = 1e-10;
196
197fn nb_deviance(y: &[f64], mu: &[f64], dispersion: f64) -> f64 {
199 let mut dev = 0.0;
200 for (&yi, &mui) in y.iter().zip(mu) {
201 let r = if dispersion > 0.0 {
202 1.0 / dispersion
203 } else {
204 f64::INFINITY
205 };
206 let term_y = if yi > 0.0 { yi * (yi / mui).ln() } else { 0.0 };
207 let term = if dispersion > 0.0 {
208 (yi + r) * ((yi + r) / (mui + r)).ln()
209 } else {
210 mui - yi
211 };
212 dev += if dispersion > 0.0 {
213 term_y - term
214 } else {
215 term_y - (yi - mui)
216 };
217 }
218 2.0 * dev
219}
220
221struct GeneScratch {
224 full: Scratch,
225 reduced: Scratch,
226 row_aug: Vec<f64>,
227}
228
229struct Scratch {
231 beta: Vec<f64>,
232 trial: Vec<f64>,
233 mu: Vec<f64>,
234 mu_t: Vec<f64>,
235 xtwx: Vec<f64>,
236 a: Vec<f64>,
237 xtr: Vec<f64>,
238 rhs: Vec<f64>,
239 step: Vec<f64>,
240}
241
242impl Scratch {
243 fn new(n: usize, p: usize) -> Self {
244 Scratch {
245 beta: vec![0.0; p],
246 trial: vec![0.0; p],
247 mu: vec![0.0; n],
248 mu_t: vec![0.0; n],
249 xtwx: vec![0.0; p * p],
250 a: vec![0.0; p * p],
251 xtr: vec![0.0; p],
252 rhs: vec![0.0; p],
253 step: vec![0.0; p],
254 }
255 }
256}
257
258fn fit_nb_glm(
263 y: &[f64],
264 design: &Design,
265 offset: &[f64],
266 dispersion: f64,
267 tight: bool,
268 start_dir: &[f64],
269 sc: &mut Scratch,
270) -> (Vec<f64>, f64) {
271 let n = design.n_samples;
272 let p = design.n_coef;
273
274 let b0 = mglm_one_group(y, offset, dispersion);
277 for (b, &d) in sc.beta[..p].iter_mut().zip(start_dir) {
278 *b = b0 * d;
279 }
280
281 eta_mu(design, offset, &sc.beta[..p], &mut sc.mu);
282 let mut dev = nb_deviance(y, &sc.mu[..n], dispersion);
283 let mut lambda = 0.0f64;
284
285 for _ in 0..MAXIT {
286 for v in sc.xtwx[..p * p].iter_mut() {
287 *v = 0.0;
288 }
289 for v in sc.xtr[..p].iter_mut() {
290 *v = 0.0;
291 }
292 let rows = design.data[..n * p].chunks_exact(p);
293 for ((&yi, &mui), xr) in y.iter().zip(&sc.mu[..n]).zip(rows) {
294 let denom = 1.0 + dispersion * mui;
295 let w = mui / denom;
296 let resid = (yi - mui) / denom;
297 for (j, &xj) in xr.iter().enumerate() {
298 sc.xtr[j] += xj * resid;
299 let xjw = xj * w;
300 let rowj = &mut sc.xtwx[j * p..j * p + p];
301 for (rk, &xk) in rowj.iter_mut().zip(xr) {
302 *rk += xjw * xk;
303 }
304 }
305 }
306 let mut accepted = false;
307 for _ in 0..20 {
308 sc.a[..p * p].copy_from_slice(&sc.xtwx[..p * p]);
309 for d in 0..p {
310 sc.a[d * p + d] += lambda * sc.xtwx[d * p + d].max(1e-6);
311 }
312 sc.rhs[..p].copy_from_slice(&sc.xtr[..p]);
313 if !solve(&mut sc.a[..p * p], &mut sc.rhs[..p], &mut sc.step[..p], p) {
314 lambda = if lambda == 0.0 { 1.0 } else { lambda * 2.0 };
315 continue;
316 }
317 for (t, (&b, &s)) in sc.trial[..p]
318 .iter_mut()
319 .zip(sc.beta[..p].iter().zip(&sc.step[..p]))
320 {
321 *t = b + s;
322 }
323 eta_mu(design, offset, &sc.trial[..p], &mut sc.mu_t);
324 let dev_t = nb_deviance(y, &sc.mu_t[..n], dispersion);
325 if dev_t <= dev + 1e-8 * (1.0 + dev.abs()) {
326 let max_step = sc.step[..p].iter().fold(0.0f64, |m, s| m.max(s.abs()));
327 let improved = dev - dev_t;
328 sc.beta[..p].copy_from_slice(&sc.trial[..p]);
329 sc.mu[..n].copy_from_slice(&sc.mu_t[..n]);
330 dev = dev_t;
331 lambda *= 0.5;
332 accepted = true;
333 let done = if tight {
334 max_step < TOL
335 } else {
336 improved < 1e-7 * (dev + 1.0)
337 };
338 if done {
339 return (sc.beta[..p].to_vec(), dev);
340 }
341 break;
342 }
343 lambda = if lambda == 0.0 { 1.0 } else { lambda * 4.0 };
344 }
345 if !accepted {
346 break;
347 }
348 }
349 (sc.beta[..p].to_vec(), dev)
350}
351
352fn eta_mu(design: &Design, offset: &[f64], beta: &[f64], mu: &mut [f64]) {
353 for s in 0..design.n_samples {
354 let xr = design.row(s);
355 let mut eta = offset[s];
356 for (&x, &b) in xr.iter().zip(beta) {
357 eta += x * b;
358 }
359 mu[s] = eta.exp();
360 }
361}
362
363fn start_direction(design: &Design) -> Vec<f64> {
366 let p = design.n_coef;
367 let mut xtx = vec![0.0f64; p * p];
368 let mut xt1 = vec![0.0f64; p];
369 for s in 0..design.n_samples {
370 let xr = design.row(s);
371 for (j, &xj) in xr.iter().enumerate() {
372 xt1[j] += xj;
373 let rowj = &mut xtx[j * p..j * p + p];
374 for (rk, &xk) in rowj.iter_mut().zip(xr) {
375 *rk += xj * xk;
376 }
377 }
378 }
379 let mut d = vec![0.0f64; p];
380 if !solve(&mut xtx, &mut xt1, &mut d, p) {
381 return vec![0.0f64; p];
382 }
383 d
384}
385
386fn solve(a: &mut [f64], rhs: &mut [f64], x: &mut [f64], p: usize) -> bool {
389 for col in 0..p {
390 let mut piv = col;
391 let mut best = a[col * p + col].abs();
392 for r in (col + 1)..p {
393 let v = a[r * p + col].abs();
394 if v > best {
395 best = v;
396 piv = r;
397 }
398 }
399 if best < 1e-12 {
400 return false;
401 }
402 if piv != col {
403 for k in 0..p {
404 a.swap(col * p + k, piv * p + k);
405 }
406 rhs.swap(col, piv);
407 }
408 let d = a[col * p + col];
409 for r in (col + 1)..p {
410 let f = a[r * p + col] / d;
411 if f == 0.0 {
412 continue;
413 }
414 for k in col..p {
415 a[r * p + k] -= f * a[col * p + k];
416 }
417 rhs[r] -= f * rhs[col];
418 }
419 }
420 for col in (0..p).rev() {
421 let mut s = rhs[col];
422 for k in (col + 1)..p {
423 s -= a[col * p + k] * x[k];
424 }
425 x[col] = s / a[col * p + col];
426 }
427 true
428}
429
430fn ave_log_cpm_gene(row: &[f64], lib: &[f64]) -> f64 {
433 let mean_lib = lib.iter().sum::<f64>() / lib.len() as f64;
434 let prior: Vec<f64> = lib
435 .iter()
436 .map(|&l| AVELOGCPM_PRIOR * l / mean_lib)
437 .collect();
438 let off_aug: Vec<f64> = lib
439 .iter()
440 .zip(&prior)
441 .map(|(&l, &p)| (l + 2.0 * p).ln())
442 .collect();
443 let aug: Vec<f64> = row.iter().zip(&prior).map(|(&c, &p)| c + p).collect();
444 let beta = mglm_one_group(&aug, &off_aug, AVELOGCPM_DISP);
445 (beta + 1e6f64.ln()) / LN2
446}
447
448fn mglm_one_group(row: &[f64], offset: &[f64], dispersion: f64) -> f64 {
451 let total: f64 = row.iter().sum();
452 if total == 0.0 {
453 return f64::NEG_INFINITY;
454 }
455 let mean_off = offset.iter().sum::<f64>() / offset.len() as f64;
456 let mut beta = (total / row.len() as f64).ln() - mean_off;
457 for _ in 0..MAXIT {
458 let mut dl = 0.0;
459 let mut info = 0.0;
460 for (&y, &off) in row.iter().zip(offset) {
461 let mu = (beta + off).exp();
462 let denom = 1.0 + mu * dispersion;
463 dl += (y - mu) / denom;
464 info += mu / denom;
465 }
466 let s = dl / info;
467 beta += s;
468 if s.abs() < TOL {
469 break;
470 }
471 }
472 beta
473}
474
475fn bh_fdr(pvals: &[f64]) -> Vec<f64> {
476 let n = pvals.len();
477 let mut order: Vec<usize> = (0..n).collect();
478 order.sort_by(|&a, &b| pvals[b].partial_cmp(&pvals[a]).unwrap());
479 let mut adj = vec![0.0f64; n];
480 let mut cummin = f64::INFINITY;
481 for (rank, &i) in order.iter().enumerate() {
482 let m = n - rank;
483 let v = (pvals[i] * n as f64 / m as f64).min(1.0);
484 cummin = cummin.min(v);
485 adj[i] = cummin;
486 }
487 adj
488}
489
490enum Test {
491 Coef(usize),
492 Contrast(Vec<f64>),
493}
494
495enum Dispersion {
496 Common(f64),
497 PerGene(Vec<f64>),
498}
499
500pub struct GlmLrtArgs<'a> {
501 pub counts: &'a Path,
502 pub design: &'a Path,
503 pub norm_factors: Option<&'a Path>,
504 pub coef: Option<usize>,
505 pub contrast: Option<&'a Path>,
506 pub dispersion: f64,
507 pub dispersion_file: Option<&'a Path>,
508 pub fdr: bool,
509}
510
511fn reduced_design_coef(design: &Design, drop: usize) -> Design {
517 let p = design.n_coef;
518 let mut data = Vec::with_capacity(design.n_samples * (p - 1));
519 for s in 0..design.n_samples {
520 let xr = design.row(s);
521 for (k, &v) in xr.iter().enumerate() {
522 if k != drop {
523 data.push(v);
524 }
525 }
526 }
527 let coef_names: Vec<String> = design
528 .coef_names
529 .iter()
530 .enumerate()
531 .filter(|(k, _)| *k != drop)
532 .map(|(_, n)| n.clone())
533 .collect();
534 Design {
535 data,
536 n_samples: design.n_samples,
537 n_coef: p - 1,
538 coef_names,
539 }
540}
541
542fn contrast_designs(design: &Design, contrast: &[f64]) -> (Design, Design) {
548 let p = design.n_coef;
549 let n = design.n_samples;
550 let q = householder_basis(contrast);
554 let mut full = vec![0.0f64; n * p];
556 for s in 0..n {
557 let xr = design.row(s);
558 for j in 0..p {
559 let mut v = 0.0;
560 for k in 0..p {
561 v += xr[k] * q[k * p + j];
562 }
563 full[s * p + j] = v;
564 }
565 }
566 let full_d = Design {
567 data: full.clone(),
568 n_samples: n,
569 n_coef: p,
570 coef_names: (0..p).map(|j| format!("c{j}")).collect(),
571 };
572 let mut reduced = Vec::with_capacity(n * (p - 1));
573 for s in 0..n {
574 for j in 1..p {
575 reduced.push(full[s * p + j]);
576 }
577 }
578 let reduced_d = Design {
579 data: reduced,
580 n_samples: n,
581 n_coef: p - 1,
582 coef_names: (1..p).map(|j| format!("c{j}")).collect(),
583 };
584 (full_d, reduced_d)
585}
586
587fn householder_basis(v: &[f64]) -> Vec<f64> {
591 let p = v.len();
592 let mut cols: Vec<Vec<f64>> = Vec::with_capacity(p);
593 let norm = v.iter().map(|x| x * x).sum::<f64>().sqrt();
594 cols.push(v.iter().map(|x| x / norm).collect());
595 for e in 0..p {
596 let mut cand = vec![0.0f64; p];
597 cand[e] = 1.0;
598 for c in &cols {
599 let d: f64 = cand.iter().zip(c).map(|(a, b)| a * b).sum();
600 for i in 0..p {
601 cand[i] -= d * c[i];
602 }
603 }
604 let nrm = cand.iter().map(|x| x * x).sum::<f64>().sqrt();
605 if nrm > 1e-9 {
606 for x in &mut cand {
607 *x /= nrm;
608 }
609 cols.push(cand);
610 if cols.len() == p {
611 break;
612 }
613 }
614 }
615 let mut q = vec![0.0f64; p * p];
616 for (j, c) in cols.iter().enumerate() {
617 for (i, &val) in c.iter().enumerate() {
618 q[i * p + j] = val;
619 }
620 }
621 q
622}
623
624pub fn glm_lrt(args: &GlmLrtArgs, output: &mut dyn Write) -> Result<u64> {
625 let &GlmLrtArgs {
626 counts: counts_path,
627 design: design_path,
628 norm_factors: norm_factors_path,
629 coef,
630 contrast: contrast_path,
631 dispersion: dispersion_arg,
632 dispersion_file: dispersion_path,
633 fdr,
634 } = args;
635 let m = Matrix::load(counts_path)?;
636 let design = Design::load(design_path)?;
637 if design.n_samples != m.n_samples {
638 return Err(RsomicsError::InvalidInput(format!(
639 "design has {} rows but matrix has {} samples",
640 design.n_samples, m.n_samples
641 )));
642 }
643
644 let norm_factors = match norm_factors_path {
645 Some(p) => load_norm_factors(p, m.n_samples)?,
646 None => vec![1.0; m.n_samples],
647 };
648
649 let mut lib = vec![0.0f64; m.n_samples];
650 for row in m.counts.chunks_exact(m.n_samples) {
651 for (s, &c) in lib.iter_mut().zip(row) {
652 *s += c;
653 }
654 }
655 let eff_lib: Vec<f64> = lib
656 .iter()
657 .zip(&norm_factors)
658 .map(|(&l, &f)| l * f)
659 .collect();
660 let offset: Vec<f64> = eff_lib.iter().map(|&l| l.ln()).collect();
661
662 let mean_eff = eff_lib.iter().sum::<f64>() / eff_lib.len() as f64;
665 let prior: Vec<f64> = eff_lib
666 .iter()
667 .map(|&l| PRIOR_COUNT * l / mean_eff)
668 .collect();
669 let offset_aug: Vec<f64> = eff_lib
670 .iter()
671 .zip(&prior)
672 .map(|(&l, &p)| (l + 2.0 * p).ln())
673 .collect();
674
675 let test = match (coef, contrast_path) {
676 (Some(_), Some(_)) => {
677 return Err(RsomicsError::InvalidInput(
678 "give --coef or --contrast, not both".into(),
679 ));
680 }
681 (Some(c), None) => {
682 if c == 0 || c > design.n_coef {
683 return Err(RsomicsError::InvalidInput(format!(
684 "--coef {c} out of range 1..={}",
685 design.n_coef
686 )));
687 }
688 Test::Coef(c - 1)
689 }
690 (None, Some(p)) => Test::Contrast(load_contrast(p, design.n_coef)?),
691 (None, None) => Test::Coef(design.n_coef - 1),
692 };
693
694 let dispersions = match dispersion_path {
695 Some(p) => {
696 let v = load_dispersions(p, m.n_genes())?;
697 Dispersion::PerGene(v)
698 }
699 None => Dispersion::Common(dispersion_arg),
700 };
701
702 let (full_design, reduced_design, fc_coef, df) = match &test {
706 Test::Coef(c) => (
707 DesignRef::Borrowed(&design),
708 reduced_design_coef(&design, *c),
709 *c,
710 1usize,
711 ),
712 Test::Contrast(c) => {
713 let (full, reduced) = contrast_designs(&design, c);
714 (DesignRef::Owned(full), reduced, 0usize, 1usize)
715 }
716 };
717 let full = full_design.as_ref();
718
719 let contrast_norm = match &test {
722 Test::Contrast(c) => c.iter().map(|x| x * x).sum::<f64>().sqrt(),
723 Test::Coef(_) => 1.0,
724 };
725
726 let n = m.n_samples;
727 let start_full = start_direction(full);
728 let start_reduced = start_direction(&reduced_design);
729
730 let per_gene = |w: &mut GeneScratch, g: usize| -> (f64, f64, f64, f64) {
731 let row = m.row(g);
732 let disp = match &dispersions {
733 Dispersion::Common(d) => *d,
734 Dispersion::PerGene(v) => v[g],
735 };
736 let (_b, dev_full) = fit_nb_glm(row, full, &offset, disp, false, &start_full, &mut w.full);
737 let (_b0, dev_null) = fit_nb_glm(
738 row,
739 &reduced_design,
740 &offset,
741 disp,
742 false,
743 &start_reduced,
744 &mut w.reduced,
745 );
746
747 for (s, (&c, &p)) in row.iter().zip(&prior).enumerate() {
748 w.row_aug[s] = c + p;
749 }
750 let (beta_aug, _) = fit_nb_glm(
751 &w.row_aug,
752 full,
753 &offset_aug,
754 disp,
755 true,
756 &start_full,
757 &mut w.full,
758 );
759 let logfc = beta_aug[fc_coef] * contrast_norm / LN2;
760
761 let logcpm = ave_log_cpm_gene(row, &eff_lib);
762 let stat = (dev_null - dev_full).max(0.0);
763 (logfc, logcpm, stat, special::pchisq_upper(stat, df as f64))
764 };
765
766 let make = || GeneScratch {
767 full: Scratch::new(n, full.n_coef),
768 reduced: Scratch::new(n, reduced_design.n_coef.max(1)),
769 row_aug: vec![0.0; n],
770 };
771
772 let rows: Vec<(f64, f64, f64, f64)> = if rayon::current_num_threads() > 1 {
773 use rayon::prelude::*;
774 (0..m.n_genes())
775 .into_par_iter()
776 .map_init(make, |w, g| per_gene(w, g))
777 .collect()
778 } else {
779 let mut w = make();
780 (0..m.n_genes()).map(|g| per_gene(&mut w, g)).collect()
781 };
782
783 let logfc: Vec<f64> = rows.iter().map(|r| r.0).collect();
784 let logcpm: Vec<f64> = rows.iter().map(|r| r.1).collect();
785 let lr: Vec<f64> = rows.iter().map(|r| r.2).collect();
786 let pvals: Vec<f64> = rows.iter().map(|r| r.3).collect();
787
788 let fdr_vals = if fdr { Some(bh_fdr(&pvals)) } else { None };
789 let gene_col = m.header.split('\t').next().unwrap_or("gene");
790 let mut header = format!("{gene_col}\tlogFC\tlogCPM\tLR\tPValue");
791 if fdr {
792 header.push_str("\tFDR");
793 }
794 writeln!(output, "{header}").map_err(RsomicsError::Io)?;
795 for g in 0..m.n_genes() {
796 write!(
797 output,
798 "{}\t{:.7}\t{:.6}\t{:.6}\t{:.6e}",
799 m.genes[g], logfc[g], logcpm[g], lr[g], pvals[g]
800 )
801 .map_err(RsomicsError::Io)?;
802 if let Some(f) = &fdr_vals {
803 write!(output, "\t{:.6e}", f[g]).map_err(RsomicsError::Io)?;
804 }
805 writeln!(output).map_err(RsomicsError::Io)?;
806 }
807 Ok(m.n_genes() as u64)
808}
809
810fn load_dispersions(path: &Path, n_genes: usize) -> Result<Vec<f64>> {
811 let file = File::open(path)
812 .map_err(|e| RsomicsError::InvalidInput(format!("{}: {e}", path.display())))?;
813 let mut v = Vec::with_capacity(n_genes);
814 for line in BufReader::new(file).lines() {
815 let line = line.map_err(RsomicsError::Io)?;
816 let line = line.trim();
817 if line.is_empty() || line.starts_with('#') {
818 continue;
819 }
820 let val = line.rsplit('\t').next().unwrap_or(line);
821 v.push(
822 val.parse::<f64>().map_err(|_| {
823 RsomicsError::InvalidInput(format!("non-numeric dispersion '{val}'"))
824 })?,
825 );
826 }
827 if v.len() != n_genes {
828 return Err(RsomicsError::InvalidInput(format!(
829 "{} dispersions for {n_genes} genes",
830 v.len()
831 )));
832 }
833 Ok(v)
834}
835
836enum DesignRef<'a> {
837 Borrowed(&'a Design),
838 Owned(Design),
839}
840impl<'a> DesignRef<'a> {
841 fn as_ref(&self) -> &Design {
842 match self {
843 DesignRef::Borrowed(d) => d,
844 DesignRef::Owned(d) => d,
845 }
846 }
847}
848
849#[cfg(test)]
850mod tests {
851 use super::*;
852
853 #[test]
854 fn deviance_zero_at_mle() {
855 let y = [10.0, 12.0, 8.0];
856 let mu = [10.0, 12.0, 8.0];
857 assert!(nb_deviance(&y, &mu, 0.1).abs() < 1e-12);
858 }
859
860 #[test]
861 fn solve_identity() {
862 let mut a = vec![2.0, 0.0, 0.0, 3.0];
863 let mut b = [4.0, 9.0];
864 let mut x = vec![0.0; 2];
865 assert!(solve(&mut a, &mut b, &mut x, 2));
866 assert!((x[0] - 2.0).abs() < 1e-12 && (x[1] - 3.0).abs() < 1e-12);
867 }
868
869 #[test]
870 fn bh_monotone() {
871 let p = [0.01, 0.04, 0.03, 0.2];
872 let adj = bh_fdr(&p);
873 for w in adj.windows(2) {
874 let _ = w;
875 }
876 assert!(adj.iter().all(|&v| (0.0..=1.0).contains(&v)));
877 }
878}