1use std::collections::HashMap;
8use std::fs::File;
9use std::io::{BufWriter, Write};
10use std::path::Path;
11
12use anyhow::{bail, Context, Result};
13use ndarray::Array2;
14
15use crate::decidetests::{p_adjust, Adjust, DecideMethod, TestResults};
16use crate::fit::MArrayLM;
17use crate::toptable::{TopRow, TopRowF};
18
19pub struct LabeledMatrix {
22 pub data: Array2<f64>,
23 pub row_names: Vec<String>,
24 pub col_names: Vec<String>,
25}
26
27#[cfg(feature = "cli")]
34pub fn read_matrix(path: &Path) -> Result<LabeledMatrix> {
35 read_matrix_with_delimiter(path, delimiter_for_path(path))
36}
37
38#[cfg(feature = "cli")]
41fn delimiter_for_path(path: &Path) -> u8 {
42 match path
43 .extension()
44 .and_then(|e| e.to_str())
45 .map(str::to_ascii_lowercase)
46 .as_deref()
47 {
48 Some("tsv") | Some("tab") => b'\t',
49 _ => b',',
50 }
51}
52
53#[cfg(feature = "cli")]
58pub fn read_matrix_with_delimiter(path: &Path, delimiter: u8) -> Result<LabeledMatrix> {
59 let mut rdr = csv::ReaderBuilder::new()
60 .has_headers(true)
61 .flexible(false)
62 .delimiter(delimiter)
63 .from_path(path)
64 .with_context(|| format!("opening {}", path.display()))?;
65
66 let header = rdr.headers()?.clone();
67 if header.len() < 2 {
68 bail!("{}: expected at least one data column", path.display());
69 }
70 let col_names: Vec<String> = header.iter().skip(1).map(|s| s.to_string()).collect();
71 let ncol = col_names.len();
72
73 let mut row_names = Vec::new();
74 let mut values: Vec<f64> = Vec::new();
75 let mut rec = csv::ByteRecord::new();
80 let mut i = 0usize;
81 while rdr
82 .read_byte_record(&mut rec)
83 .with_context(|| format!("{}: reading row {}", path.display(), i + 1))?
84 {
85 if rec.len() != ncol + 1 {
86 bail!(
87 "{}: row {} has {} fields, expected {}",
88 path.display(),
89 i + 1,
90 rec.len(),
91 ncol + 1
92 );
93 }
94 row_names.push(String::from_utf8_lossy(&rec[0]).into_owned());
95 values.reserve(ncol);
96 for j in 0..ncol {
97 let cell = std::str::from_utf8(&rec[j + 1]).map_or(f64::NAN, parse_cell);
98 values.push(cell);
99 }
100 i += 1;
101 }
102 let nrow = row_names.len();
103 let data = Array2::from_shape_vec((nrow, ncol), values)
104 .with_context(|| format!("{}: assembling matrix", path.display()))?;
105 Ok(LabeledMatrix {
106 data,
107 row_names,
108 col_names,
109 })
110}
111
112#[cfg(feature = "cli")]
113fn parse_cell(s: &str) -> f64 {
114 let t = s.trim();
115 if t.is_empty() || t.eq_ignore_ascii_case("na") || t.eq_ignore_ascii_case("nan") {
116 return f64::NAN;
117 }
118 t.parse::<f64>().unwrap_or(f64::NAN)
119}
120
121pub fn align_design(design: &LabeledMatrix, sample_order: &[String]) -> Result<Array2<f64>> {
125 let index: HashMap<&str, usize> = design
126 .row_names
127 .iter()
128 .enumerate()
129 .map(|(i, n)| (n.as_str(), i))
130 .collect();
131
132 let by_name = sample_order.iter().all(|s| index.contains_key(s.as_str()));
135 let ncoef = design.col_names.len();
136 let mut out = Array2::<f64>::zeros((sample_order.len(), ncoef));
137 if by_name {
138 for (new_i, s) in sample_order.iter().enumerate() {
139 let old_i = index[s.as_str()];
140 for j in 0..ncoef {
141 out[[new_i, j]] = design.data[[old_i, j]];
142 }
143 }
144 } else {
145 if design.row_names.len() != sample_order.len() {
146 bail!(
147 "design has {} rows but expression has {} samples, and sample names do not match",
148 design.row_names.len(),
149 sample_order.len()
150 );
151 }
152 out = design.data.clone();
153 }
154 Ok(out)
155}
156
157pub fn align_contrasts(contrasts: &LabeledMatrix, coef_order: &[String]) -> Result<Array2<f64>> {
160 let index: HashMap<&str, usize> = contrasts
161 .row_names
162 .iter()
163 .enumerate()
164 .map(|(i, n)| (n.as_str(), i))
165 .collect();
166 let by_name = coef_order.iter().all(|c| index.contains_key(c.as_str()));
167 let ncont = contrasts.col_names.len();
168 let mut out = Array2::<f64>::zeros((coef_order.len(), ncont));
169 if by_name {
170 for (new_i, c) in coef_order.iter().enumerate() {
171 let old_i = index[c.as_str()];
172 for j in 0..ncont {
173 out[[new_i, j]] = contrasts.data[[old_i, j]];
174 }
175 }
176 } else {
177 if contrasts.row_names.len() != coef_order.len() {
178 bail!("contrast rows do not match design coefficients by name or position");
179 }
180 out = contrasts.data.clone();
181 }
182 Ok(out)
183}
184
185fn fmt(v: f64) -> String {
186 if v.is_nan() {
187 "NA".to_string()
188 } else {
189 format!("{:.10e}", v)
190 }
191}
192
193fn fmt_hp(v: f64) -> String {
197 if v.is_nan() {
198 "NA".to_string()
199 } else {
200 format!("{:.17e}", v)
201 }
202}
203
204fn csv_quote(field: &str, sep: u8) -> std::borrow::Cow<'_, str> {
212 let needs_quote = field
213 .bytes()
214 .any(|b| b == sep || b == b'"' || b == b'\n' || b == b'\r');
215 if !needs_quote {
216 return std::borrow::Cow::Borrowed(field);
217 }
218 let mut quoted = String::with_capacity(field.len() + 2);
219 quoted.push('"');
220 for ch in field.chars() {
221 if ch == '"' {
222 quoted.push('"');
223 }
224 quoted.push(ch);
225 }
226 quoted.push('"');
227 std::borrow::Cow::Owned(quoted)
228}
229
230pub fn write_top_table(path: &Path, rows: &[TopRow]) -> Result<()> {
232 let mut w = BufWriter::new(File::create(path)?);
233 writeln!(w, "id,log2FoldChange,lfcSE,AveExpr,t,P.Value,adj.P.Val,B")?;
234 for r in rows {
235 writeln!(
236 w,
237 "{},{},{},{},{},{},{},{}",
238 csv_quote(&r.id, b','),
239 fmt(r.log2_fold_change),
240 fmt(r.lfc_se),
241 fmt(r.ave_expr),
242 fmt(r.t),
243 fmt(r.p_value),
244 fmt(r.adj_p_value),
245 fmt(r.b)
246 )?;
247 }
248 Ok(())
249}
250
251pub fn write_top_table_f(path: &Path, rows: &[TopRowF]) -> Result<()> {
253 let mut w = BufWriter::new(File::create(path)?);
254 writeln!(w, "id,AveExpr,F,P.Value,adj.P.Val")?;
255 for r in rows {
256 writeln!(
257 w,
258 "{},{},{},{},{}",
259 csv_quote(&r.id, b','),
260 fmt(r.ave_expr),
261 fmt(r.f),
262 fmt(r.p_value),
263 fmt(r.adj_p_value)
264 )?;
265 }
266 Ok(())
267}
268
269pub fn write_test_results(path: &Path, res: &TestResults) -> Result<()> {
272 let mut w = BufWriter::new(File::create(path)?);
273 write!(w, "id")?;
274 for name in &res.coef_names {
275 write!(w, ",{}", csv_quote(name, b','))?;
276 }
277 writeln!(w)?;
278 for g in 0..res.data.nrows() {
279 write!(w, "{}", csv_quote(&res.gene_names[g], b','))?;
280 for j in 0..res.data.ncols() {
281 write!(w, ",{}", res.data[[g, j]])?;
282 }
283 writeln!(w)?;
284 }
285 Ok(())
286}
287
288pub fn write_fit_dump(path: &Path, fit: &MArrayLM) -> Result<()> {
291 let mut w = BufWriter::new(File::create(path)?);
292 writeln!(
293 w,
294 "id,coef,coefficient,stdev_unscaled,t,p_value,lods,sigma,s2_post,df_total,F,F_p_value"
295 )?;
296 let t = fit.t.as_ref();
297 let p = fit.p_value.as_ref();
298 let lods = fit.lods.as_ref();
299 let s2_post = fit.s2_post.as_ref();
300 let df_total = fit.df_total.as_ref();
301 let f = fit.f_stat.as_ref();
302 let fp = fit.f_p_value.as_ref();
303 for g in 0..fit.n_genes() {
304 for j in 0..fit.n_coef() {
305 writeln!(
306 w,
307 "{},{},{},{},{},{},{},{},{},{},{},{}",
308 csv_quote(&fit.gene_names[g], b','),
309 csv_quote(&fit.coef_names[j], b','),
310 fmt_hp(fit.coefficients[[g, j]]),
311 fmt_hp(fit.stdev_unscaled[[g, j]]),
312 fmt_hp(t.map(|m| m[[g, j]]).unwrap_or(f64::NAN)),
313 fmt_hp(p.map(|m| m[[g, j]]).unwrap_or(f64::NAN)),
314 fmt_hp(lods.map(|m| m[[g, j]]).unwrap_or(f64::NAN)),
315 fmt_hp(fit.sigma[g]),
316 fmt_hp(s2_post.map(|m| m[g]).unwrap_or(f64::NAN)),
317 fmt_hp(df_total.map(|m| m[g]).unwrap_or(f64::NAN)),
318 fmt_hp(f.map(|m| m[g]).unwrap_or(f64::NAN)),
319 fmt_hp(fp.map(|m| m[g]).unwrap_or(f64::NAN)),
320 )?;
321 }
322 }
323 Ok(())
324}
325
326fn fmt_r(v: f64) -> String {
331 if v.is_nan() {
332 return "NA".to_string();
333 }
334 if v.is_infinite() {
335 return if v > 0.0 { "Inf" } else { "-Inf" }.to_string();
336 }
337 if v == 0.0 {
338 return "0".to_string();
339 }
340 let neg = v < 0.0;
341 let a = v.abs();
342
343 let s15 = format!("{:.14e}", a);
346 let mut mant = s15.clone();
347 for d in 1..=15usize {
348 let cand = format!("{:.*e}", d - 1, a);
349 if format!("{:.14e}", cand.parse::<f64>().unwrap()) == s15 {
350 mant = cand;
351 break;
352 }
353 }
354
355 let (mant_part, exp_part) = mant.split_once('e').unwrap();
356 let exp: i32 = exp_part.parse().unwrap();
357 let digits: String = mant_part.chars().filter(|&c| c != '.').collect();
358
359 let fixed = fmt_fixed(&digits, exp);
360 let sci = fmt_sci(&digits, exp);
361 let body = if fixed.len() <= sci.len() { fixed } else { sci };
362 if neg {
363 format!("-{body}")
364 } else {
365 body
366 }
367}
368
369fn fmt_fixed(digits: &str, exp: i32) -> String {
372 let ndig = digits.len() as i32;
373 if exp >= 0 {
374 let ip_len = exp + 1;
375 if ip_len >= ndig {
376 format!("{}{}", digits, "0".repeat((ip_len - ndig) as usize))
377 } else {
378 let (ip, fp) = digits.split_at(ip_len as usize);
379 let fp = fp.trim_end_matches('0');
380 if fp.is_empty() {
381 ip.to_string()
382 } else {
383 format!("{ip}.{fp}")
384 }
385 }
386 } else {
387 let zeros = (-exp - 1) as usize;
388 let frac = format!("{}{}", "0".repeat(zeros), digits);
389 let frac = frac.trim_end_matches('0');
390 format!("0.{frac}")
391 }
392}
393
394fn fmt_sci(digits: &str, exp: i32) -> String {
397 let (first, rest) = digits.split_at(1);
398 let mant = if rest.is_empty() {
399 first.to_string()
400 } else {
401 format!("{first}.{rest}")
402 };
403 let sign = if exp < 0 { '-' } else { '+' };
404 format!("{mant}e{sign}{:02}", exp.abs())
405}
406
407fn round_dec(v: f64, decimals: i32) -> f64 {
410 if !v.is_finite() {
411 return v;
412 }
413 if decimals >= 0 {
414 format!("{:.*}", decimals as usize, v).parse().unwrap_or(v)
415 } else {
416 let f = 10f64.powi(-decimals);
417 (v / f).round() * f
418 }
419}
420
421pub struct WriteFitOptions {
423 pub digits: Option<i32>,
425 pub adjust: Adjust,
427 pub method: DecideMethod,
430 pub f_adjust: Adjust,
432 pub sep: char,
434 pub row_names: bool,
436}
437
438impl Default for WriteFitOptions {
439 fn default() -> Self {
440 Self {
441 digits: None,
442 adjust: Adjust::None,
443 method: DecideMethod::Separate,
444 f_adjust: Adjust::None,
445 sep: '\t',
446 row_names: true,
447 }
448 }
449}
450
451fn adjust_pvalue_matrix(
454 p: &ndarray::Array2<f64>,
455 method: Adjust,
456 decide: DecideMethod,
457) -> ndarray::Array2<f64> {
458 let (ng, nc) = p.dim();
459 let mut out = ndarray::Array2::<f64>::zeros((ng, nc));
460 if let DecideMethod::Global = decide {
463 let flat: Vec<f64> = (0..nc)
465 .flat_map(|j| (0..ng).map(move |g| p[[g, j]]))
466 .collect();
467 let adj = p_adjust(&flat, method);
468 let mut k = 0;
469 for j in 0..nc {
470 for g in 0..ng {
471 out[[g, j]] = adj[k];
472 k += 1;
473 }
474 }
475 } else {
476 for j in 0..nc {
477 let col: Vec<f64> = (0..ng).map(|g| p[[g, j]]).collect();
478 let adj = p_adjust(&col, method);
479 for g in 0..ng {
480 out[[g, j]] = adj[g];
481 }
482 }
483 }
484 out
485}
486
487pub fn write_fit(
496 path: &Path,
497 fit: &MArrayLM,
498 results: Option<&TestResults>,
499 opts: &WriteFitOptions,
500) -> Result<()> {
501 let t = fit
502 .t
503 .as_ref()
504 .context("write_fit needs moderated t-statistics; run eBayes/treat first")?;
505 let p = fit
506 .p_value
507 .as_ref()
508 .context("write_fit needs p-values; run eBayes/treat first")?;
509 let ng = fit.n_genes();
510 let nc = fit.n_coef();
511
512 let padj = match opts.adjust {
513 Adjust::None => None,
514 m => Some(adjust_pvalue_matrix(p, m, opts.method)),
515 };
516 let f = fit.f_stat.as_ref();
517 let fp = fit.f_p_value.as_ref();
518 let fpadj = match (opts.f_adjust, fp) {
519 (Adjust::None, _) | (_, None) => None,
520 (m, Some(fpv)) => Some(p_adjust(fpv.as_slice().unwrap(), m)),
521 };
522
523 let cell = |v: f64, delta: i32| -> String {
525 match opts.digits {
526 Some(d) => fmt_r(round_dec(v, d + delta)),
527 None => fmt_r(v),
528 }
529 };
530 let cname = |base: &str, j: usize| -> String {
531 if nc == 1 {
532 base.to_string()
533 } else {
534 format!("{base}.{}", fit.coef_names[j])
535 }
536 };
537
538 let mut columns: Vec<(String, Vec<String>)> = Vec::new();
539 columns.push((
540 "AveExpr".to_string(),
541 (0..ng).map(|g| cell(fit.amean[g], -1)).collect(),
542 ));
543 for j in 0..nc {
544 columns.push((
545 cname("Coef", j),
546 (0..ng).map(|g| cell(fit.coefficients[[g, j]], 0)).collect(),
547 ));
548 }
549 for j in 0..nc {
550 columns.push((
551 cname("t", j),
552 (0..ng).map(|g| cell(t[[g, j]], -1)).collect(),
553 ));
554 }
555 for j in 0..nc {
556 columns.push((
557 cname("P.value", j),
558 (0..ng).map(|g| cell(p[[g, j]], 2)).collect(),
559 ));
560 }
561 if let Some(pa) = &padj {
562 for j in 0..nc {
563 columns.push((
564 cname("P.value.adj", j),
565 (0..ng).map(|g| cell(pa[[g, j]], 3)).collect(),
566 ));
567 }
568 }
569 if let Some(fv) = f {
570 columns.push(("F".to_string(), (0..ng).map(|g| cell(fv[g], -1)).collect()));
571 }
572 if let Some(fpv) = fp {
573 columns.push((
574 "F.p.value".to_string(),
575 (0..ng).map(|g| cell(fpv[g], 2)).collect(),
576 ));
577 }
578 if let Some(fpa) = &fpadj {
579 columns.push((
580 "F.p.value.adj".to_string(),
581 (0..ng).map(|g| cell(fpa[g], 3)).collect(),
582 ));
583 }
584 if let Some(res) = results {
585 let rc = res.data.ncols();
586 for j in 0..rc {
587 let nm = if rc == 1 {
588 "Results".to_string()
589 } else {
590 format!("Results.{}", res.coef_names[j])
591 };
592 columns.push((nm, (0..ng).map(|g| res.data[[g, j]].to_string()).collect()));
593 }
594 }
595
596 let sep = opts.sep.to_string();
597 let mut w = BufWriter::new(File::create(path)?);
598
599 let header: Vec<&str> = columns.iter().map(|(h, _)| h.as_str()).collect();
601 if opts.row_names {
602 write!(w, "{sep}")?;
603 }
604 writeln!(w, "{}", header.join(&sep))?;
605
606 for g in 0..ng {
607 if opts.row_names {
608 write!(w, "{}{sep}", fit.gene_names[g])?;
609 }
610 let row: Vec<&str> = columns.iter().map(|(_, c)| c[g].as_str()).collect();
611 writeln!(w, "{}", row.join(&sep))?;
612 }
613 Ok(())
614}
615
616#[cfg(test)]
617mod write_fit_tests {
618 use super::*;
619 use crate::ebayes::ebayes;
620 use crate::fit::lmfit;
621 use ndarray::Array2;
622
623 #[test]
626 fn fmt_r_matches_as_character() {
627 let cases: &[(f64, &str)] = &[
628 (0.0, "0"),
629 (0.502, "0.502"),
630 (-1.069, "-1.069"),
631 (6.29433333333333, "6.29433333333333"),
632 (0.744216015086904, "0.744216015086904"),
633 (0.000595501060937467, "0.000595501060937467"),
634 (1.92170677729296e-05, "1.92170677729296e-05"),
635 (7.00632124072959e-06, "7.00632124072959e-06"),
636 (36.1843587501443, "36.1843587501443"),
637 (0.0001, "1e-04"),
638 (100000.0, "1e+05"),
639 (123456.0, "123456"),
640 ];
641 for &(v, want) in cases {
642 assert_eq!(fmt_r(v), want, "fmt_r({v})");
643 }
644 assert_eq!(fmt_r(f64::NAN), "NA");
645 assert_eq!(fmt_r(f64::INFINITY), "Inf");
646 assert_eq!(fmt_r(f64::NEG_INFINITY), "-Inf");
647 }
648
649 fn build_fit() -> MArrayLM {
650 let e = Array2::from_shape_vec(
652 (5, 6),
653 vec![
654 4.747, 4.359, 9.024, 5.91, 7.838, 5.888, 6.367, 6.975, 6.78, 5.968, 7.564, 5.688,
655 4.329, 7.477, 4.758, 7.888, 6.149, 3.058, 9.191, 7.152, 1.571, 7.642, 2.021, 5.044,
656 6.659, 5.389, 8.25, 7.188, 7.24, 6.836,
657 ],
658 )
659 .unwrap();
660 let design = Array2::from_shape_vec(
661 (6, 2),
662 vec![1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
663 )
664 .unwrap();
665 let genes = (1..=5).map(|i| format!("g{i}")).collect();
666 let coefs = vec!["Intercept".to_string(), "grpB".to_string()];
667 let mut fit = lmfit(&e, &design, genes, coefs).unwrap();
668 ebayes(&mut fit, 0.01, (0.1, 4.0), false, false).unwrap();
669 fit
670 }
671
672 fn tmp(tag: &str) -> std::path::PathBuf {
673 let nanos = std::time::SystemTime::now()
674 .duration_since(std::time::UNIX_EPOCH)
675 .unwrap()
676 .as_nanos();
677 std::env::temp_dir().join(format!(
678 "limma_wf_{}_{}_{}.tsv",
679 tag,
680 std::process::id(),
681 nanos
682 ))
683 }
684
685 fn close(a: f64, b: f64) -> bool {
686 (a - b).abs() <= 1e-6 * (1.0 + b.abs())
687 }
688
689 #[test]
690 fn write_fit_matches_r_separate() {
691 let fit = build_fit();
692 let path = tmp("sep");
693 let opts = WriteFitOptions {
694 adjust: Adjust::BH,
695 ..Default::default()
696 };
697 write_fit(&path, &fit, None, &opts).unwrap();
698 let txt = std::fs::read_to_string(&path).unwrap();
699 let _ = std::fs::remove_file(&path);
700
701 let mut lines = txt.lines();
702 assert_eq!(
703 lines.next().unwrap(),
704 "\tAveExpr\tCoef.Intercept\tCoef.grpB\tt.Intercept\tt.grpB\tP.value.Intercept\t\
705 P.value.grpB\tP.value.adj.Intercept\tP.value.adj.grpB\tF\tF.p.value"
706 );
707
708 let expected: [[f64; 11]; 5] = [
710 [
711 6.29433333333333,
712 6.04333333333333,
713 0.502,
714 5.77088235610168,
715 0.338964636041287,
716 0.000595501060937467,
717 0.744216015086904,
718 0.000992501768229112,
719 0.911512695521572,
720 36.1843587501443,
721 0.000166248928492668,
722 ],
723 [
724 6.557,
725 6.70733333333334,
726 -0.300666666666668,
727 9.78282382638691,
728 -0.310087762759466,
729 1.92170677729296e-05,
730 0.765193889323088,
731 8.20450194802642e-05,
732 0.911512695521572,
733 91.5097315793615,
734 7.00632124072959e-06,
735 ],
736 [
737 5.60983333333333,
738 5.52133333333334,
739 0.176999999999999,
740 5.07576159719018,
741 0.115057654632127,
742 0.00128191400310164,
743 0.911512695521572,
744 0.00160239250387704,
745 0.911512695521572,
746 26.6025021648719,
747 0.000452243348899521,
748 ],
749 [
750 5.43683333333333,
751 5.97133333333334,
752 -1.069,
753 3.76176082911824,
754 -0.476192523101982,
755 0.00658339642469066,
756 0.647914700333665,
757 0.00658339642469066,
758 0.911512695521572,
759 11.844291449427,
760 0.00515864337996187,
761 ],
762 [
763 6.927,
764 6.766,
765 0.322000000000001,
766 9.0399993371551,
767 0.304212656857555,
768 3.28180077921057e-05,
769 0.769488416182624,
770 8.20450194802642e-05,
771 0.911512695521572,
772 85.7033369243552,
773 8.80865266401849e-06,
774 ],
775 ];
776 let mut g = 0;
777 for line in lines {
778 let f: Vec<&str> = line.split('\t').collect();
779 assert_eq!(f.len(), 12, "row {g} field count");
780 assert_eq!(f[0], format!("g{}", g + 1));
781 for (c, &want) in expected[g].iter().enumerate() {
782 let got: f64 = f[c + 1].parse().unwrap();
783 assert!(close(got, want), "cell [{g}][{c}] got {got} want {want}");
784 }
785 g += 1;
786 }
787 assert_eq!(g, 5);
788 }
789
790 #[test]
791 fn write_fit_global_and_none_headers() {
792 let fit = build_fit();
793
794 let pg = tmp("glob");
796 write_fit(
797 &pg,
798 &fit,
799 None,
800 &WriteFitOptions {
801 adjust: Adjust::BH,
802 method: DecideMethod::Global,
803 ..Default::default()
804 },
805 )
806 .unwrap();
807 let gtxt = std::fs::read_to_string(&pg).unwrap();
808 let _ = std::fs::remove_file(&pg);
809 let g1: Vec<&str> = gtxt.lines().nth(1).unwrap().split('\t').collect();
810 assert!(close(g1[8].parse().unwrap(), 0.00198500353645822));
812 assert!(close(g1[9].parse().unwrap(), 0.854987129091805));
813
814 let pn = tmp("none");
816 write_fit(&pn, &fit, None, &WriteFitOptions::default()).unwrap();
817 let ntxt = std::fs::read_to_string(&pn).unwrap();
818 let _ = std::fs::remove_file(&pn);
819 assert_eq!(
820 ntxt.lines().next().unwrap(),
821 "\tAveExpr\tCoef.Intercept\tCoef.grpB\tt.Intercept\tt.grpB\t\
822 P.value.Intercept\tP.value.grpB\tF\tF.p.value"
823 );
824 }
825
826 #[test]
827 fn csv_quote_rfc4180() {
828 use std::borrow::Cow;
829 assert!(matches!(csv_quote("NAMPT", b','), Cow::Borrowed(_)));
831 assert_eq!(&*csv_quote("NAMPT", b','), "NAMPT");
832 assert_eq!(
834 &*csv_quote("1,2-Di(4Z,7Z,10Z)-PE", b','),
835 "\"1,2-Di(4Z,7Z,10Z)-PE\""
836 );
837 assert_eq!(&*csv_quote("a\"b", b','), "\"a\"\"b\"");
839 assert_eq!(&*csv_quote("a\nb", b','), "\"a\nb\"");
841 assert!(matches!(csv_quote("a,b", b'\t'), Cow::Borrowed(_)));
844 assert_eq!(&*csv_quote("a\tb", b'\t'), "\"a\tb\"");
845 }
846
847 #[test]
848 fn write_top_table_quotes_comma_ids() {
849 let row = |id: &str| TopRow {
850 id: id.to_string(),
851 log2_fold_change: 1.23,
852 lfc_se: 0.45,
853 ave_expr: 8.1,
854 t: 2.0,
855 p_value: 0.01,
856 adj_p_value: 0.02,
857 b: 0.5,
858 };
859 let rows = vec![
860 row("1,2-Di(4Z,7Z,10Z)-PE"),
861 row("NAMPT"),
862 row("weird\"name"),
863 ];
864 let path = tmp("quote");
865 write_top_table(&path, &rows).unwrap();
866 let txt = std::fs::read_to_string(&path).unwrap();
867 let _ = std::fs::remove_file(&path);
868
869 let mut lines = txt.lines();
870 assert_eq!(
871 lines.next().unwrap(),
872 "id,log2FoldChange,lfcSE,AveExpr,t,P.Value,adj.P.Val,B"
873 );
874 let l1 = lines.next().unwrap();
877 let q = "\"1,2-Di(4Z,7Z,10Z)-PE\"";
878 assert!(l1.starts_with(q), "comma id not quoted: {l1}");
879 assert_eq!(
880 l1[q.len()..].matches(',').count(),
881 7,
882 "expected 7 trailing numeric fields: {l1}"
883 );
884 assert!(lines.next().unwrap().starts_with("NAMPT,"));
886 assert!(lines.next().unwrap().starts_with("\"weird\"\"name\","));
888 }
889}
890
891#[cfg(all(test, feature = "cli"))]
892mod read_matrix_tests {
893 use super::*;
894
895 #[test]
896 fn delimiter_from_extension() {
897 assert_eq!(delimiter_for_path(Path::new("x.tsv")), b'\t');
898 assert_eq!(delimiter_for_path(Path::new("x.tab")), b'\t');
899 assert_eq!(delimiter_for_path(Path::new("X.TSV")), b'\t');
901 assert_eq!(delimiter_for_path(Path::new("dir/y.Tab")), b'\t');
902 assert_eq!(delimiter_for_path(Path::new("x.csv")), b',');
904 assert_eq!(delimiter_for_path(Path::new("x.txt")), b',');
905 assert_eq!(delimiter_for_path(Path::new("noext")), b',');
906 }
907
908 fn tmp(tag: &str, ext: &str) -> std::path::PathBuf {
909 let nanos = std::time::SystemTime::now()
910 .duration_since(std::time::UNIX_EPOCH)
911 .unwrap()
912 .as_nanos();
913 std::env::temp_dir().join(format!(
914 "limma_rm_{}_{}_{}.{}",
915 tag,
916 std::process::id(),
917 nanos,
918 ext
919 ))
920 }
921
922 fn body(sep: char) -> String {
924 format!(
925 "gene{sep}s1{sep}s2{sep}s3\n\
926 g1{sep}1.5{sep}{sep}3.5\n\
927 g2{sep}4{sep}NA{sep}6\n"
928 )
929 }
930
931 fn check(m: &LabeledMatrix) {
932 assert_eq!(m.col_names, ["s1", "s2", "s3"]);
933 assert_eq!(m.row_names, ["g1", "g2"]);
934 assert_eq!(m.data.shape(), [2, 3]);
935 assert_eq!(m.data[[0, 0]], 1.5);
936 assert!(m.data[[0, 1]].is_nan()); assert_eq!(m.data[[0, 2]], 3.5);
938 assert_eq!(m.data[[1, 0]], 4.0);
939 assert!(m.data[[1, 1]].is_nan()); assert_eq!(m.data[[1, 2]], 6.0);
941 }
942
943 #[test]
944 fn auto_detects_tsv_and_csv() {
945 let pt = tmp("auto", "tsv");
947 std::fs::write(&pt, body('\t')).unwrap();
948 let mt = read_matrix(&pt).unwrap();
949 let _ = std::fs::remove_file(&pt);
950 check(&mt);
951
952 let pc = tmp("auto", "csv");
954 std::fs::write(&pc, body(',')).unwrap();
955 let mc = read_matrix(&pc).unwrap();
956 let _ = std::fs::remove_file(&pc);
957 check(&mc);
958 }
959
960 #[test]
961 fn explicit_delimiter_overrides_extension() {
962 let p = tmp("override", "csv");
965 std::fs::write(&p, body(';')).unwrap();
966 let m = read_matrix_with_delimiter(&p, b';').unwrap();
967 let _ = std::fs::remove_file(&p);
968 check(&m);
969 }
970}