rsomics_limma_array_weights/
matrix.rs1use std::fs::File;
2use std::io::{BufRead, BufReader};
3use std::path::Path;
4
5use rsomics_common::{Result, RsomicsError};
6
7fn open(path: &Path) -> Result<BufReader<File>> {
8 let f = File::open(path)
9 .map_err(|e| RsomicsError::InvalidInput(format!("{}: {e}", path.display())))?;
10 Ok(BufReader::new(f))
11}
12
13fn parse_f64(s: &str) -> Result<f64> {
14 let t = s.trim();
15 t.parse::<f64>()
16 .map_err(|_| RsomicsError::InvalidInput(format!("non-numeric value '{t}'")))
17}
18
19pub struct Expr {
20 pub samples: Vec<String>,
21 pub y: Vec<Vec<f64>>,
23}
24
25pub fn read_expr(path: &Path) -> Result<Expr> {
26 let mut lines = open(path)?.lines();
27 let header = lines
28 .next()
29 .ok_or_else(|| RsomicsError::InvalidInput("empty expression matrix".into()))?
30 .map_err(RsomicsError::Io)?;
31 let samples: Vec<String> = header.split('\t').skip(1).map(str::to_string).collect();
32 if samples.is_empty() {
33 return Err(RsomicsError::InvalidInput(
34 "expression matrix needs at least one sample column".into(),
35 ));
36 }
37 let mut y = Vec::new();
38 for line in lines {
39 let line = line.map_err(RsomicsError::Io)?;
40 if line.is_empty() {
41 continue;
42 }
43 let mut f = line.split('\t');
44 let gene = f
45 .next()
46 .ok_or_else(|| RsomicsError::InvalidInput("missing gene id".into()))?;
47 let row: Vec<f64> = f.map(parse_f64).collect::<Result<_>>()?;
48 if row.len() != samples.len() {
49 return Err(RsomicsError::InvalidInput(format!(
50 "gene '{gene}' has {} values, header declares {} samples",
51 row.len(),
52 samples.len()
53 )));
54 }
55 y.push(row);
56 }
57 if y.is_empty() {
58 return Err(RsomicsError::InvalidInput("no genes in matrix".into()));
59 }
60 Ok(Expr { samples, y })
61}
62
63pub struct Design {
64 pub x: Vec<Vec<f64>>,
66}
67
68pub fn read_design(path: &Path) -> Result<Design> {
69 let mut lines = open(path)?.lines();
70 let header = lines
71 .next()
72 .ok_or_else(|| RsomicsError::InvalidInput("empty design matrix".into()))?
73 .map_err(RsomicsError::Io)?;
74 let coef_names: Vec<String> = header.split('\t').skip(1).map(str::to_string).collect();
75 if coef_names.is_empty() {
76 return Err(RsomicsError::InvalidInput(
77 "design matrix needs at least one coefficient column".into(),
78 ));
79 }
80 let mut x = Vec::new();
81 for line in lines {
82 let line = line.map_err(RsomicsError::Io)?;
83 if line.is_empty() {
84 continue;
85 }
86 let mut f = line.split('\t');
87 let id = f
88 .next()
89 .ok_or_else(|| RsomicsError::InvalidInput("missing design row id".into()))?;
90 let row: Vec<f64> = f.map(parse_f64).collect::<Result<_>>()?;
91 if row.len() != coef_names.len() {
92 return Err(RsomicsError::InvalidInput(format!(
93 "design row '{id}' has {} values, header declares {} coefficients",
94 row.len(),
95 coef_names.len()
96 )));
97 }
98 x.push(row);
99 }
100 Ok(Design { x })
101}