Skip to main content

rsomics_limma_array_weights/
matrix.rs

1use std::fs::File;
2use std::io::{BufRead, BufReader};
3use std::path::Path;
4
5use rsomics_common::{Result, RsomicsError};
6
7fn open(path: &Path) -> Result<BufReader<File>> {
8    let f = File::open(path)
9        .map_err(|e| RsomicsError::InvalidInput(format!("{}: {e}", path.display())))?;
10    Ok(BufReader::new(f))
11}
12
13fn parse_f64(s: &str) -> Result<f64> {
14    let t = s.trim();
15    t.parse::<f64>()
16        .map_err(|_| RsomicsError::InvalidInput(format!("non-numeric value '{t}'")))
17}
18
19pub struct Expr {
20    pub samples: Vec<String>,
21    /// row-major [gene][sample]
22    pub y: Vec<Vec<f64>>,
23}
24
25pub fn read_expr(path: &Path) -> Result<Expr> {
26    let mut lines = open(path)?.lines();
27    let header = lines
28        .next()
29        .ok_or_else(|| RsomicsError::InvalidInput("empty expression matrix".into()))?
30        .map_err(RsomicsError::Io)?;
31    let samples: Vec<String> = header.split('\t').skip(1).map(str::to_string).collect();
32    if samples.is_empty() {
33        return Err(RsomicsError::InvalidInput(
34            "expression matrix needs at least one sample column".into(),
35        ));
36    }
37    let mut y = Vec::new();
38    for line in lines {
39        let line = line.map_err(RsomicsError::Io)?;
40        if line.is_empty() {
41            continue;
42        }
43        let mut f = line.split('\t');
44        let gene = f
45            .next()
46            .ok_or_else(|| RsomicsError::InvalidInput("missing gene id".into()))?;
47        let row: Vec<f64> = f.map(parse_f64).collect::<Result<_>>()?;
48        if row.len() != samples.len() {
49            return Err(RsomicsError::InvalidInput(format!(
50                "gene '{gene}' has {} values, header declares {} samples",
51                row.len(),
52                samples.len()
53            )));
54        }
55        y.push(row);
56    }
57    if y.is_empty() {
58        return Err(RsomicsError::InvalidInput("no genes in matrix".into()));
59    }
60    Ok(Expr { samples, y })
61}
62
63pub struct Design {
64    /// row-major [sample][coef]
65    pub x: Vec<Vec<f64>>,
66}
67
68pub fn read_design(path: &Path) -> Result<Design> {
69    let mut lines = open(path)?.lines();
70    let header = lines
71        .next()
72        .ok_or_else(|| RsomicsError::InvalidInput("empty design matrix".into()))?
73        .map_err(RsomicsError::Io)?;
74    let coef_names: Vec<String> = header.split('\t').skip(1).map(str::to_string).collect();
75    if coef_names.is_empty() {
76        return Err(RsomicsError::InvalidInput(
77            "design matrix needs at least one coefficient column".into(),
78        ));
79    }
80    let mut x = Vec::new();
81    for line in lines {
82        let line = line.map_err(RsomicsError::Io)?;
83        if line.is_empty() {
84            continue;
85        }
86        let mut f = line.split('\t');
87        let id = f
88            .next()
89            .ok_or_else(|| RsomicsError::InvalidInput("missing design row id".into()))?;
90        let row: Vec<f64> = f.map(parse_f64).collect::<Result<_>>()?;
91        if row.len() != coef_names.len() {
92            return Err(RsomicsError::InvalidInput(format!(
93                "design row '{id}' has {} values, header declares {} coefficients",
94                row.len(),
95                coef_names.len()
96            )));
97        }
98        x.push(row);
99    }
100    Ok(Design { x })
101}