Skip to main content

rsomics_ilr/
table.rs

1use std::io::BufRead;
2
3use rsomics_common::{Result, RsomicsError};
4
5/// A samples × features composition matrix with row (sample) and column
6/// (feature) IDs, stored row-major. Mirrors what skbio ingests from a DataFrame.
7pub struct Table {
8    pub samples: Vec<String>,
9    pub features: Vec<String>,
10    pub data: Vec<f64>,
11}
12
13impl Table {
14    pub fn n_samples(&self) -> usize {
15        self.samples.len()
16    }
17
18    pub fn n_features(&self) -> usize {
19        self.features.len()
20    }
21
22    pub fn row(&self, i: usize) -> &[f64] {
23        let m = self.n_features();
24        &self.data[i * m..(i + 1) * m]
25    }
26
27    /// Header line is feature IDs (the corner cell is ignored); each following
28    /// line is a sample ID followed by its component values.
29    pub fn parse(reader: impl BufRead, delim: char) -> Result<Self> {
30        let mut lines = reader.lines();
31        let header = lines
32            .next()
33            .ok_or_else(|| RsomicsError::InvalidInput("empty table".into()))?
34            .map_err(RsomicsError::Io)?;
35        let features: Vec<String> = header
36            .split(delim)
37            .skip(1)
38            .map(|s| s.trim().to_string())
39            .collect();
40        if features.is_empty() {
41            return Err(RsomicsError::InvalidInput(
42                "table header has no feature columns".into(),
43            ));
44        }
45        let m = features.len();
46
47        let mut samples = Vec::new();
48        let mut data = Vec::new();
49        for line in lines {
50            let line = line.map_err(RsomicsError::Io)?;
51            if line.trim().is_empty() {
52                continue;
53            }
54            let mut cells = line.split(delim);
55            let id = cells
56                .next()
57                .ok_or_else(|| RsomicsError::InvalidInput("table row has no cells".into()))?;
58            samples.push(id.trim().to_string());
59            let before = data.len();
60            for cell in cells {
61                let v: f64 = cell.trim().parse().map_err(|_| {
62                    RsomicsError::InvalidInput(format!("non-numeric table value: '{cell}'"))
63                })?;
64                data.push(v);
65            }
66            if data.len() - before != m {
67                return Err(RsomicsError::InvalidInput(format!(
68                    "sample '{}' has {} values, expected {m}",
69                    samples.last().unwrap(),
70                    data.len() - before
71                )));
72            }
73        }
74        if samples.is_empty() {
75            return Err(RsomicsError::InvalidInput("table has no samples".into()));
76        }
77        Ok(Self {
78            samples,
79            features,
80            data,
81        })
82    }
83}