use std::collections::HashMap;
use std::io::BufRead;
use rsomics_common::{Result, RsomicsError};
pub struct Coords {
pub ids: Vec<String>,
pub naxes: usize,
pub data: Vec<f64>,
}
impl Coords {
pub fn row(&self, i: usize) -> &[f64] {
&self.data[i * self.naxes..(i + 1) * self.naxes]
}
}
pub fn parse_coords<R: BufRead>(reader: R, delim: char) -> Result<Coords> {
let mut ids = Vec::new();
let mut data = Vec::new();
let mut naxes = 0usize;
for line in reader.lines() {
let line = line.map_err(RsomicsError::Io)?;
let t = line.trim_end();
if t.is_empty() || t.starts_with('#') {
continue;
}
let mut fields = t.split(delim);
let id = fields.next().unwrap().trim().to_string();
let vals: Vec<f64> = fields
.map(|f| {
f.trim().parse().map_err(|_| {
RsomicsError::InvalidInput(format!("sample '{id}': '{f}' is not numeric"))
})
})
.collect::<Result<_>>()?;
if naxes == 0 {
naxes = vals.len();
} else if vals.len() != naxes {
return Err(RsomicsError::InvalidInput(format!(
"sample '{id}' has {} axes, expected {naxes}",
vals.len()
)));
}
ids.push(id);
data.extend(vals);
}
if ids.is_empty() {
return Err(RsomicsError::InvalidInput("no coordinate rows".into()));
}
Ok(Coords { ids, naxes, data })
}
pub fn parse_prop<R: BufRead>(reader: R) -> Result<Vec<f64>> {
let mut prop = Vec::new();
for line in reader.lines() {
let line = line.map_err(RsomicsError::Io)?;
for tok in line.split([',', '\t', ' ']) {
let t = tok.trim();
if t.is_empty() {
continue;
}
prop.push(t.parse().map_err(|_| {
RsomicsError::InvalidInput(format!("proportion '{t}' is not numeric"))
})?);
}
}
if prop.is_empty() {
return Err(RsomicsError::InvalidInput(
"empty proportion-explained vector".into(),
));
}
Ok(prop)
}
pub struct Metadata {
pub columns: Vec<String>,
pub rows: HashMap<String, Vec<String>>,
}
impl Metadata {
pub fn col_index(&self, name: &str) -> Result<usize> {
self.columns
.iter()
.position(|c| c == name)
.ok_or_else(|| RsomicsError::InvalidInput(format!("category '{name}' not in metadata")))
}
pub fn value<'a>(&'a self, sid: &str, col: usize) -> &'a str {
&self.rows[sid][col]
}
}
pub fn parse_metadata<R: BufRead>(reader: R, delim: char) -> Result<Metadata> {
let mut lines = reader.lines();
let header = loop {
match lines.next() {
Some(l) => {
let l = l.map_err(RsomicsError::Io)?;
if l.trim().is_empty() {
continue;
}
break l;
}
None => return Err(RsomicsError::InvalidInput("empty metadata".into())),
}
};
let columns: Vec<String> = header
.trim_start_matches('#')
.split(delim)
.skip(1)
.map(|s| s.trim().to_string())
.collect();
let mut rows = HashMap::new();
for line in lines {
let line = line.map_err(RsomicsError::Io)?;
let t = line.trim_end();
if t.is_empty() || t.starts_with('#') {
continue;
}
let mut fields = t.split(delim);
let sid = fields.next().unwrap().trim().to_string();
let vals: Vec<String> = fields.map(|f| f.trim().to_string()).collect();
if vals.len() != columns.len() {
return Err(RsomicsError::InvalidInput(format!(
"metadata row '{sid}' has {} values, expected {}",
vals.len(),
columns.len()
)));
}
rows.insert(sid, vals);
}
Ok(Metadata { columns, rows })
}