use std::io::BufRead;
use rsomics_common::{Result, RsomicsError};
pub struct DistanceMatrix {
pub ids: Vec<String>,
pub data: Vec<f64>,
pub n: usize,
}
impl DistanceMatrix {
pub fn read<R: BufRead>(reader: R, source: &str) -> Result<Self> {
let mut lines = reader.lines();
let header = lines
.next()
.ok_or_else(|| RsomicsError::InvalidInput(format!("{source}: empty matrix")))?
.map_err(RsomicsError::Io)?;
let ids: Vec<String> = header
.split('\t')
.skip(1)
.map(str::trim)
.map(str::to_owned)
.collect();
let n = ids.len();
if n < 3 {
return Err(RsomicsError::InvalidInput(format!(
"{source}: need at least 3 ids, found {n}"
)));
}
let mut data = vec![0.0f64; n * n];
let mut row = 0;
for line in lines {
let line = line.map_err(RsomicsError::Io)?;
if line.is_empty() {
continue;
}
if row >= n {
return Err(RsomicsError::InvalidInput(format!(
"{source}: more rows than the {n} ids in the header"
)));
}
let mut fields = line.split('\t');
let rid = fields.next().unwrap_or("").trim();
if rid != ids[row] {
return Err(RsomicsError::InvalidInput(format!(
"{source}: row {row} id '{rid}' != header id '{}'",
ids[row]
)));
}
let mut col = 0;
for f in fields {
if col >= n {
return Err(RsomicsError::InvalidInput(format!(
"{source}: row {row} has more than {n} values"
)));
}
data[row * n + col] = f.trim().parse::<f64>().map_err(|_| {
RsomicsError::InvalidInput(format!(
"{source}: row {row} col {col}: not a number: '{f}'"
))
})?;
col += 1;
}
if col != n {
return Err(RsomicsError::InvalidInput(format!(
"{source}: row {row} has {col} values, expected {n}"
)));
}
row += 1;
}
if row != n {
return Err(RsomicsError::InvalidInput(format!(
"{source}: {row} data rows, expected {n}"
)));
}
Ok(DistanceMatrix { ids, data, n })
}
pub fn condensed(&self) -> Vec<f64> {
let n = self.n;
let mut v = Vec::with_capacity(n * (n - 1) / 2);
for i in 0..n {
for j in (i + 1)..n {
v.push(self.data[i * n + j]);
}
}
v
}
}