use std::io::BufRead;
use rsomics_common::{Result, RsomicsError};
pub struct DistanceMatrix {
pub ids: Vec<String>,
pub data: Vec<f64>,
pub n: usize,
}
impl DistanceMatrix {
pub fn read<R: BufRead>(reader: R, source: &str) -> Result<Self> {
let mut lines = reader.lines();
let header = lines
.next()
.ok_or_else(|| RsomicsError::InvalidInput(format!("{source}: empty matrix")))?
.map_err(RsomicsError::Io)?;
let ids: Vec<String> = header
.split('\t')
.skip(1)
.map(str::trim)
.map(str::to_owned)
.collect();
let n = ids.len();
if n < 3 {
return Err(RsomicsError::InvalidInput(format!(
"{source}: need at least 3 ids, found {n}"
)));
}
let mut data = vec![0.0f64; n * n];
let mut row = 0;
for line in lines {
let line = line.map_err(RsomicsError::Io)?;
if line.is_empty() {
continue;
}
if row >= n {
return Err(RsomicsError::InvalidInput(format!(
"{source}: more rows than the {n} ids in the header"
)));
}
let mut fields = line.split('\t');
let rid = fields.next().unwrap_or("").trim();
if rid != ids[row] {
return Err(RsomicsError::InvalidInput(format!(
"{source}: row {row} id '{rid}' != header id '{}'",
ids[row]
)));
}
let mut col = 0;
for f in fields {
if col >= n {
return Err(RsomicsError::InvalidInput(format!(
"{source}: row {row} has more than {n} values"
)));
}
data[row * n + col] = f.trim().parse::<f64>().map_err(|_| {
RsomicsError::InvalidInput(format!(
"{source}: row {row} col {col}: not a number: '{f}'"
))
})?;
col += 1;
}
if col != n {
return Err(RsomicsError::InvalidInput(format!(
"{source}: row {row} has {col} values, expected {n}"
)));
}
row += 1;
}
if row != n {
return Err(RsomicsError::InvalidInput(format!(
"{source}: {row} data rows, expected {n}"
)));
}
Ok(DistanceMatrix { ids, data, n })
}
pub fn reorder_like(&self, target: &[String], source: &str) -> Result<Vec<f64>> {
let n = self.n;
let pos: std::collections::HashMap<&str, usize> = self
.ids
.iter()
.enumerate()
.map(|(i, s)| (s.as_str(), i))
.collect();
let perm: Vec<usize> = target
.iter()
.map(|id| {
pos.get(id.as_str()).copied().ok_or_else(|| {
RsomicsError::InvalidInput(format!(
"{source}: id '{id}' missing; the two matrices must share ids"
))
})
})
.collect::<Result<_>>()?;
let mut out = vec![0.0f64; n * n];
for (i, &pi) in perm.iter().enumerate() {
for (j, &pj) in perm.iter().enumerate() {
out[i * n + j] = self.data[pi * n + pj];
}
}
Ok(out)
}
pub fn condensed(data: &[f64], n: usize) -> Vec<f64> {
let mut v = Vec::with_capacity(n * (n - 1) / 2);
for i in 0..n {
for j in (i + 1)..n {
v.push(data[i * n + j]);
}
}
v
}
}