use std::io::{BufRead, Write};
use rayon::prelude::*;
use rsomics_common::{Result, RsomicsError};
mod table;
pub use table::Table;
pub struct IlrMatrix {
pub samples: Vec<String>,
pub coords: Vec<String>,
pub data: Vec<f64>,
}
fn basis_scales(d: usize) -> Vec<f64> {
(1..d)
.map(|i| (i as f64 / (i as f64 + 1.0)).sqrt())
.collect()
}
pub fn ilr(table: &Table, pseudocount: f64) -> Result<IlrMatrix> {
let d = table.n_features();
if d < 2 {
return Err(RsomicsError::InvalidInput(format!(
"ilr needs at least 2 features, got {d}"
)));
}
let inv_d = 1.0 / d as f64;
let scales = basis_scales(d);
let mut data = vec![0.0_f64; table.n_samples() * (d - 1)];
let bad: Option<f64> = data
.par_chunks_mut(d - 1)
.zip(table.data.par_chunks(d))
.map(|(out, row)| {
let mut clr = Vec::with_capacity(d);
let mut mean = 0.0;
for &x in row {
let v = x + pseudocount;
if v <= 0.0 || !v.is_finite() {
return Some(x);
}
let l = v.ln();
clr.push(l);
mean += l;
}
mean *= inv_d;
let mut prefix = 0.0;
for (k, o) in out.iter_mut().enumerate() {
let i = k + 1;
prefix += clr[k] - mean;
*o = scales[k] * (prefix / i as f64 - (clr[i] - mean));
}
None
})
.reduce(|| None, |a, b| a.or(b));
if let Some(v) = bad {
return Err(RsomicsError::InvalidInput(format!(
"value {v} is non-positive after pseudocount {pseudocount} — ilr's log is undefined (skbio requires strictly positive data)"
)));
}
let coords = (0..d - 1).map(|k| format!("ilr{k}")).collect();
Ok(IlrMatrix {
samples: table.samples.clone(),
coords,
data,
})
}
impl IlrMatrix {
pub fn write_tsv<W: Write>(&self, mut out: W, delim: char) -> Result<()> {
let k = self.coords.len();
let mut line = String::with_capacity(16 * (k + 1));
line.push_str("sample");
for c in &self.coords {
line.push(delim);
line.push_str(c);
}
line.push('\n');
out.write_all(line.as_bytes()).map_err(RsomicsError::Io)?;
let mut buf = ryu_like::Buffer::new();
for (i, sample) in self.samples.iter().enumerate() {
line.clear();
line.push_str(sample);
for &v in &self.data[i * k..(i + 1) * k] {
line.push(delim);
line.push_str(buf.format(v));
}
line.push('\n');
out.write_all(line.as_bytes()).map_err(RsomicsError::Io)?;
}
Ok(())
}
}
mod ryu_like {
use std::fmt::Write;
pub struct Buffer {
s: String,
}
impl Buffer {
pub fn new() -> Self {
Self {
s: String::with_capacity(24),
}
}
pub fn format(&mut self, v: f64) -> &str {
self.s.clear();
let _ = write!(self.s, "{v}");
&self.s
}
}
}
pub fn run<W: Write>(
table_reader: impl BufRead,
out: W,
delim: char,
pseudocount: f64,
) -> Result<()> {
let table = Table::parse(table_reader, delim)?;
let res = ilr(&table, pseudocount)?;
res.write_tsv(out, delim)
}
#[cfg(test)]
mod tests {
use super::*;
fn parse(txt: &str) -> Table {
Table::parse(txt.as_bytes(), '\t').unwrap()
}
#[test]
fn docstring_example() {
let t = parse("\tf1\tf2\tf3\tf4\ns1\t0.1\t0.3\t0.4\t0.2\n");
let r = ilr(&t, 0.0).unwrap();
let want = [-0.7768362, -0.68339802, 0.11704769];
assert_eq!(r.data.len(), 3);
for (g, w) in r.data.iter().zip(want) {
assert!((g - w).abs() < 1e-7, "{g} vs {w}");
}
}
#[test]
fn output_width_is_d_minus_one() {
let r = ilr(&parse("\tf1\tf2\tf3\ns1\t5\t1\t9\ns2\t2\t8\t4\n"), 0.0).unwrap();
assert_eq!(r.coords.len(), 2);
assert_eq!(r.data.len(), 4);
}
#[test]
fn scale_invariant() {
let a = ilr(&parse("\tf1\tf2\tf3\tf4\ns1\t0.1\t0.3\t0.4\t0.2\n"), 0.0).unwrap();
let b = ilr(&parse("\tf1\tf2\tf3\tf4\ns1\t1\t3\t4\t2\n"), 0.0).unwrap();
for (x, y) in a.data.iter().zip(&b.data) {
assert!((x - y).abs() < 1e-12);
}
}
#[test]
fn zero_without_pseudocount_errors() {
assert!(ilr(&parse("\tf1\tf2\tf3\ns1\t0\t5\t1\n"), 0.0).is_err());
}
#[test]
fn pseudocount_admits_zero() {
let r = ilr(&parse("\tf1\tf2\tf3\ns1\t0\t5\t1\n"), 0.5).unwrap();
assert!(r.data.iter().all(|v| v.is_finite()));
}
#[test]
fn single_feature_errors() {
assert!(ilr(&parse("\tf1\ns1\t5\n"), 0.0).is_err());
}
}