1use std::io::{BufRead, Write};
2
3use rayon::prelude::*;
4use rsomics_common::{Result, RsomicsError};
5
6mod table;
7
8pub use table::Table;
9
10pub struct IlrMatrix {
11 pub samples: Vec<String>,
12 pub coords: Vec<String>,
13 pub data: Vec<f64>,
14}
15
16fn basis_scales(d: usize) -> Vec<f64> {
20 (1..d)
21 .map(|i| (i as f64 / (i as f64 + 1.0)).sqrt())
22 .collect()
23}
24
25pub fn ilr(table: &Table, pseudocount: f64) -> Result<IlrMatrix> {
41 let d = table.n_features();
42 if d < 2 {
43 return Err(RsomicsError::InvalidInput(format!(
44 "ilr needs at least 2 features, got {d}"
45 )));
46 }
47 let inv_d = 1.0 / d as f64;
48 let scales = basis_scales(d);
49
50 let mut data = vec![0.0_f64; table.n_samples() * (d - 1)];
51 let bad: Option<f64> = data
52 .par_chunks_mut(d - 1)
53 .zip(table.data.par_chunks(d))
54 .map(|(out, row)| {
55 let mut clr = Vec::with_capacity(d);
56 let mut mean = 0.0;
57 for &x in row {
58 let v = x + pseudocount;
59 if v <= 0.0 || !v.is_finite() {
60 return Some(x);
61 }
62 let l = v.ln();
63 clr.push(l);
64 mean += l;
65 }
66 mean *= inv_d;
67
68 let mut prefix = 0.0;
69 for (k, o) in out.iter_mut().enumerate() {
70 let i = k + 1;
71 prefix += clr[k] - mean;
72 *o = scales[k] * (prefix / i as f64 - (clr[i] - mean));
73 }
74 None
75 })
76 .reduce(|| None, |a, b| a.or(b));
77
78 if let Some(v) = bad {
79 return Err(RsomicsError::InvalidInput(format!(
80 "value {v} is non-positive after pseudocount {pseudocount} — ilr's log is undefined (skbio requires strictly positive data)"
81 )));
82 }
83
84 let coords = (0..d - 1).map(|k| format!("ilr{k}")).collect();
85 Ok(IlrMatrix {
86 samples: table.samples.clone(),
87 coords,
88 data,
89 })
90}
91
92impl IlrMatrix {
93 pub fn write_tsv<W: Write>(&self, mut out: W, delim: char) -> Result<()> {
99 let k = self.coords.len();
100 let mut line = String::with_capacity(16 * (k + 1));
101 line.push_str("sample");
102 for c in &self.coords {
103 line.push(delim);
104 line.push_str(c);
105 }
106 line.push('\n');
107 out.write_all(line.as_bytes()).map_err(RsomicsError::Io)?;
108
109 let mut buf = ryu_like::Buffer::new();
110 for (i, sample) in self.samples.iter().enumerate() {
111 line.clear();
112 line.push_str(sample);
113 for &v in &self.data[i * k..(i + 1) * k] {
114 line.push(delim);
115 line.push_str(buf.format(v));
116 }
117 line.push('\n');
118 out.write_all(line.as_bytes()).map_err(RsomicsError::Io)?;
119 }
120 Ok(())
121 }
122}
123
124mod ryu_like {
127 use std::fmt::Write;
128
129 pub struct Buffer {
130 s: String,
131 }
132
133 impl Buffer {
134 pub fn new() -> Self {
135 Self {
136 s: String::with_capacity(24),
137 }
138 }
139
140 pub fn format(&mut self, v: f64) -> &str {
141 self.s.clear();
142 let _ = write!(self.s, "{v}");
143 &self.s
144 }
145 }
146}
147
148pub fn run<W: Write>(
151 table_reader: impl BufRead,
152 out: W,
153 delim: char,
154 pseudocount: f64,
155) -> Result<()> {
156 let table = Table::parse(table_reader, delim)?;
157 let res = ilr(&table, pseudocount)?;
158 res.write_tsv(out, delim)
159}
160
161#[cfg(test)]
162mod tests {
163 use super::*;
164
165 fn parse(txt: &str) -> Table {
166 Table::parse(txt.as_bytes(), '\t').unwrap()
167 }
168
169 #[test]
170 fn docstring_example() {
171 let t = parse("\tf1\tf2\tf3\tf4\ns1\t0.1\t0.3\t0.4\t0.2\n");
173 let r = ilr(&t, 0.0).unwrap();
174 let want = [-0.7768362, -0.68339802, 0.11704769];
175 assert_eq!(r.data.len(), 3);
176 for (g, w) in r.data.iter().zip(want) {
177 assert!((g - w).abs() < 1e-7, "{g} vs {w}");
178 }
179 }
180
181 #[test]
182 fn output_width_is_d_minus_one() {
183 let r = ilr(&parse("\tf1\tf2\tf3\ns1\t5\t1\t9\ns2\t2\t8\t4\n"), 0.0).unwrap();
184 assert_eq!(r.coords.len(), 2);
185 assert_eq!(r.data.len(), 4);
186 }
187
188 #[test]
189 fn scale_invariant() {
190 let a = ilr(&parse("\tf1\tf2\tf3\tf4\ns1\t0.1\t0.3\t0.4\t0.2\n"), 0.0).unwrap();
191 let b = ilr(&parse("\tf1\tf2\tf3\tf4\ns1\t1\t3\t4\t2\n"), 0.0).unwrap();
192 for (x, y) in a.data.iter().zip(&b.data) {
193 assert!((x - y).abs() < 1e-12);
194 }
195 }
196
197 #[test]
198 fn zero_without_pseudocount_errors() {
199 assert!(ilr(&parse("\tf1\tf2\tf3\ns1\t0\t5\t1\n"), 0.0).is_err());
200 }
201
202 #[test]
203 fn pseudocount_admits_zero() {
204 let r = ilr(&parse("\tf1\tf2\tf3\ns1\t0\t5\t1\n"), 0.5).unwrap();
205 assert!(r.data.iter().all(|v| v.is_finite()));
206 }
207
208 #[test]
209 fn single_feature_errors() {
210 assert!(ilr(&parse("\tf1\ns1\t5\n"), 0.0).is_err());
211 }
212}