rsomics_limma_vooma/
lib.rs1mod fit;
15mod lowess;
16mod matrix;
17
18use std::io::{BufWriter, Write};
19use std::path::Path;
20
21use rsomics_common::{Result, RsomicsError};
22
23pub use matrix::{Design, Expr, read_design, read_expr};
24
25fn lowess_span(n: usize) -> f64 {
29 if n <= 50 {
30 return 1.0;
31 }
32 (0.3 + 0.7 * (50.0 / n as f64).powf(1.0 / 3.0)).min(1.0)
33}
34
35pub struct Vooma {
36 pub samples: Vec<String>,
37 pub genes: Vec<String>,
38 pub weights: Vec<Vec<f64>>,
40 pub trend_x: Vec<f64>,
42 pub trend_y: Vec<f64>,
43}
44
45pub fn vooma(expr: &Expr, design: &Design) -> Result<Vooma> {
46 let n = expr.samples.len();
47 let ng = expr.genes.len();
48 if design.x.len() != n {
49 return Err(RsomicsError::InvalidInput(format!(
50 "design has {} rows, expression has {n} samples",
51 design.x.len()
52 )));
53 }
54
55 let fit = fit::lm_fit(&expr.y, &design.x)?;
56
57 let sx = &fit.amean;
58 let sy: Vec<f64> = fit.sigma.iter().map(|&s| s.sqrt()).collect();
59
60 let mut order: Vec<usize> = (0..ng).collect();
61 order.sort_by(|&a, &b| sx[a].partial_cmp(&sx[b]).unwrap());
62 let lx: Vec<f64> = order.iter().map(|&i| sx[i]).collect();
63 let ly: Vec<f64> = order.iter().map(|&i| sy[i]).collect();
64 let delta = 0.01 * (lx[ng - 1] - lx[0]);
65 let fitted_line = lowess::lowess(&lx, &ly, lowess_span(ng), 3, delta);
66 let trend = lowess::Trend::new(&lx, &fitted_line);
67
68 let mut weights = vec![vec![0.0; n]; ng];
70 for (gi, beta) in fit.coefficients.iter().enumerate() {
71 for (j, w) in weights[gi].iter_mut().enumerate() {
72 let xrow = &design.x[j];
73 let predicted_mean: f64 = beta.iter().zip(xrow).map(|(&b, &xij)| b * xij).sum();
74 let sd = trend.eval(predicted_mean);
75 *w = 1.0 / sd.powi(4);
76 }
77 }
78
79 Ok(Vooma {
80 samples: expr.samples.clone(),
81 genes: expr.genes.clone(),
82 weights,
83 trend_x: lx,
84 trend_y: fitted_line,
85 })
86}
87
88pub fn write_weights(v: &Vooma, out: &mut dyn Write) -> Result<()> {
89 let mut w = BufWriter::with_capacity(1 << 20, out);
90 write!(w, "gene").map_err(RsomicsError::Io)?;
91 for s in &v.samples {
92 write!(w, "\t{s}").map_err(RsomicsError::Io)?;
93 }
94 writeln!(w).map_err(RsomicsError::Io)?;
95
96 let mut fmt = ryu::Buffer::new();
97 let mut line = String::with_capacity(v.samples.len() * 16);
98 for (gene, row) in v.genes.iter().zip(&v.weights) {
99 line.clear();
100 line.push_str(gene);
101 for &val in row {
102 line.push('\t');
103 line.push_str(fmt.format(val));
104 }
105 line.push('\n');
106 w.write_all(line.as_bytes()).map_err(RsomicsError::Io)?;
107 }
108 w.flush().map_err(RsomicsError::Io)?;
109 Ok(())
110}
111
112pub fn write_trend(v: &Vooma, path: &Path) -> Result<()> {
113 let f = std::fs::File::create(path).map_err(RsomicsError::Io)?;
114 let mut w = BufWriter::new(f);
115 writeln!(w, "AveLogExpr\tsqrtSD").map_err(RsomicsError::Io)?;
116 let mut xb = ryu::Buffer::new();
117 let mut yb = ryu::Buffer::new();
118 for (&x, &y) in v.trend_x.iter().zip(&v.trend_y) {
119 writeln!(w, "{}\t{}", xb.format(x), yb.format(y)).map_err(RsomicsError::Io)?;
120 }
121 w.flush().map_err(RsomicsError::Io)?;
122 Ok(())
123}
124
125#[cfg(test)]
126mod tests {
127 use super::*;
128
129 #[test]
130 fn small_span_caps_at_one() {
131 assert_eq!(lowess_span(40), 1.0);
132 assert_eq!(lowess_span(50), 1.0);
133 assert!(lowess_span(1000) < 1.0 && lowess_span(1000) > 0.3);
134 }
135
136 #[test]
137 fn intercept_only_weights_positive() {
138 let expr = Expr {
139 samples: vec!["s1".into(), "s2".into(), "s3".into(), "s4".into()],
140 genes: (0..60).map(|i| format!("g{i}")).collect(),
141 y: (0..60)
142 .map(|i| {
143 let m = (i % 10) as f64;
144 vec![m + 0.1, m - 0.1, m + 0.2, m - 0.2]
145 })
146 .collect(),
147 };
148 let design = Design {
149 coef_names: vec!["Intercept".into()],
150 x: vec![vec![1.0]; 4],
151 };
152 let v = vooma(&expr, &design).unwrap();
153 assert_eq!(v.weights.len(), 60);
154 assert!(v.weights.iter().all(|r| r.iter().all(|&w| w > 0.0)));
155 }
156}