Skip to main content

rsomics_limma_vooma/
lib.rs

1//! vooma: mean-variance modelling at the observational level for arrays.
2//!
3//! Clean-room reimplementation of limma's `vooma()` for a log-expression
4//! matrix and design. Reference: Law CW (2013), "Precision weights for gene
5//! expression analysis", PhD Thesis, University of Melbourne
6//! (hdl.handle.net/11343/38150). No limma (GPL) source was consulted; the
7//! method follows the published documentation and is validated black-box
8//! against the binary.
9//!
10//! lmFit(y, design) gives per-gene residual sd (sigma) and row mean (Amean);
11//! the trend is a lowess of sqrt(sigma) on Amean; each observation's precision
12//! weight is `1 / trend(fitted)^4` where fitted is its model-predicted mean.
13
14mod fit;
15mod lowess;
16mod matrix;
17
18use std::io::{BufWriter, Write};
19use std::path::Path;
20
21use rsomics_common::{Result, RsomicsError};
22
23pub use matrix::{Design, Expr, read_design, read_expr};
24
25/// limma `chooseLowessSpan(n, small.n=50, min.span=0.3, power=1/3)`: the span
26/// vooma uses by default. (The man page's "power=1.3" is a documentation slip;
27/// the binary uses the chooseLowessSpan default 1/3 — verified black-box.)
28fn lowess_span(n: usize) -> f64 {
29    if n <= 50 {
30        return 1.0;
31    }
32    (0.3 + 0.7 * (50.0 / n as f64).powf(1.0 / 3.0)).min(1.0)
33}
34
35pub struct Vooma {
36    pub samples: Vec<String>,
37    pub genes: Vec<String>,
38    /// precision weights, row-major [gene][sample]
39    pub weights: Vec<Vec<f64>>,
40    /// the fitted mean-variance trend, as (Amean, sqrt-sd) line points
41    pub trend_x: Vec<f64>,
42    pub trend_y: Vec<f64>,
43}
44
45pub fn vooma(expr: &Expr, design: &Design) -> Result<Vooma> {
46    let n = expr.samples.len();
47    let ng = expr.genes.len();
48    if design.x.len() != n {
49        return Err(RsomicsError::InvalidInput(format!(
50            "design has {} rows, expression has {n} samples",
51            design.x.len()
52        )));
53    }
54
55    let fit = fit::lm_fit(&expr.y, &design.x)?;
56
57    let sx = &fit.amean;
58    let sy: Vec<f64> = fit.sigma.iter().map(|&s| s.sqrt()).collect();
59
60    let mut order: Vec<usize> = (0..ng).collect();
61    order.sort_by(|&a, &b| sx[a].partial_cmp(&sx[b]).unwrap());
62    let lx: Vec<f64> = order.iter().map(|&i| sx[i]).collect();
63    let ly: Vec<f64> = order.iter().map(|&i| sy[i]).collect();
64    let delta = 0.01 * (lx[ng - 1] - lx[0]);
65    let fitted_line = lowess::lowess(&lx, &ly, lowess_span(ng), 3, delta);
66    let trend = lowess::Trend::new(&lx, &fitted_line);
67
68    // weight = 1 / trend(fitted)^4, fitted = X beta per observation
69    let mut weights = vec![vec![0.0; n]; ng];
70    for (gi, beta) in fit.coefficients.iter().enumerate() {
71        for (j, w) in weights[gi].iter_mut().enumerate() {
72            let xrow = &design.x[j];
73            let predicted_mean: f64 = beta.iter().zip(xrow).map(|(&b, &xij)| b * xij).sum();
74            let sd = trend.eval(predicted_mean);
75            *w = 1.0 / sd.powi(4);
76        }
77    }
78
79    Ok(Vooma {
80        samples: expr.samples.clone(),
81        genes: expr.genes.clone(),
82        weights,
83        trend_x: lx,
84        trend_y: fitted_line,
85    })
86}
87
88pub fn write_weights(v: &Vooma, out: &mut dyn Write) -> Result<()> {
89    let mut w = BufWriter::with_capacity(1 << 20, out);
90    write!(w, "gene").map_err(RsomicsError::Io)?;
91    for s in &v.samples {
92        write!(w, "\t{s}").map_err(RsomicsError::Io)?;
93    }
94    writeln!(w).map_err(RsomicsError::Io)?;
95
96    let mut fmt = ryu::Buffer::new();
97    let mut line = String::with_capacity(v.samples.len() * 16);
98    for (gene, row) in v.genes.iter().zip(&v.weights) {
99        line.clear();
100        line.push_str(gene);
101        for &val in row {
102            line.push('\t');
103            line.push_str(fmt.format(val));
104        }
105        line.push('\n');
106        w.write_all(line.as_bytes()).map_err(RsomicsError::Io)?;
107    }
108    w.flush().map_err(RsomicsError::Io)?;
109    Ok(())
110}
111
112pub fn write_trend(v: &Vooma, path: &Path) -> Result<()> {
113    let f = std::fs::File::create(path).map_err(RsomicsError::Io)?;
114    let mut w = BufWriter::new(f);
115    writeln!(w, "AveLogExpr\tsqrtSD").map_err(RsomicsError::Io)?;
116    let mut xb = ryu::Buffer::new();
117    let mut yb = ryu::Buffer::new();
118    for (&x, &y) in v.trend_x.iter().zip(&v.trend_y) {
119        writeln!(w, "{}\t{}", xb.format(x), yb.format(y)).map_err(RsomicsError::Io)?;
120    }
121    w.flush().map_err(RsomicsError::Io)?;
122    Ok(())
123}
124
125#[cfg(test)]
126mod tests {
127    use super::*;
128
129    #[test]
130    fn small_span_caps_at_one() {
131        assert_eq!(lowess_span(40), 1.0);
132        assert_eq!(lowess_span(50), 1.0);
133        assert!(lowess_span(1000) < 1.0 && lowess_span(1000) > 0.3);
134    }
135
136    #[test]
137    fn intercept_only_weights_positive() {
138        let expr = Expr {
139            samples: vec!["s1".into(), "s2".into(), "s3".into(), "s4".into()],
140            genes: (0..60).map(|i| format!("g{i}")).collect(),
141            y: (0..60)
142                .map(|i| {
143                    let m = (i % 10) as f64;
144                    vec![m + 0.1, m - 0.1, m + 0.2, m - 0.2]
145                })
146                .collect(),
147        };
148        let design = Design {
149            coef_names: vec!["Intercept".into()],
150            x: vec![vec![1.0]; 4],
151        };
152        let v = vooma(&expr, &design).unwrap();
153        assert_eq!(v.weights.len(), 60);
154        assert!(v.weights.iter().all(|r| r.iter().all(|&w| w > 0.0)));
155    }
156}