limma/
voom.rs

1//! Mean-variance modelling of count data at the observation level. Port of
2//! limma's `voom` (`voom.R`) and the span heuristic `chooseLowessSpan`.
3//!
4//! `voom` transforms counts to log2-counts-per-million, fits a gene-wise linear
5//! model, fits a LOWESS trend of the residual standard deviation against the
6//! average log-count, and converts that trend into per-observation precision
7//! weights for entry to the usual `lmFit` -> `eBayes` pipeline.
8//!
9//! The current port covers the default path: an optional design and library
10//! sizes, `normalize.method = "none"`, no offsets and no blocking/correlation.
11//! Per-array prior weights are supported via [`voom_weighted`], which backs
12//! [`voom_with_quality_weights`] (voom + gene-by-gene array quality weights).
13
14use anyhow::{bail, Result};
15use ndarray::{Array1, Array2, Axis};
16
17use crate::arrayweights::array_weights_gene_by_gene;
18use crate::fit::{lmfit, lmfit_weighted, MArrayLM};
19use crate::lowess::LowessInterpolator;
20
21/// Choose a LOWESS span as a function of the number of points. Port of
22/// `chooseLowessSpan`: larger spans for small datasets, smaller for large ones.
23pub fn choose_lowess_span(n: usize, small_n: f64, min_span: f64, power: f64) -> f64 {
24    (min_span + (1.0 - min_span) * (small_n / n as f64).powf(power)).min(1.0)
25}
26
27/// Result of `voom`: the log-CPM matrix `E`, the per-observation precision
28/// `weights`, the design used, the per-sample library sizes, and the span.
29#[derive(Clone, Debug)]
30pub struct VoomOutput {
31    pub e: Array2<f64>,       // n_genes x n_samples, log2-CPM
32    pub weights: Array2<f64>, // n_genes x n_samples
33    pub design: Array2<f64>,
34    pub lib_size: Array1<f64>,
35    pub span: f64,
36}
37
38/// Result of `vooma`: the per-observation precision `weights`, the design
39/// used, and the span.
40#[derive(Clone, Debug)]
41pub struct VoomaOutput {
42    pub weights: Array2<f64>, // n_genes x n_samples
43    pub design: Array2<f64>,
44    pub span: f64,
45}
46
47/// `voom(counts, design, lib.size, span, adaptive.span)` with
48/// `normalize.method = "none"`.
49///
50/// * `counts` — `n_genes x n_samples` non-negative counts (no NaN).
51/// * `design` — optional `n_samples x p` design; defaults to a grand-mean
52///   intercept.
53/// * `lib_size` — optional per-sample library sizes; defaults to column sums.
54/// * `span` — LOWESS span used when `adaptive_span` is false.
55/// * `adaptive_span` — when true, the span is chosen by [`choose_lowess_span`]
56///   (`small.n = 50, min.span = 0.3, power = 1/3`), matching voom's default.
57pub fn voom(
58    counts: &Array2<f64>,
59    design: Option<&Array2<f64>>,
60    lib_size: Option<&Array1<f64>>,
61    span: f64,
62    adaptive_span: bool,
63) -> Result<VoomOutput> {
64    voom_weighted(counts, design, lib_size, span, adaptive_span, None)
65}
66
67/// `voom` with optional per-array prior weights (the `weights=` argument used by
68/// [`voom_with_quality_weights`]). When `array_weights` is `Some(aw)` the
69/// internal `lmFit` is weighted by the broadcast matrix `W[g,j] = aw[j]`, which
70/// changes `fit.sigma` (hence the trend) and the fitted values; the returned
71/// `weights` are still `1 / trend(fitted_logcount)^4` and do not themselves
72/// include `aw`.
73pub(crate) fn voom_weighted(
74    counts: &Array2<f64>,
75    design: Option<&Array2<f64>>,
76    lib_size: Option<&Array1<f64>>,
77    span: f64,
78    adaptive_span: bool,
79    array_weights: Option<&Array1<f64>>,
80) -> Result<VoomOutput> {
81    let ngenes = counts.nrows();
82    let nsamples = counts.ncols();
83    if ngenes < 2 {
84        bail!("Need at least two genes to fit a mean-variance trend");
85    }
86    let mut min_count = f64::INFINITY;
87    for &c in counts.iter() {
88        if c.is_nan() {
89            bail!("NA counts not allowed");
90        }
91        min_count = min_count.min(c);
92    }
93    if min_count < 0.0 {
94        bail!("Negative counts not allowed");
95    }
96
97    let design_owned;
98    let design = match design {
99        Some(d) => {
100            if d.nrows() != nsamples {
101                bail!(
102                    "design rows ({}) does not match number of samples ({})",
103                    d.nrows(),
104                    nsamples
105                );
106            }
107            d
108        }
109        None => {
110            design_owned = Array2::<f64>::ones((nsamples, 1));
111            &design_owned
112        }
113    };
114    let p = design.ncols();
115
116    let lib_size: Array1<f64> = match lib_size {
117        Some(l) => {
118            if l.len() != nsamples {
119                bail!(
120                    "lib_size length ({}) does not match number of samples ({})",
121                    l.len(),
122                    nsamples
123                );
124            }
125            l.clone()
126        }
127        None => counts.sum_axis(Axis(0)),
128    };
129
130    let span = if adaptive_span {
131        choose_lowess_span(ngenes, 50.0, 0.3, 1.0 / 3.0)
132    } else {
133        span
134    };
135
136    // Log2-counts-per-million (normalize.method = "none").
137    let mut y = Array2::<f64>::zeros((ngenes, nsamples));
138    for g in 0..ngenes {
139        for s in 0..nsamples {
140            y[[g, s]] = ((counts[[g, s]] + 0.5) / (lib_size[s] + 1.0) * 1e6).log2();
141        }
142    }
143
144    let gene_names: Vec<String> = (0..ngenes).map(|i| i.to_string()).collect();
145    let coef_names: Vec<String> = (0..p).map(|j| j.to_string()).collect();
146    let fit = match array_weights {
147        Some(aw) => {
148            let mut wmat = Array2::<f64>::zeros((ngenes, nsamples));
149            for g in 0..ngenes {
150                for s in 0..nsamples {
151                    wmat[[g, s]] = aw[s];
152                }
153            }
154            lmfit_weighted(&y, design, &wmat, gene_names, coef_names)?
155        }
156        None => lmfit(&y, design, gene_names, coef_names)?,
157    };
158
159    // With too little replication for a trend, all weights are 1.
160    let n_with_reps = fit.df_residual.iter().filter(|&&d| d > 0.0).count();
161    if n_with_reps < 2 {
162        return Ok(VoomOutput {
163            e: y,
164            weights: Array2::ones((ngenes, nsamples)),
165            design: design.clone(),
166            lib_size,
167            span,
168        });
169    }
170
171    // Trend of sqrt-standard-deviation against average log-count.
172    let mean_log_lib = lib_size.iter().map(|&l| (l + 1.0).log2()).sum::<f64>() / nsamples as f64;
173    let log2_1e6 = 1e6_f64.log2();
174    let sx_all: Vec<f64> = (0..ngenes)
175        .map(|g| fit.amean[g] + mean_log_lib - log2_1e6)
176        .collect();
177    let sy_all: Vec<f64> = (0..ngenes).map(|g| fit.sigma[g].sqrt()).collect();
178
179    // Genes with all-zero counts are excluded from the trend fit (they still
180    // receive weights below).
181    let (sx, sy): (Vec<f64>, Vec<f64>) = (0..ngenes)
182        .filter(|&g| counts.row(g).sum() != 0.0)
183        .map(|g| (sx_all[g], sy_all[g]))
184        .unzip();
185    let trend = LowessInterpolator::fit(&sx, &sy, span, 3);
186
187    // Apply the trend to each fitted observation.
188    let fitted = fit.coefficients.dot(&design.t()); // n_genes x n_samples
189    let mut weights = Array2::<f64>::zeros((ngenes, nsamples));
190    for g in 0..ngenes {
191        for s in 0..nsamples {
192            let fitted_cpm = 2f64.powf(fitted[[g, s]]);
193            let fitted_count = 1e-6 * fitted_cpm * (lib_size[s] + 1.0);
194            let pred = trend.eval(fitted_count.log2());
195            weights[[g, s]] = 1.0 / pred.powi(4);
196        }
197    }
198
199    Ok(VoomOutput {
200        e: y,
201        weights,
202        design: design.clone(),
203        lib_size,
204        span,
205    })
206}
207
208/// Result of [`voom_with_quality_weights`]: a [`VoomOutput`]-style payload whose
209/// `weights` already fold in the per-sample `sample_weights`.
210#[derive(Clone, Debug)]
211pub struct VoomQualityWeights {
212    pub e: Array2<f64>,       // n_genes x n_samples, log2-CPM
213    pub weights: Array2<f64>, // n_genes x n_samples (voom weights scaled by sample_weights)
214    pub design: Array2<f64>,
215    pub lib_size: Array1<f64>,
216    pub span: f64,
217    pub sample_weights: Array1<f64>, // n_samples array quality weights (aw)
218}
219
220/// `voomWithQualityWeights(counts, design, span, adaptive.span, var.design,
221/// method="genebygene")` — combine voom observation weights with array quality
222/// weights estimated gene-by-gene.
223///
224/// Mirrors limma's orchestration: voom → [`array_weights_gene_by_gene`] (using
225/// the voom weights as observation weights) → voom weighted by those array
226/// weights → re-estimate array weights → scale each sample column of the voom
227/// weights by the final array weights. The `plot`, `var.group`, REML method and
228/// `DGEList`/`EList` inputs are out of scope; this is the matrix +
229/// `method="genebygene"` path.
230///
231/// * `var_design` — optional centred, full-rank variance design `Z2`; defaults
232///   to `contr.sum(narrays)`.
233/// * `prior_n` — prior support for the array weights (limma default `10`).
234pub fn voom_with_quality_weights(
235    counts: &Array2<f64>,
236    design: Option<&Array2<f64>>,
237    lib_size: Option<&Array1<f64>>,
238    span: f64,
239    adaptive_span: bool,
240    var_design: Option<&Array2<f64>>,
241    prior_n: f64,
242) -> Result<VoomQualityWeights> {
243    // 1. voom without array weights.
244    let v1 = voom_weighted(counts, design, lib_size, span, adaptive_span, None)?;
245
246    // 2. Array weights on top of the voom weights (voom weights act as the
247    //    observation weights for the gene-by-gene estimator). The resolved voom
248    //    design matches what arrayWeights would build for a full-rank design.
249    let aw1 = array_weights_gene_by_gene(&v1.e, &v1.design, Some(&v1.weights), var_design, prior_n);
250
251    // 3. voom again, now weighted by the array weights.
252    let v2 = voom_weighted(counts, design, lib_size, span, adaptive_span, Some(&aw1))?;
253
254    // 4. Re-estimate array weights on the reweighted voom fit.
255    let aw2 = array_weights_gene_by_gene(&v2.e, &v2.design, Some(&v2.weights), var_design, prior_n);
256
257    // 5. Scale each sample column of the voom weights by the final array weights.
258    let mut weights = v2.weights;
259    let (ng, ns) = weights.dim();
260    for g in 0..ng {
261        for s in 0..ns {
262            weights[[g, s]] *= aw2[s];
263        }
264    }
265
266    Ok(VoomQualityWeights {
267        e: v2.e,
268        weights,
269        design: v2.design,
270        lib_size: v2.lib_size,
271        span: v2.span,
272        sample_weights: aw2,
273    })
274}
275
276/// `vooma(y, design, span, legacy.span)` — the continuous-data analog of
277/// `voom`, with no predictor and no blocking.
278///
279/// * `y` — `n_genes x n_samples` log-expression matrix (must be finite; R's
280///   vooma disallows NAs).
281/// * `design` — optional `n_samples x p` design; defaults to a grand-mean
282///   intercept.
283/// * `span` — LOWESS span; when `None` it is chosen adaptively
284///   (`small.n = 50, power = 1/3`, or `small.n = 10, power = 1/2` when
285///   `legacy_span`).
286pub fn vooma(
287    y: &Array2<f64>,
288    design: Option<&Array2<f64>>,
289    span: Option<f64>,
290    legacy_span: bool,
291) -> Result<VoomaOutput> {
292    let ngenes = y.nrows();
293    let nsamples = y.ncols();
294    if ngenes < 1 {
295        bail!("y has no rows");
296    }
297
298    let design_owned;
299    let design = match design {
300        Some(d) => {
301            if d.nrows() != nsamples {
302                bail!(
303                    "design rows ({}) does not match number of samples ({})",
304                    d.nrows(),
305                    nsamples
306                );
307            }
308            d
309        }
310        None => {
311            design_owned = Array2::<f64>::ones((nsamples, 1));
312            &design_owned
313        }
314    };
315    let p = design.ncols();
316
317    let gene_names: Vec<String> = (0..ngenes).map(|i| i.to_string()).collect();
318    let coef_names: Vec<String> = (0..p).map(|j| j.to_string()).collect();
319    let fit = lmfit(y, design, gene_names, coef_names)?;
320
321    if fit.amean.iter().any(|a| a.is_nan()) {
322        bail!("y contains entirely NA rows");
323    }
324
325    let sx: Vec<f64> = fit.amean.to_vec();
326    let sy: Vec<f64> = (0..ngenes).map(|g| fit.sigma[g].sqrt()).collect();
327
328    let span = match span {
329        Some(s) => s,
330        None => {
331            if legacy_span {
332                choose_lowess_span(ngenes, 10.0, 0.3, 0.5)
333            } else {
334                choose_lowess_span(ngenes, 50.0, 0.3, 1.0 / 3.0)
335            }
336        }
337    };
338    let trend = LowessInterpolator::fit(&sx, &sy, span, 3);
339
340    let mu = fit.coefficients.dot(&design.t()); // n_genes x n_samples
341    let mut weights = Array2::<f64>::zeros((ngenes, nsamples));
342    for g in 0..ngenes {
343        for s in 0..nsamples {
344            weights[[g, s]] = 1.0 / trend.eval(mu[[g, s]]).powi(4);
345        }
346    }
347
348    Ok(VoomaOutput {
349        weights,
350        design: design.clone(),
351        span,
352    })
353}
354
355/// Result of [`vooma_by_group`].
356#[derive(Clone, Debug)]
357pub struct VoomaByGroupOutput {
358    /// Observation weights (`n_genes x n_samples`), assembled from the per-group
359    /// vooma fits; a singleton group's column borrows the global fit's weights.
360    pub weights: Array2<f64>,
361    /// The design used: the supplied design, or the `~0+group` indicator.
362    pub design: Array2<f64>,
363    /// Reported LOWESS span: the last non-singleton group's span, falling back
364    /// to the global fit's span.
365    pub span: f64,
366}
367
368/// Drop all-zero columns of `m`, preserving order. Reduces a `~0+group`
369/// per-group subdesign — whose only non-zero column is that group's indicator —
370/// to an intercept, reproducing limma's rank-deficient `lm.fit` fitted values
371/// (the group mean) without a pivoted QR.
372fn drop_zero_columns(m: &Array2<f64>) -> Array2<f64> {
373    let keep: Vec<usize> = (0..m.ncols())
374        .filter(|&j| m.column(j).iter().any(|&v| v != 0.0))
375        .collect();
376    m.select(Axis(1), &keep)
377}
378
379/// `voomaByGroup(y, group, design, span, legacy.span)` — fit a separate vooma
380/// mean-variance trend within each level of `group` and assemble the per-group
381/// observation weights into one `n_genes x n_samples` matrix for entry to
382/// `lmFit`.
383///
384/// * `y` — `n_genes x n_samples` log-expression (finite; no NAs, as in vooma).
385/// * `group` — length-`n_samples` label per sample; levels are taken in sorted
386///   order (matching R's factor levels).
387/// * `design` — optional `n_samples x p` design. When `None` it defaults to the
388///   `~0+group` indicator (one column per level, in sorted order); each
389///   per-group subdesign then reduces to an intercept (the group mean), matching
390///   limma's rank-deficient `lm.fit`.
391/// * `span`, `legacy_span` — forwarded to each per-group [`vooma`].
392///
393/// Singleton groups (a single sample) borrow their weights from a global vooma
394/// fit over all samples, matching limma. The plot-only `voom.xy` trend points
395/// are not returned, and blocking/`correlation` is not supported.
396pub fn vooma_by_group(
397    y: &Array2<f64>,
398    group: &[usize],
399    design: Option<&Array2<f64>>,
400    span: Option<f64>,
401    legacy_span: bool,
402) -> Result<VoomaByGroupOutput> {
403    let ngenes = y.nrows();
404    let narrays = y.ncols();
405    if group.len() != narrays {
406        bail!(
407            "length of group ({}) must equal number of samples ({})",
408            group.len(),
409            narrays
410        );
411    }
412
413    // Sorted unique levels; map each sample to its level index.
414    let mut levels: Vec<usize> = group.to_vec();
415    levels.sort_unstable();
416    levels.dedup();
417    let ngroups = levels.len();
418    let intgroup: Vec<usize> = group
419        .iter()
420        .map(|g| levels.iter().position(|l| l == g).unwrap())
421        .collect();
422
423    // Design: supplied, or the ~0+group indicator (sorted level order).
424    let design: Array2<f64> = match design {
425        Some(d) => {
426            if d.nrows() != narrays {
427                bail!(
428                    "design rows ({}) does not match number of samples ({})",
429                    d.nrows(),
430                    narrays
431                );
432            }
433            d.clone()
434        }
435        None => {
436            let mut ind = Array2::<f64>::zeros((narrays, ngroups));
437            for (s, &gi) in intgroup.iter().enumerate() {
438                ind[[s, gi]] = 1.0;
439            }
440            ind
441        }
442    };
443
444    // Sample columns belonging to each group level.
445    let mut cols_of: Vec<Vec<usize>> = vec![Vec::new(); ngroups];
446    for (s, &gi) in intgroup.iter().enumerate() {
447        cols_of[gi].push(s);
448    }
449    let has_singleton = cols_of.iter().any(|c| c.len() == 1);
450
451    // Global fit, used to supply weights for any singleton group.
452    let voomall = if has_singleton {
453        Some(vooma(y, Some(&design), span, legacy_span)?)
454    } else {
455        None
456    };
457
458    let mut weights = Array2::<f64>::zeros((ngenes, narrays));
459    let mut last_span: Option<f64> = None;
460    for cols in &cols_of {
461        if cols.len() == 1 {
462            let va = voomall.as_ref().unwrap();
463            let s = cols[0];
464            for g in 0..ngenes {
465                weights[[g, s]] = va.weights[[g, s]];
466            }
467        } else {
468            let yi = y.select(Axis(1), cols);
469            let di = drop_zero_columns(&design.select(Axis(0), cols));
470            let voomi = vooma(&yi, Some(&di), span, legacy_span)?;
471            for (k, &s) in cols.iter().enumerate() {
472                for g in 0..ngenes {
473                    weights[[g, s]] = voomi.weights[[g, k]];
474                }
475            }
476            last_span = Some(voomi.span);
477        }
478    }
479
480    let out_span = last_span
481        .or_else(|| voomall.as_ref().map(|v| v.span))
482        .unwrap_or(0.0);
483
484    Ok(VoomaByGroupOutput {
485        weights,
486        design,
487        span: out_span,
488    })
489}
490
491/// Result of [`vooma_lm_fit`].
492#[derive(Clone, Debug)]
493pub struct VoomaLmFit {
494    /// Weighted least-squares fit carrying the vooma weights, ready for `eBayes`.
495    pub fit: MArrayLM,
496    /// vooma observation weights (`n_genes x n_samples`) used in the final fit.
497    pub weights: Array2<f64>,
498    /// LOWESS span used for the mean-variance trend.
499    pub span: f64,
500}
501
502/// `voomaLmFit(y, design, prior.weights, span, legacy.span)` — vooma followed
503/// by a weighted `lmFit`, returning a fit ready for `eBayes`.
504///
505/// Implements the common, non-iterating path. An optional `prior_weights`
506/// matrix is supported: as in limma the first fit uses the prior weights, and
507/// the final weights are `vooma_weights * prior_weights`. The block-correlation,
508/// sample-weight (`sample.weights`/`var.design`/`var.group`) and precision
509/// `predictor` options are not implemented — they require
510/// duplicateCorrelation/gls.series or arrayWeights iteration.
511pub fn vooma_lm_fit(
512    y: &Array2<f64>,
513    design: Option<&Array2<f64>>,
514    prior_weights: Option<&Array2<f64>>,
515    span: Option<f64>,
516    legacy_span: bool,
517    gene_names: Vec<String>,
518    coef_names: Vec<String>,
519) -> Result<VoomaLmFit> {
520    let ngenes = y.nrows();
521    let nsamples = y.ncols();
522    if nsamples < 2 {
523        bail!("Too few samples");
524    }
525    if ngenes < 2 {
526        bail!("Need multiple rows");
527    }
528
529    let design_owned;
530    let design = match design {
531        Some(d) => {
532            if d.nrows() != nsamples {
533                bail!(
534                    "design rows ({}) does not match number of samples ({})",
535                    d.nrows(),
536                    nsamples
537                );
538            }
539            d
540        }
541        None => {
542            design_owned = Array2::<f64>::ones((nsamples, 1));
543            &design_owned
544        }
545    };
546
547    if let Some(pw) = prior_weights {
548        if pw.nrows() != ngenes || pw.ncols() != nsamples {
549            bail!(
550                "prior_weights dimensions ({}x{}) must match y ({}x{})",
551                pw.nrows(),
552                pw.ncols(),
553                ngenes,
554                nsamples
555            );
556        }
557    }
558
559    // First fit: weighted when prior weights are supplied, otherwise OLS.
560    let fit0 = match prior_weights {
561        Some(pw) => lmfit_weighted(y, design, pw, gene_names.clone(), coef_names.clone())?,
562        None => lmfit(y, design, gene_names.clone(), coef_names.clone())?,
563    };
564    if fit0.amean.iter().any(|a| a.is_nan()) {
565        bail!("y contains entirely NA rows");
566    }
567
568    // Mean-variance trend on (average expression, sqrt residual SD).
569    let sx: Vec<f64> = fit0.amean.to_vec();
570    let sy: Vec<f64> = (0..ngenes).map(|g| fit0.sigma[g].sqrt()).collect();
571    let span = match span {
572        Some(s) => s,
573        None => {
574            if legacy_span {
575                choose_lowess_span(ngenes, 10.0, 0.3, 0.5)
576            } else {
577                choose_lowess_span(ngenes, 50.0, 0.3, 1.0 / 3.0)
578            }
579        }
580    };
581    let trend = LowessInterpolator::fit(&sx, &sy, span, 3);
582
583    // vooma weights from the fitted values, then combine with prior weights.
584    let mu = fit0.coefficients.dot(&design.t());
585    let mut weights = Array2::<f64>::zeros((ngenes, nsamples));
586    for g in 0..ngenes {
587        for s in 0..nsamples {
588            weights[[g, s]] = 1.0 / trend.eval(mu[[g, s]]).powi(4);
589        }
590    }
591    if let Some(pw) = prior_weights {
592        weights = &weights * pw;
593    }
594
595    let fit = lmfit_weighted(y, design, &weights, gene_names, coef_names)?;
596    Ok(VoomaLmFit { fit, weights, span })
597}
598
599#[cfg(test)]
600#[allow(clippy::excessive_precision)]
601mod tests {
602    use super::*;
603    use ndarray::array;
604
605    fn counts_fixture() -> Array2<f64> {
606        array![
607            [10., 12., 8., 11.],
608            [100., 110., 95., 105.],
609            [5., 7., 6., 4.],
610            [200., 190., 210., 205.],
611            [50., 45., 55., 60.],
612            [1., 2., 0., 3.],
613            [1000., 1100., 900., 950.],
614            [30., 25., 35., 40.],
615        ]
616    }
617
618    fn design_fixture() -> Array2<f64> {
619        // model.matrix(~group), group = A A B B.
620        array![[1., 0.], [1., 0.], [1., 1.], [1., 1.]]
621    }
622
623    #[test]
624    fn choose_span_matches_r() {
625        // limma:::chooseLowessSpan(8, 50, 0.3, 1/3) clamps to 1.
626        assert_eq!(choose_lowess_span(8, 50.0, 0.3, 1.0 / 3.0), 1.0);
627        // A large dataset gives a span below 1.
628        let s = choose_lowess_span(1000, 50.0, 0.3, 1.0 / 3.0);
629        let expect = 0.3 + 0.7 * (50.0_f64 / 1000.0).powf(1.0 / 3.0);
630        assert!((s - expect).abs() < 1e-12 && s < 1.0);
631    }
632
633    #[test]
634    fn voom_matches_r() {
635        let counts = counts_fixture();
636        let design = design_fixture();
637        let v = voom(&counts, Some(&design), None, 0.5, false).unwrap();
638
639        // Library sizes are column sums.
640        let lib = array![1396.0, 1491.0, 1309.0, 1378.0];
641        for (a, b) in v.lib_size.iter().zip(lib.iter()) {
642            assert!((a - b).abs() < 1e-9);
643        }
644
645        // E = log2((counts+0.5)/(lib+1)*1e6) is a deterministic closed form.
646        for g in 0..counts.nrows() {
647            for s in 0..counts.ncols() {
648                let expect = ((counts[[g, s]] + 0.5) / (lib[s] + 1.0) * 1e6).log2();
649                assert!((v.e[[g, s]] - expect).abs() < 1e-9);
650            }
651        }
652
653        // Weights from R limma 3.68.3 (scratch/voom_ref.R).
654        let want = array![
655            [
656                28.935889399719,
657                28.935889399719,
658                10.326303259626,
659                17.013134202459
660            ],
661            [
662                78.797663894764,
663                80.528056879469,
664                77.731587915647,
665                79.054873411272
666            ],
667            [
668                0.641934244773,
669                0.877223560596,
670                0.507411168766,
671                0.507411168766
672            ],
673            [
674                97.087653321213,
675                99.514156468690,
676                100.012320428675,
677                107.314600669557
678            ],
679            [
680                50.936936306788,
681                57.095789543676,
682                65.164440442125,
683                66.225455485966
684            ],
685            [
686                0.507411168766,
687                0.507411168766,
688                0.507411168766,
689                0.507411168766
690            ],
691            [
692                2193.696131719829,
693                2193.696131719829,
694                1703.605796348448,
695                1968.434108844519
696            ],
697            [
698                28.935889399719,
699                28.935889399719,
700                35.153539352782,
701                38.113189099620
702            ],
703        ];
704        for (a, b) in v.weights.iter().zip(want.iter()) {
705            let rel = (a - b).abs() / b.abs();
706            assert!(rel < 1e-9, "weight {a} vs {b} (rel {rel:e})");
707        }
708    }
709
710    #[test]
711    fn vooma_matches_r() {
712        // Continuous log-expression fixture (scratch/vooma_ref.R).
713        let y = array![
714            [5.10, 4.80, 6.20, 5.50],
715            [2.30, 3.10, 2.80, 3.50],
716            [7.70, 7.20, 8.10, 6.90],
717            [1.10, 0.90, 1.40, 1.20],
718            [9.30, 9.10, 8.80, 9.50],
719            [4.40, 4.90, 5.20, 4.10],
720            [6.60, 6.20, 6.90, 6.40],
721            [3.30, 3.80, 3.10, 2.90],
722            [8.10, 7.60, 8.40, 8.00],
723            [0.50, 1.20, 0.80, 0.30],
724        ];
725        let design = design_fixture();
726        let v = vooma(&y, Some(&design), None, false).unwrap();
727        assert_eq!(v.span, 1.0); // ngenes=10 -> adaptive span clamps to 1
728
729        let want = array![
730            [
731                5.769009938122,
732                5.769009938122,
733                5.710681439260,
734                5.710681439260
735            ],
736            [
737                7.764204399417,
738                7.764204399417,
739                7.299187981925,
740                7.299187981925
741            ],
742            [
743                6.131523915696,
744                6.131523915696,
745                6.146966664045,
746                6.146966664045
747            ],
748            [
749                10.027067353155,
750                10.027067353155,
751                9.549770567334,
752                9.549770567334
753            ],
754            [
755                6.732012491201,
756                6.732012491201,
757                6.722541890163,
758                6.722541890163
759            ],
760            [
761                5.873692933054,
762                5.873692933054,
763                5.873692933054,
764                5.873692933054
765            ],
766            [
767                5.828720439884,
768                5.828720439884,
769                5.892290676457,
770                5.892290676457
771            ],
772            [
773                6.892732120849,
774                6.892732120849,
775                7.443835424811,
776                7.443835424811
777            ],
778            [
779                6.257090085442,
780                6.257090085442,
781                6.374681498763,
782                6.374681498763
783            ],
784            [
785                10.292360128274,
786                10.292360128274,
787                10.566485009140,
788                10.566485009140
789            ],
790        ];
791        for (a, b) in v.weights.iter().zip(want.iter()) {
792            let rel = (a - b).abs() / b.abs();
793            assert!(rel < 1e-9, "weight {a} vs {b} (rel {rel:e})");
794        }
795    }
796
797    fn rclose(a: f64, b: f64) -> bool {
798        (a - b).abs() <= 1e-7 * (1.0 + b.abs())
799    }
800
801    /// Rebuild scratch/voomalmfit_ref.R's 50x6 expression and prior-weight
802    /// matrices from the same purely rational, 0-indexed formula.
803    fn vlf_fixture() -> (Array2<f64>, Array2<f64>, Array2<f64>) {
804        let ngenes = 50usize;
805        let narrays = 6usize;
806        let group = [0.0, 0.0, 0.0, 1.0, 1.0, 1.0];
807        let mut y = Array2::<f64>::zeros((ngenes, narrays));
808        let mut pw = Array2::<f64>::zeros((ngenes, narrays));
809        for g in 0..ngenes {
810            let gi = g as i64;
811            let base = ((gi % 7) - 3) as f64;
812            let extra = (((gi * 5) % 11) - 5) as f64;
813            let lvl = 5.0 + (gi % 10) as f64 * 0.2;
814            let eff = base * 0.25;
815            for k in 0..narrays {
816                let ki = k as i64;
817                let noise = (((gi * 13 + ki * 17) % 19) - 9) as f64 * 0.04;
818                y[[g, k]] = lvl + eff * group[k] + extra * 0.05 + noise;
819                pw[[g, k]] = 0.6 + (((gi * 3 + ki * 7) % 9) as f64) * 0.1;
820            }
821        }
822        let mut design = Array2::<f64>::zeros((narrays, 2));
823        for k in 0..narrays {
824            design[[k, 0]] = 1.0;
825            design[[k, 1]] = group[k];
826        }
827        (y, design, pw)
828    }
829
830    /// Rebuild scratch/vwqw_ref.R's fixture B: integer counts (10x6) from a
831    /// 0-indexed rational formula, plus the `~group` design.
832    fn vwqw_counts_fixture() -> (Array2<f64>, Array2<f64>) {
833        let ngenes = 10usize;
834        let narrays = 6usize;
835        let grp = [0i64, 0, 0, 1, 1, 1];
836        let mut counts = Array2::<f64>::zeros((ngenes, narrays));
837        for g in 0..ngenes {
838            let gi = g as i64;
839            for j in 0..narrays {
840                let ji = j as i64;
841                let c = 20 + gi * 15 + ji * 3 + ((gi * 5 + ji * 7) % 13) * 4 + grp[j] * gi * 2;
842                counts[[g, j]] = c as f64;
843            }
844        }
845        let mut design = Array2::<f64>::zeros((narrays, 2));
846        for j in 0..narrays {
847            design[[j, 0]] = 1.0;
848            design[[j, 1]] = grp[j] as f64;
849        }
850        (counts, design)
851    }
852
853    #[test]
854    fn voom_with_quality_weights_matches_r() {
855        let (counts, design) = vwqw_counts_fixture();
856        let v = voom_with_quality_weights(&counts, Some(&design), None, 0.5, false, None, 10.0)
857            .unwrap();
858
859        // Final per-sample array quality weights.
860        let aw_want = [
861            1.1428522224351862,
862            0.69948202996388553,
863            1.1916899290606626,
864            1.1563249275179759,
865            0.74938331550111592,
866            1.2113961366163049,
867        ];
868        for (g, x) in v.sample_weights.iter().zip(aw_want.iter()) {
869            assert!(rclose(*g, *x), "sample weight: got {g}, want {x}");
870        }
871
872        // Final voom weights (voom weights scaled by the array weights).
873        assert!(rclose(v.weights[[0, 0]], 4.6250384357077206));
874        assert!(rclose(v.weights[[0, 3]], 9.0644002874417744));
875        assert!(rclose(v.weights[[0, 5]], 10.34938541502423));
876        assert!(rclose(v.weights[[4, 2]], 28.109033884186356));
877        assert!(rclose(v.weights[[5, 1]], 16.86267872361735));
878        assert!(rclose(v.weights[[9, 0]], 75.782464040995038));
879        assert!(rclose(v.weights[[9, 5]], 117.79877846147826));
880
881        // E is the deterministic log2-CPM, identical to plain voom.
882        assert!(rclose(v.e[[0, 0]], 14.185832765530236));
883        assert!(rclose(v.e[[9, 5]], 17.166249779108728));
884    }
885
886    #[test]
887    fn vooma_lm_fit_matches_r() {
888        let (y, design, pw) = vlf_fixture();
889        let gn: Vec<String> = (0..50).map(|i| format!("g{i}")).collect();
890        let cn = vec!["Intercept".to_string(), "group".to_string()];
891
892        // Scenario 1: no prior weights (first fit is OLS).
893        let r = vooma_lm_fit(&y, Some(&design), None, None, false, gn.clone(), cn.clone()).unwrap();
894        assert_eq!(r.span, 1.0); // ngenes=50 == small.n -> span clamps to 1
895        assert!(rclose(r.fit.coefficients[[0, 0]], 4.8166666666666673));
896        assert!(rclose(r.fit.coefficients[[0, 1]], -0.73666666666666669));
897        assert!(rclose(r.fit.coefficients[[2, 1]], 0.016666666666666802));
898        assert!(rclose(r.fit.sigma[0], 2.0001523415288656));
899        assert!(rclose(r.fit.sigma[4], 0.50739386270805098));
900        assert!(rclose(r.fit.stdev_unscaled[[0, 0]], 0.07765105731999547));
901        assert!(rclose(r.fit.stdev_unscaled[[0, 1]], 0.10897521719515758));
902        assert_eq!(r.fit.df_residual[0], 4.0);
903        // Within an intercept+group design, weights take two values per gene.
904        assert!(rclose(r.weights[[0, 0]], 55.282032012091854));
905        assert!(rclose(r.weights[[0, 3]], 57.019909902580309));
906        assert!(rclose(r.weights[[24, 0]], 43.694024586743168));
907        assert!(rclose(r.weights[[49, 0]], 33.652368280339203));
908        assert!(rclose(r.weights[[49, 3]], 48.718872300466991));
909
910        // Scenario 2: with prior weights (first fit is weighted).
911        let r2 = vooma_lm_fit(&y, Some(&design), Some(&pw), None, false, gn, cn).unwrap();
912        assert_eq!(r2.span, 1.0);
913        assert!(rclose(r2.fit.coefficients[[0, 0]], 4.9046666666666683));
914        assert!(rclose(r2.fit.coefficients[[0, 1]], -0.83800000000000119));
915        assert!(rclose(r2.fit.sigma[0], 1.8170323609512815));
916        assert!(rclose(r2.fit.stdev_unscaled[[0, 0]], 0.07413980745014162));
917        assert!(rclose(r2.weights[[0, 0]], 36.385394507083909));
918        assert!(rclose(r2.weights[[0, 5]], 91.56829234031899));
919        assert!(rclose(r2.weights[[49, 0]], 30.459503135870523));
920    }
921
922    #[test]
923    fn vooma_by_group_matches_r() {
924        // See scratch/voomabygroup_ref.R.
925        let ngenes = 12usize;
926        let mky = |narrays: usize| -> Array2<f64> {
927            Array2::from_shape_fn((ngenes, narrays), |(g0, s0)| {
928                5.0 + 0.25 * g0 as f64 + 0.1 * s0 as f64 + 0.15 * ((g0 * (s0 + 1)) % 5) as f64
929            })
930        };
931        let rclose = |a: f64, b: f64| (a - b).abs() <= 1e-7 * (1.0 + b.abs());
932
933        // Per-group weights: column k is the weight shared by every sample in
934        // sorted group k. Within-group columns are identical (intercept fit).
935        let gw = array![
936            [128.57974235629533, 201.43265373150086, 169.52805974815644],
937            [51.72862401222929, 18.636960776554076, 37.75223112586292],
938            [37.559193159543049, 20.439481407168092, 13.083239709395334],
939            [37.614806241386034, 146.28421635333993, 8.9183551719300596],
940            [106.95854312349614, 172.92065689564916, 12.144674253781014],
941            [69.276054650291854, 206.54103286812787, 10.92966941384552],
942            [63.332917660356472, 18.636960776554002, 15.654008874403512],
943            [32.388566989657896, 20.439481407168017, 12.99773820977671],
944            [37.212711780045019, 146.28421635334118, 9.8481582955355105],
945            [100.30030326001432, 172.92065689564888, 15.212910649940969],
946            [70.172943221746678, 206.54103286812716, 14.112001483913859],
947            [124.95596005142109, 32.391260048340605, 24.542006411260687],
948        ];
949
950        // Scenario A: three groups of two, default ~0+group design, span 0.5.
951        let ya = mky(6);
952        let group_a = [0usize, 0, 1, 1, 2, 2];
953        let va = vooma_by_group(&ya, &group_a, None, Some(0.5), false).unwrap();
954        assert!((va.span - 0.5).abs() < 1e-12);
955        for ((g, s), _) in va.weights.indexed_iter() {
956            let e = gw[[g, group_a[s]]];
957            assert!(rclose(va.weights[[g, s]], e), "A[{g},{s}]");
958        }
959
960        // Scenario B: singleton last group (sample 4) borrows the global fit.
961        let single_b = [
962            92.423854171536973,
963            46.676048745081182,
964            36.257599360043535,
965            52.633157173375849,
966            97.417756778627549,
967            67.100924018777832,
968            43.554031111104514,
969            36.052167162893426,
970            52.218302840608807,
971            99.089796796342043,
972            71.514137307408575,
973            49.250767908710742,
974        ];
975        let yb = mky(5);
976        let group_b = [0usize, 0, 1, 1, 2];
977        let vb = vooma_by_group(&yb, &group_b, None, Some(0.5), false).unwrap();
978        assert!((vb.span - 0.5).abs() < 1e-12);
979        for g in 0..ngenes {
980            // groups 0 and 1 reuse the per-group weights from scenario A
981            assert!(rclose(vb.weights[[g, 0]], gw[[g, 0]]), "B[{g},0]");
982            assert!(rclose(vb.weights[[g, 1]], gw[[g, 0]]), "B[{g},1]");
983            assert!(rclose(vb.weights[[g, 2]], gw[[g, 1]]), "B[{g},2]");
984            assert!(rclose(vb.weights[[g, 3]], gw[[g, 1]]), "B[{g},3]");
985            assert!(rclose(vb.weights[[g, 4]], single_b[g]), "B[{g},4]");
986        }
987    }
988}
limma/voom.rs

limma/
voom.rs