Skip to main content

limma/
normwithin.rs

1//! Two-colour within-array normalization. Port of limma's `MA.RG`
2//! (`RGList` -> `MAList`), `RG.MA` (the inverse), and `normalizeWithinArrays`
3//! for the deterministic intensity-dependent methods none/median/loess/
4//! printtiploess. The composite/control methods (which call R's `stats::loess`
5//! restricted to control spots) and robustspline (`normalizeRobustSpline`) are
6//! out of scope for this pure-Rust statistical port.
7
8use anyhow::{bail, Result};
9use ndarray::Array2;
10
11use crate::lowess::loess_fit;
12use crate::norm::median_finite;
13use crate::normexp::{background_correct_matrix, BackgroundMethod};
14use crate::weightedmedian::weighted_median;
15
16/// `MA.RG`: convert foreground/background red–green intensities to an `(M, A)`
17/// pair. Each channel is background-corrected with `method`/`offset` (delegating
18/// to [`background_correct_matrix`]; the `rb`/`gb` background matrices may be
19/// absent), non-positive corrected intensities become `NaN`, and then
20/// `M = log2 R - log2 G`, `A = (log2 R + log2 G) / 2`.
21pub fn ma_from_rg(
22    r: &Array2<f64>,
23    g: &Array2<f64>,
24    rb: Option<&Array2<f64>>,
25    gb: Option<&Array2<f64>>,
26    method: BackgroundMethod,
27    offset: f64,
28) -> (Array2<f64>, Array2<f64>) {
29    let rc = background_correct_matrix(r, rb, method, offset);
30    let gc = background_correct_matrix(g, gb, method, offset);
31    let log2r = rc.mapv(|v| if v <= 0.0 { f64::NAN } else { v.log2() });
32    let log2g = gc.mapv(|v| if v <= 0.0 { f64::NAN } else { v.log2() });
33    let m = &log2r - &log2g;
34    let a = (&log2r + &log2g).mapv(|v| v / 2.0);
35    (m, a)
36}
37
38/// `RG.MA`: inverse of [`ma_from_rg`] on already-`(M, A)` data, with
39/// `R = 2^(A + M/2)` and `G = 2^(A - M/2)`.
40pub fn rg_from_ma(m: &Array2<f64>, a: &Array2<f64>) -> (Array2<f64>, Array2<f64>) {
41    let r = Array2::from_shape_fn(m.dim(), |(i, j)| (a[[i, j]] + m[[i, j]] / 2.0).exp2());
42    let g = Array2::from_shape_fn(m.dim(), |(i, j)| (a[[i, j]] - m[[i, j]] / 2.0).exp2());
43    (r, g)
44}
45
46/// Print-tip layout: `ngrid_r * ngrid_c` tip groups, each printing
47/// `nspot_r * nspot_c` spots. Spots are laid out as contiguous row blocks, one
48/// block per tip, so block `t` occupies rows `[t*nspots, (t+1)*nspots)`.
49#[derive(Clone, Copy, Debug)]
50pub struct PrinterLayout {
51    pub ngrid_r: usize,
52    pub ngrid_c: usize,
53    pub nspot_r: usize,
54    pub nspot_c: usize,
55}
56
57/// Intensity-dependent within-array normalization method. Mirrors the
58/// deterministic subset of limma's `normalizeWithinArrays` `method` choices.
59#[derive(Clone, Copy, Debug, PartialEq, Eq)]
60pub enum WithinArrayMethod {
61    None,
62    Median,
63    Loess,
64    PrintTipLoess,
65}
66
67/// `normalizeWithinArrays` on the `(M, A)` matrix path; returns the normalized
68/// `M` matrix (`A` is unchanged). `weights` is an optional full
69/// `n_probes x n_arrays` weight matrix — expand a vector beforehand with
70/// [`crate::weights::as_matrix_weights`] to match limma's `asMatrixWeights`.
71/// `layout` is required for [`WithinArrayMethod::PrintTipLoess`]. `span` and
72/// `iterations` follow limma's defaults of `0.3` and `4`.
73///
74/// `Median` subtracts each column's median (or weighted median when weights are
75/// supplied); `Loess` and `PrintTipLoess` replace each column (or each print-tip
76/// block) with the residuals of a [`loess_fit`] of `M` on `A`, using limma's
77/// `loessFit` weight clamps (`min.weight = 1e-5`, `max.weight = 1e5`,
78/// `equal.weights.as.null = TRUE`).
79pub fn normalize_within_arrays(
80    m: &Array2<f64>,
81    a: &Array2<f64>,
82    method: WithinArrayMethod,
83    weights: Option<&Array2<f64>>,
84    span: f64,
85    iterations: usize,
86    layout: Option<PrinterLayout>,
87) -> Result<Array2<f64>> {
88    let (nprobes, narrays) = m.dim();
89    if a.dim() != (nprobes, narrays) {
90        bail!("dimensions of A do not match M");
91    }
92    if let Some(w) = weights {
93        if w.dim() != (nprobes, narrays) {
94            bail!("dimensions of weights do not match M");
95        }
96    }
97
98    let mut out = m.clone();
99    match method {
100        WithinArrayMethod::None => {}
101        WithinArrayMethod::Median => {
102            for j in 0..narrays {
103                let col: Vec<f64> = (0..nprobes).map(|i| m[[i, j]]).collect();
104                let center = match weights {
105                    None => median_finite(&col),
106                    Some(w) => {
107                        let wc: Vec<f64> = (0..nprobes).map(|i| w[[i, j]]).collect();
108                        weighted_median(&col, Some(&wc), true)
109                    }
110                };
111                for i in 0..nprobes {
112                    out[[i, j]] -= center;
113                }
114            }
115        }
116        WithinArrayMethod::Loess => {
117            for j in 0..narrays {
118                let y: Vec<f64> = (0..nprobes).map(|i| m[[i, j]]).collect();
119                let x: Vec<f64> = (0..nprobes).map(|i| a[[i, j]]).collect();
120                let w = weights.map(|w| (0..nprobes).map(|i| w[[i, j]]).collect::<Vec<f64>>());
121                let fit = loess_fit(&y, &x, w.as_deref(), span, iterations, 1e-5, 1e5, true);
122                for i in 0..nprobes {
123                    out[[i, j]] = fit.residuals[i];
124                }
125            }
126        }
127        WithinArrayMethod::PrintTipLoess => {
128            let layout = match layout {
129                Some(l) => l,
130                None => bail!("layout argument required for printtiploess"),
131            };
132            let nspots = layout.nspot_r * layout.nspot_c;
133            let ntips = layout.ngrid_r * layout.ngrid_c;
134            if ntips * nspots != nprobes {
135                bail!("printer layout information does not match M row dimension");
136            }
137            for j in 0..narrays {
138                for t in 0..ntips {
139                    let start = t * nspots;
140                    let y: Vec<f64> = (start..start + nspots).map(|i| m[[i, j]]).collect();
141                    let x: Vec<f64> = (start..start + nspots).map(|i| a[[i, j]]).collect();
142                    let w = weights.map(|w| {
143                        (start..start + nspots)
144                            .map(|i| w[[i, j]])
145                            .collect::<Vec<f64>>()
146                    });
147                    let fit = loess_fit(&y, &x, w.as_deref(), span, iterations, 1e-5, 1e5, true);
148                    for (k, i) in (start..start + nspots).enumerate() {
149                        out[[i, j]] = fit.residuals[k];
150                    }
151                }
152            }
153        }
154    }
155    Ok(out)
156}
157
158#[cfg(test)]
159#[allow(clippy::excessive_precision)]
160mod tests {
161    use super::*;
162
163    fn rclose(a: f64, b: f64) -> bool {
164        if a.is_nan() && b.is_nan() {
165            return true;
166        }
167        (a - b).abs() <= 1e-8 * (1.0 + b.abs())
168    }
169
170    fn assert_mat(out: &Array2<f64>, expected: &[[f64; 2]], label: &str) {
171        assert_eq!(out.nrows(), expected.len(), "{label}: row count");
172        for i in 0..expected.len() {
173            for j in 0..2 {
174                assert!(
175                    rclose(out[[i, j]], expected[i][j]),
176                    "{label}[{i},{j}]: {} vs {}",
177                    out[[i, j]],
178                    expected[i][j]
179                );
180            }
181        }
182    }
183
184    /// 16x2 RGList of positive integers plus a 16x2 weight matrix, built from
185    /// the same rational formulas as `scratch/normwithin_ref.R` so the inputs
186    /// reconstruct bit-identically.
187    fn rg_fixture() -> (Array2<f64>, Array2<f64>, Array2<f64>) {
188        let (nprobe, narray) = (16usize, 2usize);
189        let mut r = Array2::zeros((nprobe, narray));
190        let mut g = Array2::zeros((nprobe, narray));
191        let mut w = Array2::zeros((nprobe, narray));
192        for g0 in 0..nprobe {
193            for j0 in 0..narray {
194                let (gi, ji) = (g0 as i64, j0 as i64);
195                r[[g0, j0]] = (120 + gi * 8 + ji * 15 + ((gi * 3 + ji * 5) % 7) * 4) as f64;
196                g[[g0, j0]] = (90 + gi * 6 + ji * 11 + ((gi * 7 + ji * 2) % 9) * 5) as f64;
197                w[[g0, j0]] = 0.5 + ((gi * 5 + ji * 3) % 6) as f64 * 0.2;
198            }
199        }
200        (r, g, w)
201    }
202
203    const MA_RG_M: [[f64; 2]; 16] = [
204        [0.41503749927884392, 0.48170853892413135],
205        [0.095860015407516208, 0.45820535843521704],
206        [0.33324340811519715, 0.17425092684510179],
207        [0.30541300810434535, 0.14295795384204357],
208        [0.53144699139415419, 0.35453276031929004],
209        [0.035623909730721159, 0.54916177929330967],
210        [0.23815973719476435, 0.10982327795275104],
211        [0.2115041051937121, 0.28647096107046011],
212        [0.40525647848625823, 0.26445648090299212],
213        [0.58496250072115608, 0.43457768567448873],
214        [0.16905825762477988, 0.41727597147484374],
215        [0.33304412708153563, 0.2183054638132047],
216        [0.31375416344166229, 0.36499681677924833],
217        [0.47226236797179411, 0.34845438939755002],
218        [0.11651400872642448, 0.49084032335660677],
219        [0.26303440583379434, 0.16505924627049762],
220    ];
221
222    const MA_RG_A: [[f64; 2]; 16] = [
223        [6.6993718459690967, 7.0352701358121719],
224        [7.0813530092412087, 6.9705696656187559],
225        [7.1553063908297645, 7.2965788290515015],
226        [7.0952210093914125, 7.2414039783633335],
227        [7.1605412590050204, 7.3065493971046109],
228        [7.3397400497527236, 7.3620437308969944],
229        [7.4044820874596304, 7.5225171890593732],
230        [7.3536795660404408, 7.5778637081719546],
231        [7.4120816048720792, 7.53310767673368],
232        [7.4624062518028902, 7.5836110570830604],
233        [7.6159105893287018, 7.5395548638520387],
234        [7.6663679506239735, 7.7673642146583965],
235        [7.6244826318038292, 7.8118550284692336],
236        [7.6707594116226216, 7.7741400368859033],
237        [7.7997239907643596, 7.8152757700092508],
238        [7.8457627205830196, 7.9342786645513055],
239    ];
240
241    const MEDIAN_W: [[f64; 2]; 16] = [
242        [0.10962449117449857, 0.12717577860484131],
243        [-0.20955299269682914, 0.10367259811592699],
244        [0.027830400010851797, -0.18028183347418825],
245        [0.0, -0.21157480647724647],
246        [0.22603398328980884, 0.0],
247        [-0.26978909837362419, 0.19462901897401963],
248        [-0.067253270909580998, -0.244709482366539],
249        [-0.093908902910633252, -0.068061799248829935],
250        [0.09984347038191288, -0.090076279416297922],
251        [0.27954949261681072, 0.080044925355198693],
252        [-0.13635475047956547, 0.062743211155553702],
253        [0.027631118977190283, -0.13622729650608534],
254        [0.0083411553373169411, 0.010464056459958293],
255        [0.16684935986744875, -0.0060783709217400173],
256        [-0.18889899937792087, 0.13630756303731673],
257        [-0.042378602270551013, -0.18947351404879242],
258    ];
259
260    const MEDIAN_NW: [[f64; 2]; 16] = [
261        [0.1054539135058401, 0.13021496406571131],
262        [-0.21372357036548761, 0.106711783576797],
263        [0.023659822342193326, -0.17724264801331824],
264        [-0.0041705776686584706, -0.20853562101637646],
265        [0.22186340562115037, 0.0030391854608700086],
266        [-0.27395967604228266, 0.19766820443488964],
267        [-0.071423848578239468, -0.24167029690566899],
268        [-0.098079480579291722, -0.065022613787959926],
269        [0.095672892713254409, -0.087037093955427913],
270        [0.27537891494815225, 0.083084110816068701],
271        [-0.14052532814822394, 0.065782396616423711],
272        [0.023460541308531813, -0.13318811104521533],
273        [0.0041705776686584706, 0.013503241920828302],
274        [0.16267878219879028, -0.0030391854608700086],
275        [-0.19306957704657934, 0.13934674849818673],
276        [-0.046549179939209484, -0.18643432858792242],
277    ];
278
279    const LOESS_W: [[f64; 2]; 16] = [
280        [0.00055153930084905767, 0.040766574671558864],
281        [-0.05462147727184874, -0.031536546731143034],
282        [-0.083729258405399243, -0.12846041655967877],
283        [0.10480902294731109, 0.034911045736998325],
284        [0.09627798945378796, 0.016020662298251942],
285        [-0.077779198347911027, 0.00038373974104910502],
286        [-0.10952844588387034, -0.014499911899852336],
287        [0.04658188483750525, -0.053388739684320896],
288        [0.027754294544550362, -0.020896786039700466],
289        [-0.0010882906757228028, 0.10054069295286538],
290        [-0.15531488856191, 0.050255652330072564],
291        [-0.00065330016751252717, -0.045214392284930405],
292        [0.048617415339603731, 0.068999777369400661],
293        [0.13926875256791815, 0.060288430542558835],
294        [-0.10202136195850819, 0.19672064993775162],
295        [0.080029632718627397, -0.056917140958086421],
296    ];
297
298    const LOESS_NW: [[f64; 2]; 16] = [
299        [0.00019866839190113561, 0.052089024255749572],
300        [-0.0071443390267586226, -0.031013979820145199],
301        [-0.12337619317269455, -0.083454141763385392],
302        [0.014405249188203129, 0.0061021005317352106],
303        [0.058572163128462251, 0.053685820650237748],
304        [-0.022654346312755431, -0.0050963209323636738],
305        [-0.067806670626851084, 0.0084931005176177021],
306        [0.044129881046102576, -0.081071603211831045],
307        [0.057408864779656454, -0.023930936344493725],
308        [-0.0044569372157868825, 0.052364421661424354],
309        [-0.017566722061604867, 0.014048973574872226],
310        [-0.068824788806081671, -0.016670189235056643],
311        [0.029708142677324356, -0.060772958164841795],
312        [0.054009937039857014, 0.026403796859023776],
313        [-0.0019611190047384292, 0.051238823650141874],
314        [0.0006145261927541501, -0.00028034644366714545],
315    ];
316
317    const PTL_W: [[f64; 2]; 16] = [
318        [0.057442612862069288, 0.050210159050610603],
319        [-0.15367694883732977, -0.042914759848677944],
320        [0.10462697482950263, 0.023936987064198948],
321        [0.059799135951460455, -0.066727564073601572],
322        [0.028994964361376607, -0.087523395525124892],
323        [-0.17681878527877404, 0.15695329404950134],
324        [0.13049339157281115, -0.13824025525783132],
325        [0.021620657322331531, 0.088122324229944468],
326        [-0.081404690682113659, -0.12133937127726957],
327        [0.14551881405230987, 0.083158932563343516],
328        [-0.12635881757690007, 0.035868649106876822],
329        [0.084969054640595579, -0.0080345231137678996],
330        [-0.13233555553422785, -0.003760362730623472],
331        [0.082385589860677996, -0.069634857554475182],
332        [-0.11670800020386694, 0.12655755164241353],
333        [0.085736175673749124, -0.043565030693951998],
334    ];
335
336    #[test]
337    fn ma_rg_and_round_trip_match_r() {
338        let (r, g, _w) = rg_fixture();
339        let (m, a) = ma_from_rg(&r, &g, None, None, BackgroundMethod::Subtract, 0.0);
340        assert_mat(&m, &MA_RG_M, "MA.RG M");
341        assert_mat(&a, &MA_RG_A, "MA.RG A");
342
343        // RG.MA inverts MA.RG back to the original intensities.
344        let (r2, g2) = rg_from_ma(&m, &a);
345        for i in 0..16 {
346            for j in 0..2 {
347                assert!(rclose(r2[[i, j]], r[[i, j]]), "RG.MA R[{i},{j}]");
348                assert!(rclose(g2[[i, j]], g[[i, j]]), "RG.MA G[{i},{j}]");
349            }
350        }
351    }
352
353    #[test]
354    fn normalize_within_arrays_methods_match_r() {
355        let (r, g, w) = rg_fixture();
356        let (m, a) = ma_from_rg(&r, &g, None, None, BackgroundMethod::Subtract, 0.0);
357        let layout = Some(PrinterLayout {
358            ngrid_r: 2,
359            ngrid_c: 2,
360            nspot_r: 2,
361            nspot_c: 2,
362        });
363
364        // none: M is returned unchanged.
365        let none =
366            normalize_within_arrays(&m, &a, WithinArrayMethod::None, None, 0.3, 4, layout).unwrap();
367        assert_eq!(none, m);
368
369        let med_w =
370            normalize_within_arrays(&m, &a, WithinArrayMethod::Median, Some(&w), 0.3, 4, None)
371                .unwrap();
372        assert_mat(&med_w, &MEDIAN_W, "median weighted");
373
374        let med_nw =
375            normalize_within_arrays(&m, &a, WithinArrayMethod::Median, None, 0.3, 4, None).unwrap();
376        assert_mat(&med_nw, &MEDIAN_NW, "median noweights");
377
378        let loess_w =
379            normalize_within_arrays(&m, &a, WithinArrayMethod::Loess, Some(&w), 0.3, 4, None)
380                .unwrap();
381        assert_mat(&loess_w, &LOESS_W, "loess weighted");
382
383        let loess_nw =
384            normalize_within_arrays(&m, &a, WithinArrayMethod::Loess, None, 0.3, 4, None).unwrap();
385        assert_mat(&loess_nw, &LOESS_NW, "loess noweights");
386
387        let ptl_w = normalize_within_arrays(
388            &m,
389            &a,
390            WithinArrayMethod::PrintTipLoess,
391            Some(&w),
392            0.3,
393            4,
394            layout,
395        )
396        .unwrap();
397        assert_mat(&ptl_w, &PTL_W, "printtiploess weighted");
398
399        // printtiploess with no weights uses classic lowess on 4-spot tips,
400        // which interpolates exactly -> all-zero residuals.
401        let ptl_nw = normalize_within_arrays(
402            &m,
403            &a,
404            WithinArrayMethod::PrintTipLoess,
405            None,
406            0.3,
407            4,
408            layout,
409        )
410        .unwrap();
411        for i in 0..16 {
412            for j in 0..2 {
413                assert!(
414                    rclose(ptl_nw[[i, j]], 0.0),
415                    "printtiploess noweights[{i},{j}]"
416                );
417            }
418        }
419    }
420}