1use anyhow::{bail, Result};
8use ndarray::{Array1, Array2, Axis};
9
10use crate::lowess::{approx_rule2, loess_fit_unweighted};
11
12#[derive(Clone, Copy, Debug, PartialEq, Eq)]
14pub enum NormalizeMethod {
15 None,
16 Scale,
17 Quantile,
18 CyclicLoess,
19}
20
21impl NormalizeMethod {
22 pub fn parse(s: &str) -> Result<Self> {
24 Ok(match s {
25 "none" => Self::None,
26 "scale" => Self::Scale,
27 "quantile" => Self::Quantile,
28 "cyclicloess" => Self::CyclicLoess,
29 other => bail!(
30 "unknown normalize method '{other}' (expected none|scale|quantile|cyclicloess)"
31 ),
32 })
33 }
34}
35
36pub fn normalize_between_arrays(x: &Array2<f64>, method: NormalizeMethod) -> Array2<f64> {
40 match method {
41 NormalizeMethod::None => x.clone(),
42 NormalizeMethod::Scale => normalize_median_values(x),
43 NormalizeMethod::Quantile => normalize_quantiles(x, true),
44 NormalizeMethod::CyclicLoess => normalize_cyclic_loess(x, 0.7, true, 3, CyclicMethod::Fast),
45 }
46}
47
48pub(crate) fn median_finite(v: &[f64]) -> f64 {
52 let mut s: Vec<f64> = v.iter().copied().filter(|x| x.is_finite()).collect();
53 let n = s.len();
54 if n == 0 {
55 return f64::NAN;
56 }
57 s.sort_by(|a, b| a.partial_cmp(b).unwrap());
58 if n % 2 == 1 {
59 s[n / 2]
60 } else {
61 0.5 * (s[n / 2 - 1] + s[n / 2])
62 }
63}
64
65pub fn normalize_median_values(x: &Array2<f64>) -> Array2<f64> {
68 let n_cols = x.ncols();
69 if n_cols <= 1 {
70 return x.clone();
71 }
72 let log_med: Vec<f64> = (0..n_cols)
73 .map(|j| median_finite(&x.column(j).to_vec()).ln())
74 .collect();
75 let mean = log_med.iter().sum::<f64>() / n_cols as f64;
76 let scale: Vec<f64> = log_med.iter().map(|&lm| (lm - mean).exp()).collect();
77 let mut out = x.clone();
78 for (j, &s) in scale.iter().enumerate() {
79 out.column_mut(j).mapv_inplace(|v| v / s);
80 }
81 out
82}
83
84pub fn normalize_median_abs_values(x: &Array2<f64>) -> Array2<f64> {
88 let n_cols = x.ncols();
89 if n_cols <= 1 {
90 return x.clone();
91 }
92 let log_med: Vec<f64> = (0..n_cols)
93 .map(|j| {
94 let absvals: Vec<f64> = x.column(j).iter().map(|v| v.abs()).collect();
95 median_finite(&absvals).ln()
96 })
97 .collect();
98 let mean = log_med.iter().sum::<f64>() / n_cols as f64;
99 let scale: Vec<f64> = log_med.iter().map(|&lm| (lm - mean).exp()).collect();
100 let mut out = x.clone();
101 for (j, &s) in scale.iter().enumerate() {
102 out.column_mut(j).mapv_inplace(|v| v / s);
103 }
104 out
105}
106
107fn rank_average_finite(col: &[f64]) -> Vec<f64> {
111 let n = col.len();
112 let mut idx: Vec<usize> = (0..n).filter(|&k| col[k].is_finite()).collect();
113 idx.sort_by(|&a, &b| col[a].partial_cmp(&col[b]).unwrap());
114 let mut ranks = vec![f64::NAN; n];
115 let mut i = 0usize;
116 while i < idx.len() {
117 let mut j = i;
118 while j < idx.len() && col[idx[j]] == col[idx[i]] {
119 j += 1;
120 }
121 let avg = (i + 1 + j) as f64 / 2.0; for k in i..j {
123 ranks[idx[k]] = avg;
124 }
125 i = j;
126 }
127 ranks
128}
129
130pub fn normalize_quantiles(a: &Array2<f64>, ties: bool) -> Array2<f64> {
135 let nr = a.nrows();
136 let nc = a.ncols();
137 if nc <= 1 || nr == 0 {
138 return a.clone();
139 }
140 let grid: Vec<f64> = (0..nr).map(|k| k as f64 / (nr - 1) as f64).collect();
142
143 let mut s = Array2::<f64>::zeros((nr, nc));
146 let mut order = vec![vec![0usize; 0]; nc];
147 let mut nobs = vec![nr; nc];
148 for j in 0..nc {
149 let col = a.column(j);
150 let mut idx: Vec<usize> = (0..nr).filter(|&k| col[k].is_finite()).collect();
151 idx.sort_by(|&p, &q| col[p].partial_cmp(&col[q]).unwrap());
152 let sorted: Vec<f64> = idx.iter().map(|&k| col[k]).collect();
153 let nobsj = sorted.len();
154 nobs[j] = nobsj;
155 order[j] = idx;
156 if nobsj == nr {
157 for k in 0..nr {
158 s[[k, j]] = sorted[k];
159 }
160 } else {
161 let sub: Vec<f64> = (0..nobsj)
163 .map(|k| k as f64 / (nobsj - 1).max(1) as f64)
164 .collect();
165 for (k, &gi) in grid.iter().enumerate() {
166 s[[k, j]] = approx_rule2(&sub, &sorted, gi);
167 }
168 }
169 }
170 let m: Array1<f64> = s.mean_axis(Axis(1)).unwrap();
171 let m_vec = m.to_vec();
172
173 let mut out = a.clone();
174 for j in 0..nc {
175 let col = a.column(j);
176 if ties {
177 let r = rank_average_finite(&col.to_vec());
178 for k in 0..nr {
179 if col[k].is_finite() {
180 let pos = (r[k] - 1.0) / (nobs[j] - 1) as f64;
181 out[[k, j]] = approx_rule2(&grid, &m_vec, pos);
182 }
183 }
184 } else if nobs[j] == nr {
185 for (rank0, &k) in order[j].iter().enumerate() {
186 out[[k, j]] = m_vec[rank0];
187 }
188 } else {
189 for (rank0, &k) in order[j].iter().enumerate() {
190 let pos = rank0 as f64 / (nobs[j] - 1) as f64;
191 out[[k, j]] = approx_rule2(&grid, &m_vec, pos);
192 }
193 }
194 }
195 out
196}
197
198#[derive(Clone, Copy, Debug, PartialEq, Eq)]
200pub enum CyclicMethod {
201 Fast,
202 Pairs,
203 Affy,
204}
205
206pub fn normalize_cyclic_loess(
211 x: &Array2<f64>,
212 span: f64,
213 adaptive_span: bool,
214 iterations: usize,
215 method: CyclicMethod,
216) -> Array2<f64> {
217 let nr = x.nrows();
218 let n = x.ncols();
219 let span = if adaptive_span {
220 crate::voom::choose_lowess_span(nr, 50.0, 0.3, 1.0 / 3.0)
221 } else {
222 span
223 };
224 let mut x = x.clone();
225 let mut m = vec![0.0f64; nr];
229 let mut a = vec![0.0f64; nr];
230 match method {
231 CyclicMethod::Fast => {
232 for _ in 0..iterations {
233 for (g, ag) in a.iter_mut().enumerate() {
234 *ag = row_nanmean(&x, g);
235 }
236 for i in 0..n {
237 for g in 0..nr {
238 m[g] = x[[g, i]] - a[g];
239 }
240 let f = loess_fit_unweighted(&m, &a, span, 4).0;
241 for g in 0..nr {
242 x[[g, i]] -= f[g];
243 }
244 }
245 }
246 }
247 CyclicMethod::Pairs => {
248 for _ in 0..iterations {
249 for i in 0..n - 1 {
250 for j in i + 1..n {
251 for g in 0..nr {
252 m[g] = x[[g, j]] - x[[g, i]];
253 a[g] = 0.5 * (x[[g, j]] + x[[g, i]]);
254 }
255 let f = loess_fit_unweighted(&m, &a, span, 4).0;
256 for g in 0..nr {
257 x[[g, i]] += f[g] / 2.0;
258 x[[g, j]] -= f[g] / 2.0;
259 }
260 }
261 }
262 }
263 }
264 CyclicMethod::Affy => {
265 for _ in 0..iterations {
266 let mut adjustment = Array2::<f64>::zeros((nr, n));
267 for i in 0..n - 1 {
268 for j in i + 1..n {
269 for g in 0..nr {
270 m[g] = x[[g, j]] - x[[g, i]];
271 a[g] = 0.5 * (x[[g, j]] + x[[g, i]]);
272 }
273 let f = loess_fit_unweighted(&m, &a, span, 4).0;
274 for g in 0..nr {
275 adjustment[[g, j]] += f[g];
276 adjustment[[g, i]] -= f[g];
277 }
278 }
279 }
280 for g in 0..nr {
281 for c in 0..n {
282 x[[g, c]] -= adjustment[[g, c]] / n as f64;
283 }
284 }
285 }
286 }
287 }
288 x
289}
290
291fn row_nanmean(x: &Array2<f64>, g: usize) -> f64 {
292 let mut sum = 0.0;
293 let mut cnt = 0usize;
294 for &v in x.row(g) {
295 if v.is_finite() {
296 sum += v;
297 cnt += 1;
298 }
299 }
300 if cnt > 0 {
301 sum / cnt as f64
302 } else {
303 f64::NAN
304 }
305}
306
307#[cfg(test)]
308mod tests {
309 use super::*;
310 use ndarray::array;
311
312 fn fixture() -> Array2<f64> {
313 array![
314 [5.1, 4.8, 6.2, 5.5],
315 [2.3, 3.1, 2.8, 3.5],
316 [7.7, 7.2, 8.1, 6.9],
317 [1.1, 0.9, 1.4, 1.2],
318 [9.3, 9.1, 8.8, 9.5],
319 [4.4, 4.9, 5.2, 4.1],
320 ]
321 }
322
323 fn assert_close(got: &Array2<f64>, want: &Array2<f64>, tol: f64) {
324 assert_eq!(got.dim(), want.dim());
325 for (a, b) in got.iter().zip(want.iter()) {
326 assert!((a - b).abs() < tol, "got {a} want {b}");
327 }
328 }
329
330 #[test]
333 fn quantile_matches_r() {
334 let got = normalize_quantiles(&fixture(), true);
335 let want = array![
336 [5.425, 4.625, 5.425, 5.425],
337 [2.925, 2.925, 2.925, 2.925],
338 [7.475, 7.475, 7.475, 7.475],
339 [1.150, 1.150, 1.150, 1.150],
340 [9.175, 9.175, 9.175, 9.175],
341 [4.625, 5.425, 4.625, 4.625],
342 ];
343 assert_close(&got, &want, 1e-9);
344 }
345
346 #[test]
347 fn scale_matches_r() {
348 let got = normalize_median_values(&fixture());
349 let want = array![
350 [
351 5.379778894332,
352 4.958922934739,
353 5.450102801447,
354 5.741287729347
355 ],
356 [
357 2.426174795483,
358 3.202637728685,
359 2.461336749041,
360 3.653546736857
361 ],
362 [
363 8.122411271834,
364 7.438384402108,
365 7.120295595439,
366 7.202706424090
367 ],
368 [
369 1.160344467405,
370 0.929798050264,
371 1.230668374520,
372 1.252644595494
373 ],
374 [
375 9.810185042605,
376 9.401291397109,
377 7.735629782699,
378 9.916769714327
379 ],
380 [
381 4.641377869620,
382 5.062233829212,
383 4.571053962504,
384 4.279869034604
385 ],
386 ];
387 assert_close(&got, &want, 1e-9);
388 }
389
390 #[test]
391 fn scale_abs_matches_r() {
392 let nan = f64::NAN;
393 let x = array![
394 [1.0, -2.0, 3.0],
395 [-4.0, 5.0, -6.0],
396 [7.0, -8.0, nan],
397 [-10.0, 11.0, 12.0],
398 [13.0, nan, -15.0],
399 [16.0, -17.0, 18.0],
400 ];
401 let got = normalize_median_abs_values(&x);
402 let want = array![
403 [1.09937146549536, -2.33616436417763, 2.33616436417763],
404 [-4.39748586198142, 5.84041091044408, -4.67232872835526],
405 [7.69560025846748, -9.34465745671052, nan],
406 [-10.9937146549535, 12.848904002977, 9.34465745671052],
407 [14.2918290514396, nan, -11.6808218208881],
408 [17.5899434479257, -19.8573970955099, 14.0169861850658],
409 ];
410 assert_eq!(got.dim(), want.dim());
411 for (a, b) in got.iter().zip(want.iter()) {
412 if b.is_nan() {
413 assert!(a.is_nan(), "got {a} want NaN");
414 } else {
415 assert!((a - b).abs() < 1e-9, "got {a} want {b}");
416 }
417 }
418 let one = x.column(0).to_owned().insert_axis(Axis(1));
420 let got1 = normalize_median_abs_values(&one);
421 assert_eq!(got1, one);
422 }
423
424 #[test]
425 fn cyclicloess_matches_r() {
426 let got = normalize_cyclic_loess(&fixture(), 0.7, true, 3, CyclicMethod::Fast);
427 let want = array![
428 [
429 5.296751774373,
430 4.950393059958,
431 5.410921516718,
432 5.838353274939
433 ],
434 [
435 2.578701290681,
436 3.133887404904,
437 2.497436394874,
438 3.373476538746
439 ],
440 [
441 7.652064568973,
442 7.390792439486,
443 7.799979876589,
444 7.020333019702
445 ],
446 [
447 1.278090240451,
448 0.963550076309,
449 1.356899073035,
450 0.913640591855
451 ],
452 [
453 9.065987375234,
454 9.278688667701,
455 8.955145308355,
456 9.419784843868
457 ],
458 [
459 4.764093788823,
460 4.932065839925,
461 4.658296269798,
462 4.136224331580
463 ],
464 ];
465 assert_close(&got, &want, 1e-6);
466 }
467
468 #[test]
469 fn quantile_with_missing_keeps_na() {
470 let mut x = fixture();
471 x[[2, 1]] = f64::NAN;
472 let got = normalize_quantiles(&x, true);
473 assert!(got[[2, 1]].is_nan());
476 let want = array![
477 [5.4100, 4.9325, 5.4100, 5.4100],
478 [2.8150, 3.2250, 2.8150, 2.8150],
479 [7.1100, f64::NAN, 7.1100, 7.1100],
480 [1.1500, 1.1500, 1.1500, 1.1500],
481 [9.1750, 9.1750, 9.1750, 9.1750],
482 [4.4550, 6.6850, 4.4550, 4.4550],
483 ];
484 for g in 0..x.nrows() {
485 for j in 0..x.ncols() {
486 if g == 2 && j == 1 {
487 continue;
488 }
489 assert!((got[[g, j]] - want[[g, j]]).abs() < 1e-9, "[{g},{j}]");
490 }
491 }
492 }
493
494 #[test]
495 fn single_column_is_identity() {
496 let x = array![[1.0], [2.0], [3.0]];
497 assert_eq!(normalize_quantiles(&x, true), x);
498 assert_eq!(normalize_median_values(&x), x);
499 }
500}