1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
//! Proportion of expressed probes per array (Shi & Smyth).
//!
//! Pure-Rust port of the numeric core of limma's `propexpr.R` ([`propexpr`]):
//! the branch taking an explicit matrix of regular probes plus a matrix of
//! negative-control probes. The `status`/grep label-extraction wrapper is an
//! annotation-layer concern and is out of scope for the numeric port.
use ndarray::Array2;
fn median(v: &mut [f64]) -> f64 {
v.sort_by(|a, b| a.partial_cmp(b).unwrap());
let n = v.len();
if n % 2 == 1 {
v[n / 2]
} else {
(v[n / 2 - 1] + v[n / 2]) / 2.0
}
}
/// Exponential CDF `pexp(q, rate) = 1 - exp(-rate*q)` for `q >= 0`, else 0.
fn pexp(q: f64, rate: f64) -> f64 {
if q < 0.0 {
0.0
} else {
1.0 - (-rate * q).exp()
}
}
/// `propexpr(x, neg.x)` for explicitly supplied matrices. For each column
/// (array) it estimates the proportion of expressed (regular) probes by
/// comparing the regular-probe values `x` against the negative-control values
/// `neg_x`, fitting an exponential to the negative tail. `NaN` entries are
/// dropped per column. Returns one proportion per column, clamped to `[0, 1]`.
pub fn propexpr(x: &Array2<f64>, neg_x: &Array2<f64>) -> Vec<f64> {
let narrays = x.ncols();
assert_eq!(
neg_x.ncols(),
narrays,
"x and neg_x must have equal columns"
);
let mut out = vec![0.0_f64; narrays];
for (i, o) in out.iter_mut().enumerate() {
let mut b: Vec<f64> = neg_x
.column(i)
.iter()
.copied()
.filter(|v| !v.is_nan())
.collect();
let r: Vec<f64> = x
.column(i)
.iter()
.copied()
.filter(|v| !v.is_nan())
.collect();
let nb = b.len() as f64;
let nr = r.len() as f64;
let mu = b.iter().sum::<f64>() / nb;
let alpha = (r.iter().sum::<f64>() / nr - mu).max(10.0);
let b1 = median(&mut b);
let p1 = b.iter().map(|&bj| pexp(b1 - bj, 1.0 / alpha)).sum::<f64>() / nb;
let pb = (b.iter().filter(|&&v| v < b1).count() as f64
+ b.iter().filter(|&&v| v == b1).count() as f64 / 2.0)
/ nb;
let p = (r.iter().filter(|&&v| v < b1).count() as f64
+ r.iter().filter(|&&v| v == b1).count() as f64 / 2.0)
/ nr;
*o = ((pb - p) / (pb - p1)).clamp(0.0, 1.0);
}
out
}
#[cfg(test)]
mod tests {
use super::*;
use ndarray::array;
fn close(a: &[f64], b: &[f64], tol: f64) -> bool {
a.len() == b.len()
&& a.iter()
.zip(b)
.all(|(&x, &y)| (x - y).abs() <= tol + tol * y.abs())
}
#[test]
fn two_arrays_match_r() {
// Reference: propexpr(reg, neg.x = neg) in limma 3.68.3 (columns of
// overlapping regular vs negative-control intensities).
let neg = array![
[4.0, 3.0],
[5.0, 4.0],
[5.0, 5.0],
[6.0, 5.0],
[6.0, 6.0],
[7.0, 7.0],
];
let reg = array![
[5.0, 4.0],
[5.0, 5.0],
[6.0, 5.0],
[7.0, 6.0],
[8.0, 8.0],
[9.0, 9.0],
[10.0, 11.0],
[12.0, 13.0],
];
let want = [0.54285538831644, 0.550748101666388];
assert!(close(&propexpr(®, &neg), &want, 1e-9));
}
}