Skip to main content

limma/
proptruenull.rs

1//! Proportion of true null hypotheses.
2//!
3//! Pure-Rust port of limma's `propTrueNull.R` and `convest.R`.
4//! [`prop_true_null`] mirrors `propTrueNull(p, method)` with the three
5//! closed-form estimators (`lfdr`, `mean`, `hist`) and the `convest` method;
6//! [`convest`] is the standalone convex decreasing density estimate of Langaas,
7//! Lindqvist & Ferkingstad (2005).
8
9/// Method for [`prop_true_null`], mirroring `propTrueNull(p, method=...)`.
10#[derive(Clone, Copy, Debug, PartialEq, Eq)]
11pub enum PropTrueNullMethod {
12    /// `"lfdr"` (limma's default): average local false discovery rate.
13    Lfdr,
14    /// `"mean"`: twice the mean of the capped p-values.
15    Mean,
16    /// `"hist"`: histogram tail-mean method (uses `nbins`).
17    Hist,
18    /// `"convest"`: convex decreasing density estimate ([`convest`]).
19    Convest,
20}
21
22/// `propTrueNull(p, method, nbins)`: estimate the proportion of true null
23/// hypotheses among the p-values `p`. `nbins` is only used by
24/// [`PropTrueNullMethod::Hist`] (limma's default is 20); the `Convest` arm runs
25/// [`convest`] with limma's defaults (`niter = 100`, `tol = 1e-6`).
26pub fn prop_true_null(p: &[f64], method: PropTrueNullMethod, nbins: usize) -> f64 {
27    match method {
28        PropTrueNullMethod::Lfdr => by_local_fdr(p),
29        PropTrueNullMethod::Mean => by_mean_p(p),
30        PropTrueNullMethod::Hist => from_histogram(p, nbins),
31        PropTrueNullMethod::Convest => convest(p, 100, 1e-6),
32    }
33}
34
35/// `.propTrueNullByLocalFDR`: average local FDR over the sorted p-values.
36fn by_local_fdr(p: &[f64]) -> f64 {
37    let n = p.len();
38    let mut ps = p.to_vec();
39    ps.sort_by(|a, b| b.partial_cmp(a).unwrap()); // decreasing
40    let nf = n as f64;
41    let n1 = (n + 1) as f64;
42    let mut s = 0.0;
43    for (k, &pv) in ps.iter().enumerate() {
44        let i = (n - k) as f64; // n, n-1, ..., 1
45        let q = (nf / i * pv).min(1.0);
46        s += i * q;
47    }
48    s / nf / n1 * 2.0
49}
50
51/// `.propTrueNullByMeanP`: twice the mean of `pmin(p_(i), (i-0.5)/n)`.
52fn by_mean_p(p: &[f64]) -> f64 {
53    let n = p.len();
54    let mut ps = p.to_vec();
55    ps.sort_by(|a, b| a.partial_cmp(b).unwrap()); // increasing
56    let nf = n as f64;
57    let mut s = 0.0;
58    for (k, &pv) in ps.iter().enumerate() {
59        let i = (k + 1) as f64;
60        s += pv.min((i - 0.5) / nf);
61    }
62    2.0 * s / nf
63}
64
65/// `.propTrueNullFromHistogram`: tail-mean histogram method on `nbins` bins.
66fn from_histogram(p: &[f64], nbins: usize) -> f64 {
67    let nb = nbins as f64;
68    // Bins are the right-closed intervals (-0.1, 1/nbins], ..., ((nbins-1)/nbins, 1].
69    let mut counts = vec![0.0_f64; nbins];
70    for &pv in p {
71        for t in 1..=nbins {
72            if pv <= t as f64 / nb {
73                counts[t - 1] += 1.0;
74                break;
75            }
76        }
77    }
78    // tail_means[k] = mean of counts[k..nbins] = rev(cumsum(rev(counts))/(1:nbins)).
79    let mut tail_means = vec![0.0_f64; nbins];
80    let mut cum = 0.0;
81    for k in (0..nbins).rev() {
82        cum += counts[k];
83        tail_means[k] = cum / (nbins - k) as f64;
84    }
85    // First k where the tail mean is at least the bin's own count (always exists
86    // at k = nbins-1, where they are equal).
87    let mut idx = nbins - 1;
88    for k in 0..nbins {
89        if tail_means[k] >= counts[k] {
90            idx = k;
91            break;
92        }
93    }
94    tail_means[idx] / tail_means[0]
95}
96
97/// `convest(p, niter, tol)`: convex decreasing density estimate of pi0.
98///
99/// Iteratively builds the least-concave-majorant density on the 0.01 grid and
100/// returns its value at 1, the estimated proportion of true nulls. `p` must lie
101/// in `[0, 1]` with no NaNs (limma's `convest` errors otherwise); an empty `p`
102/// returns NaN, matching R's `NA`.
103pub fn convest(p: &[f64], niter: usize, tol: f64) -> f64 {
104    let m = p.len();
105    if m == 0 {
106        return f64::NAN;
107    }
108    let ny = tol;
109    let mut pv: Vec<f64> = p.to_vec();
110    pv.sort_by(|a, b| a.partial_cmp(b).unwrap());
111    let p = pv; // sorted ascending
112
113    // p.c = ceiling(100*p)/100, p.f = floor(100*p)/100 (used in the interpolation).
114    let p_c: Vec<f64> = p.iter().map(|&v| (100.0 * v).ceil() / 100.0).collect();
115
116    // x.grid = (0:100)/100 (101 points); the t-grid for theta is (1:100)/100.
117    let x_grid: Vec<f64> = (0..=100).map(|t| t as f64 / 100.0).collect();
118
119    let mut f_hat = vec![1.0_f64; 101]; // density estimate at the x-grid
120    let mut f_hat_p = vec![1.0_f64; m]; // density estimate at the p-values
121
122    let mut theta_hat = argmax_theta(&p, None);
123    let mut f_theta_hat = triangular(theta_hat, &x_grid);
124    let mut f_theta_hat_p = triangular(theta_hat, &p);
125
126    let mut pi0 = f_hat[100];
127
128    for _ in 0..niter {
129        let f_hat_diff: Vec<f64> = (0..m).map(|i| f_hat_p[i] - f_theta_hat_p[i]).collect();
130
131        let s: f64 = (0..m).map(|i| f_hat_diff[i] / f_hat_p[i]).sum();
132        let eps = if s > 0.0 {
133            0.0
134        } else {
135            let mut l = 0.0_f64;
136            let mut u = 1.0_f64;
137            let mut e = 0.5_f64;
138            while (u - l).abs() > ny {
139                e = (l + u) / 2.0;
140                let cond: f64 = (0..m)
141                    .filter(|&i| f_hat_p[i] > 0.0)
142                    .map(|i| f_hat_diff[i] / ((1.0 - e) * f_hat_p[i] + e * f_theta_hat_p[i]))
143                    .sum();
144                if cond < 0.0 {
145                    l = e;
146                } else {
147                    u = e;
148                }
149            }
150            e
151        };
152
153        for k in 0..101 {
154            f_hat[k] = (1.0 - eps) * f_hat[k] + eps * f_theta_hat[k];
155        }
156        pi0 = f_hat[100];
157
158        // f.hat.p: linear interpolation of f.hat between its floor/ceil grid points.
159        for i in 0..m {
160            let idx_f = (100.0 * p[i]).floor() as usize;
161            let idx_c = (100.0 * p[i]).ceil() as usize;
162            let ff = f_hat[idx_f];
163            let fc = f_hat[idx_c];
164            f_hat_p[i] = 100.0 * (ff - fc) * (p_c[i] - p[i]) + fc;
165        }
166
167        theta_hat = argmax_theta(&p, Some(&f_hat_p));
168        f_theta_hat = triangular(theta_hat, &x_grid);
169        f_theta_hat_p = triangular(theta_hat, &p);
170
171        // If the Unif[0,1] density fits better, switch f.theta.hat to it.
172        let s1: f64 = (0..m).map(|i| f_theta_hat_p[i] / f_hat_p[i]).sum();
173        let s2: f64 = (0..m).map(|i| 1.0 / f_hat_p[i]).sum();
174        if s1 < s2 {
175            f_theta_hat = vec![1.0; 101];
176            f_theta_hat_p = vec![1.0; m];
177        }
178    }
179    pi0
180}
181
182/// `0.01 * which.max_theta sum_i w_i * 2(theta - p_i) 1[p_i < theta] / theta^2`,
183/// with `w_i = 1` (`weights = None`) or `w_i = 1/f_hat_p_i`. `which.max` keeps
184/// the first maximizing grid point, matching R.
185fn argmax_theta(p: &[f64], weights: Option<&[f64]>) -> f64 {
186    let mut best_val = f64::NEG_INFINITY;
187    let mut best_t = 1usize;
188    for t in 1..=100 {
189        let theta = t as f64 / 100.0;
190        let t2 = theta * theta;
191        let mut s = 0.0;
192        for (i, &pi) in p.iter().enumerate() {
193            if pi < theta {
194                let term = 2.0 * (theta - pi) / t2;
195                s += match weights {
196                    Some(w) => term / w[i],
197                    None => term,
198                };
199            }
200        }
201        if s > best_val {
202            best_val = s;
203            best_t = t;
204        }
205    }
206    best_t as f64 / 100.0
207}
208
209/// Triangular density `2(theta - x) 1[x < theta] / theta^2` on the points `xs`.
210fn triangular(theta: f64, xs: &[f64]) -> Vec<f64> {
211    let t2 = theta * theta;
212    xs.iter()
213        .map(|&x| {
214            if x < theta {
215                2.0 * (theta - x) / t2
216            } else {
217                0.0
218            }
219        })
220        .collect()
221}
222
223#[cfg(test)]
224mod tests {
225    use super::*;
226
227    // A fixed p-value vector with signal at the low end so pi0 < 1 and the
228    // methods give distinct estimates. Mirrors scratch/proptruenull_ref.R.
229    const P: [f64; 30] = [
230        0.0001, 0.0007, 0.0023, 0.0041, 0.0089, 0.012, 0.018, 0.027, 0.035, 0.052, 0.071, 0.098,
231        0.13, 0.17, 0.21, 0.28, 0.33, 0.41, 0.47, 0.55, 0.58, 0.63, 0.69, 0.74, 0.78, 0.83, 0.88,
232        0.91, 0.95, 0.99,
233    ];
234
235    fn close(a: f64, b: f64, tol: f64) -> bool {
236        (a - b).abs() <= tol + tol * b.abs()
237    }
238
239    #[test]
240    fn closed_form_methods_match_r() {
241        assert!(
242            close(
243                prop_true_null(&P, PropTrueNullMethod::Lfdr, 20),
244                0.700587096774194,
245                1e-12
246            ),
247            "lfdr: {}",
248            prop_true_null(&P, PropTrueNullMethod::Lfdr, 20)
249        );
250        assert!(
251            close(
252                prop_true_null(&P, PropTrueNullMethod::Mean, 20),
253                0.723495555555556,
254                1e-12
255            ),
256            "mean: {}",
257            prop_true_null(&P, PropTrueNullMethod::Mean, 20)
258        );
259        assert!(
260            close(
261                prop_true_null(&P, PropTrueNullMethod::Hist, 20),
262                0.666666666666667,
263                1e-12
264            ),
265            "hist20: {}",
266            prop_true_null(&P, PropTrueNullMethod::Hist, 20)
267        );
268        assert!(
269            close(
270                prop_true_null(&P, PropTrueNullMethod::Hist, 10),
271                0.666666666666667,
272                1e-12
273            ),
274            "hist10: {}",
275            prop_true_null(&P, PropTrueNullMethod::Hist, 10)
276        );
277    }
278
279    #[test]
280    fn convest_matches_r() {
281        assert!(
282            close(convest(&P, 100, 1e-6), 0.654984323974808, 1e-7),
283            "convest100: {}",
284            convest(&P, 100, 1e-6)
285        );
286        assert!(
287            close(convest(&P, 50, 1e-6), 0.654639572943371, 1e-7),
288            "convest50: {}",
289            convest(&P, 50, 1e-6)
290        );
291        assert!(
292            close(
293                prop_true_null(&P, PropTrueNullMethod::Convest, 20),
294                0.654984323974808,
295                1e-7
296            ),
297            "convest via dispatch: {}",
298            prop_true_null(&P, PropTrueNullMethod::Convest, 20)
299        );
300    }
301
302    #[test]
303    fn convest_empty_is_nan() {
304        assert!(convest(&[], 100, 1e-6).is_nan());
305    }
306}