1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
//! Mid-p z-score equivalents of hypergeometric deviates.
//!
//! Pure-Rust port of limma's `zscoreHyper.R` ([`zscore_hyper`]): the signed
//! standard-normal deviate matching the mid-p-value of a hypergeometric count.
//! Tail probabilities are accumulated in log space (via [`logsumexp`]) so the
//! result stays accurate deep into either tail, as in R's `log.p = TRUE` path.
use crate::logsumexp::logsumexp;
use crate::special::ln_gamma;
use crate::zscore::zscore_from_log_tails;
use std::f64::consts::LN_2;
/// `ln C(n, k)` (log binomial coefficient); `-inf` when `k > n`.
fn lchoose(n: u64, k: u64) -> f64 {
if k > n {
return f64::NEG_INFINITY;
}
ln_gamma((n + 1) as f64) - ln_gamma((k + 1) as f64) - ln_gamma((n - k + 1) as f64)
}
/// `ln dhyper(x; m, n, k)`: log hypergeometric pmf for drawing `x` white balls
/// in `k` draws from an urn of `m` white and `n` black. `-inf` outside the
/// support `max(0, k-n) <= x <= min(m, k)`.
fn ln_dhyper(x: i64, m: u64, n: u64, k: u64) -> f64 {
if x < 0 {
return f64::NEG_INFINITY;
}
let x = x as u64;
if x > m || x > k || k - x > n {
return f64::NEG_INFINITY;
}
lchoose(m, x) + lchoose(n, k - x) - lchoose(m + n, k)
}
/// `zscoreHyper(q, m, n, k)`: z-score equivalent of a hypergeometric deviate `q`
/// (white balls drawn) for an urn of `m` white and `n` black with `k` draws.
/// Uses the mid-p-value — the full tail plus half the point mass at `q` — and
/// maps the smaller tail through the normal quantile in log space.
pub fn zscore_hyper(q: i64, m: u64, n: u64, k: u64) -> f64 {
let lo = k.saturating_sub(n) as i64;
let hi = m.min(k) as i64;
let ln_d = ln_dhyper(q, m, n, k) - LN_2;
// ln P(X > q) and ln P(X < q), summed over the support in log space.
let mut ln_pupper = f64::NEG_INFINITY;
for x in (q + 1).max(lo)..=hi {
ln_pupper = logsumexp(ln_pupper, ln_dhyper(x, m, n, k));
}
let mut ln_plower = f64::NEG_INFINITY;
for x in lo..=(q - 1).min(hi) {
ln_plower = logsumexp(ln_plower, ln_dhyper(x, m, n, k));
}
// Add half the point mass to each tail (mid-p), preserving log accuracy.
let pmidupper = logsumexp(ln_pupper, ln_d);
let pmidlower = logsumexp(ln_plower, ln_d);
zscore_from_log_tails(pmidlower, pmidupper)
}
#[cfg(test)]
mod tests {
use super::*;
fn close(a: f64, b: f64, tol: f64) -> bool {
(a - b).abs() <= tol + tol * b.abs()
}
#[test]
fn zscore_hyper_matches_r() {
// Reference: zscoreHyper(q, m, n, k) in limma 3.68.3.
let q = [5, 1, 8, 0, 3];
let want = [
1.85530391853958,
-1.35476719565062,
4.47368039807182,
-2.29868959056222,
0.281691411313753,
];
for i in 0..q.len() {
let got = zscore_hyper(q[i], 10, 20, 8);
assert!(
close(got, want[i], 1e-9),
"q={}: got {got}, want {}",
q[i],
want[i]
);
}
}
#[test]
fn zscore_hyper_is_symmetric_about_mean() {
// m=n, k=20: the mean is q=10, where z is ~0, and q=5/15 are mirror
// images. Reference: zscoreHyper(c(15,10,5), 50, 50, 20).
let want = [2.45887335894704, 5.56583284934354e-16, -2.45887335894704];
let q = [15, 10, 5];
for i in 0..q.len() {
let got = zscore_hyper(q[i], 50, 50, 20);
assert!(close(got, want[i], 1e-9), "q={}: got {got}", q[i]);
}
}
}