Skip to main content

limma/
zscore.rs

1//! z-score equivalents of t-distribution deviates.
2//!
3//! Pure-Rust port of limma's `zscore.R`: [`zscore_t`] (`zscoreT`, with the
4//! exact quantile method and the Bailey/Hill/Wallace closed-form
5//! approximations) and [`t_zscore`] (`tZscore`). The exact method and
6//! `tZscore` work in log-probability space (via [`crate::special::f_lsf`] and
7//! [`crate::special::norm_isf_log`]) so they stay accurate deep into the tails,
8//! matching R's `log.p = TRUE` calls.
9
10use crate::special::{f_lsf, ln_gamma_lr, ln_gamma_ur, ln_norm_cdf, norm_isf_log};
11use statrs::distribution::{Continuous, ContinuousCDF, StudentsT};
12
13/// Method for [`zscore_t`], mirroring `zscoreT(x, df, approx, method)`.
14#[derive(Clone, Copy, Debug, PartialEq, Eq)]
15pub enum ZscoreTMethod {
16    /// `approx = FALSE`: exact quantile mapping through the t and normal CDFs.
17    Exact,
18    /// `approx = TRUE, method = "bailey"`.
19    Bailey,
20    /// `approx = TRUE, method = "hill"`.
21    Hill,
22    /// `approx = TRUE, method = "wallace"`.
23    Wallace,
24}
25
26/// `sign(x)` with R's convention `sign(0) == 0`.
27fn rsign(x: f64) -> f64 {
28    if x > 0.0 {
29        1.0
30    } else if x < 0.0 {
31        -1.0
32    } else {
33        0.0
34    }
35}
36
37/// `zscoreT(x, df, ...)`: z-score equivalent of a t-deviate `x` on `df` degrees
38/// of freedom.
39pub fn zscore_t(x: f64, df: f64, method: ZscoreTMethod) -> f64 {
40    match method {
41        ZscoreTMethod::Exact => zscore_t_quantile(x, df),
42        ZscoreTMethod::Bailey => zscore_t_bailey(x, df),
43        ZscoreTMethod::Hill => zscore_t_hill(x, df),
44        ZscoreTMethod::Wallace => zscore_t_wallace(x, df),
45    }
46}
47
48/// `.zscoreTWallace`: Wallace (1959) closed-form approximation.
49fn zscore_t_wallace(x: f64, df: f64) -> f64 {
50    ((df + 0.125) / (df + 0.375)) * (df * (x / df * x).ln_1p()).sqrt() * rsign(x)
51}
52
53/// `.zscoreTBailey`: Bailey closed-form approximation (limma's `approx` default).
54fn zscore_t_bailey(x: f64, df: f64) -> f64 {
55    ((df + 0.125) / (df + 1.125))
56        * ((df + 19.0 / 12.0) * (x / (df + 1.0 / 12.0) * x).ln_1p()).sqrt()
57        * rsign(x)
58}
59
60/// `.zscoreTHill`: Hill (1970) closed-form approximation.
61fn zscore_t_hill(x: f64, df: f64) -> f64 {
62    let a = df - 0.5;
63    let b = 48.0 * a * a;
64    let mut z = a * (x / df * x).ln_1p();
65    z = (((((-0.4 * z - 3.3) * z - 24.0) * z - 85.5) / (0.8 * z * z + 100.0 + b) + z + 3.0) / b
66        + 1.0)
67        * z.sqrt();
68    z * rsign(x)
69}
70
71/// `.zscoreTQuantile`: exact mapping `qnorm(pt(|x|, df, upper, log), upper, log)`.
72fn zscore_t_quantile(x: f64, df: f64) -> f64 {
73    let lp = ln_t_sf_pos(x.abs(), df);
74    norm_isf_log(lp) * rsign(x)
75}
76
77/// `ln P(T > t)` for `t >= 0` on `df` degrees of freedom, via the F survival
78/// identity `T^2 ~ F(1, df)`: `P(T > t) = 0.5 * P(F_{1,df} > t^2)`.
79fn ln_t_sf_pos(t: f64, df: f64) -> f64 {
80    -std::f64::consts::LN_2 + f_lsf(t * t, 1.0, df)
81}
82
83/// `tZscore(z, df)`: t-statistic equivalent of a z-score deviate `z`.
84pub fn t_zscore(z: f64, df: f64) -> f64 {
85    let lp = ln_norm_cdf(-z.abs()); // ln P(Z > |z|)
86    t_isf_log(lp, df) * rsign(z)
87}
88
89/// Inverse upper-tail t from a log probability: returns `t >= 0` such that
90/// `ln P(T > t) = lp` (assumes `lp <= ln 0.5`). Newton on the log survival,
91/// which stays accurate in the tail where `exp(lp)` would underflow.
92fn t_isf_log(lp: f64, df: f64) -> f64 {
93    if lp >= -std::f64::consts::LN_2 {
94        return 0.0;
95    }
96    let dist = StudentsT::new(0.0, 1.0, df).expect("valid df");
97
98    let mut t = if lp > -700.0 {
99        let g = dist.inverse_cdf(1.0 - lp.exp());
100        if g.is_finite() && g > 0.0 {
101            g
102        } else {
103            1.0
104        }
105    } else {
106        let mut tt = 1.0;
107        while ln_t_sf_pos(tt, df) > lp {
108            tt *= 2.0;
109        }
110        tt
111    };
112
113    for _ in 0..100 {
114        let lsf = ln_t_sf_pos(t, df);
115        let lpdf = dist.pdf(t).ln();
116        let h = lsf - lp;
117        let dh = -(lpdf - lsf).exp(); // d/dt ln P(T>t) = -pdf/sf
118        let step = h / dh;
119        let mut tn = t - step;
120        if tn <= 0.0 {
121            tn = 0.5 * t;
122        }
123        let converged = (tn - t).abs() < 1e-13 * (1.0 + tn);
124        t = tn;
125        if converged {
126            break;
127        }
128    }
129    t
130}
131
132/// Distribution-agnostic core of `zscore(q, distribution, ...)`: given the log
133/// lower- and upper-tail probabilities of a deviate, return the signed
134/// standard-normal deviate with the same tail probability. The smaller tail is
135/// mapped through `qnorm(..., log.p = TRUE)`, so precision is preserved into
136/// the tails regardless of the source distribution.
137pub fn zscore_from_log_tails(log_lower: f64, log_upper: f64) -> f64 {
138    if log_upper < log_lower {
139        norm_isf_log(log_upper)
140    } else {
141        -norm_isf_log(log_lower)
142    }
143}
144
145/// `zscoreGamma(q, shape, scale)`: z-score equivalent of a gamma deviate. The
146/// branch is selected by whether `q` exceeds the mean `shape*scale` (as in
147/// limma), then the corresponding tail probability is mapped through the normal
148/// quantile in log space.
149pub fn zscore_gamma(q: f64, shape: f64, scale: f64) -> f64 {
150    if q > shape * scale {
151        norm_isf_log(ln_gamma_ur(shape, q / scale))
152    } else {
153        -norm_isf_log(ln_gamma_lr(shape, q / scale))
154    }
155}
156
157#[cfg(test)]
158mod tests {
159    use super::*;
160
161    const X: [f64; 8] = [0.5, 1.2, -2.0, 3.5, 8.0, -12.0, 0.0, 25.0];
162    const DF: [f64; 8] = [5.0, 10.0, 3.0, 20.0, 50.0, 8.0, 7.0, 4.0];
163
164    fn close(a: f64, b: f64, tol: f64) -> bool {
165        (a - b).abs() <= tol + tol * b.abs()
166    }
167
168    fn check(method: ZscoreTMethod, want: &[f64], tol: f64) {
169        for i in 0..X.len() {
170            let got = zscore_t(X[i], DF[i], method);
171            assert!(
172                close(got, want[i], tol),
173                "{:?} i={i}: got {got}, want {}",
174                method,
175                want[i]
176            );
177        }
178    }
179
180    #[test]
181    fn zscore_t_exact_matches_r() {
182        // Reference: zscoreT(x, df, approx=FALSE).
183        check(
184            ZscoreTMethod::Exact,
185            &[
186                0.470078587598904,
187                1.1316150720248,
188                -1.47830571549245,
189                3.05439849233135,
190                6.3895913409388,
191                -4.73936721150427,
192                0.0,
193                4.32580582047361,
194            ],
195            1e-7,
196        );
197    }
198
199    #[test]
200    fn zscore_t_approx_methods_match_r() {
201        check(
202            ZscoreTMethod::Bailey,
203            &[
204                0.47040618216154,
205                1.13171290668369,
206                -1.47913851858807,
207                3.05419638749544,
208                6.38909334210128,
209                -4.72198983061799,
210                0.0,
211                4.26852597651621,
212            ],
213            1e-9,
214        );
215        check(
216            ZscoreTMethod::Hill,
217            &[
218                0.470078899765056,
219                1.13161507280532,
220                -1.47838714521046,
221                3.0543984920903,
222                6.38959133924561,
223                -4.73937919940718,
224                0.0,
225                4.32687312869554,
226            ],
227            1e-9,
228        );
229        check(
230            ZscoreTMethod::Wallace,
231            &[
232                0.470941044862427,
233                1.13192574792552,
234                -1.4762330589056,
235                3.05330279152777,
236                6.38754780925417,
237                -4.70852441428392,
238                0.0,
239                4.24090262987019,
240            ],
241            1e-9,
242        );
243    }
244
245    #[test]
246    fn t_zscore_matches_r() {
247        // Reference: tZscore(z, df).
248        let z = [0.3, 1.0, -1.5, 2.5, 4.0, -6.0, 0.0, 3.1];
249        let dfz = [5.0, 10.0, 3.0, 20.0, 50.0, 8.0, 7.0, 4.0];
250        let want = [
251            0.316825549865956,
252            1.05256241525734,
253            -2.04335742531369,
254            2.74732044128031,
255            4.36716311562917,
256            -29.3392145892267,
257            0.0,
258            7.23627391940524,
259        ];
260        for i in 0..z.len() {
261            let got = t_zscore(z[i], dfz[i]);
262            assert!(
263                close(got, want[i], 1e-7),
264                "i={i}: got {got}, want {}",
265                want[i]
266            );
267        }
268    }
269
270    #[test]
271    fn zscore_t_round_trips_through_t_zscore() {
272        // zscoreT(Exact) and tZscore are inverses.
273        for &(x, df) in &[(1.3_f64, 9.0_f64), (-2.7, 14.0), (4.5, 6.0)] {
274            let z = zscore_t(x, df, ZscoreTMethod::Exact);
275            let back = t_zscore(z, df);
276            assert!(close(back, x, 1e-7), "x={x}: round-trip {back}");
277        }
278    }
279
280    #[test]
281    fn zscore_gamma_matches_r() {
282        // Reference: zscoreGamma(q, shape, scale) in limma 3.68.3.
283        let q = [0.5, 9.0, 10.0, 2.0];
284        let shape = [2.0, 5.0, 3.0, 1.0];
285        let scale = [1.0, 1.0, 2.0, 4.0];
286        let want = [
287            -1.33949979584716,
288            1.59852006747439,
289            1.15204145464159,
290            -0.270288020738736,
291        ];
292        for i in 0..q.len() {
293            let got = zscore_gamma(q[i], shape[i], scale[i]);
294            assert!(
295                close(got, want[i], 1e-9),
296                "i={i}: got {got}, want {}",
297                want[i]
298            );
299        }
300    }
301
302    #[test]
303    fn zscore_generic_recovers_normal() {
304        // zscore(q, "norm") is the identity; the tail-selection core must
305        // reproduce it from the log normal tail probabilities.
306        for &qi in &[-2.5_f64, -0.3, 0.8, 3.1] {
307            let lo = crate::special::ln_norm_cdf(qi);
308            let hi = crate::special::ln_norm_cdf(-qi);
309            let got = zscore_from_log_tails(lo, hi);
310            assert!(close(got, qi, 1e-9), "q={qi}: got {got}");
311        }
312    }
313}