single_statistics/testing/correction/
mod.rs

1use anyhow::{Result, anyhow};
2use single_utilities::traits::FloatOps;
3use std::cmp::Ordering;
4
5/// Multiple testing correction methods to control for false positives
6/// when performing many statistical tests simultaneously.
7
8/// Apply Bonferroni correction to p-values
9///
10/// Bonferroni correction is a simple but conservative method that multiplies
11/// each p-value by the number of tests.
12///
13/// # Arguments
14/// * `p_values` - A slice of p-values to adjust
15///
16/// # Returns
17/// * `Result<Vec<f64>>` - Vector of adjusted p-values
18///
19/// # Example
20/// ```
21/// let p_values = vec![0.01, 0.03, 0.05];
22/// let adjusted = single_statistics::testing::correction::bonferroni_correction(&p_values).unwrap();
23/// ```
24pub fn bonferroni_correction<T>(p_values: &[T]) -> Result<Vec<T>>
25where
26    T: FloatOps,
27{
28    let n = p_values.len();
29
30    if n == 0 {
31        return Err(anyhow!("Empty p-value array"));
32    }
33
34    // Validate p-values
35    for (i, &p) in p_values.iter().enumerate() {
36        if !(T::zero()..=T::one()).contains(&p) {
37            return Err(anyhow!("Invalid p-value at index {:?}: {:?}", i, p));
38        }
39    }
40
41    // Multiply each p-value by n, capping at 1.0
42    let n_t = T::from(n).unwrap();
43    let adjusted = p_values
44        .iter()
45        .map(|&p| num_traits::Float::min(p * n_t, T::one()))
46        .collect();
47
48    Ok(adjusted)
49}
50
51/// Apply Benjamini-Hochberg (BH) procedure for controlling false discovery rate
52///
53/// The BH procedure controls the false discovery rate (FDR), which is the expected
54/// proportion of false positives among all rejected null hypotheses.
55///
56/// # Arguments
57/// * `p_values` - A slice of p-values to adjust
58///
59/// # Returns
60/// * `Result<Vec<f64>>` - Vector of adjusted p-values
61///
62/// # Example
63/// ```
64/// let p_values = vec![0.01, 0.03, 0.05];
65/// let adjusted = single_statistics::testing::correction::benjamini_hochberg_correction(&p_values).unwrap();
66/// ```
67pub fn benjamini_hochberg_correction<T>(p_values: &[T]) -> Result<Vec<T>>
68where
69    T: FloatOps,
70{
71    let n = p_values.len();
72    if n == 0 {
73        return Err(anyhow::anyhow!("Empty p-value array"));
74    }
75
76    // Validate p-values
77    for (i, &p) in p_values.iter().enumerate() {
78        if !(T::zero()..=T::one()).contains(&p) {
79            return Err(anyhow::anyhow!("Invalid p-value at index {:?}: {:?}", i, p));
80        }
81    }
82
83    let n_f = T::from(n).unwrap();
84
85    let mut indices: Vec<usize> = (0..n).collect();
86    indices.sort_unstable_by(|&a, &b| {
87        p_values[a]
88            .partial_cmp(&p_values[b])
89            .unwrap_or(Ordering::Equal)
90    });
91
92    let mut adjusted = vec![T::zero(); n];
93    let mut running_min = T::one();
94
95    for i in (0..n).rev() {
96        let orig_idx = indices[i];
97        let rank = T::from(i + 1).unwrap();
98        let adjustment = num_traits::Float::min(p_values[orig_idx] * n_f / rank, T::one());
99        running_min = num_traits::Float::min(adjustment, running_min);
100        adjusted[orig_idx] = running_min;
101    }
102
103    Ok(adjusted)
104}
105
106/// Apply Benjamini-Yekutieli (BY) procedure for controlling false discovery rate under dependence
107///
108/// The BY procedure is a more conservative variant of the BH procedure that is valid
109/// under arbitrary dependence structures among the tests.
110///
111/// # Arguments
112/// * `p_values` - A slice of p-values to adjust
113///
114/// # Returns
115/// * `Result<Vec<f64>>` - Vector of adjusted p-values
116///
117/// # Example
118/// ```
119/// let p_values = vec![0.01, 0.03, 0.05];
120/// let adjusted = single_statistics::testing::correction::benjamini_yekutieli_correction(&p_values).unwrap();
121/// ```
122pub fn benjamini_yekutieli_correction<T>(p_values: &[T]) -> Result<Vec<T>>
123where
124    T: FloatOps,
125{
126    let n = p_values.len();
127    if n == 0 {
128        return Err(anyhow!("Empty p-value array"));
129    }
130
131    // Calculate the correction factor
132    let c_n: T = (1..=n).map(|i| T::one() / T::from(i).unwrap()).sum();
133
134    // Create index-value pairs and sort by p-value
135    let mut indexed_p_values: Vec<(usize, T)> =
136        p_values.iter().enumerate().map(|(i, &p)| (i, p)).collect();
137
138    // Sort in ascending order
139    indexed_p_values.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(Ordering::Equal));
140
141    // Calculate adjusted p-values with monitoring of minimum value
142    let mut adjusted_p_values = vec![T::zero(); n];
143    let mut current_min = T::one();
144    let n_f64 = T::from(n).unwrap();
145    // Process from largest to smallest p-value
146    for i in (0..n).rev() {
147        let (orig_idx, p_val) = indexed_p_values[i];
148        let rank = i + 1;
149
150        // Calculate adjustment and take minimum of current and previous
151        let adjustment =
152            num_traits::Float::min(p_val * c_n * n_f64 / T::from(rank).unwrap(), T::one());
153        current_min = num_traits::Float::min(adjustment, current_min);
154        adjusted_p_values[orig_idx] = current_min;
155    }
156
157    Ok(adjusted_p_values)
158}
159
160/// Apply Holm-Bonferroni (step-down) method for controlling family-wise error rate
161///
162/// The Holm procedure is a step-down method that controls the family-wise error rate (FWER)
163/// and is uniformly more powerful than the standard Bonferroni correction.
164///
165/// # Arguments
166/// * `p_values` - A slice of p-values to adjust
167///
168/// # Returns
169/// * `Result<Vec<f64>>` - Vector of adjusted p-values
170///
171/// # Example
172/// ```
173/// let p_values = vec![0.01, 0.03, 0.05];
174/// let adjusted = single_statistics::testing::correction::holm_bonferroni_correction(&p_values).unwrap();
175/// ```
176pub fn holm_bonferroni_correction<T>(p_values: &[T]) -> Result<Vec<T>>
177where
178    T: FloatOps,
179{
180    let n = p_values.len();
181
182    if n == 0 {
183        return Err(anyhow!("Empty p-value array"));
184    }
185
186    let zero = T::zero();
187    let one = T::one();
188
189    // Validate p-values
190    for (i, &p) in p_values.iter().enumerate() {
191        if p < zero || p > one {
192            return Err(anyhow!("Invalid p-value at index {}: {:?}", i, p));
193        }
194    }
195
196    // Create index-value pairs and sort by p-value
197    let mut indexed_p_values: Vec<(usize, T)> =
198        p_values.iter().enumerate().map(|(i, &p)| (i, p)).collect();
199
200    indexed_p_values.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(Ordering::Equal));
201
202    // Initialize the result vector
203    let mut adjusted_p_values = vec![zero; n];
204
205    // Calculate adjusted p-values
206    for (i, &(idx, p_val)) in indexed_p_values.iter().enumerate() {
207        // Convert (n - i) to type T
208        let multiplier = T::from((n - i) as f64).unwrap_or(one);
209        let adjusted_p = p_val * multiplier;
210        adjusted_p_values[idx] = num_traits::Float::min(adjusted_p, one);
211    }
212
213    Ok(adjusted_p_values)
214}
215
216/// Apply Hochberg's step-up method for controlling family-wise error rate
217///
218/// Hochberg's procedure is a step-up method that controls the family-wise error rate (FWER)
219/// and is more powerful than Holm's procedure when all tests are independent.
220///
221/// # Arguments
222/// * `p_values` - A slice of p-values to adjust
223///
224/// # Returns
225/// * `Result<Vec<f64>>` - Vector of adjusted p-values
226///
227/// # Example
228/// ```
229/// let p_values = vec![0.01, 0.03, 0.05];
230/// let adjusted = single_statistics::testing::correction::hochberg_correction(&p_values).unwrap();
231/// ```
232pub fn hochberg_correction<T>(p_values: &[T]) -> Result<Vec<T>>
233where
234    T: FloatOps,
235{
236    let n = p_values.len();
237
238    if n == 0 {
239        return Err(anyhow!("Empty p-value array"));
240    }
241
242    let zero = T::zero();
243    let one = T::one();
244
245    // Create index-value pairs and sort by p-value (descending)
246    let mut indexed_p_values: Vec<(usize, T)> =
247        p_values.iter().enumerate().map(|(i, &p)| (i, p)).collect();
248
249    indexed_p_values.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(Ordering::Equal));
250
251    // Calculate adjusted p-values
252    let mut adjusted_p_values = vec![zero; n];
253
254    // Largest p-value remains the same
255    adjusted_p_values[indexed_p_values[0].0] = indexed_p_values[0].1;
256
257    // Process remaining p-values from second-largest to smallest
258    for i in 1..n {
259        let current_index = indexed_p_values[i].0;
260        let prev_index = indexed_p_values[i - 1].0;
261
262        // Convert n and (n - i) to type T
263        let n_t = T::from(n).unwrap_or(one);
264        let denominator_t = T::from(n - i).unwrap_or(one);
265
266        let hochberg_value =
267            num_traits::Float::min(indexed_p_values[i].1 * n_t / denominator_t, one);
268        adjusted_p_values[current_index] =
269            num_traits::Float::min(hochberg_value, adjusted_p_values[prev_index]);
270    }
271
272    Ok(adjusted_p_values)
273}
274
275/// Apply Storey's q-value method for controlling false discovery rate
276///
277/// Storey's q-value method estimates the proportion of true null hypotheses (π0)
278/// and uses this to obtain more powerful FDR control than the BH procedure.
279///
280/// # Arguments
281/// * `p_values` - A slice of p-values to adjust
282/// * `lambda` - Tuning parameter for π0 estimation (between 0 and 1, typically 0.5)
283///
284/// # Returns
285/// * `Result<Vec<f64>>` - Vector of q-values
286///
287/// # Example
288/// ```
289/// let p_values = vec![0.01, 0.03, 0.05];
290/// let qvalues = single_statistics::testing::correction::storey_qvalues(&p_values, 0.5).unwrap();
291/// ```
292pub fn storey_qvalues<T>(p_values: &[T], lambda: T) -> Result<Vec<T>>
293where
294    T: FloatOps,
295{
296    let n = p_values.len();
297
298    if n == 0 {
299        return Err(anyhow!("Empty p-value array"));
300    }
301
302    let zero = T::zero();
303    let one = T::one();
304
305    if lambda <= zero || lambda >= one {
306        return Err(anyhow!("Lambda must be between 0 and 1, got {:?}", lambda));
307    }
308
309    // Validate p-values
310    for (i, &p) in p_values.iter().enumerate() {
311        if p < zero || p > one {
312            return Err(anyhow!("Invalid p-value at index {}: {:?}", i, p));
313        }
314    }
315
316    // Estimate pi0 (proportion of true null hypotheses)
317    let w = p_values.iter().filter(|&&p| p > lambda).count();
318    let w_t = T::from(w).unwrap_or(zero);
319    let n_t = T::from(n).unwrap_or(one);
320
321    let pi0 = w_t / (n_t * (one - lambda));
322    let pi0 = num_traits::Float::min(pi0, one); // Ensure pi0 doesn't exceed 1
323
324    // First apply Benjamini-Hochberg to get base adjusted values
325    let bh_adjusted = benjamini_hochberg_correction(p_values)?;
326
327    // Multiply by pi0 to get q-values
328    let q_values = bh_adjusted
329        .iter()
330        .map(|&p| num_traits::Float::min((p * pi0), one))
331        .collect();
332
333    Ok(q_values)
334}
335
336/// Apply adaptive Storey's q-value method with automatic lambda selection
337///
338/// This version of Storey's method tries multiple lambda values and selects the one
339/// that minimizes the mean-squared error of the π0 estimate.
340///
341/// # Arguments
342/// * `p_values` - A slice of p-values to adjust
343///
344/// # Returns
345/// * `Result<Vec<f64>>` - Vector of q-values
346///
347/// # Example
348/// ```
349/// let p_values = vec![0.01, 0.03, 0.05];
350/// let qvalues = single_statistics::testing::correction::adaptive_storey_qvalues(&p_values).unwrap();
351/// ```
352pub fn adaptive_storey_qvalues<T>(p_values: &[T]) -> Result<Vec<T>>
353where
354    T: FloatOps,
355{
356    let n = p_values.len();
357
358    if n == 0 {
359        return Err(anyhow!("Empty p-value array"));
360    }
361
362    let zero = T::zero();
363    let one = T::one();
364
365    // Validate p-values
366    for (i, &p) in p_values.iter().enumerate() {
367        if p < zero || p > one {
368            return Err(anyhow!("Invalid p-value at index {}: {:?}", i, p));
369        }
370    }
371
372    // Define lambda grid - convert f64 values to type T
373    let lambda_values = [0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9];
374    let lambda_grid: Vec<T> = lambda_values
375        .iter()
376        .filter_map(|&val| T::from(val))
377        .collect();
378
379    // Calculate pi0 estimates for different lambda values
380    let mut pi0_estimates = Vec::with_capacity(lambda_grid.len());
381    for &lambda in &lambda_grid {
382        let w = p_values.iter().filter(|&&p| p > lambda).count();
383        let w_t = T::from(w as f64).unwrap_or(zero);
384        let n_t = T::from(n as f64).unwrap_or(one);
385
386        let pi0 = w_t / (n_t * (one - lambda));
387        pi0_estimates.push(num_traits::Float::min(pi0, one));
388    }
389
390    // Fit smooth cubic spline (simplified here with linear interpolation to the final value)
391    // In practice, a proper spline fitting would be better
392    let pi0_sum: T = pi0_estimates.iter().copied().sum();
393    let estimates_len = T::from(pi0_estimates.len() as f64).unwrap_or(one);
394    let pi0_mean = pi0_sum / estimates_len;
395
396    let pi0 = if pi0_estimates.is_empty() {
397        one
398    } else {
399        num_traits::Float::min(pi0_mean, one)
400    };
401
402    // First apply Benjamini-Hochberg to get base adjusted values
403    let bh_adjusted = benjamini_hochberg_correction(p_values)?;
404
405    // Multiply by pi0 to get q-values
406    let q_values = bh_adjusted
407        .iter()
408        .map(|&p| num_traits::Float::min((p * pi0), one))
409        .collect();
410
411    Ok(q_values)
412}
413
414#[cfg(test)]
415mod tests {
416    use super::*;
417
418    fn assert_vec_relative_eq(a: &[f64], b: &[f64], epsilon: f64) {
419        assert_eq!(a.len(), b.len(), "Vectors have different lengths");
420        for (i, (x, y)) in a.iter().zip(b.iter()).enumerate() {
421            if (x - y).abs() > epsilon {
422                panic!("Vectors differ at index {}: {} != {}", i, x, y);
423            }
424        }
425    }
426
427    #[test]
428    fn test_bonferroni() {
429        let p_values = vec![0.01, 0.02, 0.03, 0.1, 0.2];
430        let expected = vec![0.05, 0.1, 0.15, 0.5, 1.0];
431        let adjusted = bonferroni_correction(&p_values).unwrap();
432        assert_vec_relative_eq(&adjusted, &expected, 1e-10);
433    }
434
435    use approx::assert_relative_eq;
436
437    #[test]
438    fn test_benjamini_hochberg_empty_input() {
439        // Test with empty input
440        let result = benjamini_hochberg_correction::<f64>(&[]);
441        assert!(result.is_err());
442        assert_eq!(result.unwrap_err().to_string(), "Empty p-value array");
443    }
444
445    #[test]
446    fn test_benjamini_hochberg_invalid_pvalues() {
447        // Test with invalid p-values (negative)
448        let result = benjamini_hochberg_correction(&[0.01, -0.5, 0.03]);
449        assert!(result.is_err());
450        assert!(
451            result
452                .unwrap_err()
453                .to_string()
454                .contains("Invalid p-value at index 1")
455        );
456
457        // Test with invalid p-values (greater than 1)
458        let result = benjamini_hochberg_correction(&[0.01, 1.5, 0.03]);
459        assert!(result.is_err());
460        assert!(
461            result
462                .unwrap_err()
463                .to_string()
464                .contains("Invalid p-value at index 1")
465        );
466    }
467
468    #[test]
469    fn test_benjamini_hochberg_identical_pvalues() {
470        // Test with identical p-values
471        let p_values = vec![0.05, 0.05, 0.05];
472        let expected = vec![0.05, 0.05, 0.05];
473        let adjusted = benjamini_hochberg_correction(&p_values).unwrap();
474
475        for (a, e) in adjusted.iter().zip(expected.iter()) {
476            assert_relative_eq!(*a, *e, epsilon = 1e-10);
477        }
478    }
479
480    #[test]
481    fn test_benjamini_hochberg_ordered_pvalues() {
482        // Test with ordered p-values
483        let p_values = vec![0.01, 0.02, 0.03, 0.04, 0.05];
484        let expected = vec![0.05, 0.05, 0.05, 0.05, 0.05];
485        let adjusted = benjamini_hochberg_correction(&p_values).unwrap();
486
487        for (i, (&a, &e)) in adjusted.iter().zip(expected.iter()).enumerate() {
488            assert_relative_eq!(a, e, epsilon = 1e-10, max_relative = 1e-10);
489            // If assert fails, this message would help identify which element had issues
490            if num_traits::Float::abs(a - e) > 1e-10 {
491                panic!("mismatch at index {}: expected {}, got {}", i, e, a);
492            }
493        }
494    }
495
496    #[test]
497    fn test_benjamini_hochberg_unordered_pvalues() {
498        // Test with unordered p-values
499        let p_values = vec![0.05, 0.01, 0.1, 0.04, 0.02];
500        let expected = vec![0.0625, 0.05, 0.1, 0.0625, 0.05];
501        let adjusted = benjamini_hochberg_correction(&p_values).unwrap();
502
503        for (i, (&a, &e)) in adjusted.iter().zip(expected.iter()).enumerate() {
504            //assert_relative_eq!(*a, *e, epsilon = 1e-3, max_relative = 1e-3);
505            // If assert fails, this message would help identify which element had issues
506            if num_traits::Float::abs(a - e) > 1e-3 {
507                panic!(
508                    "mismatch at index {}: expected {}, got {}, whole: {:?}",
509                    i, e, a, adjusted
510                );
511            }
512        }
513    }
514
515    #[test]
516    fn test_benjamini_hochberg_edge_cases() {
517        // Test with very small p-values
518        let p_values = vec![1e-10, 1e-9, 1e-8];
519        let adjusted = benjamini_hochberg_correction(&p_values).unwrap();
520        // All should be adjusted but still very small
521        assert!(adjusted.iter().all(|&p| p > 0.0 && p < 0.001));
522
523        // Test with p-value of 1.0
524        let p_values = vec![0.1, 0.2, 1.0];
525        let adjusted = benjamini_hochberg_correction(&p_values).unwrap();
526        // The last one should remain 1.0
527        assert_relative_eq!(adjusted[2], 1.0, epsilon = 1e-10);
528    }
529
530    #[test]
531    fn test_benjamini_hochberg_real_example() {
532        // A more realistic example based on common scientific data
533        let pvalues = vec![0.1, 0.2, 0.3, 0.4, 0.1];
534        let expected = [0.25, 0.3333333333333333, 0.375, 0.4, 0.25];
535        let adjusted = benjamini_hochberg_correction(&pvalues).unwrap();
536
537        for (i, (&a, &e)) in adjusted.iter().zip(expected.iter()).enumerate() {
538            assert_relative_eq!(a, e, epsilon = 1e-3, max_relative = 1e-3);
539            // If assert fails, this message would help identify which element had issues
540            if num_traits::Float::abs(a - e) > 1e-3 {
541                panic!("mismatch at index {}: expected {}, got {}", i, e, a);
542            }
543        }
544    }
545
546    #[test]
547    fn test_benjamini_hochberg_single_pvalue() {
548        // Test with a single p-value
549        let p_values = vec![0.025];
550        let adjusted = benjamini_hochberg_correction(&p_values).unwrap();
551        assert_relative_eq!(adjusted[0], 0.025, epsilon = 1e-10);
552    }
553    #[test]
554    fn test_holm_bonferroni() {
555        let p_values = vec![0.01, 0.02, 0.03];
556        let expected = vec![0.03, 0.04, 0.03];
557        let adjusted = holm_bonferroni_correction(&p_values).unwrap();
558        assert_vec_relative_eq(&adjusted, &expected, 1e-10);
559    }
560
561    #[test]
562    fn test_storey_qvalues() {
563        let p_values = vec![0.01, 0.02, 0.03, 0.6, 0.7];
564        let qvalues = storey_qvalues(&p_values, 0.5).unwrap();
565        // Verify qvalues are between 0 and 1
566        for &q in &qvalues {
567            assert!((0.0..=1.0).contains(&q));
568        }
569        // Verify ordering is preserved
570        assert!(qvalues[0] <= qvalues[2]);
571        assert!(qvalues[3] <= qvalues[4]);
572    }
573
574    #[test]
575    fn test_invalid_inputs() {
576        // Empty array
577        assert!(bonferroni_correction::<f32>(&[]).is_err());
578        assert!(benjamini_hochberg_correction::<f32>(&[]).is_err());
579        assert!(holm_bonferroni_correction::<f32>(&[]).is_err());
580
581        // Invalid lambda
582        assert!(storey_qvalues(&[0.5], -0.1).is_err());
583        assert!(storey_qvalues(&[0.5], 1.0).is_err());
584
585        // Invalid p-values
586        let invalid_p = vec![-0.1, 0.5, 1.1];
587        assert!(bonferroni_correction(&invalid_p).is_err());
588        assert!(benjamini_hochberg_correction(&invalid_p).is_err());
589    }
590}