single_statistics/testing/correction/
mod.rs

1use anyhow::{Result, anyhow};
2use single_utilities::traits::FloatOps;
3use std::cmp::Ordering;
4
5/// Multiple testing correction methods to control for false positives
6/// when performing many statistical tests simultaneously.
7
8/// Apply Bonferroni correction to p-values
9///
10/// Bonferroni correction is a simple but conservative method that multiplies
11/// each p-value by the number of tests.
12///
13/// # Arguments
14/// * `p_values` - A slice of p-values to adjust
15///
16/// # Returns
17/// * `Result<Vec<f64>>` - Vector of adjusted p-values
18///
19/// # Example
20/// ```
21/// let p_values = vec![0.01, 0.03, 0.05];
22/// let adjusted = single_statistics::testing::correction::bonferroni_correction(&p_values).unwrap();
23/// ```
24pub fn bonferroni_correction<T>(p_values: &[T]) -> Result<Vec<T>>
25where
26T: FloatOps {
27    let n = p_values.len();
28
29    if n == 0 {
30        return Err(anyhow!("Empty p-value array"));
31    }
32
33    // Validate p-values
34    for (i, &p) in p_values.iter().enumerate() {
35        if !(T::zero()..=T::one()).contains(&p) {
36            return Err(anyhow!("Invalid p-value at index {:?}: {:?}", i, p));
37        }
38    }
39
40    // Multiply each p-value by n, capping at 1.0
41    let n_t = T::from(n).unwrap();
42    let adjusted = p_values.iter().map(|&p| num_traits::Float::min((p * n_t), T::one())).collect();
43
44
45
46    Ok(adjusted)
47}
48
49/// Apply Benjamini-Hochberg (BH) procedure for controlling false discovery rate
50///
51/// The BH procedure controls the false discovery rate (FDR), which is the expected
52/// proportion of false positives among all rejected null hypotheses.
53///
54/// # Arguments
55/// * `p_values` - A slice of p-values to adjust
56///
57/// # Returns
58/// * `Result<Vec<f64>>` - Vector of adjusted p-values
59///
60/// # Example
61/// ```
62/// let p_values = vec![0.01, 0.03, 0.05];
63/// let adjusted = single_statistics::testing::correction::benjamini_hochberg_correction(&p_values).unwrap();
64/// ```
65pub fn benjamini_hochberg_correction<T>(p_values: &[T]) -> Result<Vec<T>>
66where
67    T: FloatOps,
68{
69    let n = p_values.len();
70    if n == 0 {
71        return Err(anyhow!("Empty p-value array"));
72    }
73
74    // Validate p-values
75    for (i, &p) in p_values.iter().enumerate() {
76        if !(T::zero()..=T::one()).contains(&p) {
77            return Err(anyhow!("Invalid p-value at index {:?}: {:?}", i, p));
78        }
79    }
80
81    // Create index-value pairs and sort by p-value in ascending order
82    // Create index-value pairs and sort by p-value
83    let mut indexed_p_values: Vec<(usize, T)> =
84        p_values.iter().enumerate().map(|(i, &p)| (i, p)).collect();
85
86    // Sort in ascending order
87    indexed_p_values.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(Ordering::Equal));
88
89    // Calculate adjusted p-values with monitoring of minimum value
90    let mut adjusted_p_values = vec![T::zero(); n];
91    let mut current_min = T::one();
92
93    // Process from largest to smallest p-value
94    let n_t = T::from(n).unwrap();
95    for i in (0..n).rev() {
96        let (orig_idx, p_val) = indexed_p_values[i];
97        let rank = T::from(i + 1).unwrap();
98
99        // Calculate adjustment and take minimum of current and previous
100        let adjustment = num_traits::Float::min((p_val * n_t / rank), T::one());
101        current_min = num_traits::Float::min(adjustment, current_min);
102        adjusted_p_values[orig_idx] = current_min;
103    }
104
105    Ok(adjusted_p_values)
106}
107
108/// Apply Benjamini-Yekutieli (BY) procedure for controlling false discovery rate under dependence
109///
110/// The BY procedure is a more conservative variant of the BH procedure that is valid
111/// under arbitrary dependence structures among the tests.
112///
113/// # Arguments
114/// * `p_values` - A slice of p-values to adjust
115///
116/// # Returns
117/// * `Result<Vec<f64>>` - Vector of adjusted p-values
118///
119/// # Example
120/// ```
121/// let p_values = vec![0.01, 0.03, 0.05];
122/// let adjusted = single_statistics::testing::correction::benjamini_yekutieli_correction(&p_values).unwrap();
123/// ```
124pub fn benjamini_yekutieli_correction<T>(p_values: &[T]) -> Result<Vec<T>>
125where
126    T: FloatOps,
127{
128    let n = p_values.len();
129    if n == 0 {
130        return Err(anyhow!("Empty p-value array"));
131    }
132
133    // Calculate the correction factor
134    let c_n: T = (1..=n).map(|i| T::one() / T::from(i).unwrap()).sum();
135
136    // Create index-value pairs and sort by p-value
137    let mut indexed_p_values: Vec<(usize, T)> =
138        p_values.iter().enumerate().map(|(i, &p)| (i, p)).collect();
139
140    // Sort in ascending order
141    indexed_p_values.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(Ordering::Equal));
142
143    // Calculate adjusted p-values with monitoring of minimum value
144    let mut adjusted_p_values = vec![T::zero(); n];
145    let mut current_min = T::one();
146    let n_f64 = T::from(n).unwrap();
147    // Process from largest to smallest p-value
148    for i in (0..n).rev() {
149        let (orig_idx, p_val) = indexed_p_values[i];
150        let rank = i + 1;
151
152        // Calculate adjustment and take minimum of current and previous
153        let adjustment =
154            num_traits::Float::min((p_val * c_n * n_f64 / T::from(rank).unwrap()), T::one());
155        current_min = num_traits::Float::min(adjustment, current_min);
156        adjusted_p_values[orig_idx] = current_min;
157    }
158
159    Ok(adjusted_p_values)
160}
161
162/// Apply Holm-Bonferroni (step-down) method for controlling family-wise error rate
163///
164/// The Holm procedure is a step-down method that controls the family-wise error rate (FWER)
165/// and is uniformly more powerful than the standard Bonferroni correction.
166///
167/// # Arguments
168/// * `p_values` - A slice of p-values to adjust
169///
170/// # Returns
171/// * `Result<Vec<f64>>` - Vector of adjusted p-values
172///
173/// # Example
174/// ```
175/// let p_values = vec![0.01, 0.03, 0.05];
176/// let adjusted = single_statistics::testing::correction::holm_bonferroni_correction(&p_values).unwrap();
177/// ```
178pub fn holm_bonferroni_correction<T>(p_values: &[T]) -> Result<Vec<T>>
179where
180    T: FloatOps,
181{
182    let n = p_values.len();
183
184    if n == 0 {
185        return Err(anyhow!("Empty p-value array"));
186    }
187
188    let zero = T::zero();
189    let one = T::one();
190
191    // Validate p-values
192    for (i, &p) in p_values.iter().enumerate() {
193        if p < zero || p > one {
194            return Err(anyhow!("Invalid p-value at index {}: {:?}", i, p));
195        }
196    }
197
198    // Create index-value pairs and sort by p-value
199    let mut indexed_p_values: Vec<(usize, T)> =
200        p_values.iter().enumerate().map(|(i, &p)| (i, p)).collect();
201
202    indexed_p_values.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(Ordering::Equal));
203
204    // Initialize the result vector
205    let mut adjusted_p_values = vec![zero; n];
206
207    // Calculate adjusted p-values
208    for (i, &(idx, p_val)) in indexed_p_values.iter().enumerate() {
209        // Convert (n - i) to type T
210        let multiplier = T::from((n - i) as f64).unwrap_or(one);
211        let adjusted_p = p_val * multiplier;
212        adjusted_p_values[idx] = num_traits::Float::min(adjusted_p, one);
213    }
214
215    Ok(adjusted_p_values)
216}
217
218/// Apply Hochberg's step-up method for controlling family-wise error rate
219///
220/// Hochberg's procedure is a step-up method that controls the family-wise error rate (FWER)
221/// and is more powerful than Holm's procedure when all tests are independent.
222///
223/// # Arguments
224/// * `p_values` - A slice of p-values to adjust
225///
226/// # Returns
227/// * `Result<Vec<f64>>` - Vector of adjusted p-values
228///
229/// # Example
230/// ```
231/// let p_values = vec![0.01, 0.03, 0.05];
232/// let adjusted = single_statistics::testing::correction::hochberg_correction(&p_values).unwrap();
233/// ```
234pub fn hochberg_correction<T>(p_values: &[T]) -> Result<Vec<T>>
235where
236    T: FloatOps,
237{
238    let n = p_values.len();
239
240    if n == 0 {
241        return Err(anyhow!("Empty p-value array"));
242    }
243
244    let zero = T::zero();
245    let one = T::one();
246
247    // Create index-value pairs and sort by p-value (descending)
248    let mut indexed_p_values: Vec<(usize, T)> =
249        p_values.iter().enumerate().map(|(i, &p)| (i, p)).collect();
250
251    indexed_p_values.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(Ordering::Equal));
252
253    // Calculate adjusted p-values
254    let mut adjusted_p_values = vec![zero; n];
255
256    // Largest p-value remains the same
257    adjusted_p_values[indexed_p_values[0].0] = indexed_p_values[0].1;
258
259    // Process remaining p-values from second-largest to smallest
260    for i in 1..n {
261        let current_index = indexed_p_values[i].0;
262        let prev_index = indexed_p_values[i - 1].0;
263
264        // Convert n and (n - i) to type T
265        let n_t = T::from(n).unwrap_or(one);
266        let denominator_t = T::from(n - i).unwrap_or(one);
267
268        let hochberg_value =
269            num_traits::Float::min((indexed_p_values[i].1 * n_t / denominator_t), one);
270        adjusted_p_values[current_index] =
271            num_traits::Float::min(hochberg_value, adjusted_p_values[prev_index]);
272    }
273
274    Ok(adjusted_p_values)
275}
276
277/// Apply Storey's q-value method for controlling false discovery rate
278///
279/// Storey's q-value method estimates the proportion of true null hypotheses (π0)
280/// and uses this to obtain more powerful FDR control than the BH procedure.
281///
282/// # Arguments
283/// * `p_values` - A slice of p-values to adjust
284/// * `lambda` - Tuning parameter for π0 estimation (between 0 and 1, typically 0.5)
285///
286/// # Returns
287/// * `Result<Vec<f64>>` - Vector of q-values
288///
289/// # Example
290/// ```
291/// let p_values = vec![0.01, 0.03, 0.05];
292/// let qvalues = single_statistics::testing::correction::storey_qvalues(&p_values, 0.5).unwrap();
293/// ```
294pub fn storey_qvalues<T>(p_values: &[T], lambda: T) -> Result<Vec<T>>
295where
296    T: FloatOps,
297{
298    let n = p_values.len();
299
300    if n == 0 {
301        return Err(anyhow!("Empty p-value array"));
302    }
303
304    let zero = T::zero();
305    let one = T::one();
306
307    if lambda <= zero || lambda >= one {
308        return Err(anyhow!("Lambda must be between 0 and 1, got {:?}", lambda));
309    }
310
311    // Validate p-values
312    for (i, &p) in p_values.iter().enumerate() {
313        if p < zero || p > one {
314            return Err(anyhow!("Invalid p-value at index {}: {:?}", i, p));
315        }
316    }
317
318    // Estimate pi0 (proportion of true null hypotheses)
319    let w = p_values.iter().filter(|&&p| p > lambda).count();
320    let w_t = T::from(w).unwrap_or(zero);
321    let n_t = T::from(n).unwrap_or(one);
322
323    let pi0 = w_t / (n_t * (one - lambda));
324    let pi0 = num_traits::Float::min(pi0, one); // Ensure pi0 doesn't exceed 1
325
326    // First apply Benjamini-Hochberg to get base adjusted values
327    let bh_adjusted = benjamini_hochberg_correction(p_values)?;
328
329    // Multiply by pi0 to get q-values
330    let q_values = bh_adjusted
331        .iter()
332        .map(|&p| num_traits::Float::min((p * pi0), one))
333        .collect();
334
335    Ok(q_values)
336}
337
338/// Apply adaptive Storey's q-value method with automatic lambda selection
339///
340/// This version of Storey's method tries multiple lambda values and selects the one
341/// that minimizes the mean-squared error of the π0 estimate.
342///
343/// # Arguments
344/// * `p_values` - A slice of p-values to adjust
345///
346/// # Returns
347/// * `Result<Vec<f64>>` - Vector of q-values
348///
349/// # Example
350/// ```
351/// let p_values = vec![0.01, 0.03, 0.05];
352/// let qvalues = single_statistics::testing::correction::adaptive_storey_qvalues(&p_values).unwrap();
353/// ```
354pub fn adaptive_storey_qvalues<T>(p_values: &[T]) -> Result<Vec<T>>
355where
356    T: FloatOps,
357{
358    let n = p_values.len();
359
360    if n == 0 {
361        return Err(anyhow!("Empty p-value array"));
362    }
363
364    let zero = T::zero();
365    let one = T::one();
366
367    // Validate p-values
368    for (i, &p) in p_values.iter().enumerate() {
369        if p < zero || p > one {
370            return Err(anyhow!("Invalid p-value at index {}: {:?}", i, p));
371        }
372    }
373
374    // Define lambda grid - convert f64 values to type T
375    let lambda_values = [0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9];
376    let lambda_grid: Vec<T> = lambda_values
377        .iter()
378        .filter_map(|&val| T::from(val))
379        .collect();
380
381    // Calculate pi0 estimates for different lambda values
382    let mut pi0_estimates = Vec::with_capacity(lambda_grid.len());
383    for &lambda in &lambda_grid {
384        let w = p_values.iter().filter(|&&p| p > lambda).count();
385        let w_t = T::from(w as f64).unwrap_or(zero);
386        let n_t = T::from(n as f64).unwrap_or(one);
387
388        let pi0 = w_t / (n_t * (one - lambda));
389        pi0_estimates.push(num_traits::Float::min(pi0, one));
390    }
391
392    // Fit smooth cubic spline (simplified here with linear interpolation to the final value)
393    // In practice, a proper spline fitting would be better
394    let pi0_sum: T = pi0_estimates.iter().copied().sum();
395    let estimates_len = T::from(pi0_estimates.len() as f64).unwrap_or(one);
396    let pi0_mean = pi0_sum / estimates_len;
397
398    let pi0 = if pi0_estimates.is_empty() {
399        one
400    } else {
401        num_traits::Float::min(pi0_mean, one)
402    };
403
404    // First apply Benjamini-Hochberg to get base adjusted values
405    let bh_adjusted = benjamini_hochberg_correction(p_values)?;
406
407    // Multiply by pi0 to get q-values
408    let q_values = bh_adjusted.iter().map(|&p| num_traits::Float::min((p * pi0), one)).collect();
409
410    Ok(q_values)
411}
412
413#[cfg(test)]
414mod tests {
415    use super::*;
416
417    fn assert_vec_relative_eq(a: &[f64], b: &[f64], epsilon: f64) {
418        assert_eq!(a.len(), b.len(), "Vectors have different lengths");
419        for (i, (x, y)) in a.iter().zip(b.iter()).enumerate() {
420            if (x - y).abs() > epsilon {
421                panic!("Vectors differ at index {}: {} != {}", i, x, y);
422            }
423        }
424    }
425
426    #[test]
427    fn test_bonferroni() {
428        let p_values = vec![0.01, 0.02, 0.03, 0.1, 0.2];
429        let expected = vec![0.05, 0.1, 0.15, 0.5, 1.0];
430        let adjusted = bonferroni_correction(&p_values).unwrap();
431        assert_vec_relative_eq(&adjusted, &expected, 1e-10);
432    }
433
434    use approx::assert_relative_eq;
435
436    #[test]
437    fn test_benjamini_hochberg_empty_input() {
438        // Test with empty input
439        let result = benjamini_hochberg_correction::<f64>(&[]);
440        assert!(result.is_err());
441        assert_eq!(result.unwrap_err().to_string(), "Empty p-value array");
442    }
443
444    #[test]
445    fn test_benjamini_hochberg_invalid_pvalues() {
446        // Test with invalid p-values (negative)
447        let result = benjamini_hochberg_correction(&[0.01, -0.5, 0.03]);
448        assert!(result.is_err());
449        assert!(
450            result
451                .unwrap_err()
452                .to_string()
453                .contains("Invalid p-value at index 1")
454        );
455
456        // Test with invalid p-values (greater than 1)
457        let result = benjamini_hochberg_correction(&[0.01, 1.5, 0.03]);
458        assert!(result.is_err());
459        assert!(
460            result
461                .unwrap_err()
462                .to_string()
463                .contains("Invalid p-value at index 1")
464        );
465    }
466
467    #[test]
468    fn test_benjamini_hochberg_identical_pvalues() {
469        // Test with identical p-values
470        let p_values = vec![0.05, 0.05, 0.05];
471        let expected = vec![0.05, 0.05, 0.05];
472        let adjusted = benjamini_hochberg_correction(&p_values).unwrap();
473
474        for (a, e) in adjusted.iter().zip(expected.iter()) {
475            assert_relative_eq!(*a, *e, epsilon = 1e-10);
476        }
477    }
478
479    #[test]
480    fn test_benjamini_hochberg_ordered_pvalues() {
481        // Test with ordered p-values
482        let p_values = vec![0.01, 0.02, 0.03, 0.04, 0.05];
483        let expected = vec![0.05, 0.05, 0.05, 0.05, 0.05];
484        let adjusted = benjamini_hochberg_correction(&p_values).unwrap();
485
486        for (i, (&a, &e)) in adjusted.iter().zip(expected.iter()).enumerate() {
487            assert_relative_eq!(a, e, epsilon = 1e-10, max_relative = 1e-10);
488            // If assert fails, this message would help identify which element had issues
489            if num_traits::Float::abs(a - e) > 1e-10 {
490                panic!("mismatch at index {}: expected {}, got {}", i, e, a);
491            }
492        }
493    }
494
495    #[test]
496    fn test_benjamini_hochberg_unordered_pvalues() {
497        // Test with unordered p-values
498        let p_values = vec![0.05, 0.01, 0.1, 0.04, 0.02];
499        let expected = vec![0.0625, 0.05, 0.1, 0.0625, 0.05];
500        let adjusted = benjamini_hochberg_correction(&p_values).unwrap();
501
502        for (i, (&a, &e)) in adjusted.iter().zip(expected.iter()).enumerate() {
503            //assert_relative_eq!(*a, *e, epsilon = 1e-3, max_relative = 1e-3);
504            // If assert fails, this message would help identify which element had issues
505            if num_traits::Float::abs(a - e) > 1e-3 {
506                panic!(
507                    "mismatch at index {}: expected {}, got {}, whole: {:?}",
508                    i, e, a, adjusted
509                );
510            }
511        }
512    }
513
514    #[test]
515    fn test_benjamini_hochberg_edge_cases() {
516        // Test with very small p-values
517        let p_values = vec![1e-10, 1e-9, 1e-8];
518        let adjusted = benjamini_hochberg_correction(&p_values).unwrap();
519        // All should be adjusted but still very small
520        assert!(adjusted.iter().all(|&p| p > 0.0 && p < 0.001));
521
522        // Test with p-value of 1.0
523        let p_values = vec![0.1, 0.2, 1.0];
524        let adjusted = benjamini_hochberg_correction(&p_values).unwrap();
525        // The last one should remain 1.0
526        assert_relative_eq!(adjusted[2], 1.0, epsilon = 1e-10);
527    }
528
529    #[test]
530    fn test_benjamini_hochberg_real_example() {
531        // A more realistic example based on common scientific data
532        let pvalues = vec![0.1, 0.2, 0.3, 0.4, 0.1];
533        let expected = [0.25, 0.3333333333333333, 0.375, 0.4, 0.25];
534        let adjusted = benjamini_hochberg_correction(&pvalues).unwrap();
535
536        for (i, (&a, &e)) in adjusted.iter().zip(expected.iter()).enumerate() {
537            assert_relative_eq!(a, e, epsilon = 1e-3, max_relative = 1e-3);
538            // If assert fails, this message would help identify which element had issues
539            if num_traits::Float::abs(a - e) > 1e-3 {
540                panic!("mismatch at index {}: expected {}, got {}", i, e, a);
541            }
542        }
543    }
544
545    #[test]
546    fn test_benjamini_hochberg_single_pvalue() {
547        // Test with a single p-value
548        let p_values = vec![0.025];
549        let adjusted = benjamini_hochberg_correction(&p_values).unwrap();
550        assert_relative_eq!(adjusted[0], 0.025, epsilon = 1e-10);
551    }
552    #[test]
553    fn test_holm_bonferroni() {
554        let p_values = vec![0.01, 0.02, 0.03];
555        let expected = vec![0.03, 0.04, 0.03];
556        let adjusted = holm_bonferroni_correction(&p_values).unwrap();
557        assert_vec_relative_eq(&adjusted, &expected, 1e-10);
558    }
559
560    #[test]
561    fn test_storey_qvalues() {
562        let p_values = vec![0.01, 0.02, 0.03, 0.6, 0.7];
563        let qvalues = storey_qvalues(&p_values, 0.5).unwrap();
564        // Verify qvalues are between 0 and 1
565        for &q in &qvalues {
566            assert!((0.0..=1.0).contains(&q));
567        }
568        // Verify ordering is preserved
569        assert!(qvalues[0] <= qvalues[2]);
570        assert!(qvalues[3] <= qvalues[4]);
571    }
572
573    #[test]
574    fn test_invalid_inputs() {
575        // Empty array
576        assert!(bonferroni_correction::<f32>(&[]).is_err());
577        assert!(benjamini_hochberg_correction::<f32>(&[]).is_err());
578        assert!(holm_bonferroni_correction::<f32>(&[]).is_err());
579
580        // Invalid lambda
581        assert!(storey_qvalues(&[0.5], -0.1).is_err());
582        assert!(storey_qvalues(&[0.5], 1.0).is_err());
583
584        // Invalid p-values
585        let invalid_p = vec![-0.1, 0.5, 1.1];
586        assert!(bonferroni_correction(&invalid_p).is_err());
587        assert!(benjamini_hochberg_correction(&invalid_p).is_err());
588    }
589}