single_statistics/testing/correction/
mod.rs

1//! Multiple testing correction methods for controlling false positives in differential expression analysis.
2//!
3//! When testing thousands of genes simultaneously (as is common in single-cell RNA-seq analysis),
4//! the probability of false positives increases dramatically. These correction methods help control
5//! either the Family-Wise Error Rate (FWER) or False Discovery Rate (FDR).
6//!
7//! ## Available Methods
8//!
9//! - **Bonferroni**: Conservative FWER control, multiplies p-values by number of tests
10//! - **Benjamini-Hochberg**: FDR control, less conservative than Bonferroni
11//! - **Benjamini-Yekutieli**: FDR control for dependent tests
12//! - **Holm-Bonferroni**: Step-down FWER control, less conservative than Bonferroni
13//! - **Storey's q-value**: Improved FDR estimation
14//!
15//! ## Choosing a Method
16//!
17//! - **For single-cell DE analysis**: Use Benjamini-Hochberg (most common)
18//! - **For very strict control**: Use Bonferroni or Holm-Bonferroni
19//! - **For dependent tests**: Use Benjamini-Yekutieli
20//! - **For large datasets**: Consider Storey's q-value
21
22use anyhow::{Result, anyhow};
23use single_utilities::traits::FloatOps;
24use std::cmp::Ordering;
25
26/// Apply Bonferroni correction to p-values
27///
28/// Bonferroni correction is a simple but conservative method that multiplies
29/// each p-value by the number of tests.
30///
31/// # Arguments
32/// * `p_values` - A slice of p-values to adjust
33///
34/// # Returns
35/// * `Result<Vec<f64>>` - Vector of adjusted p-values
36///
37/// # Example
38/// ```
39/// let p_values = vec![0.01, 0.03, 0.05];
40/// let adjusted = single_statistics::testing::correction::bonferroni_correction(&p_values).unwrap();
41/// ```
42pub fn bonferroni_correction<T>(p_values: &[T]) -> Result<Vec<T>>
43where
44    T: FloatOps,
45{
46    let n = p_values.len();
47
48    if n == 0 {
49        return Err(anyhow!("Empty p-value array"));
50    }
51
52    // Validate p-values
53    for (i, &p) in p_values.iter().enumerate() {
54        if !(T::zero()..=T::one()).contains(&p) {
55            return Err(anyhow!("Invalid p-value at index {:?}: {:?}", i, p));
56        }
57    }
58
59    // Multiply each p-value by n, capping at 1.0
60    let n_t = T::from(n).unwrap();
61    let adjusted = p_values
62        .iter()
63        .map(|&p| num_traits::Float::min(p * n_t, T::one()))
64        .collect();
65
66    Ok(adjusted)
67}
68
69/// Apply Benjamini-Hochberg (BH) procedure for controlling false discovery rate
70///
71/// The BH procedure controls the false discovery rate (FDR), which is the expected
72/// proportion of false positives among all rejected null hypotheses.
73///
74/// # Arguments
75/// * `p_values` - A slice of p-values to adjust
76///
77/// # Returns
78/// * `Result<Vec<f64>>` - Vector of adjusted p-values
79///
80/// # Example
81/// ```
82/// let p_values = vec![0.01, 0.03, 0.05];
83/// let adjusted = single_statistics::testing::correction::benjamini_hochberg_correction(&p_values).unwrap();
84/// ```
85pub fn benjamini_hochberg_correction<T>(p_values: &[T]) -> Result<Vec<T>>
86where
87    T: FloatOps,
88{
89    let n = p_values.len();
90    if n == 0 {
91        return Err(anyhow::anyhow!("Empty p-value array"));
92    }
93
94    // Validate p-values
95    for (i, &p) in p_values.iter().enumerate() {
96        if !(T::zero()..=T::one()).contains(&p) {
97            return Err(anyhow::anyhow!("Invalid p-value at index {:?}: {:?}", i, p));
98        }
99    }
100    
101    let n_f = T::from(n).unwrap();
102
103    let mut indices: Vec<usize> = (0..n).collect();
104    indices.sort_unstable_by(|&a, &b| {
105        p_values[a]
106            .partial_cmp(&p_values[b])
107            .unwrap_or(Ordering::Equal)
108    });
109
110    let mut adjusted = vec![T::zero(); n];
111    let mut running_min = T::one();
112
113    for i in (0..n).rev() {
114        let orig_idx = indices[i];
115        let rank = T::from(i + 1).unwrap();
116        let adjustment = num_traits::Float::min(p_values[orig_idx] * n_f / rank, T::one());
117        running_min = num_traits::Float::min(adjustment, running_min);
118        adjusted[orig_idx] = running_min;
119    }
120
121    Ok(adjusted)
122}
123
124/// Apply Benjamini-Yekutieli (BY) procedure for controlling false discovery rate under dependence
125///
126/// The BY procedure is a more conservative variant of the BH procedure that is valid
127/// under arbitrary dependence structures among the tests.
128///
129/// # Arguments
130/// * `p_values` - A slice of p-values to adjust
131///
132/// # Returns
133/// * `Result<Vec<f64>>` - Vector of adjusted p-values
134///
135/// # Example
136/// ```
137/// let p_values = vec![0.01, 0.03, 0.05];
138/// let adjusted = single_statistics::testing::correction::benjamini_yekutieli_correction(&p_values).unwrap();
139/// ```
140pub fn benjamini_yekutieli_correction<T>(p_values: &[T]) -> Result<Vec<T>>
141where
142    T: FloatOps,
143{
144    let n = p_values.len();
145    if n == 0 {
146        return Err(anyhow!("Empty p-value array"));
147    }
148
149    // Calculate the correction factor
150    let c_n: T = (1..=n).map(|i| T::one() / T::from(i).unwrap()).sum();
151
152    // Create index-value pairs and sort by p-value
153    let mut indexed_p_values: Vec<(usize, T)> =
154        p_values.iter().enumerate().map(|(i, &p)| (i, p)).collect();
155
156    // Sort in ascending order
157    indexed_p_values.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(Ordering::Equal));
158
159    // Calculate adjusted p-values with monitoring of minimum value
160    let mut adjusted_p_values = vec![T::zero(); n];
161    let mut current_min = T::one();
162    let n_f64 = T::from(n).unwrap();
163    // Process from largest to smallest p-value
164    for i in (0..n).rev() {
165        let (orig_idx, p_val) = indexed_p_values[i];
166        let rank = i + 1;
167
168        // Calculate adjustment and take minimum of current and previous
169        let adjustment =
170            num_traits::Float::min(p_val * c_n * n_f64 / T::from(rank).unwrap(), T::one());
171        current_min = num_traits::Float::min(adjustment, current_min);
172        adjusted_p_values[orig_idx] = current_min;
173    }
174
175    Ok(adjusted_p_values)
176}
177
178/// Apply Holm-Bonferroni (step-down) method for controlling family-wise error rate
179///
180/// The Holm procedure is a step-down method that controls the family-wise error rate (FWER)
181/// and is uniformly more powerful than the standard Bonferroni correction.
182///
183/// # Arguments
184/// * `p_values` - A slice of p-values to adjust
185///
186/// # Returns
187/// * `Result<Vec<f64>>` - Vector of adjusted p-values
188///
189/// # Example
190/// ```
191/// let p_values = vec![0.01, 0.03, 0.05];
192/// let adjusted = single_statistics::testing::correction::holm_bonferroni_correction(&p_values).unwrap();
193/// ```
194pub fn holm_bonferroni_correction<T>(p_values: &[T]) -> Result<Vec<T>>
195where
196    T: FloatOps,
197{
198    let n = p_values.len();
199
200    if n == 0 {
201        return Err(anyhow!("Empty p-value array"));
202    }
203
204    let zero = T::zero();
205    let one = T::one();
206
207    // Validate p-values
208    for (i, &p) in p_values.iter().enumerate() {
209        if p < zero || p > one {
210            return Err(anyhow!("Invalid p-value at index {}: {:?}", i, p));
211        }
212    }
213
214    // Create index-value pairs and sort by p-value
215    let mut indexed_p_values: Vec<(usize, T)> =
216        p_values.iter().enumerate().map(|(i, &p)| (i, p)).collect();
217
218    indexed_p_values.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(Ordering::Equal));
219
220    // Initialize the result vector
221    let mut adjusted_p_values = vec![zero; n];
222
223    // Calculate adjusted p-values
224    for (i, &(idx, p_val)) in indexed_p_values.iter().enumerate() {
225        // Convert (n - i) to type T
226        let multiplier = T::from((n - i) as f64).unwrap_or(one);
227        let adjusted_p = p_val * multiplier;
228        adjusted_p_values[idx] = num_traits::Float::min(adjusted_p, one);
229    }
230
231    Ok(adjusted_p_values)
232}
233
234/// Apply Hochberg's step-up method for controlling family-wise error rate
235///
236/// Hochberg's procedure is a step-up method that controls the family-wise error rate (FWER)
237/// and is more powerful than Holm's procedure when all tests are independent.
238///
239/// # Arguments
240/// * `p_values` - A slice of p-values to adjust
241///
242/// # Returns
243/// * `Result<Vec<f64>>` - Vector of adjusted p-values
244///
245/// # Example
246/// ```
247/// let p_values = vec![0.01, 0.03, 0.05];
248/// let adjusted = single_statistics::testing::correction::hochberg_correction(&p_values).unwrap();
249/// ```
250pub fn hochberg_correction<T>(p_values: &[T]) -> Result<Vec<T>>
251where
252    T: FloatOps,
253{
254    let n = p_values.len();
255
256    if n == 0 {
257        return Err(anyhow!("Empty p-value array"));
258    }
259
260    let zero = T::zero();
261    let one = T::one();
262
263    // Create index-value pairs and sort by p-value (descending)
264    let mut indexed_p_values: Vec<(usize, T)> =
265        p_values.iter().enumerate().map(|(i, &p)| (i, p)).collect();
266
267    indexed_p_values.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(Ordering::Equal));
268
269    // Calculate adjusted p-values
270    let mut adjusted_p_values = vec![zero; n];
271
272    // Largest p-value remains the same
273    adjusted_p_values[indexed_p_values[0].0] = indexed_p_values[0].1;
274
275    // Process remaining p-values from second-largest to smallest
276    for i in 1..n {
277        let current_index = indexed_p_values[i].0;
278        let prev_index = indexed_p_values[i - 1].0;
279
280        // Convert n and (n - i) to type T
281        let n_t = T::from(n).unwrap_or(one);
282        let denominator_t = T::from(n - i).unwrap_or(one);
283
284        let hochberg_value =
285            num_traits::Float::min(indexed_p_values[i].1 * n_t / denominator_t, one);
286        adjusted_p_values[current_index] =
287            num_traits::Float::min(hochberg_value, adjusted_p_values[prev_index]);
288    }
289
290    Ok(adjusted_p_values)
291}
292
293/// Apply Storey's q-value method for controlling false discovery rate
294///
295/// Storey's q-value method estimates the proportion of true null hypotheses (π0)
296/// and uses this to obtain more powerful FDR control than the BH procedure.
297///
298/// # Arguments
299/// * `p_values` - A slice of p-values to adjust
300/// * `lambda` - Tuning parameter for π0 estimation (between 0 and 1, typically 0.5)
301///
302/// # Returns
303/// * `Result<Vec<f64>>` - Vector of q-values
304///
305/// # Example
306/// ```
307/// let p_values = vec![0.01, 0.03, 0.05];
308/// let qvalues = single_statistics::testing::correction::storey_qvalues(&p_values, 0.5).unwrap();
309/// ```
310pub fn storey_qvalues<T>(p_values: &[T], lambda: T) -> Result<Vec<T>>
311where
312    T: FloatOps,
313{
314    let n = p_values.len();
315
316    if n == 0 {
317        return Err(anyhow!("Empty p-value array"));
318    }
319
320    let zero = T::zero();
321    let one = T::one();
322
323    if lambda <= zero || lambda >= one {
324        return Err(anyhow!("Lambda must be between 0 and 1, got {:?}", lambda));
325    }
326
327    // Validate p-values
328    for (i, &p) in p_values.iter().enumerate() {
329        if p < zero || p > one {
330            return Err(anyhow!("Invalid p-value at index {}: {:?}", i, p));
331        }
332    }
333
334    // Estimate pi0 (proportion of true null hypotheses)
335    let w = p_values.iter().filter(|&&p| p > lambda).count();
336    let w_t = T::from(w).unwrap_or(zero);
337    let n_t = T::from(n).unwrap_or(one);
338
339    let pi0 = w_t / (n_t * (one - lambda));
340    let pi0 = num_traits::Float::min(pi0, one); // Ensure pi0 doesn't exceed 1
341
342    // First apply Benjamini-Hochberg to get base adjusted values
343    let bh_adjusted = benjamini_hochberg_correction(p_values)?;
344
345    // Multiply by pi0 to get q-values
346    let q_values = bh_adjusted
347        .iter()
348        .map(|&p| num_traits::Float::min(p * pi0, one))
349        .collect();
350
351    Ok(q_values)
352}
353
354/// Apply adaptive Storey's q-value method with automatic lambda selection
355///
356/// This version of Storey's method tries multiple lambda values and selects the one
357/// that minimizes the mean-squared error of the π0 estimate.
358///
359/// # Arguments
360/// * `p_values` - A slice of p-values to adjust
361///
362/// # Returns
363/// * `Result<Vec<f64>>` - Vector of q-values
364///
365/// # Example
366/// ```
367/// let p_values = vec![0.01, 0.03, 0.05];
368/// let qvalues = single_statistics::testing::correction::adaptive_storey_qvalues(&p_values).unwrap();
369/// ```
370pub fn adaptive_storey_qvalues<T>(p_values: &[T]) -> Result<Vec<T>>
371where
372    T: FloatOps,
373{
374    let n = p_values.len();
375
376    if n == 0 {
377        return Err(anyhow!("Empty p-value array"));
378    }
379
380    let zero = T::zero();
381    let one = T::one();
382
383    // Validate p-values
384    for (i, &p) in p_values.iter().enumerate() {
385        if p < zero || p > one {
386            return Err(anyhow!("Invalid p-value at index {}: {:?}", i, p));
387        }
388    }
389
390    // Define lambda grid - convert f64 values to type T
391    let lambda_values = [0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9];
392    let lambda_grid: Vec<T> = lambda_values
393        .iter()
394        .filter_map(|&val| T::from(val))
395        .collect();
396
397    // Calculate pi0 estimates for different lambda values
398    let mut pi0_estimates = Vec::with_capacity(lambda_grid.len());
399    for &lambda in &lambda_grid {
400        let w = p_values.iter().filter(|&&p| p > lambda).count();
401        let w_t = T::from(w as f64).unwrap_or(zero);
402        let n_t = T::from(n as f64).unwrap_or(one);
403
404        let pi0 = w_t / (n_t * (one - lambda));
405        pi0_estimates.push(num_traits::Float::min(pi0, one));
406    }
407
408    // Fit smooth cubic spline (simplified here with linear interpolation to the final value)
409    // In practice, a proper spline fitting would be better
410    let pi0_sum: T = pi0_estimates.iter().copied().sum();
411    let estimates_len = T::from(pi0_estimates.len() as f64).unwrap_or(one);
412    let pi0_mean = pi0_sum / estimates_len;
413
414    let pi0 = if pi0_estimates.is_empty() {
415        one
416    } else {
417        num_traits::Float::min(pi0_mean, one)
418    };
419
420    // First apply Benjamini-Hochberg to get base adjusted values
421    let bh_adjusted = benjamini_hochberg_correction(p_values)?;
422
423    // Multiply by pi0 to get q-values
424    let q_values = bh_adjusted
425        .iter()
426        .map(|&p| num_traits::Float::min((p * pi0), one))
427        .collect();
428
429    Ok(q_values)
430}
431
432#[cfg(test)]
433mod tests {
434    use super::*;
435
436    fn assert_vec_relative_eq(a: &[f64], b: &[f64], epsilon: f64) {
437        assert_eq!(a.len(), b.len(), "Vectors have different lengths");
438        for (i, (x, y)) in a.iter().zip(b.iter()).enumerate() {
439            if (x - y).abs() > epsilon {
440                panic!("Vectors differ at index {}: {} != {}", i, x, y);
441            }
442        }
443    }
444
445    #[test]
446    fn test_bonferroni() {
447        let p_values = vec![0.01, 0.02, 0.03, 0.1, 0.2];
448        let expected = vec![0.05, 0.1, 0.15, 0.5, 1.0];
449        let adjusted = bonferroni_correction(&p_values).unwrap();
450        assert_vec_relative_eq(&adjusted, &expected, 1e-10);
451    }
452
453    use approx::assert_relative_eq;
454
455    #[test]
456    fn test_benjamini_hochberg_empty_input() {
457        // Test with empty input
458        let result = benjamini_hochberg_correction::<f64>(&[]);
459        assert!(result.is_err());
460        assert_eq!(result.unwrap_err().to_string(), "Empty p-value array");
461    }
462
463    #[test]
464    fn test_benjamini_hochberg_invalid_pvalues() {
465        // Test with invalid p-values (negative)
466        let result = benjamini_hochberg_correction(&[0.01, -0.5, 0.03]);
467        assert!(result.is_err());
468        assert!(
469            result
470                .unwrap_err()
471                .to_string()
472                .contains("Invalid p-value at index 1")
473        );
474
475        // Test with invalid p-values (greater than 1)
476        let result = benjamini_hochberg_correction(&[0.01, 1.5, 0.03]);
477        assert!(result.is_err());
478        assert!(
479            result
480                .unwrap_err()
481                .to_string()
482                .contains("Invalid p-value at index 1")
483        );
484    }
485
486    #[test]
487    fn test_benjamini_hochberg_identical_pvalues() {
488        // Test with identical p-values
489        let p_values = vec![0.05, 0.05, 0.05];
490        let expected = vec![0.05, 0.05, 0.05];
491        let adjusted = benjamini_hochberg_correction(&p_values).unwrap();
492
493        for (a, e) in adjusted.iter().zip(expected.iter()) {
494            assert_relative_eq!(*a, *e, epsilon = 1e-10);
495        }
496    }
497
498    #[test]
499    fn test_benjamini_hochberg_ordered_pvalues() {
500        // Test with ordered p-values
501        let p_values = vec![0.01, 0.02, 0.03, 0.04, 0.05];
502        let expected = vec![0.05, 0.05, 0.05, 0.05, 0.05];
503        let adjusted = benjamini_hochberg_correction(&p_values).unwrap();
504
505        for (i, (&a, &e)) in adjusted.iter().zip(expected.iter()).enumerate() {
506            assert_relative_eq!(a, e, epsilon = 1e-10, max_relative = 1e-10);
507            // If assert fails, this message would help identify which element had issues
508            if num_traits::Float::abs(a - e) > 1e-10 {
509                panic!("mismatch at index {}: expected {}, got {}", i, e, a);
510            }
511        }
512    }
513
514    #[test]
515    fn test_benjamini_hochberg_unordered_pvalues() {
516        // Test with unordered p-values
517        let p_values = vec![0.05, 0.01, 0.1, 0.04, 0.02];
518        let expected = vec![0.0625, 0.05, 0.1, 0.0625, 0.05];
519        let adjusted = benjamini_hochberg_correction(&p_values).unwrap();
520
521        for (i, (&a, &e)) in adjusted.iter().zip(expected.iter()).enumerate() {
522            //assert_relative_eq!(*a, *e, epsilon = 1e-3, max_relative = 1e-3);
523            // If assert fails, this message would help identify which element had issues
524            if num_traits::Float::abs(a - e) > 1e-3 {
525                panic!(
526                    "mismatch at index {}: expected {}, got {}, whole: {:?}",
527                    i, e, a, adjusted
528                );
529            }
530        }
531    }
532
533    #[test]
534    fn test_benjamini_hochberg_edge_cases() {
535        // Test with very small p-values
536        let p_values = vec![1e-10, 1e-9, 1e-8];
537        let adjusted = benjamini_hochberg_correction(&p_values).unwrap();
538        // All should be adjusted but still very small
539        assert!(adjusted.iter().all(|&p| p > 0.0 && p < 0.001));
540
541        // Test with p-value of 1.0
542        let p_values = vec![0.1, 0.2, 1.0];
543        let adjusted = benjamini_hochberg_correction(&p_values).unwrap();
544        // The last one should remain 1.0
545        assert_relative_eq!(adjusted[2], 1.0, epsilon = 1e-10);
546    }
547
548    #[test]
549    fn test_benjamini_hochberg_real_example() {
550        // A more realistic example based on common scientific data
551        let pvalues = vec![0.1, 0.2, 0.3, 0.4, 0.1];
552        let expected = [0.25, 0.3333333333333333, 0.375, 0.4, 0.25];
553        let adjusted = benjamini_hochberg_correction(&pvalues).unwrap();
554
555        for (i, (&a, &e)) in adjusted.iter().zip(expected.iter()).enumerate() {
556            assert_relative_eq!(a, e, epsilon = 1e-3, max_relative = 1e-3);
557            // If assert fails, this message would help identify which element had issues
558            if num_traits::Float::abs(a - e) > 1e-3 {
559                panic!("mismatch at index {}: expected {}, got {}", i, e, a);
560            }
561        }
562    }
563
564    #[test]
565    fn test_benjamini_hochberg_single_pvalue() {
566        // Test with a single p-value
567        let p_values = vec![0.025];
568        let adjusted = benjamini_hochberg_correction(&p_values).unwrap();
569        assert_relative_eq!(adjusted[0], 0.025, epsilon = 1e-10);
570    }
571    #[test]
572    fn test_holm_bonferroni() {
573        let p_values = vec![0.01, 0.02, 0.03];
574        let expected = vec![0.03, 0.04, 0.03];
575        let adjusted = holm_bonferroni_correction(&p_values).unwrap();
576        assert_vec_relative_eq(&adjusted, &expected, 1e-10);
577    }
578
579    #[test]
580    fn test_storey_qvalues() {
581        let p_values = vec![0.01, 0.02, 0.03, 0.6, 0.7];
582        let qvalues = storey_qvalues(&p_values, 0.5).unwrap();
583        // Verify qvalues are between 0 and 1
584        for &q in &qvalues {
585            assert!((0.0..=1.0).contains(&q));
586        }
587        // Verify ordering is preserved
588        assert!(qvalues[0] <= qvalues[2]);
589        assert!(qvalues[3] <= qvalues[4]);
590    }
591
592    #[test]
593    fn test_invalid_inputs() {
594        // Empty array
595        assert!(bonferroni_correction::<f32>(&[]).is_err());
596        assert!(benjamini_hochberg_correction::<f32>(&[]).is_err());
597        assert!(holm_bonferroni_correction::<f32>(&[]).is_err());
598
599        // Invalid lambda
600        assert!(storey_qvalues(&[0.5], -0.1).is_err());
601        assert!(storey_qvalues(&[0.5], 1.0).is_err());
602
603        // Invalid p-values
604        let invalid_p = vec![-0.1, 0.5, 1.1];
605        assert!(bonferroni_correction(&invalid_p).is_err());
606        assert!(benjamini_hochberg_correction(&invalid_p).is_err());
607    }
608}