single_statistics/testing/
mod.rs

1use std::collections::HashMap;
2
3pub mod inference;
4pub mod correction;
5pub mod effect;
6
7pub mod utils;
8
9#[derive(Debug, Clone, Copy)]
10pub enum TestMethod {
11    TTest(TTestType),
12    MannWhitney,
13    NegativeBinomial,
14    ZeroInflated,
15}
16
17#[derive(Debug, Clone, Copy)]
18pub enum TTestType {
19    Student, // Equal variance
20    Welch,   // Unequal variance
21}
22
23#[derive(Debug, Clone, Copy)]
24pub enum Alternative {
25    TwoSided,
26    Less,
27    Greater,
28}
29
30#[derive(Debug, Clone)]
31pub struct TestResult {
32    /// The test statistic value (e.g., t-statistic, U statistic)
33    pub statistic: f64,
34    /// The p-value of the test
35    pub p_value: f64,
36    /// Confidence interval for the effect size/difference (if available)
37    pub confidence_interval: Option<(f64, f64)>,
38    /// Degrees of freedom (for parametric inference)
39    pub degrees_of_freedom: Option<f64>,
40    /// Effect size measurement
41    pub effect_size: Option<f64>,
42    /// Standard error of the effect size or test statistic
43    pub standard_error: Option<f64>,
44    /// Additional test-specific information
45    pub metadata: HashMap<String, f64>,
46}
47
48impl TestResult {
49    /// Create a new test result with minimal information
50    pub fn new(statistic: f64, p_value: f64) -> Self {
51        TestResult {
52            statistic,
53            p_value,
54            confidence_interval: None,
55            degrees_of_freedom: None,
56            effect_size: None,
57            standard_error: None,
58            metadata: HashMap::new(),
59        }
60    }
61
62    /// Create a new test result with effect size
63    pub fn with_effect_size(statistic: f64, p_value: f64, effect_size: f64) -> Self {
64        TestResult {
65            statistic,
66            p_value,
67            confidence_interval: None,
68            degrees_of_freedom: None,
69            effect_size: Some(effect_size),
70            standard_error: None,
71            metadata: HashMap::new(),
72        }
73    }
74
75    /// Add confidence interval to the result
76    pub fn with_confidence_interval(mut self, lower: f64, upper: f64) -> Self {
77        self.confidence_interval = Some((lower, upper));
78        self
79    }
80
81    /// Add degrees of freedom to the result
82    pub fn with_degrees_of_freedom(mut self, df: f64) -> Self {
83        self.degrees_of_freedom = Some(df);
84        self
85    }
86
87    /// Add standard error to the result
88    pub fn with_standard_error(mut self, se: f64) -> Self {
89        self.standard_error = Some(se);
90        self
91    }
92
93    /// Add additional metadata
94    pub fn with_metadata(mut self, key: &str, value: f64) -> Self {
95        self.metadata.insert(key.to_string(), value);
96        self
97    }
98
99    /// Check if the result is statistically significant at the given threshold
100    pub fn is_significant(&self, alpha: f64) -> bool {
101        self.p_value < alpha
102    }
103}
104
105#[derive(Debug, Clone)]
106pub struct MultipleTestResults {
107    /// Test statistics for each feature/gene
108    pub statistics: Vec<f64>,
109    /// Raw (unadjusted) p-values
110    pub p_values: Vec<f64>,
111    /// Adjusted p-values (after multiple testing correction)
112    pub adjusted_p_values: Option<Vec<f64>>,
113    /// Effect sizes (if calculated)
114    pub effect_sizes: Option<Vec<f64>>,
115    /// Confidence intervals (if calculated)
116    pub confidence_intervals: Option<Vec<(f64, f64)>>,
117    /// Feature-specific metadata
118    pub feature_metadata: Option<Vec<HashMap<String, f64>>>,
119    /// Global metadata about the test
120    pub global_metadata: HashMap<String, String>,
121}
122
123impl MultipleTestResults {
124    /// Create a new results object from p-values
125    pub fn new(statistics: Vec<f64>, p_values: Vec<f64>) -> Self {
126        MultipleTestResults {
127            statistics,
128            p_values,
129            adjusted_p_values: None,
130            effect_sizes: None,
131            confidence_intervals: None,
132            feature_metadata: None,
133            global_metadata: HashMap::new(),
134        }
135    }
136
137    /// Add adjusted p-values to the results
138    pub fn with_adjusted_p_values(mut self, adjusted_p_values: Vec<f64>) -> Self {
139        self.adjusted_p_values = Some(adjusted_p_values);
140        self
141    }
142
143    /// Add effect sizes to the results
144    pub fn with_effect_sizes(mut self, effect_sizes: Vec<f64>) -> Self {
145        self.effect_sizes = Some(effect_sizes);
146        self
147    }
148
149    /// Add confidence intervals to the results
150    pub fn with_confidence_intervals(mut self, confidence_intervals: Vec<(f64, f64)>) -> Self {
151        self.confidence_intervals = Some(confidence_intervals);
152        self
153    }
154
155    /// Add global metadata about the test
156    pub fn with_global_metadata(mut self, key: &str, value: &str) -> Self {
157        self.global_metadata.insert(key.to_string(), value.to_string());
158        self
159    }
160
161    /// Get indices of significant features at the given threshold
162    pub fn significant_indices(&self, alpha: f64) -> Vec<usize> {
163        match &self.adjusted_p_values {
164            Some(adj_p_values) => adj_p_values.iter()
165                .enumerate()
166                .filter_map(|(i, &p)| if p < alpha { Some(i) } else { None })
167                .collect(),
168            None => self.p_values.iter()
169                .enumerate()
170                .filter_map(|(i, &p)| if p < alpha { Some(i) } else { None })
171                .collect(),
172        }
173    }
174
175    /// Get the number of significant features at the given threshold
176    pub fn num_significant(&self, alpha: f64) -> usize {
177        self.significant_indices(alpha).len()
178    }
179
180    /// Get top n features by p-value
181    pub fn top_features(&self, n: usize) -> Vec<usize> {
182        let p_values = match &self.adjusted_p_values {
183            Some(adj_p) => adj_p,
184            None => &self.p_values,
185        };
186
187        let mut indices: Vec<usize> = (0..p_values.len()).collect();
188        indices.sort_by(|&a, &b| p_values[a].partial_cmp(&p_values[b]).unwrap_or(std::cmp::Ordering::Equal));
189        indices.truncate(n);
190        indices
191    }
192}