single_statistics/testing/
mod.rs

1use single_utilities::traits::{FloatOps, FloatOpsTS};
2use std::collections::HashMap;
3
4pub mod correction;
5pub mod effect;
6pub mod inference;
7
8pub mod utils;
9
10#[derive(Debug, Clone, Copy)]
11pub enum TestMethod {
12    TTest(TTestType),
13    MannWhitney,
14    NegativeBinomial,
15    ZeroInflated,
16}
17
18#[derive(Debug, Clone, Copy)]
19pub enum TTestType {
20    Student, // Equal variance
21    Welch,   // Unequal variance
22}
23
24#[derive(Debug, Clone, Copy)]
25pub enum Alternative {
26    TwoSided,
27    Less,
28    Greater,
29}
30
31#[derive(Debug, Clone)]
32pub struct TestResult<T> {
33    /// The test statistic value (e.g., t-statistic, U statistic)
34    pub statistic: T,
35    /// The p-value of the test
36    pub p_value: T,
37    /// Confidence interval for the effect size/difference (if available)
38    pub confidence_interval: Option<(T, T)>,
39    /// Degrees of freedom (for parametric inference)
40    pub degrees_of_freedom: Option<T>,
41    /// Effect size measurement
42    pub effect_size: Option<T>,
43    /// Standard error of the effect size or test statistic
44    pub standard_error: Option<T>,
45    /// Additional test-specific information
46    pub metadata: HashMap<String, T>,
47}
48
49impl<T> TestResult<T>
50where
51    T: FloatOps,
52{
53    /// Create a new test result with minimal information
54    pub fn new(statistic: T, p_value: T) -> Self {
55        TestResult {
56            statistic,
57            p_value,
58            confidence_interval: None,
59            degrees_of_freedom: None,
60            effect_size: None,
61            standard_error: None,
62            metadata: HashMap::new(),
63        }
64    }
65
66    /// Create a new test result with effect size
67    pub fn with_effect_size(statistic: T, p_value: T, effect_size: T) -> Self {
68        TestResult {
69            statistic,
70            p_value,
71            confidence_interval: None,
72            degrees_of_freedom: None,
73            effect_size: Some(effect_size),
74            standard_error: None,
75            metadata: HashMap::new(),
76        }
77    }
78
79    /// Add confidence interval to the result
80    pub fn with_confidence_interval(mut self, lower: T, upper: T) -> Self {
81        self.confidence_interval = Some((lower, upper));
82        self
83    }
84
85    /// Add degrees of freedom to the result
86    pub fn with_degrees_of_freedom(mut self, df: T) -> Self {
87        self.degrees_of_freedom = Some(df);
88        self
89    }
90
91    /// Add standard error to the result
92    pub fn with_standard_error(mut self, se: T) -> Self {
93        self.standard_error = Some(se);
94        self
95    }
96
97    /// Add additional metadata
98    pub fn with_metadata(mut self, key: &str, value: T) -> Self {
99        self.metadata.insert(key.to_string(), value);
100        self
101    }
102
103    /// Check if the result is statistically significant at the given threshold
104    pub fn is_significant(&self, alpha: T) -> bool {
105        self.p_value < alpha
106    }
107}
108
109#[derive(Debug, Clone)]
110pub struct MultipleTestResults<T> {
111    /// Test statistics for each feature/gene
112    pub statistics: Vec<T>,
113    /// Raw (unadjusted) p-values
114    pub p_values: Vec<T>,
115    /// Adjusted p-values (after multiple testing correction)
116    pub adjusted_p_values: Option<Vec<T>>,
117    /// Effect sizes (if calculated)
118    pub effect_sizes: Option<Vec<T>>,
119    /// Confidence intervals (if calculated)
120    pub confidence_intervals: Option<Vec<(T, T)>>,
121    /// Feature-specific metadata
122    pub feature_metadata: Option<Vec<HashMap<String, T>>>,
123    /// Global metadata about the test
124    pub global_metadata: HashMap<String, String>,
125}
126
127impl<T> MultipleTestResults<T>
128where
129    T: FloatOps,
130{
131    /// Create a new results object from p-values
132    pub fn new(statistics: Vec<T>, p_values: Vec<T>) -> Self {
133        MultipleTestResults {
134            statistics,
135            p_values,
136            adjusted_p_values: None,
137            effect_sizes: None,
138            confidence_intervals: None,
139            feature_metadata: None,
140            global_metadata: HashMap::new(),
141        }
142    }
143
144    /// Add adjusted p-values to the results
145    pub fn with_adjusted_p_values(mut self, adjusted_p_values: Vec<T>) -> Self {
146        self.adjusted_p_values = Some(adjusted_p_values);
147        self
148    }
149
150    /// Add effect sizes to the results
151    pub fn with_effect_sizes(mut self, effect_sizes: Vec<T>) -> Self {
152        self.effect_sizes = Some(effect_sizes);
153        self
154    }
155
156    /// Add confidence intervals to the results
157    pub fn with_confidence_intervals(mut self, confidence_intervals: Vec<(T, T)>) -> Self {
158        self.confidence_intervals = Some(confidence_intervals);
159        self
160    }
161
162    /// Add global metadata about the test
163    pub fn with_global_metadata(mut self, key: &str, value: &str) -> Self {
164        self.global_metadata
165            .insert(key.to_string(), value.to_string());
166        self
167    }
168
169    /// Get indices of significant features at the given threshold
170    pub fn significant_indices(&self, alpha: T) -> Vec<usize> {
171        match &self.adjusted_p_values {
172            Some(adj_p_values) => adj_p_values
173                .iter()
174                .enumerate()
175                .filter_map(|(i, &p)| if p < alpha { Some(i) } else { None })
176                .collect(),
177            None => self
178                .p_values
179                .iter()
180                .enumerate()
181                .filter_map(|(i, &p)| if p < alpha { Some(i) } else { None })
182                .collect(),
183        }
184    }
185
186    /// Get the number of significant features at the given threshold
187    pub fn num_significant(&self, alpha: T) -> usize {
188        self.significant_indices(alpha).len()
189    }
190
191    /// Get top n features by p-value
192    pub fn top_features(&self, n: usize) -> Vec<usize> {
193        let p_values = match &self.adjusted_p_values {
194            Some(adj_p) => adj_p,
195            None => &self.p_values,
196        };
197
198        let mut indices: Vec<usize> = (0..p_values.len()).collect();
199        indices.sort_by(|&a, &b| {
200            p_values[a]
201                .partial_cmp(&p_values[b])
202                .unwrap_or(std::cmp::Ordering::Equal)
203        });
204        indices.truncate(n);
205        indices
206    }
207}