single_statistics/testing/inference/mod.rs
1use crate::testing::utils::{extract_unique_groups, get_group_indices};
2use crate::testing::{
3 Alternative, MultipleTestResults, TTestType, TestMethod, TestResult, correction,
4};
5use nalgebra_sparse::CsrMatrix;
6use single_utilities::traits::FloatOpsTS;
7
8pub mod discrete;
9
10pub mod parametric;
11
12pub mod nonparametric;
13
14/// Statistical testing methods for sparse matrices, particularly suited for single-cell data.
15///
16/// This trait extends sparse matrix types (like `CsrMatrix`) with statistical testing capabilities.
17/// It provides methods for differential expression analysis and other statistical comparisons
18/// optimized for single-cell RNA-seq data.
19///
20/// # Matrix Format
21///
22/// The expected matrix format is **genes × cells** (features × observations), where:
23/// - Rows represent genes/features
24/// - Columns represent cells/observations
25/// - Values represent expression levels (normalized counts, log-transformed, etc.)
26///
27
28pub trait MatrixStatTests<T>
29where
30 T: FloatOpsTS,
31{
32 /// Perform t-tests comparing two groups of cells for all genes.
33 ///
34 /// Runs Student's or Welch's t-test for each gene (row) in the matrix, comparing
35 /// expression levels between the specified groups of cells (columns).
36 ///
37 /// # Arguments
38 ///
39 /// * `group1_indices` - Column indices for the first group of cells
40 /// * `group2_indices` - Column indices for the second group of cells
41 /// * `test_type` - Type of t-test (`Student` or `Welch`)
42 ///
43 /// # Returns
44 ///
45 /// Vector of `TestResult` objects, one per gene, containing test statistics and p-values.
46 ///
47
48 fn t_test(
49 &self,
50 group1_indices: &[usize],
51 group2_indices: &[usize],
52 test_type: TTestType,
53 ) -> anyhow::Result<Vec<TestResult<f64>>>;
54
55 /// Perform Mann-Whitney U tests comparing two groups of cells for all genes.
56 ///
57 /// Runs non-parametric Mann-Whitney U (Wilcoxon rank-sum) tests for each gene,
58 /// comparing the distributions between the specified groups.
59 ///
60 /// # Arguments
61 ///
62 /// * `group1_indices` - Column indices for the first group of cells
63 /// * `group2_indices` - Column indices for the second group of cells
64 /// * `alternative` - Type of alternative hypothesis (two-sided, less, greater)
65 ///
66 /// # Returns
67 ///
68 /// Vector of `TestResult` objects containing U statistics and p-values.
69 fn mann_whitney_test(
70 &self,
71 group1_indices: &[usize],
72 group2_indices: &[usize],
73 alternative: Alternative,
74 ) -> anyhow::Result<Vec<TestResult<f64>>>;
75
76 /// Comprehensive differential expression analysis with multiple testing correction.
77 ///
78 /// This is the main method for differential expression analysis. It performs the
79 /// specified statistical test on all genes and applies multiple testing correction
80 /// to control the false discovery rate.
81 ///
82 /// # Arguments
83 ///
84 /// * `group_ids` - Vector assigning each cell to a group (currently supports 2 groups)
85 /// * `test_method` - Statistical test to perform
86 ///
87 /// # Returns
88 ///
89 /// `MultipleTestResults` containing statistics, p-values, adjusted p-values, and
90 /// metadata for all genes.
91 ///
92
93 fn differential_expression(
94 &self,
95 group_ids: &[usize],
96 test_method: TestMethod,
97 ) -> anyhow::Result<MultipleTestResults<f64>>;
98}
99
100impl<T> MatrixStatTests<T> for CsrMatrix<T>
101where
102 T: FloatOpsTS,
103 f64: std::convert::From<T>,
104{
105 fn t_test(
106 &self,
107 group1_indices: &[usize],
108 group2_indices: &[usize],
109 test_type: TTestType,
110 ) -> anyhow::Result<Vec<TestResult<f64>>> {
111 parametric::t_test_matrix_groups(self, group1_indices, group2_indices, test_type)
112 }
113
114 fn mann_whitney_test(
115 &self,
116 group1_indices: &[usize],
117 group2_indices: &[usize],
118 alternative: Alternative,
119 ) -> anyhow::Result<Vec<TestResult<f64>>> {
120 nonparametric::mann_whitney_matrix_groups(self, group1_indices, group2_indices, alternative)
121 }
122
123 fn differential_expression(
124 &self,
125 group_ids: &[usize],
126 test_method: TestMethod,
127 ) -> anyhow::Result<MultipleTestResults<f64>> {
128 let unique_groups = extract_unique_groups(group_ids);
129 if unique_groups.len() != 2 {
130 return Err(anyhow::anyhow!(
131 "Currently only two-group comparisons are supported"
132 ));
133 }
134
135 let (group1_indices, group2_indices) = get_group_indices(group_ids, &unique_groups);
136
137 match test_method {
138 TestMethod::TTest(test_type) => {
139 // Run t-tests
140 let results = self.t_test(&group1_indices, &group2_indices, test_type)?;
141
142 // Extract statistics and p-values
143 let statistics: Vec<_> = results.iter().map(|r| r.statistic).collect();
144 let p_values: Vec<_> = results.iter().map(|r| r.p_value).collect();
145
146 // Apply multiple testing correction
147 let adjusted_p_values = correction::benjamini_hochberg_correction(&p_values)?;
148
149 // Extract effect sizes if available
150 let effect_sizes = results
151 .iter()
152 .map(|r| r.effect_size)
153 .collect::<Option<Vec<_>>>()
154 .unwrap_or_else(|| vec![0.0; results.len()])
155 .into_iter()
156 .filter_map(|x| Option::from(x))
157 .collect::<Vec<_>>();
158
159 let mut result = MultipleTestResults::new(statistics, p_values)
160 .with_adjusted_p_values(adjusted_p_values)
161 .with_global_metadata("test_type", "t_test");
162
163 if !effect_sizes.is_empty() {
164 result = result.with_effect_sizes(effect_sizes);
165 }
166
167 Ok(result)
168 }
169
170 TestMethod::MannWhitney => {
171 // Run Mann-Whitney tests
172 let results = self.mann_whitney_test(
173 &group1_indices,
174 &group2_indices,
175 Alternative::TwoSided,
176 )?;
177
178 // Extract statistics and p-values
179 let statistics: Vec<_> = results.iter().map(|r| r.statistic).collect();
180 let p_values: Vec<_> = results.iter().map(|r| r.p_value).collect();
181
182 // Apply multiple testing correction
183 let adjusted_p_values = correction::benjamini_hochberg_correction(&p_values)?;
184
185 Ok(MultipleTestResults::new(statistics, p_values)
186 .with_adjusted_p_values(adjusted_p_values)
187 .with_global_metadata("test_type", "mann_whitney"))
188 }
189
190 // Implement other test methods similarly
191 _ => Err(anyhow::anyhow!("Test method not implemented yet")),
192 }
193 }
194}