single_statistics/testing/inference/
mod.rs

1use crate::testing::utils::{extract_unique_groups, get_group_indices};
2use crate::testing::{
3    Alternative, MultipleTestResults, TTestType, TestMethod, TestResult, correction,
4};
5use nalgebra_sparse::CsrMatrix;
6use single_utilities::traits::FloatOpsTS;
7
8pub mod discrete;
9
10pub mod parametric;
11
12pub mod nonparametric;
13
14/// Statistical testing methods for sparse matrices, particularly suited for single-cell data.
15///
16/// This trait extends sparse matrix types (like `CsrMatrix`) with statistical testing capabilities.
17/// It provides methods for differential expression analysis and other statistical comparisons
18/// optimized for single-cell RNA-seq data.
19///
20/// # Matrix Format
21///
22/// The expected matrix format is **genes × cells** (features × observations), where:
23/// - Rows represent genes/features
24/// - Columns represent cells/observations
25/// - Values represent expression levels (normalized counts, log-transformed, etc.)
26///
27
28pub trait MatrixStatTests<T>
29where
30    T: FloatOpsTS,
31{
32    /// Perform t-tests comparing two groups of cells for all genes.
33    ///
34    /// Runs Student's or Welch's t-test for each gene (row) in the matrix, comparing
35    /// expression levels between the specified groups of cells (columns).
36    ///
37    /// # Arguments
38    ///
39    /// * `group1_indices` - Column indices for the first group of cells
40    /// * `group2_indices` - Column indices for the second group of cells  
41    /// * `test_type` - Type of t-test (`Student` or `Welch`)
42    ///
43    /// # Returns
44    ///
45    /// Vector of `TestResult` objects, one per gene, containing test statistics and p-values.
46    ///
47
48    fn t_test(
49        &self,
50        group1_indices: &[usize],
51        group2_indices: &[usize],
52        test_type: TTestType,
53    ) -> anyhow::Result<Vec<TestResult<f64>>>;
54
55    /// Perform Mann-Whitney U tests comparing two groups of cells for all genes.
56    ///
57    /// Runs non-parametric Mann-Whitney U (Wilcoxon rank-sum) tests for each gene,
58    /// comparing the distributions between the specified groups.
59    ///
60    /// # Arguments
61    ///
62    /// * `group1_indices` - Column indices for the first group of cells
63    /// * `group2_indices` - Column indices for the second group of cells
64    /// * `alternative` - Type of alternative hypothesis (two-sided, less, greater)
65    ///
66    /// # Returns
67    ///
68    /// Vector of `TestResult` objects containing U statistics and p-values.
69    fn mann_whitney_test(
70        &self,
71        group1_indices: &[usize],
72        group2_indices: &[usize],
73        alternative: Alternative,
74    ) -> anyhow::Result<Vec<TestResult<f64>>>;
75
76    /// Comprehensive differential expression analysis with multiple testing correction.
77    ///
78    /// This is the main method for differential expression analysis. It performs the
79    /// specified statistical test on all genes and applies multiple testing correction
80    /// to control the false discovery rate.
81    ///
82    /// # Arguments
83    ///
84    /// * `group_ids` - Vector assigning each cell to a group (currently supports 2 groups)
85    /// * `test_method` - Statistical test to perform
86    ///
87    /// # Returns
88    ///
89    /// `MultipleTestResults` containing statistics, p-values, adjusted p-values, and
90    /// metadata for all genes.
91    ///
92
93    fn differential_expression(
94        &self,
95        group_ids: &[usize],
96        test_method: TestMethod,
97    ) -> anyhow::Result<MultipleTestResults<f64>>;
98}
99
100impl<T> MatrixStatTests<T> for CsrMatrix<T>
101where
102    T: FloatOpsTS,
103    f64: std::convert::From<T>,
104{
105    fn t_test(
106        &self,
107        group1_indices: &[usize],
108        group2_indices: &[usize],
109        test_type: TTestType,
110    ) -> anyhow::Result<Vec<TestResult<f64>>> {
111        parametric::t_test_matrix_groups(self, group1_indices, group2_indices, test_type)
112    }
113
114    fn mann_whitney_test(
115        &self,
116        group1_indices: &[usize],
117        group2_indices: &[usize],
118        alternative: Alternative,
119    ) -> anyhow::Result<Vec<TestResult<f64>>> {
120        nonparametric::mann_whitney_matrix_groups(self, group1_indices, group2_indices, alternative)
121    }
122
123    fn differential_expression(
124        &self,
125        group_ids: &[usize],
126        test_method: TestMethod,
127    ) -> anyhow::Result<MultipleTestResults<f64>> {
128        let unique_groups = extract_unique_groups(group_ids);
129        if unique_groups.len() != 2 {
130            return Err(anyhow::anyhow!(
131                "Currently only two-group comparisons are supported"
132            ));
133        }
134
135        let (group1_indices, group2_indices) = get_group_indices(group_ids, &unique_groups);
136
137        match test_method {
138            TestMethod::TTest(test_type) => {
139                // Run t-tests
140                let results = self.t_test(&group1_indices, &group2_indices, test_type)?;
141
142                // Extract statistics and p-values
143                let statistics: Vec<_> = results.iter().map(|r| r.statistic).collect();
144                let p_values: Vec<_> = results.iter().map(|r| r.p_value).collect();
145
146                // Apply multiple testing correction
147                let adjusted_p_values = correction::benjamini_hochberg_correction(&p_values)?;
148
149                // Extract effect sizes if available
150                let effect_sizes = results
151                    .iter()
152                    .map(|r| r.effect_size)
153                    .collect::<Option<Vec<_>>>()
154                    .unwrap_or_else(|| vec![0.0; results.len()])
155                    .into_iter()
156                    .filter_map(|x| Option::from(x))
157                    .collect::<Vec<_>>();
158
159                let mut result = MultipleTestResults::new(statistics, p_values)
160                    .with_adjusted_p_values(adjusted_p_values)
161                    .with_global_metadata("test_type", "t_test");
162
163                if !effect_sizes.is_empty() {
164                    result = result.with_effect_sizes(effect_sizes);
165                }
166
167                Ok(result)
168            }
169
170            TestMethod::MannWhitney => {
171                // Run Mann-Whitney tests
172                let results = self.mann_whitney_test(
173                    &group1_indices,
174                    &group2_indices,
175                    Alternative::TwoSided,
176                )?;
177
178                // Extract statistics and p-values
179                let statistics: Vec<_> = results.iter().map(|r| r.statistic).collect();
180                let p_values: Vec<_> = results.iter().map(|r| r.p_value).collect();
181
182                // Apply multiple testing correction
183                let adjusted_p_values = correction::benjamini_hochberg_correction(&p_values)?;
184
185                Ok(MultipleTestResults::new(statistics, p_values)
186                    .with_adjusted_p_values(adjusted_p_values)
187                    .with_global_metadata("test_type", "mann_whitney"))
188            }
189
190            // Implement other test methods similarly
191            _ => Err(anyhow::anyhow!("Test method not implemented yet")),
192        }
193    }
194}