single_statistics/testing/
utils.rs

1//! Utility functions for statistical testing operations.
2
3use nalgebra_sparse::CsrMatrix;
4use single_utilities::traits::FloatOpsTS;
5
6/// Extract unique group identifiers from a group assignment vector.
7///
8/// Returns a sorted vector of unique group IDs, removing duplicates.
9pub fn extract_unique_groups(group_ids: &[usize]) -> Vec<usize> {
10    let mut unique_groups = group_ids.to_vec();
11    unique_groups.sort();
12    unique_groups.dedup();
13    unique_groups
14}
15
16/// Extract indices of cells belonging to each of the two groups.
17///
18/// Returns a tuple of (group1_indices, group2_indices) where each vector contains
19/// the row/column indices of cells belonging to that group.
20pub fn get_group_indices(group_ids: &[usize], unique_groups: &[usize]) -> (Vec<usize>, Vec<usize>) {
21    let group1 = unique_groups[0];
22    let group2 = unique_groups[1];
23
24    let group1_indices = group_ids.iter()
25        .enumerate()
26        .filter_map(|(i, &g)| if g == group1 { Some(i) } else { None })
27        .collect();
28
29    let group2_indices = group_ids.iter()
30        .enumerate()
31        .filter_map(|(i, &g)| if g == group2 { Some(i) } else { None })
32        .collect();
33
34    (group1_indices, group2_indices)
35}
36
37/// Efficiently compute summary statistics for all genes across two groups of cells.
38///
39/// This function traverses the sparse matrix once and computes sum and sum-of-squares
40/// for each gene within each group, optimized for sparse matrix structures.
41///
42/// # Returns
43///
44/// Tuple of (group1_sums, group1_sum_squares, group2_sums, group2_sum_squares).
45pub(crate) fn accumulate_gene_statistics_two_groups<T>(
46    matrix: &CsrMatrix<T>,
47    group1_indices: &[usize],
48    group2_indices: &[usize],
49) -> anyhow::Result<(Vec<T>, Vec<T>, Vec<T>, Vec<T>)>
50where
51    T: FloatOpsTS,
52{
53    let n_genes = matrix.ncols();
54
55    // Pre-allocate all accumulation vectors
56    let mut group1_sums = vec![T::zero(); n_genes];
57    let mut group1_sum_squares = vec![T::zero(); n_genes];
58    let mut group2_sums = vec![T::zero(); n_genes];
59    let mut group2_sum_squares = vec![T::zero(); n_genes];
60    
61    for &row_idx in group1_indices {
62        let row = matrix.row(row_idx);
63        for (col_idx, &value) in row.col_indices().iter().zip(row.values().iter()) {
64            group1_sums[*col_idx] += value;
65            group1_sum_squares[*col_idx] += value * value;
66        }
67    }
68    
69    for &row_idx in group2_indices {
70        let row = matrix.row(row_idx);
71        for (col_idx, &value) in row.col_indices().iter().zip(row.values().iter()) {
72            group2_sums[*col_idx] += value;
73            group2_sum_squares[*col_idx] += value * value;
74        }
75    }
76
77    Ok((group1_sums, group1_sum_squares, group2_sums, group2_sum_squares))
78}
79
80pub(crate) fn extract_gene_values_optimized<T>(
81    matrix: &CsrMatrix<T>,
82    gene_idx: usize,
83    group1_indices: &[usize],
84    group2_indices: &[usize],
85) -> (Vec<T>, Vec<T>)
86where
87    T: FloatOpsTS,
88{
89    // Pre-allocate with known capacity
90    let mut group1_values = Vec::with_capacity(group1_indices.len());
91    let mut group2_values = Vec::with_capacity(group2_indices.len());
92
93    // Extract values efficiently
94    for &row_idx in group1_indices {
95        let value = matrix.get_entry(row_idx, gene_idx)
96            .map(|entry| entry.into_value())
97            .unwrap_or(T::zero());
98        group1_values.push(value);
99    }
100
101    for &row_idx in group2_indices {
102        let value = matrix.get_entry(row_idx, gene_idx)
103            .map(|entry| entry.into_value())
104            .unwrap_or(T::zero());
105        group2_values.push(value);
106    }
107
108    (group1_values, group2_values)
109}