intc/
utils.rs

1use anyhow::{bail, Result};
2use hashbrown::HashMap;
3use ndarray::{Array1, Array2, Axis};
4use std::hash::Hash;
5
6/// Validates the provided token is found one and only once in the gene set
7pub fn validate_token(encode_map: &HashMap<usize, &str>, token: &str) -> Result<usize> {
8    let ntc_index = encode_map
9        .iter()
10        .filter(|(_idx, gene)| gene.contains(token))
11        .map(|(idx, _gene)| *idx)
12        .collect::<Vec<usize>>();
13
14    if ntc_index.len() != 1 {
15        bail!("Multiple potential genes found with provided non-targeting control token")
16    }
17    Ok(ntc_index[0])
18}
19
20/// Select the ranks for a provided embedding. Applies a filter which selects all ranks
21/// for the current gene index
22pub fn select_values(current_idx: usize, encodings: &[usize], values: &Array1<f64>) -> Array1<f64> {
23    encodings
24        .iter()
25        .zip(values.iter())
26        .filter(|(idx, _ranks)| **idx == current_idx)
27        .map(|(_, value)| *value)
28        .collect()
29}
30
31/// Builds a vector of gene names from the provided map skipping the non-targeting control index
32pub fn reconstruct_names(map: &HashMap<usize, &str>, ntc_index: usize) -> Vec<String> {
33    (0..map.len())
34        .filter(|x| *x != ntc_index)
35        .map(|x| map.get(&x).unwrap().to_string())
36        .collect()
37}
38
39/// Builds a vector of pseudo gene names
40pub fn build_pseudo_names(n_pseudo: usize) -> Vec<String> {
41    (0..n_pseudo).map(|x| format!("pseudogene-{}", x)).collect()
42}
43
44/// Performs an argsort on a 1D ndarray and returns an array of indices
45pub fn argsort<T>(array: &Array1<T>, ascending: bool) -> Vec<usize>
46where
47    T: PartialOrd,
48{
49    let mut indices: Vec<usize> = (0..array.len()).collect();
50    if ascending {
51        indices.sort_by(|&a, &b| array[a].partial_cmp(&array[b]).unwrap());
52    } else {
53        indices.sort_by(|&a, &b| array[b].partial_cmp(&array[a]).unwrap());
54    }
55    indices
56}
57
58/// Performs an argsort on a 1D vector and returns an array of indices
59pub fn argsort_vec<T>(vec: &Vec<T>) -> Vec<usize>
60where
61    T: PartialOrd,
62{
63    let mut indices: Vec<usize> = (0..vec.len()).collect();
64    indices.sort_by(|&a, &b| vec[a].partial_cmp(&vec[b]).unwrap());
65    indices
66}
67
68/// Calculates the diagonal product of fold changes and pvalues
69pub fn diagonal_product(log2_fold_changes: &Array1<f64>, pvalues: &Array1<f64>) -> Array1<f64> {
70    log2_fold_changes * pvalues.mapv(|x| -x.log10())
71}
72
73/// Calculates the diagonal product of fold changes and pvalues in a 2D elementwise context
74pub fn diagonal_product_matrix(
75    log2_fold_changes: &Array2<f64>,
76    pvalues: &Array2<f64>,
77) -> Array2<f64> {
78    log2_fold_changes * pvalues.mapv(|x| -x.log10())
79}
80
81/// Calculates the cumulative sum of an array
82pub fn cumulative_sum(arr: &Array1<f64>) -> Array1<f64> {
83    let mut cumsum = arr.clone();
84    for i in 1..arr.len() {
85        cumsum[i] = cumsum[i - 1] + arr[i];
86    }
87    cumsum
88}
89
90/// recovers the indices of all unique values in a vector and returns a hashmap of the unique values and their indices
91/// # Arguments
92/// * `vec` - the vector to be searched and hashed
93/// ```
94pub fn unique_indices<T: Eq + Hash + Clone>(vec: &[T]) -> HashMap<T, Vec<usize>> {
95    let mut map = HashMap::new();
96    for (i, x) in vec.iter().enumerate() {
97        map.entry(x.clone()).or_insert(Vec::new()).push(i);
98    }
99    map
100}
101
102pub fn aggregate_fold_changes(
103    gene_names: &[String],
104    fold_changes: &Array1<f64>,
105) -> HashMap<String, f64> {
106    let idx_map = unique_indices(gene_names);
107    idx_map
108        .iter()
109        .map(|(k, v)| {
110            let fc = fold_changes.select(Axis(0), v).mean().unwrap();
111            (k.clone(), fc)
112        })
113        .collect()
114}
115
116#[cfg(test)]
117mod testing {
118    use super::{argsort, argsort_vec};
119    use hashbrown::HashMap;
120    use ndarray::{array, Array1, Axis};
121    use ndarray_rand::{rand_distr::Uniform, RandomExt};
122
123    #[test]
124    fn test_argsort_forward() {
125        let array = array![1.0, 2.0, 3.0, 4.0, 5.0];
126        let sorted = argsort(&array, true);
127        assert_eq!(sorted, vec![0, 1, 2, 3, 4]);
128        assert_eq!(
129            array.select(Axis(0), &sorted),
130            array![1.0, 2.0, 3.0, 4.0, 5.0]
131        );
132    }
133
134    #[test]
135    fn test_argsort_reverse() {
136        let array = array![5.0, 4.0, 3.0, 2.0, 1.0];
137        let sorted = argsort(&array, true);
138        assert_eq!(sorted, vec![4, 3, 2, 1, 0]);
139        assert_eq!(
140            array.select(Axis(0), &sorted),
141            array![1.0, 2.0, 3.0, 4.0, 5.0]
142        );
143    }
144
145    #[test]
146    fn test_reordering() {
147        let pvalues = Array1::random(100, Uniform::new(0.0, 1.0));
148        let order = argsort(&pvalues, true);
149        let reorder = argsort_vec(&order);
150
151        let sorted_pvalues = pvalues.select(Axis(0), &order);
152        let resorted_pvalues = sorted_pvalues.select(Axis(0), &reorder);
153
154        assert_ne!(pvalues, sorted_pvalues);
155        assert_eq!(pvalues, resorted_pvalues);
156    }
157
158    #[test]
159    fn test_select_values() {
160        let encodings = vec![0, 0, 1, 1, 2, 2];
161        let ranks = array![0.1, 0.2, 0.3, 0.4, 0.5, 0.6];
162        let selected = super::select_values(1, &encodings, &ranks);
163        assert_eq!(selected, array![0.3, 0.4]);
164    }
165
166    #[test]
167    fn test_validate_token() {
168        let mut map = HashMap::new();
169        map.insert(0, "gene-0");
170        map.insert(1, "gene-1");
171        map.insert(2, "gene-2");
172        map.insert(3, "gene-3");
173        map.insert(4, "gene-4");
174        let index = super::validate_token(&map, "gene-2").unwrap();
175        assert_eq!(index, 2);
176    }
177
178    #[test]
179    fn test_validate_token_duplicate() {
180        let mut map = HashMap::new();
181        map.insert(0, "gene-0");
182        map.insert(1, "gene-1");
183        map.insert(2, "gene-2");
184        map.insert(3, "gene-3");
185        map.insert(4, "gene-4");
186        let index = super::validate_token(&map, "gene");
187        assert!(index.is_err());
188    }
189
190    #[test]
191    fn test_reconstruct_names() {
192        let mut map = HashMap::new();
193        map.insert(0, "gene-0");
194        map.insert(1, "gene-1");
195        map.insert(2, "gene-2");
196        map.insert(3, "gene-3");
197        map.insert(4, "gene-4");
198        let names = super::reconstruct_names(&map, 2);
199        assert_eq!(names, vec!["gene-0", "gene-1", "gene-3", "gene-4"]);
200    }
201
202    #[test]
203    fn test_build_pseudo_names() {
204        let names = super::build_pseudo_names(5);
205        assert_eq!(
206            names,
207            vec![
208                "pseudogene-0",
209                "pseudogene-1",
210                "pseudogene-2",
211                "pseudogene-3",
212                "pseudogene-4"
213            ]
214        );
215    }
216}