1use anyhow::{bail, Result};
2use hashbrown::HashMap;
3use ndarray::{Array1, Array2, Axis};
4use std::hash::Hash;
5
6pub fn validate_token(encode_map: &HashMap<usize, &str>, token: &str) -> Result<usize> {
8 let ntc_index = encode_map
9 .iter()
10 .filter(|(_idx, gene)| gene.contains(token))
11 .map(|(idx, _gene)| *idx)
12 .collect::<Vec<usize>>();
13
14 if ntc_index.len() != 1 {
15 bail!("Multiple potential genes found with provided non-targeting control token")
16 }
17 Ok(ntc_index[0])
18}
19
20pub fn select_values(current_idx: usize, encodings: &[usize], values: &Array1<f64>) -> Array1<f64> {
23 encodings
24 .iter()
25 .zip(values.iter())
26 .filter(|(idx, _ranks)| **idx == current_idx)
27 .map(|(_, value)| *value)
28 .collect()
29}
30
31pub fn reconstruct_names(map: &HashMap<usize, &str>, ntc_index: usize) -> Vec<String> {
33 (0..map.len())
34 .filter(|x| *x != ntc_index)
35 .map(|x| map.get(&x).unwrap().to_string())
36 .collect()
37}
38
39pub fn build_pseudo_names(n_pseudo: usize) -> Vec<String> {
41 (0..n_pseudo).map(|x| format!("pseudogene-{}", x)).collect()
42}
43
44pub fn argsort<T>(array: &Array1<T>, ascending: bool) -> Vec<usize>
46where
47 T: PartialOrd,
48{
49 let mut indices: Vec<usize> = (0..array.len()).collect();
50 if ascending {
51 indices.sort_by(|&a, &b| array[a].partial_cmp(&array[b]).unwrap());
52 } else {
53 indices.sort_by(|&a, &b| array[b].partial_cmp(&array[a]).unwrap());
54 }
55 indices
56}
57
58pub fn argsort_vec<T>(vec: &Vec<T>) -> Vec<usize>
60where
61 T: PartialOrd,
62{
63 let mut indices: Vec<usize> = (0..vec.len()).collect();
64 indices.sort_by(|&a, &b| vec[a].partial_cmp(&vec[b]).unwrap());
65 indices
66}
67
68pub fn diagonal_product(log2_fold_changes: &Array1<f64>, pvalues: &Array1<f64>) -> Array1<f64> {
70 log2_fold_changes * pvalues.mapv(|x| -x.log10())
71}
72
73pub fn diagonal_product_matrix(
75 log2_fold_changes: &Array2<f64>,
76 pvalues: &Array2<f64>,
77) -> Array2<f64> {
78 log2_fold_changes * pvalues.mapv(|x| -x.log10())
79}
80
81pub fn cumulative_sum(arr: &Array1<f64>) -> Array1<f64> {
83 let mut cumsum = arr.clone();
84 for i in 1..arr.len() {
85 cumsum[i] = cumsum[i - 1] + arr[i];
86 }
87 cumsum
88}
89
90pub fn unique_indices<T: Eq + Hash + Clone>(vec: &[T]) -> HashMap<T, Vec<usize>> {
95 let mut map = HashMap::new();
96 for (i, x) in vec.iter().enumerate() {
97 map.entry(x.clone()).or_insert(Vec::new()).push(i);
98 }
99 map
100}
101
102pub fn aggregate_fold_changes(
103 gene_names: &[String],
104 fold_changes: &Array1<f64>,
105) -> HashMap<String, f64> {
106 let idx_map = unique_indices(gene_names);
107 idx_map
108 .iter()
109 .map(|(k, v)| {
110 let fc = fold_changes.select(Axis(0), v).mean().unwrap();
111 (k.clone(), fc)
112 })
113 .collect()
114}
115
116#[cfg(test)]
117mod testing {
118 use super::{argsort, argsort_vec};
119 use hashbrown::HashMap;
120 use ndarray::{array, Array1, Axis};
121 use ndarray_rand::{rand_distr::Uniform, RandomExt};
122
123 #[test]
124 fn test_argsort_forward() {
125 let array = array![1.0, 2.0, 3.0, 4.0, 5.0];
126 let sorted = argsort(&array, true);
127 assert_eq!(sorted, vec![0, 1, 2, 3, 4]);
128 assert_eq!(
129 array.select(Axis(0), &sorted),
130 array![1.0, 2.0, 3.0, 4.0, 5.0]
131 );
132 }
133
134 #[test]
135 fn test_argsort_reverse() {
136 let array = array![5.0, 4.0, 3.0, 2.0, 1.0];
137 let sorted = argsort(&array, true);
138 assert_eq!(sorted, vec![4, 3, 2, 1, 0]);
139 assert_eq!(
140 array.select(Axis(0), &sorted),
141 array![1.0, 2.0, 3.0, 4.0, 5.0]
142 );
143 }
144
145 #[test]
146 fn test_reordering() {
147 let pvalues = Array1::random(100, Uniform::new(0.0, 1.0));
148 let order = argsort(&pvalues, true);
149 let reorder = argsort_vec(&order);
150
151 let sorted_pvalues = pvalues.select(Axis(0), &order);
152 let resorted_pvalues = sorted_pvalues.select(Axis(0), &reorder);
153
154 assert_ne!(pvalues, sorted_pvalues);
155 assert_eq!(pvalues, resorted_pvalues);
156 }
157
158 #[test]
159 fn test_select_values() {
160 let encodings = vec![0, 0, 1, 1, 2, 2];
161 let ranks = array![0.1, 0.2, 0.3, 0.4, 0.5, 0.6];
162 let selected = super::select_values(1, &encodings, &ranks);
163 assert_eq!(selected, array![0.3, 0.4]);
164 }
165
166 #[test]
167 fn test_validate_token() {
168 let mut map = HashMap::new();
169 map.insert(0, "gene-0");
170 map.insert(1, "gene-1");
171 map.insert(2, "gene-2");
172 map.insert(3, "gene-3");
173 map.insert(4, "gene-4");
174 let index = super::validate_token(&map, "gene-2").unwrap();
175 assert_eq!(index, 2);
176 }
177
178 #[test]
179 fn test_validate_token_duplicate() {
180 let mut map = HashMap::new();
181 map.insert(0, "gene-0");
182 map.insert(1, "gene-1");
183 map.insert(2, "gene-2");
184 map.insert(3, "gene-3");
185 map.insert(4, "gene-4");
186 let index = super::validate_token(&map, "gene");
187 assert!(index.is_err());
188 }
189
190 #[test]
191 fn test_reconstruct_names() {
192 let mut map = HashMap::new();
193 map.insert(0, "gene-0");
194 map.insert(1, "gene-1");
195 map.insert(2, "gene-2");
196 map.insert(3, "gene-3");
197 map.insert(4, "gene-4");
198 let names = super::reconstruct_names(&map, 2);
199 assert_eq!(names, vec!["gene-0", "gene-1", "gene-3", "gene-4"]);
200 }
201
202 #[test]
203 fn test_build_pseudo_names() {
204 let names = super::build_pseudo_names(5);
205 assert_eq!(
206 names,
207 vec![
208 "pseudogene-0",
209 "pseudogene-1",
210 "pseudogene-2",
211 "pseudogene-3",
212 "pseudogene-4"
213 ]
214 );
215 }
216}