1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
use anyhow::{bail, Result};
use hashbrown::HashMap;
use ndarray::Array1;
pub fn validate_token(encode_map: &HashMap<usize, &str>, token: &str) -> Result<usize> {
let ntc_index = encode_map
.iter()
.filter(|(_idx, gene)| gene.contains(token))
.map(|(idx, _gene)| *idx)
.collect::<Vec<usize>>();
if ntc_index.len() != 1 {
bail!("Multiple potential genes found with provided non-targeting control token")
}
Ok(ntc_index[0])
}
pub fn select_ranks(current_idx: usize, encodings: &[usize], ranks: &Array1<f64>) -> Array1<f64> {
encodings
.iter()
.zip(ranks.iter())
.filter(|(idx, _ranks)| **idx == current_idx)
.map(|(_, ranks)| *ranks)
.collect()
}
pub fn reconstruct_names(map: &HashMap<usize, &str>, ntc_index: usize) -> Vec<String> {
(0..map.len())
.filter(|x| *x != ntc_index)
.map(|x| map.get(&x).unwrap().to_string())
.collect()
}
pub fn build_pseudo_names(n_pseudo: usize) -> Vec<String> {
(0..n_pseudo).map(|x| format!("pseudogene-{}", x)).collect()
}
pub fn argsort(array: &Array1<f64>) -> Vec<usize> {
let mut indices: Vec<usize> = (0..array.len()).collect();
indices.sort_by(|&a, &b| array[a].partial_cmp(&array[b]).unwrap());
indices
}
#[cfg(test)]
mod testing {
use super::argsort;
use hashbrown::HashMap;
use ndarray::{array, Axis};
#[test]
fn test_argsort_forward() {
let array = array![1.0, 2.0, 3.0, 4.0, 5.0];
let sorted = argsort(&array);
assert_eq!(sorted, vec![0, 1, 2, 3, 4]);
}
#[test]
fn test_argsort_reverse() {
let array = array![5.0, 4.0, 3.0, 2.0, 1.0];
let sorted = argsort(&array);
assert_eq!(sorted, vec![4, 3, 2, 1, 0]);
}
#[test]
fn test_reordering() {
let pvalues = array![0.1, 0.2, 0.3, 0.4, 0.5, 0.6];
let order = argsort(&pvalues);
let ntc_mask = array![0, 0, 0, 1, 0, 0];
let sorted_ntc_mask = ntc_mask.select(Axis(0), &order);
assert_eq!(sorted_ntc_mask.select(Axis(0), &order), ntc_mask);
}
#[test]
fn test_select_ranks() {
let encodings = vec![0, 0, 1, 1, 2, 2];
let ranks = array![0.1, 0.2, 0.3, 0.4, 0.5, 0.6];
let selected = super::select_ranks(1, &encodings, &ranks);
assert_eq!(selected, array![0.3, 0.4]);
}
#[test]
fn test_validate_token() {
let mut map = HashMap::new();
map.insert(0, "gene-0");
map.insert(1, "gene-1");
map.insert(2, "gene-2");
map.insert(3, "gene-3");
map.insert(4, "gene-4");
let index = super::validate_token(&map, "gene-2").unwrap();
assert_eq!(index, 2);
}
#[test]
fn test_validate_token_duplicate() {
let mut map = HashMap::new();
map.insert(0, "gene-0");
map.insert(1, "gene-1");
map.insert(2, "gene-2");
map.insert(3, "gene-3");
map.insert(4, "gene-4");
let index = super::validate_token(&map, "gene");
assert!(index.is_err());
}
#[test]
fn test_reconstruct_names() {
let mut map = HashMap::new();
map.insert(0, "gene-0");
map.insert(1, "gene-1");
map.insert(2, "gene-2");
map.insert(3, "gene-3");
map.insert(4, "gene-4");
let names = super::reconstruct_names(&map, 2);
assert_eq!(names, vec!["gene-0", "gene-1", "gene-3", "gene-4"]);
}
#[test]
fn test_build_pseudo_names() {
let names = super::build_pseudo_names(5);
assert_eq!(names, vec!["pseudogene-0", "pseudogene-1", "pseudogene-2", "pseudogene-3", "pseudogene-4"]);
}
}