1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
use anyhow::{bail, Result};
use hashbrown::HashMap;
use ndarray::Array1;

/// Validates the provided token is found one and only once in the gene set
pub fn validate_token(encode_map: &HashMap<usize, &str>, token: &str) -> Result<usize> {
    let ntc_index = encode_map
        .iter()
        .filter(|(_idx, gene)| gene.contains(token))
        .map(|(idx, _gene)| *idx)
        .collect::<Vec<usize>>();

    if ntc_index.len() != 1 {
        bail!("Multiple potential genes found with provided non-targeting control token")
    }
    Ok(ntc_index[0])
}

/// Select the ranks for a provided embedding. Applies a filter which selects all ranks
/// for the current gene index
pub fn select_ranks(current_idx: usize, encodings: &[usize], ranks: &Array1<f64>) -> Array1<f64> {
    encodings
        .iter()
        .zip(ranks.iter())
        .filter(|(idx, _ranks)| **idx == current_idx)
        .map(|(_, ranks)| *ranks)
        .collect()
}

/// Builds a vector of gene names from the provided map skipping the non-targeting control index
pub fn reconstruct_names(map: &HashMap<usize, &str>, ntc_index: usize) -> Vec<String> {
    (0..map.len())
        .filter(|x| *x != ntc_index)
        .map(|x| map.get(&x).unwrap().to_string())
        .collect()
}

/// Builds a vector of pseudo gene names
pub fn build_pseudo_names(n_pseudo: usize) -> Vec<String> {
    (0..n_pseudo).map(|x| format!("pseudogene-{}", x)).collect()
}

/// Performs an argsort on a 1D ndarray and returns an array of indices
pub fn argsort(array: &Array1<f64>) -> Vec<usize> {
    let mut indices: Vec<usize> = (0..array.len()).collect();
    indices.sort_by(|&a, &b| array[a].partial_cmp(&array[b]).unwrap());
    indices
}

#[cfg(test)]
mod testing {
    use super::argsort;
    use hashbrown::HashMap;
    use ndarray::{array, Axis};

    #[test]
    fn test_argsort_forward() {
        let array = array![1.0, 2.0, 3.0, 4.0, 5.0];
        let sorted = argsort(&array);
        assert_eq!(sorted, vec![0, 1, 2, 3, 4]);
    }

    #[test]
    fn test_argsort_reverse() {
        let array = array![5.0, 4.0, 3.0, 2.0, 1.0];
        let sorted = argsort(&array);
        assert_eq!(sorted, vec![4, 3, 2, 1, 0]);
    }

    #[test]
    fn test_reordering() {
        let pvalues = array![0.1, 0.2, 0.3, 0.4, 0.5, 0.6];
        let order = argsort(&pvalues);
        let ntc_mask = array![0, 0, 0, 1, 0, 0];
        let sorted_ntc_mask = ntc_mask.select(Axis(0), &order);
        assert_eq!(sorted_ntc_mask.select(Axis(0), &order), ntc_mask);
    }

    #[test]
    fn test_select_ranks() {
        let encodings = vec![0, 0, 1, 1, 2, 2];
        let ranks = array![0.1, 0.2, 0.3, 0.4, 0.5, 0.6];
        let selected = super::select_ranks(1, &encodings, &ranks);
        assert_eq!(selected, array![0.3, 0.4]);
    }

    #[test]
    fn test_validate_token() {
        let mut map = HashMap::new();
        map.insert(0, "gene-0");
        map.insert(1, "gene-1");
        map.insert(2, "gene-2");
        map.insert(3, "gene-3");
        map.insert(4, "gene-4");
        let index = super::validate_token(&map, "gene-2").unwrap();
        assert_eq!(index, 2);
    }

    #[test]
    fn test_validate_token_duplicate() {
        let mut map = HashMap::new();
        map.insert(0, "gene-0");
        map.insert(1, "gene-1");
        map.insert(2, "gene-2");
        map.insert(3, "gene-3");
        map.insert(4, "gene-4");
        let index = super::validate_token(&map, "gene");
        assert!(index.is_err());
    }

    #[test]
    fn test_reconstruct_names() {
        let mut map = HashMap::new();
        map.insert(0, "gene-0");
        map.insert(1, "gene-1");
        map.insert(2, "gene-2");
        map.insert(3, "gene-3");
        map.insert(4, "gene-4");
        let names = super::reconstruct_names(&map, 2);
        assert_eq!(names, vec!["gene-0", "gene-1", "gene-3", "gene-4"]);
    }

    #[test]
    fn test_build_pseudo_names() {
        let names = super::build_pseudo_names(5);
        assert_eq!(names, vec!["pseudogene-0", "pseudogene-1", "pseudogene-2", "pseudogene-3", "pseudogene-4"]);
    }
}