1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
use io_utils::open_for_read;
use std::{assert, format, i32, io::BufRead, str};
use string_utils::TextUtils;
use vector_utils::unique_sort;
pub fn fetch_exons(species: &str, exons: &mut Vec<(String, i32, i32, bool, String, i32)>) {
assert!(species == "human" || species == "mouse");
let root = "/mnt/opt/meowmix_git/ensembl/release-94/gtf";
let gtf: String;
if species == "human" {
gtf = format!(
"{}/homo_sapiens/Homo_sapiens.GRCh38.94.chr_patch_hapl_scaff.gtf",
root
);
} else {
gtf = format!("{}/mus_musculus/Mus_musculus.GRCm38.94.gtf", root);
}
exons.clear();
let f = open_for_read![>f];
for line in f.lines() {
let s = line.unwrap();
let fields: Vec<&str> = s.split_terminator('\t').collect();
if fields.len() < 9 {
continue;
}
let fields8: Vec<&str> = fields[8].split_terminator(';').collect();
if fields8.len() < 6 {
continue;
}
if !fields8[4].contains("exon_number") {
continue;
}
if !fields8[5].contains("gene_name") {
continue;
}
let exon = fields8[4].between("\"", "\"").force_i32();
let gene = fields8[5].between("\"", "\"");
let chr = fields[0];
let (start, stop) = (fields[3].force_i32(), fields[4].force_i32());
let mut fw = false;
if fields[6] == "+" {
fw = true;
}
exons.push((chr.to_string(), start - 1, stop, fw, gene.to_string(), exon));
}
unique_sort(exons);
}