grass_runtime/
genome.rs

1use lazy_static::lazy_static;
2use std::{
3    cell::RefCell,
4    collections::HashMap,
5    fmt::Display,
6    hash::{Hash, Hasher},
7    io::{BufRead, BufReader, Read},
8    sync::RwLock,
9};
10
11#[derive(Default)]
12pub struct Genome {
13    chr_name_list: Vec<String>,
14    chr_size_list: Vec<Option<usize>>,
15    name_id_map: HashMap<String, usize>,
16}
17
18#[derive(Clone, Copy)]
19pub enum ChrRef<'a> {
20    Assigned(usize),
21    Unassigned(&'a str),
22    Dummy,
23}
24
25impl<'a> Display for ChrRef<'a> {
26    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
27        let name = self.get_chr_name();
28        write!(f, "{}", name)
29    }
30}
31
32impl<'a> PartialEq<str> for ChrRef<'a> {
33    fn eq(&self, other: &str) -> bool {
34        if other == "." {
35            self == &ChrRef::Dummy
36        } else {
37            self == &ChrRef::Unassigned(other)
38        }
39    }
40}
41
42impl<'a> PartialEq<&String> for ChrRef<'a> {
43    fn eq(&self, other: &&String) -> bool {
44        self == other.as_str()
45    }
46}
47
48impl<'a> PartialEq for ChrRef<'a> {
49    fn eq(&self, other: &Self) -> bool {
50        match (self, other) {
51            (Self::Assigned(l0), Self::Assigned(r0)) => l0 == r0,
52            (Self::Unassigned(l0), Self::Unassigned(r0)) => l0 == r0,
53            (Self::Dummy, Self::Dummy) => true,
54            (_, Self::Dummy) => false,
55            (Self::Dummy, _) => false,
56            _ => {
57                let this_str = self.get_chr_name();
58                let that_str = other.get_chr_name();
59                this_str == that_str
60            }
61        }
62    }
63}
64
65impl<'a> PartialOrd for ChrRef<'a> {
66    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
67        if let Some(this_id) = self.id() {
68            if let Some(that_id) = other.id() {
69                return this_id.partial_cmp(&that_id);
70            }
71        }
72        None
73    }
74}
75
76impl<'a> Eq for ChrRef<'a> {}
77
78impl<'a> Ord for ChrRef<'a> {
79    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
80        let this_id = self.get_id_or_update();
81        let that_id = other.get_id_or_update();
82        this_id.cmp(&that_id)
83    }
84}
85
86impl<'a> ChrRef<'a> {
87    pub fn to_static(&self) -> ChrRef<'static> {
88        let id = self.get_id_or_update();
89        if id < usize::MAX {
90            ChrRef::Assigned(id)
91        } else {
92            ChrRef::Dummy
93        }
94    }
95    pub fn get_chr_name(&self) -> &'a str {
96        match self {
97            Self::Unassigned(name) => name,
98            Self::Assigned(id) => {
99                if let Some(name) = LAST_NAME.with(|cached_name| {
100                    if let Some(cached_name) = cached_name.borrow().as_ref() {
101                        if cached_name.0 == *id {
102                            return Some(cached_name.1);
103                        }
104                    }
105                    None
106                }) {
107                    return name;
108                }
109
110                let storage = GENOME_STORAGE.read().unwrap();
111
112                let ret = unsafe { std::mem::transmute(storage.chr_name_list[*id].as_str()) };
113
114                LAST_NAME.with(|cached_name| {
115                    *cached_name.borrow_mut() = Some((*id, ret));
116                });
117
118                ret
119            }
120            Self::Dummy => ".",
121        }
122    }
123    pub fn id(&self) -> Option<usize> {
124        match self {
125            Self::Unassigned(_) => None,
126            Self::Assigned(id) => Some(*id),
127            Self::Dummy => None,
128        }
129    }
130    pub fn get_id_or_update(&self) -> usize {
131        match self {
132            Self::Unassigned(name) => {
133                let mut storage = GENOME_STORAGE.write().unwrap();
134                let id = storage.chr_name_list.len();
135                storage.name_id_map.insert(name.to_string(), id);
136                storage.chr_name_list.push(name.to_string());
137                storage.chr_size_list.push(None);
138                id
139            }
140            Self::Assigned(id) => *id,
141            _ => usize::MAX,
142        }
143    }
144    pub fn get_chr_size(&self) -> Option<usize> {
145        self.id()
146            .map(|id| {
147                let storage = GENOME_STORAGE.read().unwrap();
148                storage.chr_size_list[id]
149            })
150            .unwrap_or(None)
151    }
152    pub fn verify_size(&self, size: usize) -> bool {
153        Some(size) == self.get_chr_size()
154    }
155    pub fn verify_size_or_update(&self, size: usize) -> bool {
156        if let Some(actual_size) = self.get_chr_size() {
157            return size == actual_size;
158        }
159        let mut storage = GENOME_STORAGE.write().unwrap();
160        storage.chr_size_list[self.get_id_or_update()] = Some(size);
161        true
162    }
163}
164
165thread_local! {
166    static LAST_QUERY : RefCell<Option<(usize, u64)>> = RefCell::new(None);
167    static LAST_NAME  : RefCell<Option<(usize, &'static str)>> = RefCell::new(None);
168}
169
170impl Genome {
171    pub fn get_chr_by_id(id: usize) -> Option<ChrRef<'static>> {
172        let storage = GENOME_STORAGE.read().unwrap();
173        if storage.chr_name_list.len() > id {
174            Some(ChrRef::Assigned(id))
175        } else {
176            None
177        }
178    }
179    pub fn clear_genome_definition() {
180        let mut storage = GENOME_STORAGE.write().unwrap();
181        LAST_NAME.with(|last_name| {
182            *last_name.borrow_mut() = None;
183        });
184        LAST_QUERY.with(|last_query| {
185            *last_query.borrow_mut() = None;
186        });
187        *storage = Default::default();
188    }
189    pub fn get_chrom_sizes() -> Vec<(&'static str, usize)> {
190        let storage = GENOME_STORAGE.read().unwrap();
191
192        storage
193            .chr_name_list
194            .iter()
195            .zip(storage.chr_size_list.iter())
196            .filter_map(|(name, size)| {
197                let name = name.as_str();
198                let size = size.clone();
199                size.map(|size| {
200                    (
201                        unsafe { std::mem::transmute::<_, &'static str>(name) },
202                        size,
203                    )
204                })
205            })
206            .collect()
207    }
208    pub fn query_chr(name: &str) -> ChrRef {
209        let mut hasher = std::collections::hash_map::DefaultHasher::new();
210        name.hash(&mut hasher);
211        let hash = hasher.finish();
212
213        if let Some((id, cached_hash)) = LAST_QUERY.with(|id| id.borrow().clone()) {
214            // Definitely, hash == cached_hash doesn't means it's the same. But in practise, chrom
215            // name's hash code never collides
216            if hash == cached_hash {
217                return ChrRef::Assigned(id);
218            }
219        }
220
221        let storage = GENOME_STORAGE.read().unwrap();
222        if let Some(id) = storage.name_id_map.get(name) {
223            LAST_QUERY.with(|cache| {
224                *cache.borrow_mut() = Some((*id, hash));
225            });
226            return ChrRef::Assigned(*id);
227        }
228        ChrRef::Unassigned(name)
229    }
230    pub fn load_genome_file<R: Read>(reader: R) -> Result<(), Box<dyn std::error::Error>> {
231        let mut storage = GENOME_STORAGE.write()?;
232        if storage.chr_name_list.len() != 0 {
233            Err(std::io::Error::new(
234                std::io::ErrorKind::Other,
235                "Genome definition has been already loaded",
236            ))?;
237        }
238        let mut br = BufReader::new(reader);
239        let mut buf = String::new();
240        let mut id = 0;
241        while let Ok(sz) = br.read_line(&mut buf) {
242            if sz == 0 {
243                break;
244            }
245
246            let line = buf.trim_end();
247            let mut tokenized = line.split('\t');
248            if let Some(chr_name) = tokenized.next() {
249                if let Some(chr_size_txt) = tokenized.next() {
250                    let chr_size: usize = chr_size_txt.parse()?;
251
252                    storage.chr_name_list.push(chr_name.to_string());
253                    storage.chr_size_list.push(Some(chr_size));
254                    storage.name_id_map.insert(chr_name.to_string(), id);
255                }
256            }
257
258            buf.clear();
259            id += 1;
260        }
261        Ok(())
262    }
263}
264
265lazy_static! {
266    static ref GENOME_STORAGE: RwLock<Genome> = {
267        let inner = Default::default();
268        RwLock::new(inner)
269    };
270}