1use lazy_static::lazy_static;
2use std::{
3 cell::RefCell,
4 collections::HashMap,
5 fmt::Display,
6 hash::{Hash, Hasher},
7 io::{BufRead, BufReader, Read},
8 sync::RwLock,
9};
10
11#[derive(Default)]
12pub struct Genome {
13 chr_name_list: Vec<String>,
14 chr_size_list: Vec<Option<usize>>,
15 name_id_map: HashMap<String, usize>,
16}
17
18#[derive(Clone, Copy)]
19pub enum ChrRef<'a> {
20 Assigned(usize),
21 Unassigned(&'a str),
22 Dummy,
23}
24
25impl<'a> Display for ChrRef<'a> {
26 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
27 let name = self.get_chr_name();
28 write!(f, "{}", name)
29 }
30}
31
32impl<'a> PartialEq<str> for ChrRef<'a> {
33 fn eq(&self, other: &str) -> bool {
34 if other == "." {
35 self == &ChrRef::Dummy
36 } else {
37 self == &ChrRef::Unassigned(other)
38 }
39 }
40}
41
42impl<'a> PartialEq<&String> for ChrRef<'a> {
43 fn eq(&self, other: &&String) -> bool {
44 self == other.as_str()
45 }
46}
47
48impl<'a> PartialEq for ChrRef<'a> {
49 fn eq(&self, other: &Self) -> bool {
50 match (self, other) {
51 (Self::Assigned(l0), Self::Assigned(r0)) => l0 == r0,
52 (Self::Unassigned(l0), Self::Unassigned(r0)) => l0 == r0,
53 (Self::Dummy, Self::Dummy) => true,
54 (_, Self::Dummy) => false,
55 (Self::Dummy, _) => false,
56 _ => {
57 let this_str = self.get_chr_name();
58 let that_str = other.get_chr_name();
59 this_str == that_str
60 }
61 }
62 }
63}
64
65impl<'a> PartialOrd for ChrRef<'a> {
66 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
67 if let Some(this_id) = self.id() {
68 if let Some(that_id) = other.id() {
69 return this_id.partial_cmp(&that_id);
70 }
71 }
72 None
73 }
74}
75
76impl<'a> Eq for ChrRef<'a> {}
77
78impl<'a> Ord for ChrRef<'a> {
79 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
80 let this_id = self.get_id_or_update();
81 let that_id = other.get_id_or_update();
82 this_id.cmp(&that_id)
83 }
84}
85
86impl<'a> ChrRef<'a> {
87 pub fn to_static(&self) -> ChrRef<'static> {
88 let id = self.get_id_or_update();
89 if id < usize::MAX {
90 ChrRef::Assigned(id)
91 } else {
92 ChrRef::Dummy
93 }
94 }
95 pub fn get_chr_name(&self) -> &'a str {
96 match self {
97 Self::Unassigned(name) => name,
98 Self::Assigned(id) => {
99 if let Some(name) = LAST_NAME.with(|cached_name| {
100 if let Some(cached_name) = cached_name.borrow().as_ref() {
101 if cached_name.0 == *id {
102 return Some(cached_name.1);
103 }
104 }
105 None
106 }) {
107 return name;
108 }
109
110 let storage = GENOME_STORAGE.read().unwrap();
111
112 let ret = unsafe { std::mem::transmute(storage.chr_name_list[*id].as_str()) };
113
114 LAST_NAME.with(|cached_name| {
115 *cached_name.borrow_mut() = Some((*id, ret));
116 });
117
118 ret
119 }
120 Self::Dummy => ".",
121 }
122 }
123 pub fn id(&self) -> Option<usize> {
124 match self {
125 Self::Unassigned(_) => None,
126 Self::Assigned(id) => Some(*id),
127 Self::Dummy => None,
128 }
129 }
130 pub fn get_id_or_update(&self) -> usize {
131 match self {
132 Self::Unassigned(name) => {
133 let mut storage = GENOME_STORAGE.write().unwrap();
134 let id = storage.chr_name_list.len();
135 storage.name_id_map.insert(name.to_string(), id);
136 storage.chr_name_list.push(name.to_string());
137 storage.chr_size_list.push(None);
138 id
139 }
140 Self::Assigned(id) => *id,
141 _ => usize::MAX,
142 }
143 }
144 pub fn get_chr_size(&self) -> Option<usize> {
145 self.id()
146 .map(|id| {
147 let storage = GENOME_STORAGE.read().unwrap();
148 storage.chr_size_list[id]
149 })
150 .unwrap_or(None)
151 }
152 pub fn verify_size(&self, size: usize) -> bool {
153 Some(size) == self.get_chr_size()
154 }
155 pub fn verify_size_or_update(&self, size: usize) -> bool {
156 if let Some(actual_size) = self.get_chr_size() {
157 return size == actual_size;
158 }
159 let mut storage = GENOME_STORAGE.write().unwrap();
160 storage.chr_size_list[self.get_id_or_update()] = Some(size);
161 true
162 }
163}
164
165thread_local! {
166 static LAST_QUERY : RefCell<Option<(usize, u64)>> = RefCell::new(None);
167 static LAST_NAME : RefCell<Option<(usize, &'static str)>> = RefCell::new(None);
168}
169
170impl Genome {
171 pub fn get_chr_by_id(id: usize) -> Option<ChrRef<'static>> {
172 let storage = GENOME_STORAGE.read().unwrap();
173 if storage.chr_name_list.len() > id {
174 Some(ChrRef::Assigned(id))
175 } else {
176 None
177 }
178 }
179 pub fn clear_genome_definition() {
180 let mut storage = GENOME_STORAGE.write().unwrap();
181 LAST_NAME.with(|last_name| {
182 *last_name.borrow_mut() = None;
183 });
184 LAST_QUERY.with(|last_query| {
185 *last_query.borrow_mut() = None;
186 });
187 *storage = Default::default();
188 }
189 pub fn get_chrom_sizes() -> Vec<(&'static str, usize)> {
190 let storage = GENOME_STORAGE.read().unwrap();
191
192 storage
193 .chr_name_list
194 .iter()
195 .zip(storage.chr_size_list.iter())
196 .filter_map(|(name, size)| {
197 let name = name.as_str();
198 let size = size.clone();
199 size.map(|size| {
200 (
201 unsafe { std::mem::transmute::<_, &'static str>(name) },
202 size,
203 )
204 })
205 })
206 .collect()
207 }
208 pub fn query_chr(name: &str) -> ChrRef {
209 let mut hasher = std::collections::hash_map::DefaultHasher::new();
210 name.hash(&mut hasher);
211 let hash = hasher.finish();
212
213 if let Some((id, cached_hash)) = LAST_QUERY.with(|id| id.borrow().clone()) {
214 if hash == cached_hash {
217 return ChrRef::Assigned(id);
218 }
219 }
220
221 let storage = GENOME_STORAGE.read().unwrap();
222 if let Some(id) = storage.name_id_map.get(name) {
223 LAST_QUERY.with(|cache| {
224 *cache.borrow_mut() = Some((*id, hash));
225 });
226 return ChrRef::Assigned(*id);
227 }
228 ChrRef::Unassigned(name)
229 }
230 pub fn load_genome_file<R: Read>(reader: R) -> Result<(), Box<dyn std::error::Error>> {
231 let mut storage = GENOME_STORAGE.write()?;
232 if storage.chr_name_list.len() != 0 {
233 Err(std::io::Error::new(
234 std::io::ErrorKind::Other,
235 "Genome definition has been already loaded",
236 ))?;
237 }
238 let mut br = BufReader::new(reader);
239 let mut buf = String::new();
240 let mut id = 0;
241 while let Ok(sz) = br.read_line(&mut buf) {
242 if sz == 0 {
243 break;
244 }
245
246 let line = buf.trim_end();
247 let mut tokenized = line.split('\t');
248 if let Some(chr_name) = tokenized.next() {
249 if let Some(chr_size_txt) = tokenized.next() {
250 let chr_size: usize = chr_size_txt.parse()?;
251
252 storage.chr_name_list.push(chr_name.to_string());
253 storage.chr_size_list.push(Some(chr_size));
254 storage.name_id_map.insert(chr_name.to_string(), id);
255 }
256 }
257
258 buf.clear();
259 id += 1;
260 }
261 Ok(())
262 }
263}
264
265lazy_static! {
266 static ref GENOME_STORAGE: RwLock<Genome> = {
267 let inner = Default::default();
268 RwLock::new(inner)
269 };
270}