1use std::collections::HashMap;
29use std::hash::Hash;
30
31use crate::data_structures::interval_tree;
32use crate::data_structures::interval_tree::{IntervalTree, IntervalTreeIterator};
33use crate::utils::Interval;
34use bio_types::annot::loc::Loc;
35
36#[derive(Clone, Eq, PartialEq, Debug, Serialize, Deserialize)]
51pub struct AnnotMap<R, T>
52where
53 R: Hash + Eq,
54{
55 refid_itrees: HashMap<R, IntervalTree<isize, T>>,
56}
57
58impl<R, T> Default for AnnotMap<R, T>
59where
60 R: Eq + Hash,
61{
62 fn default() -> Self {
63 AnnotMap {
64 refid_itrees: HashMap::new(),
65 }
66 }
67}
68
69impl<R, T> AnnotMap<R, T>
70where
71 R: Eq + Hash,
72{
73 pub fn new() -> Self {
76 Default::default()
77 }
78
79 pub fn insert_at<L>(&mut self, data: T, location: &L)
105 where
106 R: Eq + Hash + Clone,
107 L: Loc<RefID = R>,
108 {
109 let itree = self
110 .refid_itrees
111 .entry(location.refid().clone())
112 .or_default();
113 let rng = location.start()..(location.start() + (location.length() as isize));
114 itree.insert(rng, data);
115 }
116
117 pub fn find<'a, L>(&'a self, location: &'a L) -> AnnotMapIterator<'a, R, T>
120 where
121 L: Loc<RefID = R>,
122 {
123 if let Some(itree) = self.refid_itrees.get(location.refid()) {
124 let interval = location.start()..(location.start() + (location.length() as isize));
125 let itree_iter = itree.find(interval);
126 AnnotMapIterator {
127 itree_iter: Some(itree_iter),
128 refid: location.refid(),
129 }
130 } else {
131 AnnotMapIterator {
132 itree_iter: None,
133 refid: location.refid(),
134 }
135 }
136 }
137}
138
139impl<R, T> AnnotMap<R, T>
140where
141 R: Eq + Hash + Clone,
142 T: Loc<RefID = R>,
143{
144 pub fn insert_loc(&mut self, data: T) {
173 let itree = self.refid_itrees.entry(data.refid().clone()).or_default();
174 let rng = data.start()..(data.start() + (data.length() as isize));
175 itree.insert(rng, data);
176 }
177}
178
179#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug, Serialize)]
181pub struct Entry<'a, R, T>
182where
183 R: Eq + Hash,
184{
185 itree_entry: interval_tree::Entry<'a, isize, T>,
186 refid: &'a R,
187}
188
189impl<'a, R, T> Entry<'a, R, T>
190where
191 R: Eq + Hash,
192{
193 pub fn data(&self) -> &'a T {
195 self.itree_entry.data()
196 }
197
198 pub fn interval(&self) -> &'a Interval<isize> {
200 self.itree_entry.interval()
201 }
202
203 pub fn refid(&self) -> &'a R {
205 self.refid
206 }
207}
208
209#[derive(Clone, Eq, PartialEq, Hash, Debug, Serialize)]
214pub struct AnnotMapIterator<'a, R, T>
215where
216 R: Eq + Hash,
217{
218 itree_iter: Option<IntervalTreeIterator<'a, isize, T>>,
219 refid: &'a R,
220}
221
222impl<'a, R, T> Iterator for AnnotMapIterator<'a, R, T>
223where
224 R: 'a + Eq + Hash,
225 T: 'a,
226{
227 type Item = Entry<'a, R, T>;
228
229 fn next(&mut self) -> Option<Self::Item> {
230 match self.itree_iter {
231 Some(ref mut iter) => match iter.next() {
232 Some(next_itree) => Some(Entry {
233 itree_entry: next_itree,
234 refid: self.refid,
235 }),
236 None => None,
237 },
238 None => None,
239 }
240 }
241}
242
243#[cfg(test)]
244mod tests {
245 use super::*;
246
247 use bio_types::annot::contig::Contig;
248 use bio_types::strand::ReqStrand;
249
250 #[test]
251 fn lookup() {
252 let mut genes: AnnotMap<String, String> = AnnotMap::new();
253 genes.insert_at(
254 "TMA22".to_owned(),
255 &Contig::new(
256 "chrX".to_owned(),
257 461829,
258 462426 - 461829,
259 ReqStrand::Forward,
260 ),
261 );
262 genes.insert_at(
263 "TMA19".to_owned(),
264 &Contig::new(
265 "chrXI".to_owned(),
266 334412,
267 334916 - 334412,
268 ReqStrand::Reverse,
269 ),
270 );
271
272 let query = Contig::new("chrX".to_owned(), 462400, 100, ReqStrand::Forward);
273 let hits: Vec<&String> = genes.find(&query).map(|e| e.data()).collect();
274 assert_eq!(hits, vec!["TMA22"]);
275
276 let query = Contig::new("chrXI".to_owned(), 334400, 100, ReqStrand::Forward);
277 let hits: Vec<&String> = genes.find(&query).map(|e| e.data()).collect();
278 assert_eq!(hits, vec!["TMA19"]);
279
280 let query = Contig::new("chrXI".to_owned(), 334916, 100, ReqStrand::Forward);
281 let hits: Vec<&String> = genes.find(&query).map(|e| e.data()).collect();
282 assert!(hits.is_empty());
283
284 let query = Contig::new("chrX".to_owned(), 461729, 100, ReqStrand::Forward);
285 let hits: Vec<&String> = genes.find(&query).map(|e| e.data()).collect();
286 assert!(hits.is_empty());
287
288 let query = Contig::new("chrXI".to_owned(), 462400, 100, ReqStrand::Forward);
289 let hits: Vec<&String> = genes.find(&query).map(|e| e.data()).collect();
290 assert!(hits.is_empty());
291
292 let query = Contig::new("NotFound".to_owned(), 0, 0, ReqStrand::Forward);
293 let hits: Vec<&String> = genes.find(&query).map(|e| e.data()).collect();
294 assert!(hits.is_empty());
295 }
296
297 #[test]
298 fn overlaps() {
299 let mut genes: AnnotMap<String, String> = AnnotMap::new();
300
301 genes.insert_at(
302 "a".to_owned(),
303 &Contig::new("chr01".to_owned(), 1000, 1000, ReqStrand::Forward),
304 );
305 genes.insert_at(
306 "b".to_owned(),
307 &Contig::new("chr01".to_owned(), 1300, 1000, ReqStrand::Forward),
308 );
309 genes.insert_at(
310 "c".to_owned(),
311 &Contig::new("chr01".to_owned(), 1700, 1000, ReqStrand::Forward),
312 );
313 genes.insert_at(
314 "d".to_owned(),
315 &Contig::new("chr01".to_owned(), 2200, 1000, ReqStrand::Forward),
316 );
317
318 let query = Contig::new("chr01".to_owned(), 1050, 100, ReqStrand::Forward);
319 let mut hits: Vec<&String> = genes.find(&query).map(|e| e.data()).collect();
320 hits.sort();
321 assert_eq!(hits, vec!["a"]);
322
323 let query = Contig::new("chr01".to_owned(), 1450, 100, ReqStrand::Forward);
324 let mut hits: Vec<&String> = genes.find(&query).map(|e| e.data()).collect();
325 hits.sort();
326 assert_eq!(hits, vec!["a", "b"]);
327
328 let query = Contig::new("chr01".to_owned(), 1850, 100, ReqStrand::Forward);
329 let mut hits: Vec<&String> = genes.find(&query).map(|e| e.data()).collect();
330 hits.sort();
331 assert_eq!(hits, vec!["a", "b", "c"]);
332
333 let query = Contig::new("chr01".to_owned(), 2250, 100, ReqStrand::Forward);
334 let mut hits: Vec<&String> = genes.find(&query).map(|e| e.data()).collect();
335 hits.sort();
336 assert_eq!(hits, vec!["b", "c", "d"]);
337
338 let query = Contig::new("chr01".to_owned(), 2650, 100, ReqStrand::Forward);
339 let mut hits: Vec<&String> = genes.find(&query).map(|e| e.data()).collect();
340 hits.sort();
341 assert_eq!(hits, vec!["c", "d"]);
342 }
343}