use std::collections::HashMap;
use std::hash::Hash;
use crate::data_structures::interval_tree;
use crate::data_structures::interval_tree::{IntervalTree, IntervalTreeIterator};
use crate::utils::Interval;
use bio_types::annot::loc::Loc;
#[derive(Debug, Clone)]
pub struct AnnotMap<R, T>
where
R: Hash + Eq,
{
refid_itrees: HashMap<R, IntervalTree<isize, T>>,
}
impl<R, T> AnnotMap<R, T>
where
R: Eq + Hash,
{
pub fn new() -> Self {
AnnotMap {
refid_itrees: HashMap::new(),
}
}
pub fn insert_at<L>(&mut self, data: T, location: &L)
where
R: Eq + Hash + Clone,
L: Loc<RefID = R>,
{
let itree = self
.refid_itrees
.entry(location.refid().clone())
.or_insert_with(IntervalTree::new);
let rng = location.start()..(location.start() + (location.length() as isize));
itree.insert(rng, data);
}
pub fn find<'a, L>(&'a self, location: &'a L) -> AnnotMapIterator<'a, R, T>
where
L: Loc<RefID = R>,
{
if let Some(itree) = self.refid_itrees.get(location.refid()) {
let interval = location.start()..(location.start() + (location.length() as isize));
let itree_iter = itree.find(interval);
AnnotMapIterator {
itree_iter: Some(itree_iter),
refid: location.refid(),
}
} else {
AnnotMapIterator {
itree_iter: None,
refid: location.refid(),
}
}
}
}
impl<R, T> AnnotMap<R, T>
where
R: Eq + Hash + Clone,
T: Loc<RefID = R>,
{
pub fn insert_loc(&mut self, data: T) {
let itree = self
.refid_itrees
.entry(data.refid().clone())
.or_insert_with(IntervalTree::new);
let rng = data.start()..(data.start() + (data.length() as isize));
itree.insert(rng, data);
}
}
#[derive(Debug, Clone)]
pub struct Entry<'a, R, T>
where
R: Eq + Hash,
{
itree_entry: interval_tree::Entry<'a, isize, T>,
refid: &'a R,
}
impl<'a, R, T> Entry<'a, R, T>
where
R: Eq + Hash,
{
pub fn data(&self) -> &'a T {
self.itree_entry.data()
}
pub fn interval(&self) -> &'a Interval<isize> {
self.itree_entry.interval()
}
pub fn refid(&self) -> &'a R {
self.refid
}
}
pub struct AnnotMapIterator<'a, R, T>
where
R: Eq + Hash,
{
itree_iter: Option<IntervalTreeIterator<'a, isize, T>>,
refid: &'a R,
}
impl<'a, R, T> Iterator for AnnotMapIterator<'a, R, T>
where
R: 'a + Eq + Hash,
T: 'a,
{
type Item = Entry<'a, R, T>;
fn next(&mut self) -> Option<Self::Item> {
match self.itree_iter {
Some(ref mut iter) => match iter.next() {
Some(next_itree) => Some(Entry {
itree_entry: next_itree,
refid: self.refid,
}),
None => None,
},
None => None,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use bio_types::annot::contig::Contig;
use bio_types::strand::ReqStrand;
#[test]
fn lookup() {
let mut genes: AnnotMap<String, String> = AnnotMap::new();
genes.insert_at(
"TMA22".to_owned(),
&Contig::new(
"chrX".to_owned(),
461829,
462426 - 461829,
ReqStrand::Forward,
),
);
genes.insert_at(
"TMA19".to_owned(),
&Contig::new(
"chrXI".to_owned(),
334412,
334916 - 334412,
ReqStrand::Reverse,
),
);
let query = Contig::new("chrX".to_owned(), 462400, 100, ReqStrand::Forward);
let hits: Vec<&String> = genes.find(&query).map(|e| e.data()).collect();
assert_eq!(hits, vec!["TMA22"]);
let query = Contig::new("chrXI".to_owned(), 334400, 100, ReqStrand::Forward);
let hits: Vec<&String> = genes.find(&query).map(|e| e.data()).collect();
assert_eq!(hits, vec!["TMA19"]);
let query = Contig::new("chrXI".to_owned(), 334916, 100, ReqStrand::Forward);
let hits: Vec<&String> = genes.find(&query).map(|e| e.data()).collect();
assert!(hits.is_empty());
let query = Contig::new("chrX".to_owned(), 461729, 100, ReqStrand::Forward);
let hits: Vec<&String> = genes.find(&query).map(|e| e.data()).collect();
assert!(hits.is_empty());
let query = Contig::new("chrXI".to_owned(), 462400, 100, ReqStrand::Forward);
let hits: Vec<&String> = genes.find(&query).map(|e| e.data()).collect();
assert!(hits.is_empty());
let query = Contig::new("NotFound".to_owned(), 0, 0, ReqStrand::Forward);
let hits: Vec<&String> = genes.find(&query).map(|e| e.data()).collect();
assert!(hits.is_empty());
}
#[test]
fn overlaps() {
let mut genes: AnnotMap<String, String> = AnnotMap::new();
genes.insert_at(
"a".to_owned(),
&Contig::new("chr01".to_owned(), 1000, 1000, ReqStrand::Forward),
);
genes.insert_at(
"b".to_owned(),
&Contig::new("chr01".to_owned(), 1300, 1000, ReqStrand::Forward),
);
genes.insert_at(
"c".to_owned(),
&Contig::new("chr01".to_owned(), 1700, 1000, ReqStrand::Forward),
);
genes.insert_at(
"d".to_owned(),
&Contig::new("chr01".to_owned(), 2200, 1000, ReqStrand::Forward),
);
let query = Contig::new("chr01".to_owned(), 1050, 100, ReqStrand::Forward);
let mut hits: Vec<&String> = genes.find(&query).map(|e| e.data()).collect();
hits.sort();
assert_eq!(hits, vec!["a"]);
let query = Contig::new("chr01".to_owned(), 1450, 100, ReqStrand::Forward);
let mut hits: Vec<&String> = genes.find(&query).map(|e| e.data()).collect();
hits.sort();
assert_eq!(hits, vec!["a", "b"]);
let query = Contig::new("chr01".to_owned(), 1850, 100, ReqStrand::Forward);
let mut hits: Vec<&String> = genes.find(&query).map(|e| e.data()).collect();
hits.sort();
assert_eq!(hits, vec!["a", "b", "c"]);
let query = Contig::new("chr01".to_owned(), 2250, 100, ReqStrand::Forward);
let mut hits: Vec<&String> = genes.find(&query).map(|e| e.data()).collect();
hits.sort();
assert_eq!(hits, vec!["b", "c", "d"]);
let query = Contig::new("chr01".to_owned(), 2650, 100, ReqStrand::Forward);
let mut hits: Vec<&String> = genes.find(&query).map(|e| e.data()).collect();
hits.sort();
assert_eq!(hits, vec!["c", "d"]);
}
}