rustybam 0.1.34

bioinformatics toolkit in rust
Documentation
use bio::data_structures::interval_tree::{Entry, IntervalTree};
use bio::io::*;
use itertools::Itertools;

pub trait IntervalTreeExt<N: Ord + Clone, D> {
    fn find_bed_overlaps(&self, rec: &bed::Record) -> Vec<Entry<u64, bed::Record>>;
}

impl IntervalTreeExt<u64, &bed::Record> for IntervalTree<u64, bed::Record> {
    fn find_bed_overlaps(&self, rec: &bed::Record) -> Vec<Entry<u64, bed::Record>> {
        self.find(rec.start()..rec.end())
            .filter(|entry| entry.data().chrom() == rec.chrom())
            .collect_vec()
    }
}

pub fn interval_tree_from_bed_file(path: &str) -> IntervalTree<u64, bed::Record> {
    let mut tree = IntervalTree::new();
    let mut reader = bed::Reader::from_file(path).unwrap();
    for rec in reader.records() {
        let rec = rec.unwrap();
        tree.insert(rec.start()..rec.end(), rec.clone());
    }
    tree
}

#[cfg(test)]
mod tests {
    use super::*;
    use bio::utils::Interval;

    #[test]
    fn test_chrom_specific_overlaps() {
        let mut tree = IntervalTree::new();
        let mut rec = bed::Record::new();
        rec.set_chrom("Range_1");
        rec.set_start(11);
        rec.set_end(20);

        let mut rec2 = bed::Record::new();
        rec2.set_chrom("Range_2");
        rec2.set_start(10);
        rec2.set_end(30);

        tree.insert(11..20, rec.clone());
        tree.insert(10..30, rec2.clone());

        for r in tree.find_bed_overlaps(&rec) {
            assert_eq!(r.interval(), &(Interval::from(rec.start()..rec.end())));
            assert_eq!(r.data().chrom(), "Range_1");
            eprintln!("found filtered range");
        }
        for r in tree.find(15..25) {
            if r.data().chrom() == "Range_1" {
                assert_eq!(r.interval(), &(Interval::from(rec.start()..rec.end())));
                assert_eq!(r.data().chrom(), "Range_1");
            } else if r.data().chrom() == "Range_2" {
                assert_eq!(r.interval(), &(Interval::from(rec2.start()..rec2.end())));
                assert_eq!(r.data().chrom(), "Range_2");
            }
        }
    }
}