gtars_scoring/
files.rs

1use std::collections::HashMap;
2use std::path::PathBuf;
3
4use anyhow::Result;
5use glob::glob;
6
7use gtars_core::models::region_set::RegionSet;
8use gtars_core::models::{Interval, Region};
9use gtars_core::utils::generate_region_to_id_map;
10use gtars_overlaprs::{Bits, Overlapper};
11
12#[allow(unused)]
13pub struct OverlapResult(Region, pub(crate) u32);
14
15pub trait FindOverlaps {
16    fn find_overlaps(&self, region: &Region) -> Option<Vec<OverlapResult>>;
17}
18
19pub struct FragmentFileGlob {
20    curr: usize,
21    files: Vec<PathBuf>,
22}
23
24pub struct ConsensusSet {
25    len: usize,
26    overlap_trees: HashMap<String, Bits<u32, u32>>,
27}
28
29impl FragmentFileGlob {
30    pub fn new(pattern: &str) -> Result<Self> {
31        let files = glob(pattern)?;
32        let files = files
33            .map(|f| match f {
34                Ok(path) => Ok(path),
35                Err(_) => anyhow::bail!(format!("Error reading file entry: {:?}", f)),
36            })
37            .collect::<Result<Vec<_>>>()?;
38        let curr = 0_usize;
39        Ok(FragmentFileGlob { files, curr })
40    }
41
42    pub fn len(&self) -> usize {
43        self.files.len()
44    }
45
46    pub fn is_empty(&self) -> bool {
47        self.files.is_empty()
48    }
49}
50
51impl Iterator for FragmentFileGlob {
52    type Item = PathBuf;
53    fn next(&mut self) -> Option<Self::Item> {
54        let result = self.files.get(self.curr).cloned();
55        self.curr += 1;
56        result
57    }
58}
59
60impl ConsensusSet {
61    pub fn new(path: PathBuf) -> Result<Self> {
62        let regions = RegionSet::try_from(path.as_path())?.regions;
63        let len = regions.len();
64
65        let mut trees: HashMap<String, Bits<u32, u32>> = HashMap::new();
66        let mut intervals: HashMap<String, Vec<Interval<u32, u32>>> = HashMap::new();
67
68        let region_to_id_map = generate_region_to_id_map(&regions);
69
70        for region in regions.iter() {
71            // create interval
72            let interval = Interval {
73                start: region.start,
74                end: region.end,
75                val: *region_to_id_map.get(region).unwrap(),
76            };
77
78            // use chr to get the vector of intervals
79            let chr_intervals = intervals.entry(region.chr.clone()).or_default();
80
81            // push interval to vector
82            chr_intervals.push(interval);
83        }
84
85        // build the trees
86        for (chr, chr_intervals) in intervals.into_iter() {
87            let lapper: Bits<u32, u32> = Bits::build(chr_intervals);
88            trees.insert(chr.to_string(), lapper);
89        }
90
91        Ok(ConsensusSet {
92            overlap_trees: trees,
93            len,
94        })
95    }
96
97    pub fn len(&self) -> usize {
98        self.len
99    }
100
101    pub fn is_empty(&self) -> bool {
102        self.len == 0
103    }
104}
105
106impl FindOverlaps for ConsensusSet {
107    fn find_overlaps(&self, region: &Region) -> Option<Vec<OverlapResult>> {
108        let tree = self.overlap_trees.get(&region.chr);
109        if let Some(tree) = tree {
110            let olaps = tree.find(region.start, region.end);
111            let olaps = olaps
112                .into_iter()
113                .map(|olap| {
114                    OverlapResult(
115                        Region {
116                            chr: region.chr.clone(),
117                            start: region.start,
118                            end: region.end,
119                            rest: None,
120                        },
121                        olap.val,
122                    )
123                })
124                .collect();
125
126            Some(olaps)
127        } else {
128            None
129        }
130    }
131}