1use std::collections::HashMap;
2use std::path::PathBuf;
3
4use anyhow::Result;
5use glob::glob;
6
7use gtars_core::models::region_set::RegionSet;
8use gtars_core::models::{Interval, Region};
9use gtars_core::utils::generate_region_to_id_map;
10use gtars_overlaprs::{Bits, Overlapper};
11
12#[allow(unused)]
13pub struct OverlapResult(Region, pub(crate) u32);
14
15pub trait FindOverlaps {
16 fn find_overlaps(&self, region: &Region) -> Option<Vec<OverlapResult>>;
17}
18
19pub struct FragmentFileGlob {
20 curr: usize,
21 files: Vec<PathBuf>,
22}
23
24pub struct ConsensusSet {
25 len: usize,
26 overlap_trees: HashMap<String, Bits<u32, u32>>,
27}
28
29impl FragmentFileGlob {
30 pub fn new(pattern: &str) -> Result<Self> {
31 let files = glob(pattern)?;
32 let files = files
33 .map(|f| match f {
34 Ok(path) => Ok(path),
35 Err(_) => anyhow::bail!(format!("Error reading file entry: {:?}", f)),
36 })
37 .collect::<Result<Vec<_>>>()?;
38 let curr = 0_usize;
39 Ok(FragmentFileGlob { files, curr })
40 }
41
42 pub fn len(&self) -> usize {
43 self.files.len()
44 }
45
46 pub fn is_empty(&self) -> bool {
47 self.files.is_empty()
48 }
49}
50
51impl Iterator for FragmentFileGlob {
52 type Item = PathBuf;
53 fn next(&mut self) -> Option<Self::Item> {
54 let result = self.files.get(self.curr).cloned();
55 self.curr += 1;
56 result
57 }
58}
59
60impl ConsensusSet {
61 pub fn new(path: PathBuf) -> Result<Self> {
62 let regions = RegionSet::try_from(path.as_path())?.regions;
63 let len = regions.len();
64
65 let mut trees: HashMap<String, Bits<u32, u32>> = HashMap::new();
66 let mut intervals: HashMap<String, Vec<Interval<u32, u32>>> = HashMap::new();
67
68 let region_to_id_map = generate_region_to_id_map(®ions);
69
70 for region in regions.iter() {
71 let interval = Interval {
73 start: region.start,
74 end: region.end,
75 val: *region_to_id_map.get(region).unwrap(),
76 };
77
78 let chr_intervals = intervals.entry(region.chr.clone()).or_default();
80
81 chr_intervals.push(interval);
83 }
84
85 for (chr, chr_intervals) in intervals.into_iter() {
87 let lapper: Bits<u32, u32> = Bits::build(chr_intervals);
88 trees.insert(chr.to_string(), lapper);
89 }
90
91 Ok(ConsensusSet {
92 overlap_trees: trees,
93 len,
94 })
95 }
96
97 pub fn len(&self) -> usize {
98 self.len
99 }
100
101 pub fn is_empty(&self) -> bool {
102 self.len == 0
103 }
104}
105
106impl FindOverlaps for ConsensusSet {
107 fn find_overlaps(&self, region: &Region) -> Option<Vec<OverlapResult>> {
108 let tree = self.overlap_trees.get(®ion.chr);
109 if let Some(tree) = tree {
110 let olaps = tree.find(region.start, region.end);
111 let olaps = olaps
112 .into_iter()
113 .map(|olap| {
114 OverlapResult(
115 Region {
116 chr: region.chr.clone(),
117 start: region.start,
118 end: region.end,
119 rest: None,
120 },
121 olap.val,
122 )
123 })
124 .collect();
125
126 Some(olaps)
127 } else {
128 None
129 }
130 }
131}