grass_runtime/algorithm/
components.rs

1use std::{
2    cmp::{Ordering, Reverse},
3    collections::{BinaryHeap, HashMap},
4    fmt::{Debug, Formatter, Result},
5    hash::Hash,
6    iter::Enumerate,
7};
8
9use crate::ChrRef;
10use crate::{
11    property::{Region, RegionCore, Serializable},
12    record::ToSelfContained,
13};
14
15use super::Sorted;
16
17#[derive(Clone, Copy)]
18pub struct RegionComponent<T: Region> {
19    pub is_open: bool,
20    pub index: usize,
21    pub depth: usize,
22    pub value: T,
23}
24
25impl<T: Region + Serializable> Serializable for RegionComponent<T> {
26    fn dump<W: std::io::Write>(&self, mut fp: W) -> std::io::Result<()> {
27        self.value.dump(&mut fp)?;
28        write!(&mut fp, "\t{}", if self.is_open { "open" } else { "close" })?;
29        write!(&mut fp, "\t#{}\t{}", self.index, self.depth)
30    }
31}
32
33impl<T: RegionCore + ToSelfContained> ToSelfContained for RegionComponent<T>
34where
35    T::SelfContained: RegionCore,
36{
37    type SelfContained = RegionComponent<T::SelfContained>;
38
39    fn to_self_contained(&self) -> Self::SelfContained {
40        RegionComponent {
41            value: self.value.to_self_contained(),
42            is_open: self.is_open,
43            index: self.index,
44            depth: self.depth,
45        }
46    }
47}
48
49impl<T: Region> RegionCore for RegionComponent<T> {
50    fn start(&self) -> u32 {
51        self.value.start()
52    }
53
54    fn end(&self) -> u32 {
55        self.value.end()
56    }
57
58    fn chrom(&self) -> ChrRef<'static> {
59        self.value.chrom()
60    }
61}
62
63impl<T: Region> Debug for RegionComponent<T> {
64    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
65        if self.is_open {
66            write!(f, "Open(")?;
67        } else {
68            write!(f, "Close(")?;
69        }
70        let (chrom, pos) = self.position();
71        write!(f, "{}, {}, {})", chrom.to_string(), pos, self.depth)
72    }
73}
74
75impl<T: Region> RegionComponent<T> {
76    pub fn is_start(&self) -> bool {
77        self.is_open
78    }
79
80    pub fn is_end(&self) -> bool {
81        !self.is_open
82    }
83
84    pub fn depth(&self) -> usize {
85        self.depth
86    }
87
88    pub fn position(&self) -> (ChrRef<'static>, u32) {
89        if self.is_open {
90            (self.value.chrom(), self.value.start())
91        } else {
92            (self.value.chrom(), self.value.end())
93        }
94    }
95}
96
97impl<T: Region> PartialEq for RegionComponent<T> {
98    fn eq(&self, other: &RegionComponent<T>) -> bool {
99        self.position() == other.position()
100    }
101}
102
103impl<T: Region> PartialOrd for RegionComponent<T> {
104    fn partial_cmp(&self, other: &RegionComponent<T>) -> Option<Ordering> {
105        let ret = self
106            .position()
107            .cmp(&other.position())
108            .then_with(|| self.is_open.cmp(&other.is_open));
109        Some(ret)
110    }
111}
112
113impl<T: Region> Eq for RegionComponent<T> {}
114
115impl<T: Region> Ord for RegionComponent<T> {
116    fn cmp(&self, other: &Self) -> Ordering {
117        self.partial_cmp(other).unwrap()
118    }
119}
120
121pub struct ComponentsIter<I>
122where
123    I: Iterator + Sorted,
124    I::Item: Region + Clone,
125{
126    iter: Enumerate<I>,
127    peek_buffer: Option<(usize, I::Item)>,
128    heap: BinaryHeap<Reverse<RegionComponent<I::Item>>>,
129}
130
131impl<I> Sorted for ComponentsIter<I>
132where
133    I: Iterator + Sorted,
134    I::Item: Region + Clone,
135{
136}
137
138pub trait Components
139where
140    Self: Iterator + Sized + Sorted,
141{
142    fn components(self) -> ComponentsIter<Self>
143    where
144        Self::Item: Region + Clone,
145    {
146        let mut iter = self.enumerate();
147        let peek_buffer = iter.next();
148        ComponentsIter {
149            iter,
150            peek_buffer,
151            heap: BinaryHeap::new(),
152        }
153    }
154}
155
156impl<T> Components for T where T: Iterator + Sized + Sorted {}
157
158impl<I> Iterator for ComponentsIter<I>
159where
160    I: Iterator + Sorted,
161    I::Item: Region + Clone,
162{
163    type Item = RegionComponent<I::Item>;
164    fn next(&mut self) -> Option<Self::Item> {
165        if let Some((index, peek_buffer)) = self.peek_buffer.as_ref() {
166            let index = *index;
167            if self.heap.peek().map_or(false, |x| {
168                x.0.position() < (peek_buffer.chrom().clone(), peek_buffer.start())
169            }) {
170                let depth = self.heap.len();
171                return self.heap.pop().map(|Reverse(mut x)| {
172                    x.depth = depth - 1;
173                    x
174                });
175            }
176            let depth = self.heap.len() + 1;
177
178            self.heap.push(Reverse(RegionComponent {
179                index,
180                depth: 0,
181                value: peek_buffer.clone(),
182                is_open: false,
183            }));
184            let ret = Some(RegionComponent {
185                index,
186                depth,
187                is_open: true,
188                value: peek_buffer.clone(),
189            });
190            self.peek_buffer = self.iter.next();
191            ret
192        } else {
193            let depth = self.heap.len();
194            self.heap.pop().map(|Reverse(mut x)| {
195                x.depth = depth - 1;
196                x
197            })
198        }
199    }
200}
201
202pub struct TaggedComponent<I, R, T, F>
203where
204    I: Iterator<Item = RegionComponent<R>>,
205    R: Region + Clone,
206    T: Clone + Hash + Eq,
207    F: FnMut(&R) -> T,
208{
209    tag_func: F,
210    state: HashMap<T, usize>,
211    component_iter: I,
212}
213
214pub trait TaggedComponentExt<R>
215where
216    R: Region + Clone,
217    Self: Iterator<Item = RegionComponent<R>>,
218{
219    fn with_tag<T, F>(self, tag_func: F) -> TaggedComponent<Self, R, T, F>
220    where
221        T: Clone + Hash + Eq,
222        F: FnMut(&R) -> T,
223        Self: Sized,
224    {
225        TaggedComponent {
226            tag_func,
227            state: HashMap::new(),
228            component_iter: self,
229        }
230    }
231}
232
233impl<T, R> TaggedComponentExt<R> for T
234where
235    R: Region + Clone,
236    Self: Iterator<Item = RegionComponent<R>>,
237{
238}
239
240impl<I, R, T, F> Iterator for TaggedComponent<I, R, T, F>
241where
242    I: Iterator<Item = RegionComponent<R>>,
243    R: Region + Clone,
244    T: Clone + Hash + Eq,
245    F: FnMut(&R) -> T,
246{
247    type Item = (T, RegionComponent<R>);
248    fn next(&mut self) -> Option<Self::Item> {
249        let mut next_comp = self.component_iter.next()?;
250        let tag = (self.tag_func)(&next_comp.value);
251        let tagged_depth = if next_comp.is_open {
252            let cell = self.state.entry(tag.clone()).or_insert(0);
253            *cell += 1;
254            *cell
255        } else {
256            let depth = self
257                .state
258                .get_mut(&tag)
259                .map(|depth| {
260                    *depth -= 1;
261                    *depth
262                })
263                .unwrap_or(0);
264            if depth == 0 {
265                self.state.remove(&tag);
266            }
267            depth
268        };
269        next_comp.depth = tagged_depth;
270        Some((tag, next_comp))
271    }
272}
273
274#[cfg(test)]
275mod test {
276    use crate::{algorithm::AssumeSorted, record::Bed3, LineRecordStreamExt};
277
278    use super::Components;
279
280    #[test]
281    fn test_component_iter() -> Result<(), Box<dyn std::error::Error>> {
282        let input = include_bytes!("../../../data/a.bed");
283        let bed3 = input.into_record_iter::<Bed3>().assume_sorted();
284        let comp_iter = bed3.components();
285        comp_iter.count();
286        Ok(())
287    }
288}