1use std::{
2 cmp::{Ordering, Reverse},
3 collections::{BinaryHeap, HashMap},
4 fmt::{Debug, Formatter, Result},
5 hash::Hash,
6 iter::Enumerate,
7};
8
9use crate::ChrRef;
10use crate::{
11 property::{Region, RegionCore, Serializable},
12 record::ToSelfContained,
13};
14
15use super::Sorted;
16
17#[derive(Clone, Copy)]
18pub struct RegionComponent<T: Region> {
19 pub is_open: bool,
20 pub index: usize,
21 pub depth: usize,
22 pub value: T,
23}
24
25impl<T: Region + Serializable> Serializable for RegionComponent<T> {
26 fn dump<W: std::io::Write>(&self, mut fp: W) -> std::io::Result<()> {
27 self.value.dump(&mut fp)?;
28 write!(&mut fp, "\t{}", if self.is_open { "open" } else { "close" })?;
29 write!(&mut fp, "\t#{}\t{}", self.index, self.depth)
30 }
31}
32
33impl<T: RegionCore + ToSelfContained> ToSelfContained for RegionComponent<T>
34where
35 T::SelfContained: RegionCore,
36{
37 type SelfContained = RegionComponent<T::SelfContained>;
38
39 fn to_self_contained(&self) -> Self::SelfContained {
40 RegionComponent {
41 value: self.value.to_self_contained(),
42 is_open: self.is_open,
43 index: self.index,
44 depth: self.depth,
45 }
46 }
47}
48
49impl<T: Region> RegionCore for RegionComponent<T> {
50 fn start(&self) -> u32 {
51 self.value.start()
52 }
53
54 fn end(&self) -> u32 {
55 self.value.end()
56 }
57
58 fn chrom(&self) -> ChrRef<'static> {
59 self.value.chrom()
60 }
61}
62
63impl<T: Region> Debug for RegionComponent<T> {
64 fn fmt(&self, f: &mut Formatter<'_>) -> Result {
65 if self.is_open {
66 write!(f, "Open(")?;
67 } else {
68 write!(f, "Close(")?;
69 }
70 let (chrom, pos) = self.position();
71 write!(f, "{}, {}, {})", chrom.to_string(), pos, self.depth)
72 }
73}
74
75impl<T: Region> RegionComponent<T> {
76 pub fn is_start(&self) -> bool {
77 self.is_open
78 }
79
80 pub fn is_end(&self) -> bool {
81 !self.is_open
82 }
83
84 pub fn depth(&self) -> usize {
85 self.depth
86 }
87
88 pub fn position(&self) -> (ChrRef<'static>, u32) {
89 if self.is_open {
90 (self.value.chrom(), self.value.start())
91 } else {
92 (self.value.chrom(), self.value.end())
93 }
94 }
95}
96
97impl<T: Region> PartialEq for RegionComponent<T> {
98 fn eq(&self, other: &RegionComponent<T>) -> bool {
99 self.position() == other.position()
100 }
101}
102
103impl<T: Region> PartialOrd for RegionComponent<T> {
104 fn partial_cmp(&self, other: &RegionComponent<T>) -> Option<Ordering> {
105 let ret = self
106 .position()
107 .cmp(&other.position())
108 .then_with(|| self.is_open.cmp(&other.is_open));
109 Some(ret)
110 }
111}
112
113impl<T: Region> Eq for RegionComponent<T> {}
114
115impl<T: Region> Ord for RegionComponent<T> {
116 fn cmp(&self, other: &Self) -> Ordering {
117 self.partial_cmp(other).unwrap()
118 }
119}
120
121pub struct ComponentsIter<I>
122where
123 I: Iterator + Sorted,
124 I::Item: Region + Clone,
125{
126 iter: Enumerate<I>,
127 peek_buffer: Option<(usize, I::Item)>,
128 heap: BinaryHeap<Reverse<RegionComponent<I::Item>>>,
129}
130
131impl<I> Sorted for ComponentsIter<I>
132where
133 I: Iterator + Sorted,
134 I::Item: Region + Clone,
135{
136}
137
138pub trait Components
139where
140 Self: Iterator + Sized + Sorted,
141{
142 fn components(self) -> ComponentsIter<Self>
143 where
144 Self::Item: Region + Clone,
145 {
146 let mut iter = self.enumerate();
147 let peek_buffer = iter.next();
148 ComponentsIter {
149 iter,
150 peek_buffer,
151 heap: BinaryHeap::new(),
152 }
153 }
154}
155
156impl<T> Components for T where T: Iterator + Sized + Sorted {}
157
158impl<I> Iterator for ComponentsIter<I>
159where
160 I: Iterator + Sorted,
161 I::Item: Region + Clone,
162{
163 type Item = RegionComponent<I::Item>;
164 fn next(&mut self) -> Option<Self::Item> {
165 if let Some((index, peek_buffer)) = self.peek_buffer.as_ref() {
166 let index = *index;
167 if self.heap.peek().map_or(false, |x| {
168 x.0.position() < (peek_buffer.chrom().clone(), peek_buffer.start())
169 }) {
170 let depth = self.heap.len();
171 return self.heap.pop().map(|Reverse(mut x)| {
172 x.depth = depth - 1;
173 x
174 });
175 }
176 let depth = self.heap.len() + 1;
177
178 self.heap.push(Reverse(RegionComponent {
179 index,
180 depth: 0,
181 value: peek_buffer.clone(),
182 is_open: false,
183 }));
184 let ret = Some(RegionComponent {
185 index,
186 depth,
187 is_open: true,
188 value: peek_buffer.clone(),
189 });
190 self.peek_buffer = self.iter.next();
191 ret
192 } else {
193 let depth = self.heap.len();
194 self.heap.pop().map(|Reverse(mut x)| {
195 x.depth = depth - 1;
196 x
197 })
198 }
199 }
200}
201
202pub struct TaggedComponent<I, R, T, F>
203where
204 I: Iterator<Item = RegionComponent<R>>,
205 R: Region + Clone,
206 T: Clone + Hash + Eq,
207 F: FnMut(&R) -> T,
208{
209 tag_func: F,
210 state: HashMap<T, usize>,
211 component_iter: I,
212}
213
214pub trait TaggedComponentExt<R>
215where
216 R: Region + Clone,
217 Self: Iterator<Item = RegionComponent<R>>,
218{
219 fn with_tag<T, F>(self, tag_func: F) -> TaggedComponent<Self, R, T, F>
220 where
221 T: Clone + Hash + Eq,
222 F: FnMut(&R) -> T,
223 Self: Sized,
224 {
225 TaggedComponent {
226 tag_func,
227 state: HashMap::new(),
228 component_iter: self,
229 }
230 }
231}
232
233impl<T, R> TaggedComponentExt<R> for T
234where
235 R: Region + Clone,
236 Self: Iterator<Item = RegionComponent<R>>,
237{
238}
239
240impl<I, R, T, F> Iterator for TaggedComponent<I, R, T, F>
241where
242 I: Iterator<Item = RegionComponent<R>>,
243 R: Region + Clone,
244 T: Clone + Hash + Eq,
245 F: FnMut(&R) -> T,
246{
247 type Item = (T, RegionComponent<R>);
248 fn next(&mut self) -> Option<Self::Item> {
249 let mut next_comp = self.component_iter.next()?;
250 let tag = (self.tag_func)(&next_comp.value);
251 let tagged_depth = if next_comp.is_open {
252 let cell = self.state.entry(tag.clone()).or_insert(0);
253 *cell += 1;
254 *cell
255 } else {
256 let depth = self
257 .state
258 .get_mut(&tag)
259 .map(|depth| {
260 *depth -= 1;
261 *depth
262 })
263 .unwrap_or(0);
264 if depth == 0 {
265 self.state.remove(&tag);
266 }
267 depth
268 };
269 next_comp.depth = tagged_depth;
270 Some((tag, next_comp))
271 }
272}
273
274#[cfg(test)]
275mod test {
276 use crate::{algorithm::AssumeSorted, record::Bed3, LineRecordStreamExt};
277
278 use super::Components;
279
280 #[test]
281 fn test_component_iter() -> Result<(), Box<dyn std::error::Error>> {
282 let input = include_bytes!("../../../data/a.bed");
283 let bed3 = input.into_record_iter::<Bed3>().assume_sorted();
284 let comp_iter = bed3.components();
285 comp_iter.count();
286 Ok(())
287 }
288}