grass_runtime/algorithm/
invert.rs

1use std::iter::Peekable;
2
3use crate::{property::Region, record::Bed3, Genome};
4
5use super::{Components, ComponentsIter, Sorted};
6
7pub struct SortedInversion<I>
8where
9    I: Iterator + Sorted,
10    I::Item: Region + Clone,
11{
12    iter: Peekable<ComponentsIter<I>>,
13    chrom_id: usize,
14    last_end_pos: usize,
15}
16
17impl<I> Sorted for SortedInversion<I>
18where
19    I: Iterator + Sorted,
20    I::Item: Region + Clone,
21{
22}
23
24impl<I> Iterator for SortedInversion<I>
25where
26    I: Iterator + Sorted,
27    I::Item: Region + Clone,
28{
29    type Item = Bed3;
30
31    fn next(&mut self) -> Option<Self::Item> {
32        // First, find the next covered region
33        while let Some(next_comp) = self.iter.peek() {
34            if next_comp.depth != 0 {
35                break;
36            }
37            self.iter.next();
38        }
39
40        if let Some((end_chr, end)) = self.iter.peek().map(|c| c.position()) {
41            // Then we have a good end point for the inverted interval
42            if let Some(current_chr) = Genome::get_chr_by_id(self.chrom_id) {
43                if current_chr == end_chr {
44                    // This means the start point and the end point are on the same genome
45                    let region = Bed3 {
46                        chrom: current_chr,
47                        start: self.last_end_pos as u32,
48                        end: end,
49                    };
50
51                    // Then we need to fastward the frontier to the end of this covered region
52                    while self.iter.peek().map_or(false, |c| c.depth != 0) {
53                        self.iter.next();
54                    }
55                    self.last_end_pos = self.iter.peek().unwrap().position().1 as usize;
56
57                    return Some(region);
58                } else {
59                    // Otherwise, it means the suggested end point is in a different chromosome
60                    // So we need to report the rest of current chromosome
61                    let region = Bed3 {
62                        chrom: current_chr,
63                        start: self.last_end_pos as u32,
64                        end: current_chr.get_chr_size().map_or(u32::MAX, |x| x as u32),
65                    };
66
67                    self.chrom_id += 1;
68                    self.last_end_pos = 0;
69
70                    return Some(region);
71                }
72            }
73        }
74
75        // Otherwise, means we can't find any end point, then we just report regions that
76        // covers reset of the genome
77        if let Some(chr) = Genome::get_chr_by_id(self.chrom_id) {
78            let start = self.last_end_pos as u32;
79            let end = chr.get_chr_size().unwrap_or(usize::MAX) as u32;
80            self.chrom_id += 1;
81            self.last_end_pos = 0;
82            return Some(Bed3 {
83                chrom: chr,
84                start,
85                end,
86            });
87        } else {
88            return None;
89        }
90    }
91}
92
93pub trait SortedInversionExt
94where
95    Self: Iterator + Sorted + Sized,
96    Self::Item: Region + Clone,
97{
98    fn invert(self) -> SortedInversion<Self> {
99        SortedInversion {
100            iter: self.components().peekable(),
101            chrom_id: 0,
102            last_end_pos: 0,
103        }
104    }
105}
106
107impl<I> SortedInversionExt for I
108where
109    I: Iterator + Sorted + Sized,
110    I::Item: Region + Clone,
111{
112}