Skip to main content

gen_core/
range.rs

1use std::cmp::{max, min};
2
3use itertools::Itertools;
4
5#[derive(Clone, Debug, Eq, Hash, PartialEq)]
6pub struct Range {
7    pub start: i64,
8    pub end: i64,
9}
10
11impl Range {
12    pub fn extend_to(&self, other: &Range) -> Range {
13        Range {
14            start: self.start,
15            end: other.end,
16        }
17    }
18
19    // Returns true if this range is left-adjacent to the other range
20    pub fn left_adjoins(&self, other: &Range, modulus: Option<i64>) -> bool {
21        let mut other_start = other.start;
22        let mut self_end = self.end;
23        if let Some(modulus) = modulus {
24            other_start %= modulus;
25            self_end %= modulus;
26        }
27
28        self_end == other_start
29    }
30
31    pub fn is_wraparound(&self) -> bool {
32        self.start > self.end
33    }
34
35    pub fn overlap(&self, other: &Range) -> Vec<Range> {
36        /*
37           Returns the overlapping ranges between two ranges. If there are multiple overlapping ranges,
38           such as can be the case when a range wraps the origin, multiple ranges are returned. If
39           there are no overlapping ranges, an empty list is returned.
40
41           Examples:
42
43           Overlap between two non-wraparound ranges
44                 6            19
45                 |------------|        self
46           AAAAAAAAAAAAAAAAAAAAAAAA
47           |------------|              other
48           0            13
49                 |------|              overlap
50                 6      13
51
52           Overlap with a wraparound range
53             2   6
54           >-|   |---------------->    self
55           AAAAAAAAAAAAAAAAAAAAAAAA
56           |---|                       other
57           0   4
58           |-|                         overlap
59           0 2
60
61           Overlap with multiple wraparound ranges
62             2   6
63           >-|   |---------------->    self
64           AAAAAAAAAAAAAAAAAAAAAAAA
65           >---|        |--------->    other
66               4        13
67           >-|          |--------->    overlap
68             2          13
69
70           Multiple Overlaps
71               4        13
72           >---|        |--------->    self
73           AAAAAAAAAAAAAAAAAAAAAAAA
74             |----------------|        other
75             2                19
76             |-|        |-----|        overlaps
77             2 4        13    19
78        */
79
80        let start1 = self.start;
81        let end1 = self.end;
82        let start2 = other.start;
83        let end2 = other.end;
84
85        let mut self_intervals = vec![];
86        let mut other_intervals = vec![];
87
88        // split the ranges into pre-/post-origin segments
89        if self.is_wraparound() {
90            self_intervals.extend(vec![
91                Range {
92                    start: start1,
93                    end: i64::MAX,
94                },
95                Range {
96                    start: 1,
97                    end: end1,
98                },
99            ]);
100        } else {
101            self_intervals.push(self.clone());
102        }
103
104        if other.is_wraparound() {
105            other_intervals.extend(vec![
106                Range {
107                    start: start2,
108                    end: i64::MAX,
109                },
110                Range {
111                    start: 1,
112                    end: end2,
113                },
114            ]);
115        } else {
116            other_intervals.push(other.clone());
117        }
118
119        let overlaps = Range::find_pairwise_overlaps(self_intervals, other_intervals);
120
121        if overlaps.len() > 1 {
122            Range::consolidate_overlaps_about_the_origin(overlaps)
123        } else {
124            overlaps
125        }
126    }
127
128    fn find_pairwise_overlaps(intervals1: Vec<Range>, intervals2: Vec<Range>) -> Vec<Range> {
129        let mut overlaps = vec![];
130        for interval1 in intervals1 {
131            for interval2 in &intervals2 {
132                if interval1.end > interval2.start && interval1.start <= interval2.end {
133                    overlaps.push(Range {
134                        start: max(interval1.start, interval2.start),
135                        end: min(interval1.end, interval2.end),
136                    });
137                }
138            }
139        }
140
141        overlaps
142    }
143
144    // Consolidate the first and last overlaps if they lie on either side of the origin.
145    fn consolidate_overlaps_about_the_origin(overlaps: Vec<Range>) -> Vec<Range> {
146        let mut sorted_overlaps = overlaps
147            .clone()
148            .into_iter()
149            .sorted_by(|a, b| a.start.cmp(&b.start))
150            .collect::<Vec<Range>>();
151        let first = sorted_overlaps.first().unwrap().clone();
152        let last = sorted_overlaps.last().unwrap().clone();
153        if first.start == 0 && last.end == i64::MAX {
154            sorted_overlaps.pop();
155            sorted_overlaps.push(Range {
156                start: last.start,
157                end: first.end,
158            });
159        }
160
161        sorted_overlaps
162    }
163
164    pub fn contains(&self, index: i64) -> bool {
165        if self.is_wraparound() {
166            index >= self.start || index <= self.end
167        } else {
168            index >= self.start && index <= self.end
169        }
170    }
171
172    pub fn circular_mod(index: i64, sequence_length: i64, is_circular_contig: bool) -> i64 {
173        if is_circular_contig {
174            index % sequence_length
175        } else {
176            index
177        }
178    }
179
180    pub fn translate_index(
181        &self,
182        index: i64,
183        range2: &Range,
184        sequence_length: i64,
185        is_circular_contig: bool,
186    ) -> Result<i64, &'static str> {
187        if !self.contains(index) {
188            return Err("Index is not contained in range");
189        }
190
191        let offset = index - self.start;
192        Ok(Range::circular_mod(
193            range2.start + offset,
194            sequence_length,
195            is_circular_contig,
196        ))
197    }
198}
199
200#[derive(Clone, Debug, Eq, Hash, PartialEq)]
201pub struct RangeMapping {
202    pub source_range: Range,
203    pub target_range: Range,
204}
205
206impl RangeMapping {
207    pub fn merge_contiguous_mappings(mappings: Vec<RangeMapping>) -> Vec<RangeMapping> {
208        let mut grouped_mappings = vec![];
209        let mut current_group = vec![];
210
211        for mapping in mappings {
212            if current_group.is_empty() {
213                current_group.push(mapping);
214            } else {
215                let last_mapping = current_group.last().unwrap();
216                if last_mapping
217                    .source_range
218                    .left_adjoins(&mapping.source_range, None)
219                    && last_mapping
220                        .target_range
221                        .left_adjoins(&mapping.target_range, None)
222                {
223                    current_group.push(mapping);
224                } else {
225                    grouped_mappings.push(current_group);
226                    current_group = vec![mapping];
227                }
228            }
229        }
230
231        if !current_group.is_empty() {
232            grouped_mappings.push(current_group);
233        }
234
235        let mut merged_mappings = vec![];
236        for group in grouped_mappings {
237            let first = group.first().unwrap();
238            let last = group.last().unwrap();
239            merged_mappings.push(RangeMapping {
240                source_range: first.source_range.extend_to(&last.source_range),
241                target_range: first.target_range.extend_to(&last.target_range),
242            });
243        }
244
245        merged_mappings
246    }
247}
248
249#[cfg(test)]
250mod tests {
251    // Note this useful idiom: importing names from outer (for mod tests) scope.
252    use super::*;
253
254    #[test]
255    fn test_left_adjoins() {
256        let left_range = Range { start: 0, end: 2 };
257        let middle_range = Range { start: 1, end: 3 };
258        let right_range = Range { start: 2, end: 4 };
259
260        assert!(left_range.left_adjoins(&right_range, None));
261        assert!(!left_range.left_adjoins(&middle_range, None));
262        assert!(!middle_range.left_adjoins(&right_range, None));
263        assert!(!right_range.left_adjoins(&left_range, None));
264        assert!(!right_range.left_adjoins(&middle_range, None));
265        assert!(!middle_range.left_adjoins(&left_range, None));
266
267        assert!(right_range.left_adjoins(&left_range, Some(4)));
268        assert!(left_range.left_adjoins(&right_range, Some(4)));
269        assert!(!left_range.left_adjoins(&middle_range, Some(4)));
270        assert!(!middle_range.left_adjoins(&right_range, Some(4)));
271        assert!(!right_range.left_adjoins(&middle_range, Some(4)));
272        assert!(!middle_range.left_adjoins(&left_range, Some(4)));
273    }
274
275    #[test]
276    fn test_overlap() {
277        let range1 = Range { start: 0, end: 12 };
278        let range2 = Range { start: 4, end: 8 };
279        let range3 = Range { start: 10, end: 16 };
280
281        assert_eq!(range1.overlap(&range2), vec![Range { start: 4, end: 8 }]);
282        assert_eq!(range1.overlap(&range3), vec![Range { start: 10, end: 12 }]);
283        assert_eq!(range2.overlap(&range3), vec![]);
284    }
285
286    #[test]
287    fn test_merge_contiguous_ranges() {
288        let mappings = vec![
289            RangeMapping {
290                source_range: Range { start: 0, end: 2 },
291                target_range: Range { start: 2, end: 4 },
292            },
293            RangeMapping {
294                source_range: Range { start: 2, end: 5 },
295                target_range: Range { start: 4, end: 7 },
296            },
297            RangeMapping {
298                source_range: Range { start: 7, end: 8 },
299                target_range: Range { start: 9, end: 10 },
300            },
301        ];
302
303        let merged_mappings = RangeMapping::merge_contiguous_mappings(mappings);
304        assert_eq!(merged_mappings.len(), 2);
305        assert_eq!(
306            merged_mappings,
307            vec![
308                RangeMapping {
309                    source_range: Range { start: 0, end: 5 },
310                    target_range: Range { start: 2, end: 7 },
311                },
312                RangeMapping {
313                    source_range: Range { start: 7, end: 8 },
314                    target_range: Range { start: 9, end: 10 },
315                },
316            ]
317        );
318    }
319}