1use crate::index::IntervalIndex;
2use crate::interval::Interval;
3use crate::set::IntervalSet;
4
5fn mk(chrom: &str, start: u64, end: u64) -> Interval {
6 Interval {
7 chrom: chrom.to_string(),
8 start,
9 end,
10 strand: None,
11 }
12}
13
14#[must_use]
16pub fn merge(input: &IntervalSet) -> IntervalSet {
17 let mut work = input.clone();
18 work.sort();
19 let mut out = IntervalSet::new();
20 for (chrom, ivs) in work.iter_chroms() {
21 if ivs.is_empty() {
22 continue;
23 }
24 let mut cur_start = ivs[0].start;
25 let mut cur_end = ivs[0].end;
26 for iv in &ivs[1..] {
27 if iv.start <= cur_end {
28 cur_end = cur_end.max(iv.end);
29 } else {
30 out.push(mk(chrom, cur_start, cur_end));
31 cur_start = iv.start;
32 cur_end = iv.end;
33 }
34 }
35 out.push(mk(chrom, cur_start, cur_end));
36 }
37 out.sort();
38 out
39}
40
41#[must_use]
43pub fn intersect(a: &IntervalSet, b: &IntervalSet) -> IntervalSet {
44 let bx = IntervalIndex::build(b);
46 let mut out = IntervalSet::new();
47 for (chrom, a_ivs) in a.iter_chroms() {
48 for ai in a_ivs {
49 bx.for_each_overlap(chrom, ai.start, ai.end, |bi| {
50 let lo = ai.start.max(bi.start);
51 let hi = ai.end.min(bi.end);
52 if hi > lo {
53 out.push(mk(chrom, lo, hi));
54 }
55 });
56 }
57 }
58 out.sort();
59 out
60}
61
62#[must_use]
64pub fn subtract(a: &IntervalSet, b: &IntervalSet) -> IntervalSet {
65 let b_merged = merge(b);
66 let mut out = IntervalSet::new();
67 for (chrom, a_ivs) in a.iter_chroms() {
68 let b_ivs = b_merged.get(chrom).unwrap_or(&[]);
69 let mut av: Vec<&Interval> = a_ivs.iter().collect();
70 av.sort_unstable_by_key(|x| (x.start, x.end));
71 let mut lo = 0usize;
73 for ai in &av {
74 while lo < b_ivs.len() && b_ivs[lo].end <= ai.start {
75 lo += 1;
76 }
77 for (s, e) in subtract_one(ai, &b_ivs[lo..]) {
78 out.push(mk(chrom, s, e));
79 }
80 }
81 }
82 out.sort();
83 out
84}
85
86fn subtract_one(a: &Interval, b_ivs: &[Interval]) -> Vec<(u64, u64)> {
87 let mut cursor = a.start;
88 let mut out = Vec::new();
89 for b in b_ivs {
90 if b.end <= cursor {
91 continue;
92 }
93 if b.start >= a.end {
94 break;
95 }
96 if b.start > cursor {
97 out.push((cursor, b.start.min(a.end)));
98 }
99 cursor = cursor.max(b.end);
100 if cursor >= a.end {
101 break;
102 }
103 }
104 if cursor < a.end {
105 out.push((cursor, a.end));
106 }
107 out
108}
109
110#[must_use]
112pub fn complement(input: &IntervalSet, chrom_sizes: &[(String, u64)]) -> IntervalSet {
113 let merged = merge(input);
114 let mut out = IntervalSet::new();
115 for (chrom, size) in chrom_sizes {
116 let ivs = merged.get(chrom).unwrap_or(&[]);
117 let mut cursor: u64 = 0;
118 for iv in ivs {
119 if iv.start > cursor {
120 out.push(mk(chrom, cursor, iv.start));
121 }
122 cursor = cursor.max(iv.end);
123 if cursor >= *size {
124 break;
125 }
126 }
127 if cursor < *size {
128 out.push(mk(chrom, cursor, *size));
129 }
130 }
131 out.sort();
132 out
133}
134
135#[must_use]
136pub fn coverage_bases(input: &IntervalSet) -> Vec<(String, u64)> {
137 let merged = merge(input);
138 let mut out = Vec::new();
139 for (chrom, ivs) in merged.iter_chroms() {
140 let total: u64 = ivs.iter().map(Interval::len).sum();
141 out.push((chrom.to_string(), total));
142 }
143 out
144}
145
146#[cfg(test)]
147mod tests {
148 use super::*;
149
150 fn iv(chrom: &str, start: u64, end: u64) -> Interval {
151 Interval::new(chrom, start, end).unwrap()
152 }
153
154 fn set(items: impl IntoIterator<Item = Interval>) -> IntervalSet {
155 let mut s: IntervalSet = items.into_iter().collect();
156 s.sort();
157 s
158 }
159
160 #[test]
161 fn merge_collapses_overlapping_and_touching() {
162 let s = set([
163 iv("chr1", 100, 200),
164 iv("chr1", 150, 250),
165 iv("chr1", 250, 300),
166 iv("chr1", 400, 500),
167 ]);
168 let m = merge(&s);
169 let v = m.get("chr1").unwrap();
170 assert_eq!(v.len(), 2);
171 assert_eq!((v[0].start, v[0].end), (100, 300));
172 assert_eq!((v[1].start, v[1].end), (400, 500));
173 }
174
175 #[test]
176 fn merge_independent_per_chrom() {
177 let s = set([iv("chr1", 100, 200), iv("chr2", 100, 200)]);
178 let m = merge(&s);
179 assert_eq!(m.get("chr1").unwrap().len(), 1);
180 assert_eq!(m.get("chr2").unwrap().len(), 1);
181 }
182
183 #[test]
184 fn intersect_clips_to_overlap() {
185 let a = set([iv("chr1", 100, 200), iv("chr1", 300, 400)]);
186 let b = set([iv("chr1", 150, 350)]);
187 let i = intersect(&a, &b);
188 let v = i.get("chr1").unwrap();
189 assert_eq!(v.len(), 2);
190 assert_eq!((v[0].start, v[0].end), (150, 200));
191 assert_eq!((v[1].start, v[1].end), (300, 350));
192 }
193
194 #[test]
195 fn intersect_long_a_then_short_a_dense_b() {
196 let a = set([iv("chr1", 0, 100), iv("chr1", 10, 11)]);
197 let b = set([iv("chr1", 5, 15), iv("chr1", 50, 60)]);
198 let i = intersect(&a, &b);
199 let v = i.get("chr1").unwrap();
200 assert_eq!(
201 v.iter().map(|x| (x.start, x.end)).collect::<Vec<_>>(),
202 vec![(5, 15), (10, 11), (50, 60)]
203 );
204 }
205
206 #[test]
207 fn intersect_different_chroms_yield_nothing() {
208 let a = set([iv("chr1", 100, 200)]);
209 let b = set([iv("chr2", 100, 200)]);
210 assert!(intersect(&a, &b).is_empty());
211 }
212
213 #[test]
214 fn subtract_punches_holes() {
215 let a = set([iv("chr1", 100, 500)]);
216 let b = set([iv("chr1", 200, 300), iv("chr1", 400, 450)]);
217 let s = subtract(&a, &b);
218 let v = s.get("chr1").unwrap();
219 assert_eq!(v.len(), 3);
220 assert_eq!((v[0].start, v[0].end), (100, 200));
221 assert_eq!((v[1].start, v[1].end), (300, 400));
222 assert_eq!((v[2].start, v[2].end), (450, 500));
223 }
224
225 #[test]
226 fn subtract_full_cover_yields_nothing() {
227 let a = set([iv("chr1", 100, 200)]);
228 let b = set([iv("chr1", 50, 250)]);
229 assert!(subtract(&a, &b).is_empty());
230 }
231
232 #[test]
233 fn complement_against_genome_sizes() {
234 let s = set([iv("chr1", 100, 200), iv("chr1", 400, 500)]);
235 let sizes = vec![("chr1".to_string(), 1000)];
236 let c = complement(&s, &sizes);
237 let v = c.get("chr1").unwrap();
238 assert_eq!(v.len(), 3);
239 assert_eq!((v[0].start, v[0].end), (0, 100));
240 assert_eq!((v[1].start, v[1].end), (200, 400));
241 assert_eq!((v[2].start, v[2].end), (500, 1000));
242 }
243
244 #[test]
245 fn complement_chrom_with_no_intervals_is_full_length() {
246 let s = IntervalSet::new();
247 let sizes = vec![("chr1".to_string(), 1000)];
248 let c = complement(&s, &sizes);
249 let v = c.get("chr1").unwrap();
250 assert_eq!(v.len(), 1);
251 assert_eq!((v[0].start, v[0].end), (0, 1000));
252 }
253
254 #[test]
255 fn coverage_bases_dedupes_overlap() {
256 let s = set([iv("chr1", 100, 200), iv("chr1", 150, 250)]);
257 let cov = coverage_bases(&s);
258 assert_eq!(cov, vec![("chr1".to_string(), 150)]);
259 }
260}