1use itertools::Itertools;
2use std::cell::RefCell;
3use std::cmp::{max, min, Ordering};
4use std::collections::HashMap;
5use std::ops::Range;
6use superslice::*;
7
8trait FilterByBools<T> {
9 fn filter_by_bools(&self, keep: &[bool]) -> Vec<T>;
10}
11
12impl<T> FilterByBools<T> for Vec<T>
13where
14 T: Clone,
15{
16 fn filter_by_bools(&self, keep: &[bool]) -> Vec<T> {
17 if self.len() != keep.len() {
18 panic!("v and keep had unequal length");
19 }
20 self.iter()
21 .enumerate()
22 .filter(|(idx, _value)| keep[*idx])
23 .map(|(_idx, value)| value.clone())
24 .collect()
25 }
26}
27
28pub trait Rangable: num::PrimInt {}
31
32impl Rangable for u16 {}
33impl Rangable for u32 {}
34impl Rangable for u64 {}
35impl Rangable for u128 {}
36
37impl Rangable for i16 {}
38impl Rangable for i32 {}
39impl Rangable for i64 {}
40impl Rangable for i128 {}
41
42#[derive(Debug)]
56pub struct IntervalSetGeneric<T: Rangable + std::fmt::Debug> {
57 intervals: Vec<Range<T>>,
58 ids: Vec<Vec<u32>>,
59 root: RefCell<Option<IntervalSetEntry>>,
60}
61
62pub type IntervalSet = IntervalSetGeneric<u32>;
64
65#[derive(Debug)]
75struct IntervalSetEntry {
76 no: i32,
77 children: Vec<IntervalSetEntry>,
78}
79
80impl<T: Rangable + std::fmt::Debug> Clone for IntervalSetGeneric<T> {
81 fn clone(&self) -> IntervalSetGeneric<T> {
82 IntervalSetGeneric {
83 intervals: self.intervals.clone(),
84 ids: self.ids.clone(),
85 root: self.root.clone(),
86 }
87 }
88}
89
90impl Clone for IntervalSetEntry {
91 fn clone(&self) -> IntervalSetEntry {
92 IntervalSetEntry {
93 no: self.no,
94 children: self.children.clone(),
95 }
96 }
97}
98
99trait IntervalCollector<T: Rangable + std::fmt::Debug> {
100 fn collect(&mut self, iset: &IntervalSetGeneric<T>, no: u32);
101}
102
103struct VecIntervalCollector<T: Rangable> {
104 intervals: Vec<Range<T>>,
105 ids: Vec<Vec<u32>>,
106}
107
108impl<T: Rangable + std::fmt::Debug> VecIntervalCollector<T> {
109 fn new() -> VecIntervalCollector<T> {
110 VecIntervalCollector {
111 intervals: Vec::new(),
112 ids: Vec::new(),
113 }
114 }
115}
116
117impl<T: Rangable + std::fmt::Debug> IntervalCollector<T> for VecIntervalCollector<T> {
118 fn collect(&mut self, iset: &IntervalSetGeneric<T>, no: u32) {
119 self.intervals.push(iset.intervals[no as usize].clone());
120 self.ids.push(iset.ids[no as usize].clone());
121 }
122}
123struct TagIntervalCollector {
124 hit: Vec<bool>,
125}
126
127impl TagIntervalCollector {
128 fn new<T: Rangable + std::fmt::Debug>(iset: &IntervalSetGeneric<T>) -> TagIntervalCollector {
129 TagIntervalCollector {
130 hit: vec![false; iset.len()],
131 }
132 }
133}
134
135impl<T: Rangable + std::fmt::Debug> IntervalCollector<T> for TagIntervalCollector {
136 fn collect(&mut self, _iset: &IntervalSetGeneric<T>, no: u32) {
137 self.hit[no as usize] = true;
138 }
139}
140
141#[allow(clippy::needless_return)]
143fn nclist_range_sort<T: Rangable>(a: &Range<T>, b: &Range<T>) -> Ordering {
144 if a.start < b.start {
145 return Ordering::Less;
146 } else if a.start > b.start {
147 return Ordering::Greater;
148 } else if a.end > b.end {
149 return Ordering::Less; } else if a.end < b.end {
151 return Ordering::Greater;
152 } else {
153 return Ordering::Equal;
154 }
155}
156
157#[derive(Debug)]
158pub enum NestedIntervalError {
159 NegativeInterval,
161 IntervalIdMisMatch,
163}
164
165impl<T: Rangable + std::fmt::Debug> IntervalSetGeneric<T> {
166 pub fn new(intervals: &[Range<T>]) -> Result<IntervalSetGeneric<T>, NestedIntervalError> {
170 for r in intervals {
171 if r.start >= r.end {
172 return Err(NestedIntervalError::NegativeInterval);
173 }
174 }
175 let mut iv = intervals.to_vec();
176 iv.sort_unstable_by(nclist_range_sort);
177 let count = iv.len();
178 Ok(IntervalSetGeneric {
179 intervals: iv,
180 ids: (0..count).map(|x| vec![x as u32]).collect(),
181 root: RefCell::new(None),
182 })
183 }
184
185 pub fn new_with_ids(
191 intervals: &[Range<T>],
192 ids: &[u32],
193 ) -> Result<IntervalSetGeneric<T>, NestedIntervalError> {
194 let vec_ids = ids.iter().map(|x| vec![*x]).collect::<Vec<Vec<u32>>>();
195 Self::new_with_ids_multiple(intervals, vec_ids)
196 }
197 pub fn new_with_ids_multiple(
198 intervals: &[Range<T>],
199 ids: Vec<Vec<u32>>,
200 ) -> Result<IntervalSetGeneric<T>, NestedIntervalError> {
201 for r in intervals {
202 if r.start >= r.end {
203 return Err(NestedIntervalError::NegativeInterval);
204 }
205 }
206 if intervals.len() != ids.len() {
207 return Err(NestedIntervalError::IntervalIdMisMatch);
208 }
209 let mut idx: Vec<usize> = (0..intervals.len()).collect();
210 idx.sort_unstable_by(|idx_a, idx_b| {
211 nclist_range_sort(&intervals[*idx_a], &intervals[*idx_b])
212 });
213 let mut out_iv: Vec<Range<T>> = Vec::with_capacity(intervals.len());
214 let mut out_ids: Vec<Vec<u32>> = Vec::with_capacity(intervals.len());
215 for ii in 0..idx.len() {
216 out_iv.push(intervals[idx[ii]].clone());
217 out_ids.push(ids[idx[ii]].clone());
218 }
219 Ok(IntervalSetGeneric {
220 intervals: out_iv,
221 ids: out_ids,
222 root: RefCell::new(None),
223 })
224 }
225
226 fn new_filtered(&self, keep: &[bool]) -> IntervalSetGeneric<T> {
228 IntervalSetGeneric {
229 intervals: self.intervals.filter_by_bools(keep),
230 ids: self.ids.filter_by_bools(keep),
231 root: RefCell::new(None),
232 }
233 }
234
235 fn new_presorted(intervals: Vec<Range<T>>, ids: Vec<Vec<u32>>) -> IntervalSetGeneric<T> {
238 IntervalSetGeneric {
239 intervals,
240 ids,
241 root: RefCell::new(None),
242 }
243 }
244
245 pub fn len(&self) -> usize {
247 self.intervals.len()
248 }
249
250 pub fn is_empty(&self) -> bool {
252 self.len() == 0
253 }
254
255 fn build_tree(
257 &self,
258 parent: &mut IntervalSetEntry,
259 it: &mut std::iter::Peekable<
260 std::iter::Enumerate<std::slice::Iter<'_, std::ops::Range<T>>>,
261 >,
262 ) {
263 loop {
264 match it.peek() {
265 Some((_, next)) => {
266 if (parent.no != -1) && (next.end > self.intervals[parent.no as usize].end) {
267 return;
268 }
269 let (ii, r) = it.next().unwrap();
270 if r.start > r.end {
271 panic!("invalid interval end < start");
273 }
274 let entry = IntervalSetEntry {
275 no: ii as i32,
276 children: Vec::new(),
277 };
278 parent.children.push(entry);
279 self.build_tree(parent.children.last_mut().unwrap(), it);
280 }
281 None => {
282 return;
283 }
284 }
285 }
286 }
287
288 fn ensure_nclist(&self) {
290 let mut root = self.root.borrow_mut();
291 if root.is_none() {
292 let mut new_root = IntervalSetEntry {
293 no: -1,
294 children: Vec::new(),
295 };
296 self.build_tree(
297 &mut new_root,
298 &mut self.intervals.iter().enumerate().peekable(),
299 );
300 *root = Some(new_root);
301 }
302 }
303
304 fn depth_first_search<S: IntervalCollector<T>>(
305 &self,
306 node: &IntervalSetEntry,
307 query: &Range<T>,
308 collector: &mut S,
309 ) {
310 let children = &node.children[..];
311 let first = children
316 .upper_bound_by_key(&query.start, |entry| self.intervals[entry.no as usize].end);
317 if first == children.len() {
318 return;
319 }
320 for next in &children[first..] {
321 let next_iv = &self.intervals[next.no as usize];
322 if !next_iv.overlaps(query) {
323 return;
324 }
325 collector.collect(self, next.no as u32);
326 if !next.children.is_empty() {
327 self.depth_first_search(next, query, collector);
328 }
329 }
330 }
331 pub fn has_overlap(&self, query: &Range<T>) -> Result<bool, NestedIntervalError> {
333 if query.start > query.end {
334 return Err(NestedIntervalError::NegativeInterval);
335 }
336 self.ensure_nclist();
337 let binding = self.root.borrow();
339 let root = binding.as_ref();
340 let children = &root.unwrap().children[..];
341 let first = children
344 .upper_bound_by_key(&query.start, |entry| self.intervals[entry.no as usize].end);
345 if first == children.len() {
346 dbg!("no entry larger");
347 return Ok(false);
349 }
350 let next = &self.intervals[children[first].no as usize];
351 Ok(next.overlaps(query))
353 }
354
355 pub fn iter(
357 &self,
358 ) -> std::iter::Zip<std::slice::Iter<'_, std::ops::Range<T>>, std::slice::Iter<'_, Vec<u32>>>
359 {
360 self.intervals.iter().zip(self.ids.iter())
361 }
362
363 pub fn query_overlapping(&self, query: &Range<T>) -> IntervalSetGeneric<T> {
365 self.ensure_nclist();
366 let mut collector = VecIntervalCollector::new();
367 self.depth_first_search(self.root.borrow().as_ref().unwrap(), query, &mut collector);
368 IntervalSetGeneric::new_presorted(collector.intervals, collector.ids)
369 }
370
371 pub fn any_overlapping(&self) -> bool {
373 for (next, last) in self.intervals.iter().skip(1).zip(self.intervals.iter()) {
374 if last.overlaps(next) {
375 return true;
376 }
377 }
378 false
379 }
380 pub fn overlap_status(&self) -> Vec<bool> {
384 let mut result = vec![false; self.intervals.len()];
385 for (ii, (next, last)) in self
386 .intervals
387 .iter()
388 .skip(1)
389 .zip(self.intervals.iter())
390 .enumerate()
391 {
392 if last.overlaps(next) {
393 result[ii] = true; result[ii + 1] = true;
395 }
396 }
397 result
398 }
399
400 pub fn any_nested(&self) -> bool {
402 self.ensure_nclist();
403 for entry in self.root.borrow().as_ref().unwrap().children.iter() {
404 if !entry.children.is_empty() {
405 return true;
406 }
407 }
408 false
409 }
410
411 pub fn remove_duplicates(&self) -> IntervalSetGeneric<T> {
415 let mut keep = vec![false; self.len()];
416 keep[0] = true;
417 for (ix, (v1, v2)) in self
418 .intervals
419 .iter()
420 .skip(1)
421 .zip(self.intervals.iter())
422 .enumerate()
423 {
424 keep[ix + 1] = v1 != v2;
425 }
426 self.new_filtered(&keep)
427 }
428
429 pub fn remove_empty(&self) -> IntervalSetGeneric<T> {
431 let keep: Vec<bool> = self.intervals.iter().map(|r| r.start != r.end).collect();
432 self.new_filtered(&keep)
433 }
434
435 pub fn merge_hull(&self) -> IntervalSetGeneric<T> {
441 let mut new_intervals: Vec<Range<T>> = Vec::new();
442 let mut new_ids: Vec<Vec<u32>> = Vec::new();
443 let mut it = self.intervals.iter().zip(self.ids.iter()).peekable();
444 while let Some(this_element) = it.next() {
445 let mut this_iv = this_element.0.start..this_element.0.end;
446 let mut this_ids = this_element.1.clone();
447 while let Some(next) = it.peek() {
448 if next.0.start < this_iv.end {
449 if next.0.end > this_iv.end {
450 this_iv.end = next.0.end;
451 }
452 this_ids.extend_from_slice(next.1);
453 it.next(); } else {
455 break;
456 }
457 }
458 new_intervals.push(this_iv);
459 this_ids.sort();
460 new_ids.push(this_ids)
461 }
462 IntervalSetGeneric::new_presorted(new_intervals, new_ids)
463 }
464
465 pub fn merge_connected(&self) -> IntervalSetGeneric<T> {
473 let hull = self.merge_hull();
474 let mut new_intervals: Vec<Range<T>> = Vec::new();
475 let mut new_ids: Vec<Vec<u32>> = Vec::new();
476 let mut it = hull.intervals.iter().zip(hull.ids.iter()).peekable();
477 while let Some(this_element) = it.next() {
478 let mut this_iv = this_element.0.start..this_element.0.end;
479 let mut this_ids = this_element.1.clone();
480 while let Some(next) = it.peek() {
481 if next.0.start == this_iv.end {
482 if next.0.end > this_iv.end {
483 this_iv.end = next.0.end;
484 }
485 this_ids.extend_from_slice(next.1);
486 it.next(); } else {
488 break;
489 }
490 }
491 new_intervals.push(this_iv);
492 this_ids.sort();
493 new_ids.push(this_ids)
494 }
495 IntervalSetGeneric::new_presorted(new_intervals, new_ids)
496 }
497
498 pub fn merge_drop(&self) -> IntervalSetGeneric<T> {
506 let mut keep = vec![true; self.len()];
507 let mut last_stop = T::zero();
508 for ii in 0..self.len() {
509 if self.intervals[ii].start < last_stop {
510 keep[ii] = false;
511 keep[ii - 1] = false;
512 }
513 if self.intervals[ii].end > last_stop {
514 last_stop = self.intervals[ii].end;
515 }
516 }
517 self.new_filtered(&keep)
518 }
519
520 pub fn merge_split(&self) -> IntervalSetGeneric<T> {
532 #[derive(PartialEq, Eq, PartialOrd, Ord, Copy, Clone, Debug)]
533 enum SiteKind {
534 End,
535 Start,
536 }
537 #[derive(Debug)]
538 struct Site<T> {
539 pos: T,
540 kind: SiteKind,
541 id: Vec<u32>,
542 }
543 let mut sites: Vec<Site<T>> = Vec::new();
544 for (iv, id) in self.intervals.iter().zip(self.ids.iter()) {
545 sites.push(Site {
546 pos: iv.start,
547 kind: SiteKind::Start,
548 id: id.clone(),
549 });
550 sites.push(Site {
551 pos: iv.end,
552 kind: SiteKind::End,
553 id: id.clone(),
554 });
555 }
556 sites.sort_by_key(|x| (x.pos, x.kind));
557 let mut new_intervals = Vec::new();
558 let mut new_ids: Vec<Vec<u32>> = Vec::new();
559 if !sites.is_empty() {
560 let mut it = sites.iter_mut();
561 let mut last = it.next().unwrap();
562 let mut last_ids: HashMap<u32, u32> = HashMap::new();
563 for id in &last.id {
564 last_ids.insert(*id, 1);
565 }
566 for next in it {
567 {
570 if last.pos != next.pos && !last_ids.is_empty() {
571 new_intervals.push(last.pos..next.pos);
572 let mut ids_here: Vec<u32> = last_ids.keys().cloned().collect();
573 ids_here.sort();
574 new_ids.push(ids_here);
575 }
576 }
577 match next.kind {
578 SiteKind::Start => {
579 for id in &next.id {
580 *last_ids.entry(*id).or_insert(0) += 1;
581 }
582 }
583 SiteKind::End => {
584 for id in &next.id {
585 *last_ids.get_mut(id).unwrap() -= 1;
586 }
587 last_ids.retain(|_k, v| (*v > 0));
588 }
589 }
590 last = next;
591 }
592 }
593 IntervalSetGeneric {
594 intervals: new_intervals,
595 ids: new_ids,
596 root: RefCell::new(None),
597 }
598 }
599
600 pub fn find_closest_start_left(&self, pos: T) -> Option<(Range<T>, Vec<u32>)> {
603 let first = self.intervals.upper_bound_by_key(&pos, |entry| entry.start);
604 if first == 0 {
605 return None;
606 }
607 let prev = first - 1;
608 Some((self.intervals[prev].clone(), self.ids[prev].clone()))
609 }
610
611 pub fn find_closest_start_right(&self, pos: T) -> Option<(Range<T>, Vec<u32>)> {
614 let first = self
615 .intervals
616 .upper_bound_by_key(&pos, |entry| entry.start + T::one());
617 if first == self.len() {
619 return None;
620 }
621 Some((self.intervals[first].clone(), self.ids[first].clone()))
622 }
623
624 pub fn find_closest_start(&self, pos: T) -> Option<(Range<T>, Vec<u32>)> {
629 let left = self.find_closest_start_left(pos);
630 let right = self.find_closest_start_right(pos);
631 match (left, right) {
632 (None, None) => None,
633 (Some(l), None) => Some(l),
634 (None, Some(r)) => Some(r),
635 (Some(l), Some(r)) => {
636 let distance_left = if l.0.start > pos {
639 l.0.start - pos
640 } else {
641 pos - l.0.start
642 };
643 let distance_right = if r.0.start > pos {
644 r.0.start - pos
645 } else {
646 pos - r.0.start
647 };
648
649 if distance_left <= distance_right {
650 Some(l)
651 } else {
652 Some(r)
653 }
654 }
655 }
656 }
657
658 pub fn covered_units(&self) -> T {
660 let merged = self.merge_hull();
661 let mut total = T::zero();
662 for iv in merged.intervals.iter() {
663 total = total + iv.end - iv.start;
664 }
665 total
666 }
667
668 #[allow(clippy::cast_lossless)]
670 pub fn mean_interval_size(&self) -> f64 {
671 let mut total = T::zero();
672 for iv in self.intervals.iter() {
673 total = total + iv.end - iv.start;
674 }
675 let total: f64 = total.to_f64().unwrap();
676 total / self.len() as f64
677 }
678
679 pub fn invert(&self, lower_bound: T, upper_bound: T) -> IntervalSetGeneric<T> {
690 let mut new_intervals: Vec<Range<T>> = Vec::new();
691 let mut new_ids: Vec<Vec<u32>> = Vec::new();
692
693 if self.is_empty() {
694 new_intervals.push(lower_bound..upper_bound);
695 new_ids.push(vec![0]);
696 } else {
697 let lower = min(lower_bound, self.intervals[0].start);
698 let upper = max(upper_bound, self._highest_end().unwrap());
699 let n = self.merge_hull();
700 let mut paired = vec![lower];
701 for iv in n.intervals {
702 paired.push(iv.start);
703 paired.push(iv.end);
704 }
705 paired.push(upper);
706 new_intervals.extend(
707 paired
708 .chunks(2)
709 .filter(|se| se[0] != se[1])
710 .map(|se| se[0]..se[1]),
711 );
712 new_ids.extend((0..new_intervals.len()).map(|x| vec![x as u32]));
713 }
714 IntervalSetGeneric::new_presorted(new_intervals, new_ids)
715 }
716
717 pub fn filter_to_overlapping(
719 &self,
720 other: &IntervalSetGeneric<T>,
721 ) -> IntervalSetGeneric<T> {
722 self.ensure_nclist();
727 let mut collector = TagIntervalCollector::new(self);
728 for q in other.intervals.iter() {
729 self.depth_first_search(self.root.borrow().as_ref().unwrap(), q, &mut collector);
730 }
731 self.new_filtered(&collector.hit)
732 }
733
734 pub fn filter_to_overlapping_and_split(
740 &self,
741 other: &IntervalSetGeneric<T>,
742 ) -> IntervalSetGeneric<T> {
743 let mut out_ivs = Vec::new();
744 let mut out_ids = Vec::new();
745
746 let other = other.merge_connected();
747 for (ii, iv) in self.intervals.iter().enumerate() {
748 let id = &self.ids[ii];
749 let overlapping = other.query_overlapping(iv).merge_connected();
750 for (new_iv, _) in overlapping.iter() {
751 let start = max(iv.start, new_iv.start);
753 let stop = min(iv.end, new_iv.end);
754 out_ivs.push(start..stop);
755 out_ids.push(id.clone());
756 }
757 }
758
759 IntervalSetGeneric::new_with_ids_multiple(&out_ivs, out_ids)
760 .expect("Unexpected failure in rebuilding intervals")
761 }
762
763 pub fn filter_to_non_overlapping(
765 &self,
766 other: &IntervalSetGeneric<T>,
767 ) -> IntervalSetGeneric<T> {
768 let keep: Result<Vec<bool>, NestedIntervalError> = self
773 .intervals
774 .iter()
775 .enumerate()
776 .map(|(_ii, iv)| other.has_overlap(iv))
777 .map_ok(|x| !x)
778 .collect();
779 match keep {
780 Ok(keep) => self.new_filtered(&keep),
781 Err(_) => panic!(
782 "Negative intervals encountered inside IntervalSets - check input sanity code"
783 ),
784 }
785 }
786
787 pub fn filter_to_overlapping_k_others(
789 &self,
790 others: &[&IntervalSetGeneric<T>],
791 min_k: u32,
792 ) -> IntervalSetGeneric<T> {
793 let counts = self._count_overlapping(others);
799 let mut keep = vec![false; self.len()];
800 for (ii, value) in counts.iter().enumerate() {
801 if *value >= min_k {
802 keep[ii] = true;
803 }
804 }
805 self.new_filtered(&keep)
806 }
807 pub fn filter_to_non_overlapping_k_others(
809 &self,
810 others: &[&IntervalSetGeneric<T>],
811 max_k: u32,
812 ) -> IntervalSetGeneric<T> {
813 let counts = self._count_overlapping(others);
818 let mut keep = vec![false; self.len()];
819 for (ii, value) in counts.iter().enumerate() {
820 if *value <= max_k {
821 keep[ii] = true;
822 }
823 }
824 self.new_filtered(&keep)
825 }
826
827 pub fn union(&self, others: &[&IntervalSetGeneric<T>]) -> IntervalSetGeneric<T> {
831 let mut new_intervals: Vec<Range<T>> = Vec::new();
832 new_intervals.extend_from_slice(&self.intervals);
833 for o in others.iter() {
834 new_intervals.extend_from_slice(&o.intervals);
835 }
836 IntervalSetGeneric::new(&new_intervals[..]).unwrap()
837 }
838
839 fn _highest_end(&self) -> Option<T> {
841 match self.root.borrow().as_ref() {
842 Some(root) => root
844 .children
845 .iter()
846 .map(|entry| self.intervals[entry.no as usize].end)
847 .max(),
848 None => self.intervals.iter().map(|range| range.end).max(),
850 }
851 }
852
853 fn _count_overlapping(&self, others: &[&IntervalSetGeneric<T>]) -> Vec<u32> {
854 self.ensure_nclist();
855 let mut counts: Vec<u32> = vec![0; self.len()];
856 for o in others {
857 let mut collector = TagIntervalCollector::new(self);
858 for q in o.intervals.iter() {
859 self.depth_first_search(self.root.borrow().as_ref().unwrap(), q, &mut collector);
860 }
861 for (ii, value) in collector.hit.iter().enumerate() {
862 if *value {
863 counts[ii] += 1;
864 }
865 }
866 }
867 counts
868 }
869}
870
871impl<T: Rangable + std::fmt::Debug> Eq for IntervalSetGeneric<T> {}
872impl<T: Rangable + std::fmt::Debug> PartialEq for IntervalSetGeneric<T> {
873 fn eq(&self, other: &IntervalSetGeneric<T>) -> bool {
874 (self.intervals == other.intervals) && (self.ids == other.ids)
875 }
876}
877
878pub trait RangePlus<T> {
880 fn overlaps(&self, other: &Range<T>) -> bool;
882}
883
884impl<T: Rangable> RangePlus<T> for Range<T> {
885 fn overlaps(&self, other: &Range<T>) -> bool {
886 self.start < other.end && other.start < self.end && other.start < other.end
887 }
888}
889
890#[cfg(test)]
891#[allow(dead_code)]
892#[allow(clippy::single_range_in_vec_init)]
893mod tests {
894 use crate::{IntervalSet, IntervalSetGeneric};
895 use std::ops::Range;
896 #[test]
897 fn test_has_overlap() {
898 let r = vec![0..5, 10..15];
899 let n = IntervalSet::new(&r).unwrap();
900 assert!(n.has_overlap(&(3..4)).unwrap());
901 assert!(n.has_overlap(&(5..20)).unwrap());
902 assert!(!n.has_overlap(&(6..10)).unwrap());
903 assert!(!n.has_overlap(&(100..110)).unwrap());
904 assert!(!n.has_overlap(&(3..3)).unwrap());
905
906 let r2 = vec![0..15, 0..6];
907 let n = IntervalSet::new(&r2).unwrap();
908 assert!(n.has_overlap(&(3..4)).unwrap());
909 assert!(n.has_overlap(&(5..20)).unwrap());
910 assert!(n.has_overlap(&(6..10)).unwrap());
911 assert!(!n.has_overlap(&(20..30)).unwrap());
912
913 let r2 = vec![100..150, 30..40, 200..400];
914 let n = IntervalSet::new(&r2).unwrap();
915 assert!(n.has_overlap(&(101..102)).unwrap());
916 assert!(n.has_overlap(&(149..150)).unwrap());
917 assert!(n.has_overlap(&(39..99)).unwrap());
918 assert!(n.has_overlap(&(29..99)).unwrap());
919 assert!(n.has_overlap(&(19..99)).unwrap());
920 assert!(!n.has_overlap(&(0..5)).unwrap());
921 assert!(!n.has_overlap(&(0..29)).unwrap());
922 assert!(!n.has_overlap(&(0..30)).unwrap());
923 assert!(n.has_overlap(&(0..31)).unwrap());
924 assert!(!n.has_overlap(&(40..41)).unwrap());
925 assert!(!n.has_overlap(&(40..99)).unwrap());
926 assert!(!n.has_overlap(&(40..100)).unwrap());
927 assert!(n.has_overlap(&(40..101)).unwrap());
928 assert!(n.has_overlap(&(399..400)).unwrap());
929 assert!(!n.has_overlap(&(400..4000)).unwrap());
930 }
931
932 #[test]
933 fn test_iter() {
934 let n = IntervalSet::new(&[100..150, 30..40, 200..400, 250..300]).unwrap();
935 let c: Vec<(&Range<u32>, &Vec<u32>)> = n.iter().collect();
936 assert!(!c.is_empty());
937 let c: Vec<Range<u32>> = c
938 .iter()
939 .map(|(interval, _id)| (*interval).clone())
940 .collect();
941 dbg!(&c);
942 assert!(c == vec![30..40, 100..150, 200..400, 250..300]);
943 }
944
945 #[test]
946 fn test_query() {
947 let n = IntervalSet::new(&[100..150, 30..40, 200..400, 250..300]).unwrap();
948 let c = n.query_overlapping(&(0..5));
949 assert!(c.is_empty());
950 let c = n.query_overlapping(&(0..31));
951 assert_eq!(c.intervals, vec![30..40]);
952 let c = n.query_overlapping(&(200..250));
953 assert_eq!(c.intervals, vec![200..400]);
954 let c = n.query_overlapping(&(200..251));
955 assert_eq!(c.intervals, vec![200..400, 250..300]);
956 let c = n.query_overlapping(&(0..1000));
957 dbg!(&c);
958 assert_eq!(c.intervals, vec![30..40, 100..150, 200..400, 250..300]);
959 let c = n.query_overlapping(&(401..1000));
960 assert!(c.is_empty());
961 }
962 #[test]
963 fn test_query_multiple() {
964 let n = IntervalSet::new(&[100..150, 30..40, 200..400, 250..300]).unwrap();
965 let c = n.filter_to_overlapping(&IntervalSet::new(&[0..5, 0..105]).unwrap());
966 assert_eq!(c.intervals, vec![30..40, 100..150]);
967 let c = n.filter_to_overlapping(&IntervalSet::new(&[500..600, 550..700]).unwrap());
968 assert!(c.is_empty());
969 let c = n.filter_to_overlapping(&IntervalSet::new(&[45..230]).unwrap());
970 assert_eq!(c.intervals, vec![100..150, 200..400]);
971 let c = n.filter_to_overlapping(&IntervalSet::new(&[45..101, 101..230]).unwrap());
972 assert_eq!(c.intervals, vec![100..150, 200..400]);
973 }
974
975 #[test]
976 fn test_query_multiple_non_overlapping() {
977 let n = IntervalSet::new(&[100..150, 30..40, 200..400, 250..300]).unwrap();
978 let c = n.filter_to_non_overlapping(&IntervalSet::new(&[0..5, 0..105]).unwrap());
979 assert_eq!(c.intervals, vec![200..400, 250..300]);
980 let c = n.filter_to_non_overlapping(&IntervalSet::new(&[500..600, 550..700]).unwrap());
981 assert_eq!(c.intervals, vec![30..40, 100..150, 200..400, 250..300]);
982 let c = n.filter_to_non_overlapping(&IntervalSet::new(&[0..600]).unwrap());
983 assert!(c.is_empty());
984 let c = n.filter_to_non_overlapping(&IntervalSet::new(&[45..230]).unwrap());
985 assert_eq!(c.intervals, vec![30..40, 250..300]);
986 let c = n.filter_to_non_overlapping(&IntervalSet::new(&[45..101, 101..230]).unwrap());
987 assert_eq!(c.intervals, vec![30..40, 250..300]);
988 }
989
990 #[test]
991 fn test_any_overlapping() {
992 let n = IntervalSet::new(&[100..150]).unwrap();
993 assert!(!n.any_overlapping());
994 let n = IntervalSet::new(&[100..150, 200..300]).unwrap();
995 assert!(!n.any_overlapping());
996 let n = IntervalSet::new(&[100..150, 150..300]).unwrap();
997 assert!(!n.any_overlapping());
998 let n = IntervalSet::new(&[100..151, 150..300]).unwrap();
999 assert!(n.any_overlapping());
1000 let n = IntervalSet::new(&[100..151, 105..110]).unwrap();
1001 assert!(n.any_overlapping());
1002 let n = IntervalSet::new(&[100..151, 105..110, 0..1000]).unwrap();
1003 assert!(n.any_overlapping());
1004 let n = IntervalSet::new(&[100..150, 150..210, 0..1000]).unwrap();
1005 assert!(n.any_overlapping());
1006 let n = IntervalSet::new(&[100..150, 150..210, 0..130]).unwrap();
1007 assert!(n.any_overlapping());
1008 let n = IntervalSet::new(&[100..150, 150..210, 150..250]).unwrap();
1009 assert!(n.any_overlapping());
1010 let n = IntervalSet::new(&[100..150, 150..210, 149..250]).unwrap();
1011 assert!(n.any_overlapping());
1012 let n = IntervalSet::new(&[100..150, 150..210, 209..250]).unwrap();
1013 assert!(n.any_overlapping());
1014 }
1015
1016 #[test]
1017 fn test_any_nested() {
1018 assert!(!IntervalSet::new(&[]).unwrap().any_nested());
1019 assert!(!IntervalSet::new(&[100..150]).unwrap().any_nested());
1020 assert!(!IntervalSet::new(&[100..150, 150..300])
1021 .unwrap()
1022 .any_nested());
1023 assert!(!IntervalSet::new(&[100..151, 150..300])
1024 .unwrap()
1025 .any_nested());
1026 assert!(IntervalSet::new(&[100..151, 150..300, 100..130])
1027 .unwrap()
1028 .any_nested());
1029 assert!(IntervalSet::new(&[100..151, 150..300, 0..1000])
1030 .unwrap()
1031 .any_nested());
1032 }
1033
1034 #[test]
1035 fn test_remove_duplicates() {
1036 let n = IntervalSet::new(&[100..150]).unwrap().remove_duplicates();
1037 assert!(!n.any_overlapping());
1038 assert_eq!(n.len(), 1);
1039
1040 let n = IntervalSet::new(&[30..40, 30..40, 100..150])
1041 .unwrap()
1042 .remove_duplicates();
1043 assert!(!n.any_overlapping());
1044 assert_eq!(n.len(), 2);
1045 let n = IntervalSet::new(&[30..40, 30..40, 35..150])
1046 .unwrap()
1047 .remove_duplicates();
1048 assert_eq!(n.len(), 2);
1049 let n = IntervalSet::new_with_ids(
1050 &[30..40, 30..40, 35..150, 35..150, 36..38],
1051 &[55, 56, 57, 58, 59],
1052 )
1053 .unwrap()
1054 .remove_duplicates();
1055 assert_eq!(n.len(), 3);
1056 dbg!(&n);
1057 assert_eq!(n.ids, vec![vec![55], vec![57], vec![59]]);
1058 }
1059
1060 #[test]
1061 fn test_merge_hull() {
1062 let n = IntervalSet::new(&[100..150, 120..180, 110..115, 200..201])
1063 .unwrap()
1064 .merge_hull();
1065 assert_eq!(n.intervals, vec![100..180, 200..201]);
1066 assert_eq!(n.ids, vec![vec![0, 1, 2], vec![3]]);
1067 assert!(!n.any_overlapping());
1068
1069 let n = IntervalSet::new(&[100..150]).unwrap().merge_hull();
1070 assert_eq!(n.len(), 1);
1071 assert!(!n.any_overlapping());
1072
1073 let n = IntervalSet::new(&[100..150, 120..180])
1074 .unwrap()
1075 .merge_hull();
1076 assert_eq!(n.len(), 1);
1077 assert!(!n.any_overlapping());
1078 assert_eq!(n.intervals, vec![100..180]);
1079 assert_eq!(n.ids, vec![vec![0, 1]]);
1080
1081 let n = IntervalSet::new(&[100..150, 120..180, 110..115])
1082 .unwrap()
1083 .merge_hull();
1084 assert!(n.len() == 1);
1085 assert!(!n.any_overlapping());
1086 assert_eq!(n.intervals, vec![100..180]);
1087 assert_eq!(n.ids, vec![vec![0, 1, 2]]);
1088
1089 let n =
1090 IntervalSet::new_with_ids(&[300..400, 400..450, 450..500, 510..520], &[20, 10, 30, 40])
1091 .unwrap();
1092 assert_eq!(n.intervals, vec![300..400, 400..450, 450..500, 510..520]);
1093 }
1094
1095 #[test]
1096 fn test_merge_drop() {
1097 let n = IntervalSet::new(&[]).unwrap().merge_drop();
1098 assert_eq!(n.len(), 0);
1099 assert!(!n.any_overlapping());
1100
1101 let n = IntervalSet::new(&[100..150]).unwrap().merge_drop();
1102 assert_eq!(n.len(), 1);
1103 assert!(!n.any_overlapping());
1104
1105 let n = IntervalSet::new(&[100..150, 120..180])
1106 .unwrap()
1107 .merge_drop();
1108 assert_eq!(n.len(), 0);
1109 assert!(!n.any_overlapping());
1110 assert_eq!(n.intervals, vec![]);
1111 assert_eq!(n.ids, Vec::<Vec<u32>>::new());
1112
1113 let n = IntervalSet::new(&[100..150, 120..180, 200..250])
1114 .unwrap()
1115 .merge_drop();
1116 assert_eq!(n.len(), 1);
1117 assert!(!n.any_overlapping());
1118 assert_eq!(n.intervals, vec![200..250]);
1119 assert_eq!(n.ids, vec![vec![2]]);
1120
1121 let n = IntervalSet::new(&[100..150, 120..180, 200..250, 106..110])
1122 .unwrap()
1123 .merge_drop();
1124 assert_eq!(n.len(), 1);
1125 assert!(!n.any_overlapping());
1126 assert_eq!(n.intervals, vec![200..250]);
1127 assert_eq!(n.ids, vec![vec![3]]);
1128
1129 let n = IntervalSet::new(&[100..150, 120..180, 200..250, 106..110, 80..105])
1130 .unwrap()
1131 .merge_drop();
1132 assert_eq!(n.len(), 1);
1133 assert!(!n.any_overlapping());
1134 assert_eq!(n.intervals, vec![200..250]);
1135 assert_eq!(n.ids, vec![vec![4]]);
1136
1137 let n = IntervalSet::new(&[100..150, 120..180, 200..250, 106..110, 80..105, 30..40])
1138 .unwrap()
1139 .merge_drop();
1140 assert_eq!(n.len(), 2);
1141 assert!(!n.any_overlapping());
1142 assert_eq!(n.intervals, vec![30..40, 200..250]);
1143 assert_eq!(n.ids, vec![vec![0], vec![5]]);
1144
1145 let n = IntervalSet::new(&[
1146 100..150,
1147 120..180,
1148 200..250,
1149 106..110,
1150 80..105,
1151 30..40,
1152 400..405,
1153 ])
1154 .unwrap()
1155 .merge_drop();
1156 assert_eq!(n.len(), 3);
1157 assert!(!n.any_overlapping());
1158 assert_eq!(n.intervals, vec![30..40, 200..250, 400..405]);
1159 assert_eq!(n.ids, vec![vec![0], vec![5], vec![6]]);
1160 let n = IntervalSet::new(&[0..20, 10..15, 20..35, 30..40, 40..50])
1161 .unwrap()
1162 .merge_drop();
1163 assert_eq!(n.intervals, vec![40..50]);
1164 }
1165
1166 #[test]
1167 fn test_find_closest_start_left() {
1168 let n = IntervalSet::new(&[
1169 30..40,
1170 80..105,
1171 100..150,
1172 106..110,
1173 106..120,
1174 107..125,
1175 120..180,
1176 200..250,
1177 400..405,
1178 ])
1179 .unwrap();
1180 assert!(n.find_closest_start_left(29).is_none());
1182 assert_eq!(n.find_closest_start_left(100).unwrap(), (100..150, vec![2]));
1183 assert_eq!(n.find_closest_start_left(105).unwrap(), (100..150, vec![2]));
1184 assert_eq!(n.find_closest_start_left(106).unwrap(), (106..110, vec![4]));
1185 assert_eq!(n.find_closest_start_left(109).unwrap(), (107..125, vec![5]));
1186 assert_eq!(n.find_closest_start_left(110).unwrap(), (107..125, vec![5]));
1187 assert_eq!(n.find_closest_start_left(111).unwrap(), (107..125, vec![5]));
1188 assert_eq!(n.find_closest_start_left(120).unwrap(), (120..180, vec![6]));
1189 assert_eq!(n.find_closest_start_left(121).unwrap(), (120..180, vec![6]));
1190 assert_eq!(n.find_closest_start_left(125).unwrap(), (120..180, vec![6]));
1191 assert_eq!(n.find_closest_start_left(127).unwrap(), (120..180, vec![6]));
1192 assert_eq!(
1193 n.find_closest_start_left(121000).unwrap(),
1194 (400..405, vec![8])
1195 );
1196 let n = IntervalSet::new(&[]).unwrap();
1197 assert!(n.find_closest_start_left(29).is_none());
1198 }
1199
1200 #[test]
1201 fn test_find_closest_start_right() {
1202 let n = IntervalSet::new(&[
1203 30..40,
1204 80..105,
1205 100..150,
1206 106..110,
1207 106..120,
1208 107..125,
1209 120..180,
1210 200..250,
1211 400..405,
1212 ])
1213 .unwrap();
1214 assert_eq!(n.find_closest_start_right(10).unwrap(), (30..40, vec![0]));
1216 assert_eq!(n.find_closest_start_right(29).unwrap(), (30..40, vec![0]));
1217 assert_eq!(n.find_closest_start_right(30).unwrap(), (30..40, vec![0]));
1218 assert_eq!(n.find_closest_start_right(31).unwrap(), (80..105, vec![1]));
1219 assert_eq!(n.find_closest_start_right(99).unwrap(), (100..150, vec![2]));
1220 assert_eq!(
1221 n.find_closest_start_right(100).unwrap(),
1222 (100..150, vec![2])
1223 );
1224 assert_eq!(
1225 n.find_closest_start_right(101).unwrap(),
1226 (106..120, vec![3])
1227 );
1228 assert_eq!(
1229 n.find_closest_start_right(107).unwrap(),
1230 (107..125, vec![5])
1231 );
1232 assert_eq!(
1233 n.find_closest_start_right(110).unwrap(),
1234 (120..180, vec![6])
1235 );
1236 assert_eq!(
1237 n.find_closest_start_right(111).unwrap(),
1238 (120..180, vec![6])
1239 );
1240 assert_eq!(
1241 n.find_closest_start_right(120).unwrap(),
1242 (120..180, vec![6])
1243 );
1244 assert_eq!(
1245 n.find_closest_start_right(121).unwrap(),
1246 (200..250, vec![7])
1247 );
1248 assert_eq!(
1249 n.find_closest_start_right(125).unwrap(),
1250 (200..250, vec![7])
1251 );
1252 assert_eq!(
1253 n.find_closest_start_right(127).unwrap(),
1254 (200..250, vec![7])
1255 );
1256 assert!(n.find_closest_start_right(121000).is_none());
1257 let n = IntervalSet::new(&[]).unwrap();
1258 assert!(n.find_closest_start_right(29).is_none());
1259 }
1260
1261 #[test]
1262 fn test_find_closest_start() {
1263 let n = IntervalSet::new(&[]).unwrap();
1264 assert!(n.find_closest_start(100).is_none());
1265 let n = IntervalSet::new(&[100..110, 200..300]).unwrap();
1266 assert_eq!(n.find_closest_start(0).unwrap(), (100..110, vec![0]));
1267 assert_eq!(n.find_closest_start(100).unwrap(), (100..110, vec![0]));
1268 assert_eq!(n.find_closest_start(149).unwrap(), (100..110, vec![0]));
1269 assert_eq!(n.find_closest_start(150).unwrap(), (100..110, vec![0]));
1270 assert_eq!(n.find_closest_start(151).unwrap(), (200..300, vec![1]));
1271 assert_eq!(n.find_closest_start(251).unwrap(), (200..300, vec![1]));
1272 assert_eq!(n.find_closest_start(351).unwrap(), (200..300, vec![1]));
1273
1274 let n = IntervalSet::new(&[10..11, 1000..1110]).unwrap();
1275 assert_eq!(n.find_closest_start(5).unwrap(), (10..11, vec![0]));
1276 let n = IntervalSet::new(&[
1277 566564..667063,
1278 569592..570304,
1279 713866..714288,
1280 935162..937142,
1281 1051311..1052403,
1282 1279151..1281233,
1283 1282803..1283631,
1284 1310387..1311060,
1285 1337193..1337881,
1286 1447089..1447626,
1287 ])
1288 .unwrap();
1289
1290 assert_eq!(
1291 n.find_closest_start(570000).unwrap(),
1292 (569592..570304, vec![1])
1293 );
1294 }
1295
1296 #[test]
1297 fn test_covered_units() {
1298 let n = IntervalSet::new(&[]).unwrap();
1299 assert_eq!(n.covered_units(), 0);
1300 let n = IntervalSet::new(&[10..100]).unwrap();
1301 assert_eq!(n.covered_units(), 90);
1302 let n = IntervalSet::new(&[10..100, 200..300]).unwrap();
1303 assert_eq!(n.covered_units(), 90 + 100);
1304 let n = IntervalSet::new(&[10..100, 200..300, 15..99]).unwrap();
1305 assert_eq!(n.covered_units(), 90 + 100);
1306 let n = IntervalSet::new(&[10..100, 200..300, 15..99, 15..105]).unwrap();
1307 assert_eq!(n.covered_units(), 90 + 100 + 5);
1308 }
1309
1310 #[test]
1311 fn test_mean_interval_size() {
1312 let n = IntervalSet::new(&[]).unwrap();
1313 assert!(n.mean_interval_size().is_nan());
1314 let n = IntervalSet::new(&[10..100]).unwrap();
1315 assert_eq!(n.mean_interval_size(), 90.);
1316 let n = IntervalSet::new(&[10..100, 200..300]).unwrap();
1317 assert_eq!(n.mean_interval_size(), (90 + 100) as f64 / 2.0);
1318 let n = IntervalSet::new(&[10..100, 200..300, 15..99]).unwrap();
1319 assert_eq!(n.mean_interval_size(), (90 + 100 + (99 - 15)) as f64 / 3.0);
1320 let n = IntervalSet::new(&[10..100, 200..300, 15..99, 15..105]).unwrap();
1321 assert_eq!(
1322 n.mean_interval_size(),
1323 (((100 - 10) + (300 - 200) + (99 - 15) + (105 - 15)) as f64 / 4.0)
1324 );
1325 }
1326
1327 #[test]
1328 fn test_invert() {
1329 let n = IntervalSet::new(&[]).unwrap().invert(0, 100);
1330 assert_eq!(n.intervals, vec![0..100,]);
1331 assert_eq!(n.ids, vec![vec![0]]);
1332 let n = IntervalSet::new(&[30..40]).unwrap().invert(0, 100);
1333 assert_eq!(n.intervals, vec![0..30, 40..100,]);
1334 assert_eq!(n.ids, vec![vec![0], vec![1]]);
1335 let n = IntervalSet::new(&[30..40, 35..38]).unwrap().invert(0, 100);
1336 assert_eq!(n.intervals, vec![0..30, 40..100,]);
1337 assert_eq!(n.ids, vec![vec![0], vec![1]]);
1338 let n = IntervalSet::new(&[30..40, 35..38, 35..50])
1339 .unwrap()
1340 .invert(0, 100);
1341 assert_eq!(n.intervals, vec![0..30, 50..100,]);
1342 assert_eq!(n.ids, vec![vec![0], vec![1]]);
1343 let n = IntervalSet::new(&[30..40, 35..38, 35..50])
1344 .unwrap()
1345 .invert(40, 100);
1346 assert_eq!(n.intervals, vec![50..100,]);
1347 assert_eq!(n.ids, vec![vec![0]]);
1348 let n = IntervalSet::new(&[30..40, 35..38, 35..50, 55..60])
1349 .unwrap()
1350 .invert(40, 40);
1351 assert_eq!(n.intervals, vec![50..55]);
1352 assert_eq!(n.ids, vec![vec![0]]);
1353 let n = IntervalSet::new(&[30..40, 35..38, 35..50])
1354 .unwrap()
1355 .invert(40, 40);
1356
1357 assert!(n.intervals.is_empty());
1358 assert!(n.ids.is_empty());
1359
1360 let n = IntervalSet::new(&[10..20, 35..38, 40..50]).unwrap();
1362 let ni = n.invert(0, 50);
1363 let nii = ni.invert(0, 50);
1364 assert_eq!(n.intervals, nii.intervals);
1365
1366 let n = IntervalSet::new(&[10..36, 35..38, 40..50]).unwrap();
1367 let ni = n.invert(0, 100);
1368 let n2 = n.union(&[&ni]).merge_connected();
1369 assert_eq!(n2.intervals, vec![0..100]);
1370
1371 let n = IntervalSet::new(&[]).unwrap();
1372 let ni = n.invert(0, 100);
1373 assert_eq!(ni.intervals, vec![0..100]);
1374 }
1375
1376 #[test]
1377 fn test_union() {
1378 let n = IntervalSet::new(&[])
1379 .unwrap()
1380 .union(&[&IntervalSet::new(&[0..100]).unwrap()]);
1381 assert_eq!(n.intervals, vec![0..100]);
1382
1383 let n = IntervalSet::new(&[0..10])
1384 .unwrap()
1385 .union(&[&IntervalSet::new(&[0..100]).unwrap()]);
1386 assert_eq!(n.intervals, vec![0..100, 0..10]);
1387
1388 let n = IntervalSet::new(&[0..10])
1389 .unwrap()
1390 .union(&[&IntervalSet::new(&[0..100, 200..300]).unwrap()]);
1391 assert_eq!(n.intervals, vec![0..100, 0..10, 200..300]);
1392 assert_eq!(n.ids, vec![vec![0], vec![1], vec![2]]);
1393
1394 let n = IntervalSet::new(&[0..10])
1395 .unwrap()
1396 .union(&[&IntervalSet::new(&[]).unwrap()]);
1397 assert_eq!(n.intervals, vec![0..10]);
1398 let n = IntervalSet::new(&[0..10]).unwrap().union(&[
1399 &IntervalSet::new(&[0..100]).unwrap(),
1400 &IntervalSet::new(&[200..300]).unwrap(),
1401 ]);
1402 assert_eq!(n.intervals, vec![0..100, 0..10, 200..300]);
1403 assert_eq!(n.ids, vec![vec![0], vec![1], vec![2]]);
1404 }
1405
1406 #[test]
1407 fn test_substract() {
1408 let n = IntervalSet::new(&[])
1409 .unwrap()
1410 .filter_to_non_overlapping(&IntervalSet::new(&[0..100]).unwrap());
1411 assert!(n.intervals.is_empty());
1412
1413 let n = IntervalSet::new(&[0..10])
1414 .unwrap()
1415 .filter_to_non_overlapping(&IntervalSet::new(&[0..100]).unwrap());
1416 assert!(n.intervals.is_empty());
1417
1418 let n = IntervalSet::new(&[0..10, 100..150])
1419 .unwrap()
1420 .filter_to_non_overlapping(&IntervalSet::new(&[0..100]).unwrap());
1421 assert_eq!(n.intervals, vec![100..150]);
1422
1423 let n = IntervalSet::new(&[0..10, 100..150, 150..300])
1424 .unwrap()
1425 .filter_to_non_overlapping(&IntervalSet::new(&[55..101]).unwrap());
1426 assert_eq!(n.intervals, vec![0..10, 150..300]);
1427 assert_eq!(n.ids, vec![vec![0], vec![2]]);
1428
1429 let n = IntervalSet::new(&[0..10, 5..6, 100..150, 150..300])
1430 .unwrap()
1431 .filter_to_non_overlapping(&IntervalSet::new(&[55..101]).unwrap());
1432 assert_eq!(n.intervals, vec![0..10, 5..6, 150..300]);
1433 assert_eq!(n.ids, vec![vec![0], vec![1], vec![3]]);
1434 }
1435
1436 #[test]
1437 fn test_filter_overlapping_multiples() {
1438 let n = IntervalSet::new(&[100..150, 30..40, 200..400, 250..300]).unwrap();
1439 let c = n.filter_to_overlapping_k_others(&[&IntervalSet::new(&[0..5, 0..105]).unwrap()], 1);
1440 assert_eq!(c.intervals, vec![30..40, 100..150]);
1441 let c = n.filter_to_overlapping_k_others(&[&IntervalSet::new(&[0..5, 0..105]).unwrap()], 0);
1442 assert_eq!(c, n);
1443 let c = n.filter_to_overlapping_k_others(&[&IntervalSet::new(&[0..5, 0..105]).unwrap()], 2);
1444 assert!(c.is_empty());
1445
1446 let c = n.filter_to_overlapping_k_others(
1447 &[
1448 &IntervalSet::new(&[0..35]).unwrap(),
1449 &IntervalSet::new(&[0..160]).unwrap(),
1450 ],
1451 2,
1452 );
1453 assert_eq!(c.intervals, vec![30..40,]);
1454 let c = n.filter_to_overlapping_k_others(
1455 &[
1456 &IntervalSet::new(&[0..35]).unwrap(),
1457 &IntervalSet::new(&[0..160]).unwrap(),
1458 ],
1459 1,
1460 );
1461 assert_eq!(c.intervals, vec![30..40, 100..150]);
1462 }
1463
1464 #[test]
1465 fn test_filter_non_overlapping_multiples() {
1466 let n = IntervalSet::new(&[100..150, 30..40, 200..400, 250..300]).unwrap();
1467 let c =
1468 n.filter_to_non_overlapping_k_others(&[&IntervalSet::new(&[0..5, 0..105]).unwrap()], 1);
1469 assert_eq!(c.intervals, vec![30..40, 100..150, 200..400, 250..300]);
1470 let c =
1471 n.filter_to_non_overlapping_k_others(&[&IntervalSet::new(&[0..5, 0..105]).unwrap()], 0);
1472 assert_eq!(c.intervals, vec![200..400, 250..300]);
1473 let c =
1474 n.filter_to_non_overlapping_k_others(&[&IntervalSet::new(&[0..5, 0..105]).unwrap()], 2);
1475 assert_eq!(c.intervals, vec![30..40, 100..150, 200..400, 250..300]);
1476
1477 let c = n.filter_to_non_overlapping_k_others(
1478 &[
1479 &IntervalSet::new(&[0..35]).unwrap(),
1480 &IntervalSet::new(&[0..160]).unwrap(),
1481 ],
1482 2,
1483 );
1484 assert_eq!(c.intervals, vec![30..40, 100..150, 200..400, 250..300]);
1485 let c = n.filter_to_non_overlapping_k_others(
1486 &[
1487 &IntervalSet::new(&[0..35]).unwrap(),
1488 &IntervalSet::new(&[0..160]).unwrap(),
1489 ],
1490 1,
1491 );
1492 assert_eq!(c.intervals, vec![100..150, 200..400, 250..300]);
1493 }
1494
1495 #[test]
1496 fn test_split() {
1497 let n = IntervalSet::new(&[0..100, 20..30]).unwrap();
1498 let c = n.merge_split();
1499 assert_eq!(c.intervals, [0..20, 20..30, 30..100]);
1500 assert_eq!(c.ids, vec![vec![0], vec![0, 1,], vec![0]]);
1501
1502 let n = IntervalSet::new(&[0..100, 0..90, 70..95, 110..150]).unwrap();
1503 let c = n.merge_split();
1504 assert_eq!(c.intervals, [0..70, 70..90, 90..95, 95..100, 110..150]);
1505 assert_eq!(
1506 c.ids,
1507 vec![vec![0, 1], vec![0, 1, 2], vec![0, 2], vec![0], vec![3]]
1508 );
1509 let n =
1510 IntervalSet::new_with_ids(&[0..100, 0..90, 70..95, 110..150], &[100, 200, 300, 400])
1511 .unwrap();
1512 let c = n.merge_split();
1513 assert_eq!(c.intervals, [0..70, 70..90, 90..95, 95..100, 110..150]);
1514 assert_eq!(
1515 c.ids,
1516 vec![
1517 vec![100, 200],
1518 vec![100, 200, 300],
1519 vec![100, 300],
1520 vec![100],
1521 vec![400]
1522 ]
1523 );
1524 let d = c.merge_split();
1525 assert_eq!(c, d);
1526
1527 let n = IntervalSet::new_with_ids(&[0..100, 5..10, 15..20], &[100, 200, 300]).unwrap();
1528 let c = n.merge_split();
1529 assert_eq!(c.intervals, [0..5, 5..10, 10..15, 15..20, 20..100]);
1530 assert_eq!(
1531 c.ids,
1532 vec![
1533 vec![100],
1534 vec![100, 200],
1535 vec![100],
1536 vec![100, 300],
1537 vec![100]
1538 ]
1539 );
1540
1541 let n = IntervalSet::new_with_ids(&[0..100, 5..50, 10..15, 25..75], &[100, 200, 300, 400])
1542 .unwrap();
1543 let c = n.merge_split();
1544 assert_eq!(
1545 c.intervals,
1546 [0..5, 5..10, 10..15, 15..25, 25..50, 50..75, 75..100]
1547 );
1548 assert_eq!(
1549 c.ids,
1550 vec![
1551 vec![100], vec![100, 200], vec![100, 200, 300], vec![100, 200], vec![100, 200, 400], vec![100, 400], vec![100] ]
1559 );
1560 }
1561
1562 #[test]
1563 fn test_example() {
1564 let intervals = vec![0..20, 15..30, 50..100];
1565 let interval_set = IntervalSet::new(&intervals).unwrap();
1566 assert_eq!(interval_set.ids, vec![vec![0], vec![1], vec![2]]); let hits = interval_set.query_overlapping(&(10..16));
1568 assert_eq!(hits.intervals, [0..20, 15..30]);
1569 let merged = hits.merge_hull();
1570 assert_eq!(merged.intervals, [0..30]);
1571 assert_eq!(merged.ids, vec![vec![0, 1]]);
1572 }
1573
1574 #[test]
1575 fn test_new_with_ids_sorting() {
1576 let n = IntervalSet::new_with_ids(&[300..400, 30..40], &[20, 10]).unwrap();
1577 assert_eq!(n.intervals, [30..40, 300..400]);
1578 assert_eq!(n.ids, vec![vec![10], vec![20]]);
1579 }
1580
1581 #[test]
1582 fn test_merge_connected() {
1583 let n = IntervalSet::new_with_ids(&[300..400, 400..450, 450..500], &[20, 10, 30]).unwrap();
1584 assert_eq!(n.merge_hull().intervals, vec![300..400, 400..450, 450..500]);
1585 let n = n.merge_connected();
1586 assert_eq!(n.intervals, [300..500]);
1587 assert_eq!(n.ids, vec![vec![10, 20, 30],]);
1588
1589 let n = IntervalSet::new_with_ids(&[300..400, 400..450, 451..500], &[20, 10, 30])
1590 .unwrap()
1591 .merge_connected();
1592 assert_eq!(n.intervals, [300..450, 451..500]);
1593 assert_eq!(n.ids, vec![vec![10, 20], vec![30]]);
1594 let n =
1595 IntervalSet::new_with_ids(&[300..400, 400..450, 451..500, 350..355], &[20, 10, 30, 40])
1596 .unwrap()
1597 .merge_connected();
1598 assert_eq!(n.intervals, [300..450, 451..500]);
1599 assert_eq!(n.ids, vec![vec![10, 20, 40], vec![30]]);
1600 let n =
1601 IntervalSet::new_with_ids(&[300..400, 400..450, 450..500, 510..520], &[20, 10, 30, 40])
1602 .unwrap()
1603 .merge_connected();
1604 assert_eq!(n.intervals, vec![300..500, 510..520]);
1605 }
1606
1607 #[test]
1608 fn test_clone() {
1609 let n = IntervalSet::new_with_ids(&[300..400, 400..450, 450..500], &[20, 10, 30]).unwrap();
1610 let n2 = n.clone();
1611 assert_eq!(n.intervals, n2.intervals);
1612 assert_eq!(n.ids, n2.ids);
1613 assert!(n2.root.borrow().is_none());
1614 n.has_overlap(&(0..1)).unwrap();
1615 assert!(n2.root.borrow().is_none());
1616 let n2 = n.clone();
1617 assert!(n2.root.borrow().is_some());
1618 }
1619
1620 #[test]
1621 fn test_split_repeated_ids() {
1622 let intervals = vec![
1623 10736170..10736283,
1624 10939387..10939423,
1625 10940596..10940707,
1626 10941690..10941780,
1627 10944966..10945053,
1628 10947303..10947418,
1629 10949211..10949269,
1630 10950048..10950174,
1631 10959066..10959136,
1632 10961282..10961338,
1633 10939423..10940596,
1634 10940707..10941690,
1635 10941780..10944966,
1636 10945053..10947303,
1637 10947418..10949211,
1638 10949269..10950048,
1639 10950174..10959066,
1640 10959136..10961282,
1641 11066417..11066515,
1642 11067984..11068174,
1643 11066515..11067984,
1644 11124336..11124379,
1645 11124507..11125705,
1646 11124379..11124507,
1647 11249808..11249959,
1648 12602465..12602527,
1649 12604669..12604739,
1650 12615408..12615534,
1651 12616313..12616371,
1652 12618170..12618289,
1653 12620837..12620938,
1654 12624241..12624331,
1655 12625316..12625428,
1656 12626606..12626642,
1657 12602527..12604669,
1658 12604739..12615408,
1659 12615534..12616313,
1660 12616371..12618170,
1661 12618289..12620837,
1662 12620938..12624241,
1663 12624331..12625316,
1664 12625428..12626606,
1665 15273854..15273961,
1666 15282556..15288670,
1667 15290717..15290836,
1668 15290994..15291024,
1669 15295331..15295410,
1670 15295729..15295832,
1671 15297156..15297196,
1672 15290836..15290994,
1673 15291024..15295331,
1674 15295410..15295729,
1675 15295832..15297156,
1676 15298377..15304556,
1677 15326036..15327756,
1678 15342359..15342426,
1679 15342944..15343065,
1680 15327756..15342359,
1681 15342426..15342944,
1682 15349466..15350043,
1683 15489989..15490131,
1684 15490871..15490947,
1685 15492485..15492564,
1686 15490131..15490871,
1687 15490947..15492485,
1688 15489989..15490131,
1689 15490871..15490947,
1690 15492485..15492564,
1691 15541435..15541607,
1692 15558445..15558626,
1693 15575611..15575786,
1694 15577966..15578006,
1695 15541607..15558445,
1696 15558626..15575611,
1697 15575786..15577966,
1698 15550903..15552887,
1699 15553210..15553586,
1700 15553690..15553838,
1701 15552887..15553210,
1702 15553586..15553690,
1703 15557576..15558591,
1704 15560155..15560264,
1705 15560524..15560694,
1706 15558591..15560155,
1707 15560264..15560524,
1708 15562016..15564276,
1709 15572088..15573265,
1710 15588360..15588478,
1711 15600907..15602514,
1712 15604051..15604133,
1713 15604841..15604882,
1714 15602514..15604051,
1715 15604133..15604841,
1716 15611758..15613096,
1717 15615401..15615578,
1718 15622600..15622712,
1719 15623986..15624071,
1720 15624538..15624674,
1721 15624955..15625094,
1722 ];
1723 let ids = vec![
1724 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 10, 10, 10, 10, 10, 10, 10, 10, 5, 5, 9, 5, 5, 9, 6, 5,
1725 5, 5, 5, 5, 5, 5, 5, 5, 9, 9, 9, 9, 9, 9, 9, 9, 5, 6, 5, 5, 5, 5, 5, 9, 9, 9, 9, 6, 6,
1726 6, 6, 10, 10, 5, 5, 5, 5, 9, 9, 5, 5, 5, 5, 5, 5, 5, 9, 9, 9, 6, 6, 6, 10, 10, 6, 6, 6,
1727 10, 10, 6, 6, 5, 6, 6, 6, 10, 10, 6, 6, 6, 6, 6, 6,
1728 ];
1729
1730 let n = IntervalSet::new_with_ids(&intervals, &ids).unwrap();
1731 n.merge_split();
1732 }
1733
1734 #[test]
1735 fn test_merge_split_non_remains_overlapping() {
1736 let intervals = vec![
1737 15489989..15490131,
1738 15490871..15490947,
1739 15492485..15492564,
1740 15490131..15490871,
1741 15490947..15492485,
1742 15489989..15490131,
1743 15490871..15490947,
1744 15492485..15492564,
1745 15541435..15541607,
1746 15558445..15558626,
1747 ];
1748 let ids = vec![5, 5, 5, 9, 9, 5, 5, 5, 5, 5];
1749 let n = IntervalSet::new_with_ids(&intervals, &ids)
1750 .unwrap()
1751 .merge_split();
1752 assert!(!n.any_overlapping());
1753 }
1754
1755 #[test]
1756 fn test_overlap_status() {
1757 let intervals = vec![
1758 0..100,
1759 50..150,
1760 150..200,
1761 201..230,
1762 230..250,
1763 249..500,
1764 550..600,
1765 ];
1766 let n = IntervalSet::new(&intervals).unwrap();
1767 assert_eq!(
1768 n.overlap_status(),
1769 vec![true, true, false, false, true, true, false]
1770 );
1771 }
1772
1773 #[test]
1774 fn test_has_overlap_u64() {
1775 let r = vec![0..5, 10..15];
1776 let n = IntervalSetGeneric::<u64>::new(&r).unwrap();
1777 assert!(n.has_overlap(&(3..4)).unwrap());
1778 assert!(n.has_overlap(&(5..20)).unwrap());
1779 assert!(!n.has_overlap(&(6..10)).unwrap());
1780 assert!(!n.has_overlap(&(100..110)).unwrap());
1781 assert!(!n.has_overlap(&(3..3)).unwrap());
1782 }
1783
1784 #[test]
1785 fn test_has_overlap_i128() {
1786 let r = vec![-50..-40, 10..15];
1787 let n = IntervalSetGeneric::<i128>::new(&r).unwrap();
1788 assert!(n.has_overlap(&(-45..46)).unwrap());
1789 assert!(n.has_overlap(&(-50..-49)).unwrap());
1790 assert!(!n.has_overlap(&(-39..-38)).unwrap());
1791 assert!(!n.has_overlap(&(-40..-39)).unwrap());
1792 assert!(n.has_overlap(&(5..20)).unwrap());
1793 assert!(!n.has_overlap(&(6..10)).unwrap());
1794 assert!(!n.has_overlap(&(100..110)).unwrap());
1795 assert!(!n.has_overlap(&(3..3)).unwrap());
1796 }
1797
1798 #[test]
1799 fn test_filter_overlapping_split_basic() {
1800 let a = vec![0..100];
1801 let ids = [1];
1802 let iv_a = IntervalSet::new_with_ids(&a, &ids).unwrap();
1803 let b = vec![0..100];
1804 let other = IntervalSet::new(&b).unwrap();
1805 let c = iv_a.filter_to_overlapping_and_split(&other);
1806 assert_eq!(c.intervals, vec![0..100]);
1807 assert_eq!(c.ids, vec![vec![1]]);
1808 }
1809
1810 #[test]
1811 fn test_filter_overlapping_split_basic2() {
1812 let a = vec![0..100];
1813 let ids = [1];
1814 let iv_a = IntervalSet::new_with_ids(&a, &ids).unwrap();
1815 let b = vec![25..50];
1816 let other = IntervalSet::new(&b).unwrap();
1817 let c = iv_a.filter_to_overlapping_and_split(&other);
1818 assert_eq!(c.intervals, vec![25..50]);
1819 assert_eq!(c.ids, vec![vec![1]]);
1820 }
1821 #[test]
1822 fn test_filter_overlapping_split() {
1823 let a = vec![0..100, 200..300, 300..450, 490..510];
1824 let ids = [1, 2, 3, 4];
1825 let iv_a = IntervalSet::new_with_ids(&a, &ids).unwrap();
1826 let b = vec![0..50, 50..60, 75..99, 350..500];
1827 let other = IntervalSet::new(&b).unwrap();
1828 let c = iv_a.filter_to_overlapping_and_split(&other);
1829 assert_eq!(c.intervals, vec![0..60, 75..99, 350..450, 490..500]);
1830 assert_eq!(
1831 c.ids,
1832 vec![vec![1], vec![1], vec![3], vec![4]]
1833 );
1834 }
1835
1836 #[test]
1837 fn test_filter_overlapping_split_2() {
1838 let a = vec![0..100, 100..300, 300..450, 490..510];
1839 let ids = [1, 2, 3, 4];
1840 let iv_a = IntervalSet::new_with_ids(&a, &ids).unwrap();
1841 let b = vec![0..50, 50..60, 75..110, 350..500];
1842 let other = IntervalSet::new(&b).unwrap();
1843 let c = iv_a.filter_to_overlapping_and_split(&other);
1844 assert_eq!(c.intervals, vec![0..60, 75..100, 100..110, 350..450, 490..500]);
1845 assert_eq!(
1846 c.ids,
1847 vec![vec![1], vec![1], vec![2], vec![3], vec![4]]
1848 );
1849 }
1850
1851 #[test]
1852 fn test_filter_overlapping_split_doc() {
1853 let a = vec![10..20];
1854 let ids = [99];
1855 let iv_a = IntervalSet::new_with_ids(&a, &ids).unwrap();
1856 let b = vec![5..11, 15..17, 18..30];
1857 let other = IntervalSet::new(&b).unwrap();
1858 let c = iv_a.filter_to_overlapping_and_split(&other);
1859 assert_eq!(c.intervals, vec![10..11, 15..17, 18..20]);
1860 assert_eq!(
1861 c.ids,
1862 vec![vec![99], vec![99], vec![99]]
1863 );
1864 }
1865 #[test]
1866 fn test_identical_starts () {
1867
1868 let a = vec![
1869 (1.. 10),
1870 (20.. 30),
1871 (20.. 40),
1872 (50.. 100),
1873 ];
1874 let iv_a = IntervalSet::new(&a).unwrap();
1875 for range in a {
1876 println!("{:?}", range);
1877 println!("{}",iv_a.has_overlap(&(range.start..range.end)).unwrap());
1878 assert!(iv_a.has_overlap(&(range.start..range.end)).unwrap());
1879 }
1880
1881 assert!(iv_a.query_overlapping(&(1.. 10)).intervals.len() == 1);
1882 assert!(iv_a.query_overlapping(&(20.. 30)).intervals.len() == 2);
1883 assert!(iv_a.query_overlapping(&(20.. 40)).intervals.len() == 2);
1884 assert!(iv_a.query_overlapping(&(30.. 32)).intervals.len() == 1);
1885 assert!(iv_a.query_overlapping(&(29.. 32)).intervals.len() == 2);
1886 }
1887
1888 #[test]
1889 fn test_identical_intervals () {
1890 let a = vec![
1891 (266854.. 266855),
1892 (268816.. 268818),
1893 (268816.. 268818),
1894 (297502.. 297503),
1895 ];
1896 let iv_a = IntervalSet::new(&a).unwrap();
1897 for range in a {
1898 dbg!(&range);
1899 assert!(iv_a.has_overlap(&(range.start..range.end)).unwrap());
1900 }
1901 assert!(iv_a.query_overlapping(&(266854.. 266855)).intervals.len() == 1);
1902 assert!(iv_a.query_overlapping(&(268816.. 268818)).intervals.len() == 2);
1903 assert!(iv_a.query_overlapping(&(297502.. 297503)).intervals.len() == 1);
1904
1905 }
1906
1907 #[test]
1908 fn test_nested () {
1909 let a = vec![
1910 (10.. 30),
1911 (15.. 25),
1912 (25.. 28),
1913 ];
1914 let iv_a = IntervalSet::new(&a).unwrap();
1915 for range in a {
1916 assert!(iv_a.has_overlap(&(range.start..range.end)).unwrap());
1917 }
1918 }
1919
1920
1921}