coalesced_intervals/
lib.rs

1use core::ops::Bound;
2use std::collections::BTreeMap;
3
4/// This is a conceptually simple data structure designed for the case where you have intervals
5/// that you'd like to coalesce into maximal contiguous runs.
6///
7/// For example, if I add `[0, 1)` and then I add `[1, 2)` I should observe a single contiguous
8/// interval `[0, 2)` in the data structure.
9///
10/// Implementation note: we use two btrees, one with the starts as the keys and one with limits as
11/// the keys.
12pub struct CoalescedIntervals<T> {
13    start_to_limit: BTreeMap<T, T>,
14    limit_to_start: BTreeMap<T, T>,
15}
16
17impl<T: Copy + std::cmp::Ord + std::fmt::Debug> CoalescedIntervals<T> {
18    /// Creates a new (empty) set of maximally coalesced intervals.
19    pub fn new() -> Self {
20        CoalescedIntervals {
21            start_to_limit: BTreeMap::new(),
22            limit_to_start: BTreeMap::new(),
23        }
24    }
25
26    /// Checks interval invariants for this data structure -- panics via `assert!` if there are
27    /// internal inconsistencies.
28    pub fn check_invariants(&self) {
29        // There should be no empty-sized intervals, and data should be reflected symmetrically in
30        // both maps.
31        for (start, limit) in self.start_to_limit.iter() {
32            assert!(start != limit);
33            assert!(self.limit_to_start[&limit] == *start);
34        }
35        for (limit, start) in self.limit_to_start.iter() {
36            assert!(start != limit);
37            assert!(self.start_to_limit[&start] == *limit);
38        }
39    }
40
41    /// To be dominated by this interval the candidate_start must be >= start and candidate_limit
42    /// must be <= limit.
43    fn remove_intervals_dominated_by(&mut self, start: T, limit: T) {
44        assert!(start <= limit);
45        let mut dominated = vec![];
46        for (candidate_start, candidate_limit) in self
47            .start_to_limit
48            .range((Bound::Included(start), Bound::Excluded(limit)))
49        {
50            if *candidate_limit <= limit {
51                dominated.push((*candidate_start, *candidate_limit));
52            } else {
53                // candidate_limit > limit, so we can stop looking
54                break;
55            }
56        }
57        for (s, l) in dominated {
58            self.start_to_limit.remove(&s);
59            self.limit_to_start.remove(&l);
60        }
61    }
62
63    fn is_dominated_by_existing(&self, start: T, limit: T) -> bool {
64        assert!(start <= limit);
65        // Look at the first interval that ends at-or-after limit to see if it dominates.
66        for (_existing_limit, existing_start) in self
67            .limit_to_start
68            .range((Bound::Included(limit), Bound::Unbounded))
69        {
70            if *existing_start <= start {
71                return true;
72            } else {
73                break;
74            }
75        }
76        // Look at the first interval that start at-or-before start to see if it dominates.
77        for (_existing_start, existing_limit) in self
78            .start_to_limit
79            .range((Bound::Unbounded, Bound::Included(start)))
80        {
81            if *existing_limit >= limit {
82                return true;
83            } else {
84                break;
85            }
86        }
87        return false;
88    }
89
90    /// Inserts the `[start, limit)` interval into both underlying mappings.
91    fn insert_record(&mut self, start: T, limit: T) {
92        assert!(start <= limit);
93        log::debug!("inserting record: {:?}, {:?}", start, limit);
94        self.start_to_limit.insert(start, limit);
95        self.limit_to_start.insert(limit, start);
96    }
97
98    /// Removes the interval from both mappings that has a start at `value` -- panics if no such
99    /// interval exists.
100    fn remove_with_start_at(&mut self, value: T) -> T {
101        if let Some((start, limit)) = self.start_to_limit.remove_entry(&value) {
102            self.limit_to_start.remove(&limit);
103            log::debug!("removed: {:?}, {:?}", start, limit);
104            limit
105        } else {
106            panic!("Attempted to remove start that was not present in map");
107        }
108    }
109
110    fn remove_with_limit_at(&mut self, value: T) -> T {
111        if let Some((limit, start)) = self.limit_to_start.remove_entry(&value) {
112            self.start_to_limit.remove(&start);
113            log::debug!("removed: {:?}, {:?}", start, limit);
114            start
115        } else {
116            panic!("Attempted to remove limit that was not present in map");
117        }
118    }
119
120    /// Finds any collision with the left edge of the interval; e.g. where the limit of another
121    /// interval is contained within this interval; i.e.
122    ///
123    /// `start <= other.limit <= limit`
124    fn find_collision_left(&self, start: T, limit: T) -> Option<(T, T)> {
125        assert!(start <= limit);
126        for (other_limit, other_start) in self
127            .limit_to_start
128            .range((Bound::Included(start), Bound::Included(limit)))
129        {
130            if start <= *other_limit && *other_limit <= limit {
131                return Some((*other_start, *other_limit));
132            }
133        }
134        return None;
135    }
136
137    /// Finds any collision with the right edge of the interval; e.g. where the start of another
138    /// interval is contained within this interval; i.e.
139    ///
140    /// `start <= other.start <= limit`
141    fn find_collision_right(&self, start: T, limit: T) -> Option<(T, T)> {
142        assert!(start <= limit);
143        for (other_start, other_limit) in self
144            .start_to_limit
145            .range((Bound::Included(start), Bound::Included(limit)))
146        {
147            if start <= *other_start && *other_start <= limit {
148                return Some((*other_start, *other_limit));
149            }
150        }
151        return None;
152    }
153
154    /// Adds the interval `[start, limit)` to the current interval set.
155    pub fn add(&mut self, start: T, limit: T) {
156        assert!(start <= limit);
157        // Ignore empty intervals.
158        if start == limit {
159            return;
160        }
161
162        // No change necessary if there's already an interval in there that dominates this one.
163        if self.is_dominated_by_existing(start, limit) {
164            return;
165        }
166
167        self.remove_intervals_dominated_by(start, limit);
168
169        // If our start is another interval's limit, or our limit is another interval's start, we
170        // coalesce them. Note that both may be true simultaneously. We're maximally coalesced as
171        // an invariant, so we don' thave to look for additional things that coalesce or are
172        // dominated by this new larger block, they would have been colliding which would break the
173        // invariant.
174
175        let collision_left: Option<(T, T)> = self.find_collision_left(start, limit);
176        let collision_right: Option<(T, T)> = self.find_collision_right(start, limit);
177
178        log::debug!("considering: {:?}, {:?}", start, limit);
179        log::debug!("  collision left:  {:?}", collision_left);
180        log::debug!("  collision right: {:?}", collision_right);
181
182        match (collision_left, collision_right) {
183            (None, None) => {
184                self.insert_record(start, limit);
185            }
186            // Collision on the right edge.
187            (None, Some((collided_start, collided_limit))) => {
188                self.remove_with_start_at(collided_start);
189                assert!(collided_limit > limit);
190                self.insert_record(start, collided_limit);
191            }
192            // Collision on the left edge.
193            (Some((collided_start, _collided_limit)), None) => {
194                self.remove_with_start_at(collided_start);
195                assert!(collided_start < start);
196                self.insert_record(collided_start, limit);
197            }
198            // Collision on both edges.
199            (Some((left_start, _)), Some((_, right_limit))) => {
200                self.remove_with_start_at(left_start);
201                self.remove_with_limit_at(right_limit);
202                assert!(left_start < start);
203                assert!(limit < right_limit);
204                self.insert_record(left_start, right_limit);
205            }
206        }
207    }
208
209    /// Returns the interval that contains `value`, or `None` if there is none in the current
210    /// interval set.
211    ///
212    /// Note that limits are exclusive, so with the interval set with a single interval `[0, 1)`
213    /// the value `1` is not contained.
214    pub fn get_interval_containing(&self, value: T) -> Option<(T, T)> {
215        // We look at the first interval whose limit is after `value` to see if it overlaps.
216        for (limit, start) in self
217            .limit_to_start
218            .range((Bound::Excluded(value), Bound::Unbounded))
219        {
220            if *start <= value {
221                assert!(*limit > value);
222                return Some((*start, *limit));
223            } else {
224                break;
225            }
226        }
227
228        // We look at the first interval whose start is before `value` to see if it overlaps.
229        for (start, limit) in self
230            .start_to_limit
231            .range((Bound::Unbounded, Bound::Included(value)))
232        {
233            if *limit > value {
234                assert!(*start <= value);
235                return Some((*start, *limit));
236            } else {
237                break;
238            }
239        }
240
241        None
242    }
243
244    /// Returns the first interval whose start is >= `value`.
245    ///
246    /// If there is no such interval, `None` is returned.
247    pub fn get_first_start_from(&self, value: T) -> Option<(T, T)> {
248        for (start, limit) in self
249            .start_to_limit
250            .range((Bound::Included(value), Bound::Unbounded))
251        {
252            return Some((*start, *limit));
253        }
254        None
255    }
256
257    /// Returns the first interval whose limit is < `value`.
258    ///
259    /// If there is no such interval, `None` is returned.
260    pub fn get_first_limit_before(&self, value: T) -> Option<(T, T)> {
261        for (limit, start) in self
262            .limit_to_start
263            .range((Bound::Unbounded, Bound::Excluded(value)))
264        {
265            return Some((*start, *limit));
266        }
267        None
268    }
269
270    /// Returns whether there is a partial overlap in the interval `[start, limit)`.
271    ///
272    /// That is, returns true iff there is any intersection between `[start, limit)`
273    /// and the coalesced intervals held in this data structure.
274    ///
275    /// When the interval given is empty, this function returns whether the point is
276    /// contained within any interval in this data structure.
277    pub fn contains_partial(&self, start: T, limit: T) -> bool {
278        assert!(start <= limit);
279        if start == limit {
280            return self.get_interval_containing(start).is_some();
281        }
282
283        if self.is_dominated_by_existing(start, limit) {
284            return true;
285        }
286
287        match self.get_first_start_from(start) {
288            Some((next_start, _next_limit)) => {
289                if next_start < limit {
290                    return true;
291                }
292            }
293            None => {}
294        }
295
296        match self.get_first_limit_before(limit) {
297            Some((_prev_start, prev_limit)) => {
298                if prev_limit > start {
299                    return true;
300                }
301            }
302            None => {}
303        }
304
305        return false;
306    }
307
308    /// Converts the current interval set to a vector of `[start, limit)` in sorted (ascending)
309    /// order.
310    pub fn to_vec(&self) -> Vec<(T, T)> {
311        let mut v: Vec<(T, T)> = Vec::with_capacity(self.start_to_limit.len());
312        for (start, limit) in self.start_to_limit.iter() {
313            v.push((*start, *limit));
314        }
315        v
316    }
317}
318
319#[cfg(test)]
320mod tests {
321    use super::*;
322
323    #[test]
324    fn empty() {
325        let ivals = CoalescedIntervals::<i64>::new();
326        assert_eq!(ivals.to_vec(), []);
327    }
328
329    /// Adding a single interval with no area.
330    #[test]
331    fn with_empty_range() {
332        let mut ivals = CoalescedIntervals::<i64>::new();
333        ivals.add(0, 0);
334        assert_eq!(ivals.to_vec(), []);
335        assert!(ivals.get_interval_containing(0).is_none());
336        assert!(ivals.get_first_start_from(0).is_none());
337    }
338
339    /// Adding a single interval (that has area in it).
340    #[test]
341    fn one_interval_range() {
342        let mut ivals = CoalescedIntervals::<i64>::new();
343        ivals.add(0, 1);
344        assert_eq!(ivals.to_vec(), [(0, 1)]);
345
346        assert_eq!(ivals.get_first_start_from(-1), Some((0, 1)));
347
348        assert_eq!(ivals.get_interval_containing(0), Some((0, 1)));
349        assert_eq!(ivals.get_first_start_from(0), Some((0, 1)));
350
351        assert!(ivals.get_interval_containing(1).is_none());
352        assert!(ivals.get_first_start_from(1).is_none());
353    }
354
355    /// Adding two intervals that coalesce.
356    #[test]
357    fn two_interval_abutted() {
358        let mut ivals = CoalescedIntervals::<i64>::new();
359        ivals.add(0, 1);
360        assert!(ivals.get_interval_containing(1).is_none());
361        ivals.add(1, 2);
362        assert_eq!(ivals.to_vec(), [(0, 2)]);
363        assert_eq!(ivals.get_interval_containing(1), Some((0, 2)));
364
365        // The coalesced interval starts from 0 so this gives us `None`.
366        assert!(ivals.get_first_start_from(1).is_none());
367    }
368
369    /// Adding three intervals that coalesce when third one shows up.
370    #[test]
371    fn three_interval_abutted() {
372        let _ = env_logger::try_init();
373        let mut ivals = CoalescedIntervals::<i64>::new();
374        ivals.add(0, 1);
375        ivals.add(2, 3);
376        assert_eq!(ivals.to_vec(), [(0, 1), (2, 3)]);
377        assert_eq!(ivals.get_first_start_from(1), Some((2, 3)));
378        ivals.add(1, 2);
379        assert_eq!(ivals.to_vec(), [(0, 3)]);
380        assert_eq!(ivals.get_first_start_from(1), None);
381    }
382
383    /// Adding a smaller interval when a larger interval is already present with the same start.
384    #[test]
385    fn test_smaller_on_larger() {
386        let mut ivals = CoalescedIntervals::<i64>::new();
387        ivals.add(0, 3);
388        assert_eq!(ivals.to_vec(), [(0, 3)]);
389        ivals.add(0, 1);
390        assert_eq!(ivals.to_vec(), [(0, 3)]);
391    }
392
393    /// Partial overlap between earlier and subsequent.
394    #[test]
395    fn test_partial_overlap() {
396        let _ = env_logger::try_init();
397        let mut ivals = CoalescedIntervals::<i64>::new();
398        ivals.add(0, 3);
399        assert_eq!(ivals.to_vec(), [(0, 3)]);
400        ivals.add(2, 4);
401        assert_eq!(ivals.to_vec(), [(0, 4)]);
402    }
403
404    /// Adding an interval `[MIN, MIN+1)` and seeing if we can find it by passing the `MIN` value
405    /// to `get_first_start_from`. The `MIN` value shouldn't hold any surprises in this regard.
406    #[test]
407    fn test_get_first_start_from_min_value() {
408        let _ = env_logger::try_init();
409        let mut ivals = CoalescedIntervals::<i8>::new();
410        ivals.add(i8::MIN, i8::MIN + 1);
411        assert_eq!(
412            ivals.get_first_start_from(i8::MIN),
413            Some((i8::MIN, i8::MIN + 1))
414        );
415    }
416
417    #[test]
418    fn test_contains_partial() {
419        let mut ivals = CoalescedIntervals::<i64>::new();
420        ivals.add(0, 3);
421        assert!(ivals.contains_partial(0, 1));
422        assert!(ivals.contains_partial(1, 2));
423        assert!(ivals.contains_partial(2, 3));
424        assert!(ivals.contains_partial(1, 3));
425        assert!(ivals.contains_partial(0, 2));
426        assert!(ivals.contains_partial(2, 4));
427        assert!(ivals.contains_partial(-1, 1));
428
429        assert!(!ivals.contains_partial(3, 3));
430        assert!(!ivals.contains_partial(3, 4));
431        assert!(!ivals.contains_partial(-1, 0));
432    }
433}