interval_heap/
lib.rs

1// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
2// file at the top-level directory of this distribution and at
3// http://rust-lang.org/COPYRIGHT.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10
11//! A double-ended priority queue implemented with an interval heap.
12//!
13//! An `IntervalHeap` can be used wherever a [`BinaryHeap`][bh] can, but has the ability to
14//! efficiently access the heap's smallest item and accepts custom comparators. If you only need
15//! access to either the smallest item or the greatest item, `BinaryHeap` is more efficient.
16//!
17//! Insertion has amortized `O(log n)` time complexity. Popping the smallest or greatest item is
18//! `O(log n)`. Retrieving the smallest or greatest item is `O(1)`.
19//!
20//! [bh]: https://doc.rust-lang.org/stable/std/collections/struct.BinaryHeap.html
21
22extern crate compare;
23#[cfg(test)] extern crate rand;
24
25use std::fmt::{self, Debug};
26use std::iter;
27use std::slice;
28use std::vec;
29
30use compare::{Compare, Natural, natural};
31
32// An interval heap is a binary tree structure with the following properties:
33//
34// (1) Each node (except possibly the last leaf) contains two values
35//     where the first one is less than or equal to the second one.
36// (2) Each node represents a closed interval.
37// (3) A child node's interval is completely contained in the parent node's
38//     interval.
39//
40// This implies that the min and max items are always in the root node.
41//
42// This interval heap implementation stores its nodes in a linear array
43// using a Vec. Here's an example of the layout of a tree with 13 items
44// (7 nodes) where the numbers represent the *offsets* in the array:
45//
46//          (0 1)
47//         /     \
48//    (2 3)       (4 5)
49//    /   \       /    \
50//  (6 7)(8 9)(10 11)(12 --)
51//
52// Even indices are used for the "left" item of a node while odd indices
53// are used for the "right" item of a node. Note: the last node may not
54// have a "right" item.
55
56// FIXME: There may be a better algorithm for turning a vector into an
57// interval heap. Right now, this takes O(n log n) time, I think.
58
59fn is_root(x: usize) -> bool { x < 2 }
60
61/// Set LSB to zero for the "left" item index of a node.
62fn left(x: usize) -> usize { x & !1 }
63
64/// Returns index of "left" item of parent node.
65fn parent_left(x: usize) -> usize {
66    debug_assert!(!is_root(x));
67    left((x - 2) / 2)
68}
69
70/// The first `v.len() - 1` items are considered a valid interval heap
71/// and the last item is to be inserted.
72fn interval_heap_push<T, C: Compare<T>>(v: &mut [T], cmp: &C) {
73    debug_assert!(v.len() > 0);
74    // Start with the last new/modified node and work our way to
75    // the root if necessary...
76    let mut node_max = v.len() - 1;
77    let mut node_min = left(node_max);
78    // The reason for using two variables instead of one is to
79    // get around the special case of the last node only containing
80    // one item (node_min == node_max).
81    if cmp.compares_gt(&v[node_min], &v[node_max]) { v.swap(node_min, node_max); }
82    while !is_root(node_min) {
83        let par_min = parent_left(node_min);
84        let par_max = par_min + 1;
85        if cmp.compares_lt(&v[node_min], &v[par_min]) {
86            v.swap(par_min, node_min);
87        } else if cmp.compares_lt(&v[par_max], &v[node_max]) {
88            v.swap(par_max, node_max);
89        } else {
90            return; // nothing to do anymore
91        }
92        debug_assert!(cmp.compares_le(&v[node_min], &v[node_max]));
93        node_min = par_min;
94        node_max = par_max;
95    }
96}
97
98/// The min item in the root node of an otherwise valid interval heap
99/// has been been replaced with some other value without violating rule (1)
100/// for the root node. This function restores the interval heap properties.
101fn update_min<T, C: Compare<T>>(v: &mut [T], cmp: &C) {
102    // Starting at the root, we go down the tree...
103    debug_assert!(cmp.compares_le(&v[0], &v[1]));
104    let mut left = 0;
105    loop {
106        let c1 = left * 2 + 2; // index of 1st child's left item
107        let c2 = left * 2 + 4; // index of 2nd child's left item
108        if v.len() <= c1 { return; } // No children. We're done.
109        // Pick child with lowest min
110        let ch = if v.len() <= c2 || cmp.compares_lt(&v[c1], &v[c2]) { c1 }
111                 else { c2 };
112        if cmp.compares_lt(&v[ch], &v[left]) {
113            v.swap(ch, left);
114            left = ch;
115            let right = left + 1;
116            if right < v.len() {
117                if cmp.compares_gt(&v[left], &v[right]) { v.swap(left, right); }
118            }
119        } else {
120            break;
121        }
122    }
123}
124
125/// The max item in the root node of an otherwise valid interval heap
126/// has been been replaced with some other value without violating rule (1)
127/// for the root node. This function restores the interval heap properties.
128fn update_max<T, C: Compare<T>>(v: &mut [T], cmp: &C) {
129    debug_assert!(cmp.compares_le(&v[0], &v[1]));
130    // Starting at the root, we go down the tree...
131    let mut right = 1;
132    loop {
133        let c1 = right * 2 + 1; // index of 1st child's right item
134        let c2 = right * 2 + 3; // index of 2nd child's right item
135        if v.len() <= c1 { return; } // No children. We're done.
136        // Pick child with greatest max
137        let ch = if v.len() <= c2 || cmp.compares_gt(&v[c1], &v[c2]) { c1 }
138                 else { c2 };
139        if cmp.compares_gt(&v[ch], &v[right]) {
140            v.swap(ch, right);
141            right = ch;
142            let left = right - 1; // always exists
143            if cmp.compares_gt(&v[left], &v[right]) { v.swap(left, right); }
144        } else {
145            break;
146        }
147    }
148}
149
150/// A double-ended priority queue implemented with an interval heap.
151///
152/// It is a logic error for an item to be modified in such a way that the
153/// item's ordering relative to any other item, as determined by the heap's
154/// comparator, changes while it is in the heap. This is normally only
155/// possible through `Cell`, `RefCell`, global state, I/O, or unsafe code.
156#[derive(Clone)]
157pub struct IntervalHeap<T, C: Compare<T> = Natural<T>> {
158    data: Vec<T>,
159    cmp: C,
160}
161
162impl<T, C: Compare<T> + Default> Default for IntervalHeap<T, C> {
163    #[inline]
164    fn default() -> IntervalHeap<T, C> {
165        Self::with_comparator(C::default())
166    }
167}
168
169impl<T: Ord> IntervalHeap<T> {
170    /// Returns an empty heap ordered according to the natural order of its items.
171    ///
172    /// # Examples
173    ///
174    /// ```
175    /// use interval_heap::IntervalHeap;
176    ///
177    /// let heap = IntervalHeap::<u32>::new();
178    /// assert!(heap.is_empty());
179    /// ```
180    pub fn new() -> IntervalHeap<T> { Self::with_comparator(natural()) }
181
182    /// Returns an empty heap with the given capacity and ordered according to the
183    /// natural order of its items.
184    ///
185    /// The heap will be able to hold exactly `capacity` items without reallocating.
186    ///
187    /// # Examples
188    ///
189    /// ```
190    /// use interval_heap::IntervalHeap;
191    ///
192    /// let heap = IntervalHeap::<u32>::with_capacity(5);
193    /// assert!(heap.is_empty());
194    /// assert!(heap.capacity() >= 5);
195    /// ```
196    pub fn with_capacity(capacity: usize) -> IntervalHeap<T> {
197        Self::with_capacity_and_comparator(capacity, natural())
198    }
199}
200
201impl<T: Ord> From<Vec<T>> for IntervalHeap<T> {
202    /// Returns a heap containing all the items of the given vector and ordered
203    /// according to the natural order of its items.
204    ///
205    /// # Examples
206    ///
207    /// ```
208    /// use interval_heap::IntervalHeap;
209    ///
210    /// let heap = IntervalHeap::from(vec![5, 1, 6, 4]);
211    /// assert_eq!(heap.len(), 4);
212    /// assert_eq!(heap.min_max(), Some((&1, &6)));
213    /// ```
214    fn from(vec: Vec<T>) -> IntervalHeap<T> {
215        Self::from_vec_and_comparator(vec, natural())
216    }
217}
218
219impl<T, C: Compare<T>> IntervalHeap<T, C> {
220    /// Returns an empty heap ordered according to the given comparator.
221    pub fn with_comparator(cmp: C) -> IntervalHeap<T, C> {
222        IntervalHeap { data: vec![], cmp: cmp }
223    }
224
225    /// Returns an empty heap with the given capacity and ordered according to the given
226    /// comparator.
227    pub fn with_capacity_and_comparator(capacity: usize, cmp: C) -> IntervalHeap<T, C> {
228        IntervalHeap { data: Vec::with_capacity(capacity), cmp: cmp }
229    }
230
231    /// Returns a heap containing all the items of the given vector and ordered
232    /// according to the given comparator.
233    pub fn from_vec_and_comparator(mut vec: Vec<T>, cmp: C) -> IntervalHeap<T, C> {
234        for to in 2 .. vec.len() + 1 {
235            interval_heap_push(&mut vec[..to], &cmp);
236        }
237        let heap = IntervalHeap { data: vec, cmp: cmp };
238        debug_assert!(heap.is_valid());
239        heap
240    }
241
242    /// Returns an iterator visiting all items in the heap in arbitrary order.
243    pub fn iter(&self) -> Iter<T> {
244        debug_assert!(self.is_valid());
245        Iter(self.data.iter())
246    }
247
248    /// Returns a reference to the smallest item in the heap.
249    ///
250    /// Returns `None` if the heap is empty.
251    pub fn min(&self) -> Option<&T> {
252        debug_assert!(self.is_valid());
253        match self.data.len() {
254            0 => None,
255            _ => Some(&self.data[0]),
256        }
257    }
258
259    /// Returns a reference to the greatest item in the heap.
260    ///
261    /// Returns `None` if the heap is empty.
262    pub fn max(&self) -> Option<&T> {
263        debug_assert!(self.is_valid());
264        match self.data.len() {
265            0 => None,
266            1 => Some(&self.data[0]),
267            _ => Some(&self.data[1]),
268        }
269    }
270
271    /// Returns references to the smallest and greatest items in the heap.
272    ///
273    /// Returns `None` if the heap is empty.
274    pub fn min_max(&self) -> Option<(&T, &T)> {
275        debug_assert!(self.is_valid());
276        match self.data.len() {
277            0 => None,
278            1 => Some((&self.data[0], &self.data[0])),
279            _ => Some((&self.data[0], &self.data[1])),
280        }
281    }
282
283    /// Returns the number of items the heap can hold without reallocation.
284    pub fn capacity(&self) -> usize {
285        self.data.capacity()
286    }
287
288    /// Reserves the minimum capacity for exactly `additional` more items to be inserted into the
289    /// heap.
290    ///
291    /// Does nothing if the capacity is already sufficient.
292    ///
293    /// Note that the allocator may give the heap more space than it
294    /// requests. Therefore capacity can not be relied upon to be precisely
295    /// minimal. Prefer `reserve` if future insertions are expected.
296    pub fn reserve_exact(&mut self, additional: usize) {
297        self.data.reserve_exact(additional);
298    }
299
300    /// Reserves capacity for at least `additional` more items to be inserted into the heap.
301    ///
302    /// The heap may reserve more space to avoid frequent reallocations.
303    pub fn reserve(&mut self, additional: usize) {
304        self.data.reserve(additional);
305    }
306
307    /// Discards as much additional capacity from the heap as possible.
308    pub fn shrink_to_fit(&mut self) {
309        self.data.shrink_to_fit()
310    }
311
312    /// Removes the smallest item from the heap and returns it.
313    ///
314    /// Returns `None` if the heap was empty.
315    pub fn pop_min(&mut self) -> Option<T> {
316        debug_assert!(self.is_valid());
317        let min = match self.data.len() {
318            0 => None,
319            1...2 => Some(self.data.swap_remove(0)),
320            _ => {
321                let res = self.data.swap_remove(0);
322                update_min(&mut self.data, &self.cmp);
323                Some(res)
324            }
325        };
326        debug_assert!(self.is_valid());
327        min
328    }
329
330    /// Removes the greatest item from the heap and returns it.
331    ///
332    /// Returns `None` if the heap was empty.
333    pub fn pop_max(&mut self) -> Option<T> {
334        debug_assert!(self.is_valid());
335        let max = match self.data.len() {
336            0...2 => self.data.pop(),
337            _ => {
338                let res = self.data.swap_remove(1);
339                update_max(&mut self.data, &self.cmp);
340                Some(res)
341            }
342        };
343        debug_assert!(self.is_valid());
344        max
345    }
346
347    /// Pushes an item onto the heap.
348    pub fn push(&mut self, item: T) {
349        debug_assert!(self.is_valid());
350        self.data.push(item);
351        interval_heap_push(&mut self.data, &self.cmp);
352        debug_assert!(self.is_valid());
353    }
354
355    /// Consumes the heap and returns its items as a vector in arbitrary order.
356    pub fn into_vec(self) -> Vec<T> { self.data }
357
358    /// Consumes the heap and returns its items as a vector in sorted (ascending) order.
359    pub fn into_sorted_vec(self) -> Vec<T> {
360        let mut vec = self.data;
361        for hsize in (2..vec.len()).rev() {
362            vec.swap(1, hsize);
363            update_max(&mut vec[..hsize], &self.cmp);
364        }
365        vec
366    }
367
368    /// Returns the number of items in the heap.
369    pub fn len(&self) -> usize {
370        self.data.len()
371    }
372
373    /// Returns `true` if the heap contains no items.
374    pub fn is_empty(&self) -> bool {
375        self.data.is_empty()
376    }
377
378    /// Removes all items from the heap.
379    pub fn clear(&mut self) {
380        self.data.clear();
381    }
382
383    /// Clears the heap, returning an iterator over the removed items in arbitrary order.
384    #[cfg(feature = "drain")]
385    pub fn drain(&mut self) -> Drain<T> {
386        Drain(self.data.drain(..))
387    }
388
389    /// Checks if the heap is valid.
390    ///
391    /// The heap is valid if:
392    ///
393    /// 1. It has fewer than two items, OR
394    /// 2a. Each node's left item is less than or equal to its right item, AND
395    /// 2b. Each node's left item is greater than or equal to the left item of the
396    ///     node's parent, AND
397    /// 2c. Each node's right item is less than or equal to the right item of the
398    ///     node's parent
399    fn is_valid(&self) -> bool {
400        let mut nodes = self.data.chunks(2);
401
402        match nodes.next() {
403            Some(chunk) if chunk.len() == 2 => {
404                let l = &chunk[0];
405                let r = &chunk[1];
406
407                self.cmp.compares_le(l, r) && // 2a
408                nodes.enumerate().all(|(i, node)| {
409                    let p = i & !1;
410                    let l = &node[0];
411                    let r = node.last().unwrap();
412
413                    self.cmp.compares_le(l, r) &&              // 2a
414                    self.cmp.compares_ge(l, &self.data[p]) &&  // 2b
415                    self.cmp.compares_le(r, &self.data[p + 1]) // 2c
416                })
417            }
418            _ => true, // 1
419        }
420    }
421}
422
423impl<T: Debug, C: Compare<T>> Debug for IntervalHeap<T, C> {
424    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
425        f.debug_list().entries(self).finish()
426    }
427}
428
429impl<T, C: Compare<T> + Default> iter::FromIterator<T> for IntervalHeap<T, C> {
430    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> IntervalHeap<T, C> {
431        IntervalHeap::from_vec_and_comparator(iter.into_iter().collect(), C::default())
432    }
433}
434
435impl<T, C: Compare<T>> Extend<T> for IntervalHeap<T, C> {
436    fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
437        let iter = iter.into_iter();
438        let (lower, _) = iter.size_hint();
439        self.reserve(lower);
440        for elem in iter {
441            self.push(elem);
442        }
443    }
444}
445
446impl<'a, T: 'a + Copy, C: Compare<T>> Extend<&'a T> for IntervalHeap<T, C> {
447    fn extend<I: IntoIterator<Item = &'a T>>(&mut self, iter: I) {
448        self.extend(iter.into_iter().map(|&item| item));
449    }
450}
451
452/// An iterator over an `IntervalHeap` in arbitrary order.
453///
454/// Acquire through [`IntervalHeap::iter`](struct.IntervalHeap.html#method.iter).
455pub struct Iter<'a, T: 'a>(slice::Iter<'a, T>);
456
457impl<'a, T> Clone for Iter<'a, T> {
458    fn clone(&self) -> Iter<'a, T> { Iter(self.0.clone()) }
459}
460
461impl<'a, T> Iterator for Iter<'a, T> {
462    type Item = &'a T;
463    #[inline] fn next(&mut self) -> Option<&'a T> { self.0.next() }
464    #[inline] fn size_hint(&self) -> (usize, Option<usize>) { self.0.size_hint() }
465}
466
467impl<'a, T> DoubleEndedIterator for Iter<'a, T> {
468    fn next_back(&mut self) -> Option<&'a T> { self.0.next_back() }
469}
470
471impl<'a, T> ExactSizeIterator for Iter<'a, T> {}
472
473/// A consuming iterator over an `IntervalHeap` in arbitrary order.
474///
475/// Acquire through [`IntoIterator::into_iter`](
476/// https://doc.rust-lang.org/stable/std/iter/trait.IntoIterator.html#tymethod.into_iter).
477pub struct IntoIter<T>(vec::IntoIter<T>);
478
479impl<T> Iterator for IntoIter<T> {
480    type Item = T;
481    fn next(&mut self) -> Option<T> { self.0.next() }
482    fn size_hint(&self) -> (usize, Option<usize>) { self.0.size_hint() }
483}
484
485impl<T> DoubleEndedIterator for IntoIter<T> {
486    fn next_back(&mut self) -> Option<T> { self.0.next_back() }
487}
488
489impl<T> ExactSizeIterator for IntoIter<T> {}
490
491/// An iterator that drains an `IntervalHeap` in arbitrary oder.
492///
493/// Acquire through [`IntervalHeap::drain`](struct.IntervalHeap.html#method.drain).
494#[cfg(feature = "drain")]
495pub struct Drain<'a, T: 'a>(vec::Drain<'a, T>);
496
497#[cfg(feature = "drain")]
498impl<'a, T: 'a> Iterator for Drain<'a, T> {
499    type Item = T;
500    fn next(&mut self) -> Option<T> { self.0.next() }
501    fn size_hint(&self) -> (usize, Option<usize>) { self.0.size_hint() }
502}
503
504#[cfg(feature = "drain")]
505impl<'a, T: 'a> DoubleEndedIterator for Drain<'a, T> {
506    fn next_back(&mut self) -> Option<T> { self.0.next_back() }
507}
508
509#[cfg(feature = "drain")]
510impl<'a, T: 'a> ExactSizeIterator for Drain<'a, T> {}
511
512impl<T, C: Compare<T>> IntoIterator for IntervalHeap<T, C> {
513    type Item = T;
514    type IntoIter = IntoIter<T>;
515    fn into_iter(self) -> IntoIter<T> { IntoIter(self.data.into_iter()) }
516}
517
518impl<'a, T, C: Compare<T>> IntoIterator for &'a IntervalHeap<T, C> {
519    type Item = &'a T;
520    type IntoIter = Iter<'a, T>;
521    fn into_iter(self) -> Iter<'a, T> { self.iter() }
522}
523
524#[cfg(test)]
525mod test {
526    use rand::{thread_rng, Rng};
527    use super::IntervalHeap;
528
529    #[test]
530    fn fuzz_push_into_sorted_vec() {
531        let mut rng = thread_rng();
532        let mut tmp = Vec::with_capacity(100);
533        for _ in 0..100 {
534            tmp.clear();
535            let mut ih = IntervalHeap::from(tmp);
536            for _ in 0..100 {
537                ih.push(rng.next_u32());
538            }
539            tmp = ih.into_sorted_vec();
540            for pair in tmp.windows(2) {
541                assert!(pair[0] <= pair[1]);
542            }
543        }
544    }
545
546    #[test]
547    fn fuzz_pop_min() {
548        let mut rng = thread_rng();
549        let mut tmp = Vec::with_capacity(100);
550        for _ in 0..100 {
551            tmp.clear();
552            let mut ih = IntervalHeap::from(tmp);
553            for _ in 0..100 {
554                ih.push(rng.next_u32());
555            }
556            let mut tmpx: Option<u32> = None;
557            loop {
558                let tmpy = ih.pop_min();
559                match (tmpx, tmpy) {
560                    (_, None) => break,
561                    (Some(x), Some(y)) => assert!(x <= y),
562                    _ => ()
563                }
564                tmpx = tmpy;
565            }
566            tmp = ih.into_vec();
567        }
568    }
569
570    #[test]
571    fn fuzz_pop_max() {
572        let mut rng = thread_rng();
573        let mut tmp = Vec::with_capacity(100);
574        for _ in 0..100 {
575            tmp.clear();
576            let mut ih = IntervalHeap::from(tmp);
577            for _ in 0..100 {
578                ih.push(rng.next_u32());
579            }
580            let mut tmpx: Option<u32> = None;
581            loop {
582                let tmpy = ih.pop_max();
583                match (tmpx, tmpy) {
584                    (_, None) => break,
585                    (Some(x), Some(y)) => assert!(x >= y),
586                    _ => ()
587                }
588                tmpx = tmpy;
589            }
590            tmp = ih.into_vec();
591        }
592    }
593
594    #[test]
595    fn test_from_vec() {
596        let heap = IntervalHeap::<i32>::from(vec![]);
597        assert_eq!(heap.min_max(), None);
598
599        let heap = IntervalHeap::from(vec![2]);
600        assert_eq!(heap.min_max(), Some((&2, &2)));
601
602        let heap = IntervalHeap::from(vec![2, 1]);
603        assert_eq!(heap.min_max(), Some((&1, &2)));
604
605        let heap = IntervalHeap::from(vec![2, 1, 3]);
606        assert_eq!(heap.min_max(), Some((&1, &3)));
607    }
608
609    #[test]
610    fn test_is_valid() {
611        fn new(data: Vec<i32>) -> IntervalHeap<i32> {
612            IntervalHeap { data: data, cmp: ::compare::natural() }
613        }
614
615        assert!(new(vec![]).is_valid());
616        assert!(new(vec![1]).is_valid());
617        assert!(new(vec![1, 1]).is_valid());
618        assert!(new(vec![1, 5]).is_valid());
619        assert!(new(vec![1, 5, 1]).is_valid());
620        assert!(new(vec![1, 5, 1, 1]).is_valid());
621        assert!(new(vec![1, 5, 5]).is_valid());
622        assert!(new(vec![1, 5, 5, 5]).is_valid());
623        assert!(new(vec![1, 5, 2, 4]).is_valid());
624        assert!(new(vec![1, 5, 2, 4, 3]).is_valid());
625        assert!(new(vec![1, 5, 2, 4, 3, 3]).is_valid());
626
627        assert!(!new(vec![2, 1]).is_valid());       // violates 2a
628        assert!(!new(vec![1, 5, 4, 3]).is_valid()); // violates 2a
629        assert!(!new(vec![1, 5, 0]).is_valid());    // violates 2b
630        assert!(!new(vec![1, 5, 0, 5]).is_valid()); // violates 2b
631        assert!(!new(vec![1, 5, 6]).is_valid());    // violates 2c
632        assert!(!new(vec![1, 5, 1, 6]).is_valid()); // violates 2c
633        assert!(!new(vec![1, 5, 0, 6]).is_valid()); // violates 2b and 2c
634    }
635}