leonardo_heap/
lib.rs

1// Copyright 2016 Ben Mather <bwhmather@bwhmather.com>
2// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
3// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
4// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
5// option. This file may not be copied, modified, or distributed
6// except according to those terms.
7
8//! A binary heap structure supporting fast in-place partial sorting.
9//!
10//! This is structure is the core of Dijkstra's Smoothsort algorithm.
11#[cfg(test)]
12extern crate rand;
13
14mod leonardo;
15mod subheap;
16mod layout;
17
18use std::fmt::Debug;
19
20use subheap::SubHeapMut;
21
22/// Recursively move a new top element down the heap to restore heap order
23/// within a subheap.
24fn sift_down<T: Ord + Debug>(heap: &mut SubHeapMut<T>) {
25    let (mut this_value, mut children) = heap.destructure_mut();
26
27    loop {
28        // No children.  We have reached the bottom of the heap.
29        if children.is_none() {
30            break;
31        }
32
33        let (fst_child, snd_child) = children.unwrap();
34
35        // Find the largest child.  Prefer the furthest child if both children
36        // are the same as doing so makes the array slightly more sorted.
37        let mut next_heap = if fst_child.value() > snd_child.value() {
38            fst_child
39        } else {
40            snd_child
41        };
42
43        // The heap property is satisfied.  No need to do anything else.
44        if &*this_value >= next_heap.value() {
45            break;
46        }
47
48        // Swap the value of the parent with the value of the largest child.
49        std::mem::swap(this_value, next_heap.value_mut());
50
51        // TODO there has to be a better pattern for unpacking to existing vars
52        match next_heap.into_components() {
53            (v, n) => {
54                this_value = v;
55                children = n;
56            }
57        }
58    }
59}
60
61/// This function will restore the string property (the property that the head
62/// of each each top-level subheap should contain a value greater than the head
63/// of the next subheap down after the head of the first subheap has been
64/// changed.  It assumes that the heap property alread holds for all subheaps.
65fn restring<T : Ord + Debug>(mut subheap_iter: layout::IterMut<T>) {
66    if let Some(mut this_subheap) = subheap_iter.next() {
67        for mut next_subheap in subheap_iter {
68            if next_subheap.value() <= this_subheap.value() {
69                break;
70            }
71
72            std::mem::swap(next_subheap.value_mut(), this_subheap.value_mut());
73
74            // The head of `next_subheap` is now lower than it was previously
75            // and so may need to be moved down.  As the new value at the head
76            // of `this_subheap` was larger than the old value it will already
77            // be in heap order so there is no need to do the same for
78            // `this_subheap`.
79            sift_down(&mut next_subheap);
80
81            this_subheap = next_subheap;
82        }
83    }
84}
85
86/// Restores the heap property of the first subheap and the string property of
87/// the heap as a whole after a push.
88fn balance_after_push<T: Ord + Debug>(
89    heap_data: &mut [T], layout: &layout::Layout,
90) {
91    assert_eq!(heap_data.len(), layout.len());
92
93    // Move the highest value in the first subheap to the top.
94    sift_down(&mut layout.iter(heap_data).next().unwrap());
95
96    // Swap it down through the other top-level subheaps until the string
97    // property is restored.
98    restring(layout.iter(heap_data));
99}
100
101/// Restores the string property after a pop.
102fn balance_after_pop<T: Ord + Debug>(
103    heap_data: &mut [T], layout: &layout::Layout,
104) {
105    {
106        let mut subheap_iter = layout.iter(heap_data);
107        match (subheap_iter.next(), subheap_iter.next()) {
108            (Some(fst), Some(snd)) => {
109                if snd.order - fst.order != 1 {
110                    return
111                }
112            }
113            _ => {
114                return;
115            }
116        }
117    }
118
119    {
120        let mut subheaps_from_snd = layout.iter(heap_data);
121        // Consume the first subheap.
122        subheaps_from_snd.next();
123
124        restring(subheaps_from_snd);
125    }
126
127    {
128        let subheaps_from_fst = layout.iter(heap_data);
129        restring(subheaps_from_fst);
130    }
131}
132
133
134#[derive(Debug)]
135struct Iter<'a, T: 'a> {
136    heap_data: &'a mut [T],
137    layout: layout::Layout,
138}
139
140
141impl<'a, T : Ord + Debug> Iterator for Iter<'a, T>
142{
143    type Item = &'a T;
144
145    fn next(&mut self) -> Option<&'a T> {
146        self.layout.pop();
147
148        if self.heap_data.len() != 0 {
149            // In order to avoid having more than one mutable reference to the
150            // heap at any one time,we have to temporarily replace it in self
151            // with a placeholder value.
152            let heap_data = std::mem::replace(&mut self.heap_data, &mut []);
153
154            let (result, rest_data) = heap_data.split_last_mut().unwrap();
155
156            // Store what's left of the heap back in self.
157            self.heap_data = rest_data;
158
159            balance_after_pop(self.heap_data, &self.layout);
160
161            Some(&*result)
162        } else {
163            None
164        }
165    }
166
167    fn size_hint(&self) -> (usize, Option<usize>) {
168        (self.heap_data.len(), Some(self.heap_data.len()))
169    }
170}
171
172
173impl<'a, T : Ord + Debug> ExactSizeIterator for Iter<'a, T> {}
174
175
176#[derive(Debug)]
177struct Drain<'a, T: 'a> {
178    heap: &'a mut LeonardoHeap<T>,
179}
180
181
182impl<'a, T: Ord + Debug> Iterator for Drain<'a, T>
183{
184    type Item = T;
185
186    fn next(&mut self) -> Option<T> {
187        self.heap.pop()
188    }
189
190    fn size_hint(&self) -> (usize, Option<usize>) {
191        (self.heap.len(), Some(self.heap.len()))
192    }
193}
194
195
196impl<'a, T : Ord + Debug> ExactSizeIterator for Drain<'a, T> {}
197
198
199#[derive(Debug)]
200pub struct LeonardoHeap<T> {
201    data: Vec<T>,
202    layout: layout::Layout,
203}
204
205
206impl<T: Ord + Debug> LeonardoHeap<T> {
207    /// Creates a new, empty `LeonardoHeap<T>`
208    pub fn new() -> Self {
209        LeonardoHeap {
210            data: Vec::new(),
211            layout: layout::Layout::new(),
212        }
213    }
214
215    /// Creates a new `LeonardoHeap<T>` with space allocated for at least
216    /// `capacity` elements.
217    pub fn with_capacity(capacity: usize) -> Self {
218        LeonardoHeap {
219            data: Vec::with_capacity(capacity),
220            layout: layout::Layout::new(),
221        }
222    }
223
224    /// Returns the number of elements for which space has been allocated.
225    pub fn capacity(&self) -> usize {
226        self.data.capacity()
227    }
228
229    /// Reserve at least enough space for `additional` elements to be pushed
230    /// on to the heap.
231    pub fn reserve(&mut self, additional: usize) {
232        self.data.reserve(additional)
233    }
234
235    /// Reserves the minimum capacity for exactly `additional` elements to be
236    /// pushed onto the heap.
237    pub fn reserve_exact(&mut self, additional: usize) {
238        self.data.reserve_exact(additional)
239    }
240
241    /// Shrinks the capacity of the underlying storage to free up as much space
242    /// as possible.
243    pub fn shrink_to_fit(&mut self) {
244        self.data.shrink_to_fit()
245    }
246
247    /// Removes all elements from the heap that do not match a predicate.
248    pub fn retain<F>(&mut self, f: F)
249        where F: FnMut(&T) -> bool
250    {
251        // TODO there is a much more interesting implementation
252        self.data.retain(f);
253
254        self.heapify();
255    }
256
257    /// Removes all elements from the heap.
258    pub fn clear(&mut self) {
259        self.data.clear();
260        self.layout = layout::Layout::new();
261    }
262
263    /// Returns the number of elements in the heap.
264    pub fn len(&self) -> usize {
265        self.data.len()
266    }
267
268    /// Returns `true` if the heap contains no elements, `false` otherwise.
269    pub fn is_empty(&self) -> bool {
270        self.data.is_empty()
271    }
272
273    /// Removes duplicate elements from the heap, preserving heap order.
274    pub fn dedup(&mut self) {
275        self.sort();
276        self.data.dedup();
277        self.heapify();
278    }
279
280    fn heapify(&mut self) {
281        let mut layout = layout::Layout::new();
282
283        // TODO harmless off-by-one error
284        for i in 0..self.data.len() {
285            balance_after_push(&mut self.data[0..i], &layout);
286            layout.push();
287        }
288    }
289
290    /// Forces sorting of the entire underlying array.  The sorted array is
291    /// still a valid leonardo heap.
292    pub fn sort(&mut self) {
293        let mut layout = self.layout.clone();
294
295        // TODO harmless off-by-one error
296        for i in (0..self.data.len()).rev() {
297            layout.pop();
298            balance_after_pop(&mut self.data[0..i], &layout);
299        }
300    }
301
302    /// Adds a new element to the heap.  The heap will be rebalanced to
303    /// maintain the string and heap properties.
304    ///
305    /// Elements pushed more than once will not be deduplicated.
306    pub fn push(&mut self, item: T) {
307        self.data.push(item);
308        self.layout.push();
309
310        balance_after_push(self.data.as_mut_slice(), &self.layout);
311    }
312
313    /// Returns a reference to the largest element in the heap without removing
314    /// it.
315    pub fn peek(&self) -> Option<&T> {
316        self.data.get(self.data.len())
317    }
318
319    /// Removes and returns the largest element in the heap.  If the heap is
320    /// empty, returns `None`.
321    pub fn pop(&mut self) -> Option<T> {
322        let result = self.data.pop();
323        self.layout.pop();
324
325        balance_after_pop(self.data.as_mut_slice(), &self.layout);
326
327        result
328    }
329
330    /// Returns a *sorted* iterator over the elements in the heap.
331    ///
332    /// Will lazily sort the top elements of the heap in-place as it is
333    /// consumed.
334    pub fn iter(
335        &mut self
336    ) -> impl Iterator<Item = &T> + ExactSizeIterator<Item = &T> {
337        Iter {
338            heap_data: self.data.as_mut_slice(),
339            layout: self.layout.clone(),
340        }
341    }
342
343    /// Returns an iterator that removes and returns elements from the top of
344    /// the heap.
345    pub fn drain<'a>(
346        &'a mut self
347    ) -> impl Iterator<Item = T> + ExactSizeIterator<Item = T> + 'a {
348        // TODO should drain clear the heap if not fully consumed
349        Drain {
350            heap: self,
351        }
352    }
353}
354
355
356#[cfg(test)]
357mod tests {
358    use rand;
359    use rand::Rng;
360
361    use layout;
362    use subheap::SubHeapMut;
363    use {LeonardoHeap, sift_down, balance_after_push, balance_after_pop};
364
365    #[test]
366    fn test_sift_down_zero() {
367        let mut subheap_data = [1];
368        sift_down(&mut SubHeapMut::new(&mut subheap_data, 0));
369        assert_eq!(subheap_data, [1]);
370    }
371
372    #[test]
373    fn test_sift_down_one() {
374        let mut subheap_data = [1];
375        sift_down(&mut SubHeapMut::new(&mut subheap_data, 1));
376        assert_eq!(subheap_data, [1]);
377    }
378
379    #[test]
380    fn test_sift_down_two() {
381        let mut subheap_data = [3, 2, 1];
382        sift_down(&mut SubHeapMut::new(&mut subheap_data, 2));
383        assert_eq!(subheap_data, [1, 2, 3]);
384
385        let mut subheap_data = [3, 5, 4];
386        sift_down(&mut SubHeapMut::new(&mut subheap_data, 2));
387        assert_eq!(subheap_data, [3, 4, 5]);
388
389        let mut subheap_data = [6, 7, 8];
390        sift_down(&mut SubHeapMut::new(&mut subheap_data, 2));
391        assert_eq!(subheap_data, [6, 7, 8]);
392    }
393
394    #[test]
395    fn test_sift_down_three() {
396        let mut subheap_data = [1, 2, 3, 4, 5];
397        sift_down(&mut SubHeapMut::new(&mut subheap_data, 3));
398        assert_eq!(subheap_data, [1, 2, 3, 4, 5]);
399
400        let mut subheap_data = [1, 2, 3, 5, 4];
401        sift_down(&mut SubHeapMut::new(&mut subheap_data, 3));
402        assert_eq!(subheap_data, [1, 2, 3, 4, 5]);
403
404        let mut subheap_data = [1, 2, 5, 4, 3];
405        sift_down(&mut SubHeapMut::new(&mut subheap_data, 3));
406        assert_eq!(subheap_data, [1, 2, 3, 4, 5]);
407
408        let mut subheap_data = [2, 3, 5, 4, 1];
409        sift_down(&mut SubHeapMut::new(&mut subheap_data, 3));
410        assert_eq!(subheap_data, [2, 1, 3, 4, 5]);
411
412        let mut subheap_data = [3, 2, 5, 4, 1];
413        sift_down(&mut SubHeapMut::new(&mut subheap_data, 3));
414        assert_eq!(subheap_data, [1, 2, 3, 4, 5]);
415    }
416
417    #[test]
418    fn test_sift_down_sorting() {
419        let mut subheap_data = [5, 5, 4];
420        sift_down(&mut SubHeapMut::new(&mut subheap_data, 2));
421        assert_eq!(subheap_data, [4, 5, 5]);
422
423        let mut subheap_data = [1, 2, 4, 4, 3];
424        sift_down(&mut SubHeapMut::new(&mut subheap_data, 3));
425        assert_eq!(subheap_data, [1, 2, 3, 4, 4]);
426    }
427
428    #[test]
429    #[should_panic]
430    fn test_sift_down_wrong_order() {
431        let mut subheap_data : [i32; 0] = [];
432        sift_down(&mut SubHeapMut::new(&mut subheap_data, 0));
433    }
434
435    #[test]
436    fn test_balance_after_push_first() {
437        let mut subheap_data = [1];
438        balance_after_push(
439            &mut subheap_data, &layout::Layout::new_from_len(1),
440        );
441        assert_eq!(subheap_data, [1]);
442    }
443
444    #[test]
445    fn test_balance_after_push_second() {
446        let mut subheap_data = [1, 2];
447        balance_after_push(
448            &mut subheap_data, &layout::Layout::new_from_len(2),
449        );
450        assert_eq!(subheap_data, [1, 2]);
451
452        let mut subheap_data = [2, 1];
453        balance_after_push(
454            &mut subheap_data, &layout::Layout::new_from_len(2),
455        );
456        assert_eq!(subheap_data, [1, 2]);
457    }
458
459    #[test]
460    fn test_balance_after_push_merge() {
461        let mut subheap_data = [1, 2, 3];
462        balance_after_push(
463            &mut subheap_data, &layout::Layout::new_from_len(3),
464        );
465        assert_eq!(subheap_data, [1, 2, 3]);
466
467        let mut subheap_data = [1, 3, 2];
468        balance_after_push(
469            &mut subheap_data, &layout::Layout::new_from_len(3),
470        );
471        assert_eq!(subheap_data, [1, 2, 3]);
472    }
473
474    #[test]
475    #[should_panic]
476    fn test_balance_after_push_mismatched_lengths() {
477        let mut subheap_data = [1, 2, 3, 4];
478        balance_after_push(
479            &mut subheap_data, &layout::Layout::new_from_len(12),
480        );
481    }
482
483    #[test]
484    fn test_balance_after_pop_empty() {
485        let mut subheap_data : [i32; 0]= [];
486        balance_after_pop(&mut subheap_data, &layout::Layout::new_from_len(0));
487        assert_eq!(subheap_data, []);
488    }
489
490    #[test]
491    fn test_balance_after_pop_one() {
492        let mut heap_data = [1];
493        balance_after_pop(&mut heap_data, &layout::Layout::new_from_len(1));
494        assert_eq!(heap_data, [1]);
495    }
496
497    #[test]
498    fn test_balance_after_pop_two() {
499        let mut heap_data = [1, 2];
500        balance_after_pop(&mut heap_data, &layout::Layout::new_from_len(2));
501        assert_eq!(heap_data, [1, 2]);
502
503        let mut heap_data = [2, 1];
504        balance_after_pop(&mut heap_data, &layout::Layout::new_from_len(2));
505        assert_eq!(heap_data, [1, 2]);
506    }
507
508    #[test]
509    fn test_balance_after_pop_split_heaps() {
510        let mut heap_data = [1, 2, 3, 4, 5, 6, 7];
511        balance_after_pop(&mut heap_data, &layout::Layout::new_from_len(7));
512        assert_eq!(heap_data, [1, 2, 3, 4, 5, 6, 7]);
513
514        let mut heap_data = [1, 2, 3, 4, 5, 7, 6];
515        balance_after_pop(&mut heap_data, &layout::Layout::new_from_len(7));
516        assert_eq!(heap_data, [1, 2, 3, 4, 5, 6, 7]);
517
518        let mut heap_data = [1, 2, 3, 4, 6, 5, 7];
519        balance_after_pop(&mut heap_data, &layout::Layout::new_from_len(7));
520        assert_eq!(heap_data, [1, 2, 3, 4, 5, 6, 7]);
521
522        let mut heap_data = [1, 2, 3, 4, 7, 5, 6];
523        balance_after_pop(&mut heap_data, &layout::Layout::new_from_len(7));
524        assert_eq!(heap_data, [1, 2, 3, 4, 5, 6, 7]);
525
526        let mut heap_data = [1, 2, 3, 4, 6, 7, 5];
527        balance_after_pop(&mut heap_data, &layout::Layout::new_from_len(7));
528        assert_eq!(heap_data, [1, 2, 3, 4, 5, 6, 7]);
529
530        let mut heap_data = [1, 2, 3, 4, 7, 6, 5];
531        balance_after_pop(&mut heap_data, &layout::Layout::new_from_len(7));
532        assert_eq!(heap_data, [1, 2, 3, 4, 5, 6, 7]);
533    }
534
535    #[test]
536    fn test_balance_after_pop_restring_after_sift() {
537        let mut heap_data = [
538            1, 2, 3, 4, 5, 6, 10, 11, 12,
539            9, 7, 13,
540            8
541        ];
542        balance_after_pop(&mut heap_data, &layout::Layout::new_from_len(13));
543        assert_eq!(heap_data, [
544            1, 2, 3, 4, 5, 6, 9, 10, 11,
545            8, 7, 12,
546            13,
547        ]);
548    }
549
550    #[test]
551    fn test_balance_after_pop_mutiple_layers() {
552        let mut heap_data = [
553            3, 0, 5, 1, 9, 2, 6, 7, 10,
554            4,
555            8,
556        ];
557        balance_after_pop(&mut heap_data, &layout::Layout::new_from_len(11));
558        assert_eq!(heap_data, [
559            3, 0, 4, 1, 5, 2, 6, 7, 8,
560            9,
561            10,
562        ]);
563    }
564
565    #[test]
566    #[should_panic]
567    fn test_balance_after_pop_mismatched_lengths() {
568        let mut subheap_data = [1, 2, 3, 4];
569        balance_after_pop(
570            &mut subheap_data, &layout::Layout::new_from_len(12),
571        );
572    }
573
574    #[test]
575    fn test_push_pop() {
576        let mut heap = LeonardoHeap::new();
577        heap.push(4);
578        heap.push(1);
579        heap.push(2);
580        heap.push(3);
581
582        assert_eq!(heap.pop(), Some(4));
583        assert_eq!(heap.pop(), Some(3));
584        assert_eq!(heap.pop(), Some(2));
585        assert_eq!(heap.pop(), Some(1));
586    }
587
588    #[test]
589    fn test_random() {
590        let mut rng = rand::thread_rng();
591
592        let mut inputs : Vec<i32> = (0..200).collect();
593
594        let mut expected = inputs.clone();
595        expected.sort_by(|a, b| b.cmp(a));
596
597        rng.shuffle(inputs.as_mut_slice());
598
599        let mut heap = LeonardoHeap::new();
600        for input in inputs {
601            heap.push(input.clone());
602        }
603
604        let mut outputs: Vec<i32> = Vec::new();
605        while let Some(output) = heap.pop() {
606            outputs.push(output);
607        }
608
609        assert_eq!(outputs, expected);
610    }
611
612    #[test]
613    fn test_sort_random() {
614        let mut rng = rand::thread_rng();
615
616        let mut inputs : Vec<i32> = (0..200).collect();
617
618        let mut expected = inputs.clone();
619        expected.sort();
620
621        rng.shuffle(inputs.as_mut_slice());
622
623        let mut heap = LeonardoHeap::new();
624        for input in &inputs {
625            heap.push(input.clone());
626        }
627
628        heap.sort();
629
630        assert_eq!(heap.data, expected);
631    }
632
633    #[test]
634    fn test_iter() {
635        let mut heap = LeonardoHeap::new();
636        heap.push(4);
637        heap.push(1);
638        heap.push(2);
639        heap.push(3);
640
641        let mut heap_iter = heap.iter();
642
643        let mut var = 4;
644        assert_eq!(heap_iter.next(), Some(&var));
645        var = 3;
646        assert_eq!(heap_iter.next(), Some(&var));
647        var = 2;
648        assert_eq!(heap_iter.next(), Some(&var));
649        var = 1;
650        assert_eq!(heap_iter.next(), Some(&var));
651    }
652}