timely_sort/lib.rs
1//! Types and traits for radix sorting.
2//!
3//! The current offerings are
4//!
5//! * LSBRadixSorter: A least-significant byte radix sorter.
6//! * MSBRadixSorter: A most-significant byte radix sorter.
7//! * LSBSWCRadixSorter: A least-significant byte radix sorter with software write combining.
8//!
9//! There should probably be a `MSBSWCRadixSorter` in the future, because I like both of those things.
10
11mod lsb;
12mod lsb_swc;
13mod msb;
14mod stash;
15mod batched_vec;
16
17pub use lsb::Sorter as LSBRadixSorter;
18pub use lsb_swc::Sorter as LSBSWCRadixSorter;
19pub use msb::Sorter as MSBRadixSorter;
20
21/// An unsigned integer fit for use as a radix key.
22pub trait Unsigned : Ord {
23 fn bytes() -> usize;
24 fn as_u64(&self) -> u64;
25}
26
27impl Unsigned for u8 { #[inline]fn bytes() -> usize { 1 } #[inline] fn as_u64(&self) -> u64 { *self as u64 } }
28impl Unsigned for u16 { #[inline]fn bytes() -> usize { 2 } #[inline] fn as_u64(&self) -> u64 { *self as u64 } }
29impl Unsigned for u32 { #[inline]fn bytes() -> usize { 4 } #[inline] fn as_u64(&self) -> u64 { *self as u64 } }
30impl Unsigned for u64 { #[inline]fn bytes() -> usize { 8 } #[inline] fn as_u64(&self) -> u64 { *self as u64 } }
31impl Unsigned for usize { #[inline]fn bytes() -> usize { ::std::mem::size_of::<usize>() } #[inline]fn as_u64(&self) -> u64 { *self as u64 } }
32
33/// Functionality provided by a radix sorter.
34///
35/// These radix sorters allow you to specify a radix key function with each method call, so that
36/// the sorter does not need to reflect this (hard to name) type in its own type. This means you
37/// need to take some care and not abuse this.
38///
39/// Internally the sorters manage data using blocks `Vec<T>`, and are delighted to consume data
40/// in block form (they will re-use the blocks), and will return results as a sequence of blocks
41/// such that if they are traversed in-order they are appropriately sorted.
42///
43/// Most of the sorters manage a stash of buffers (ideally: 256) that they use when sorting the
44/// data. If this stash goes dry the sorters will allocate, but they much prefer to re-use buffers
45/// whenever possible, and if having consulted your sorted results you would like to return the
46/// buffers (using either `recycle` or `rebalance`) you should! The `rebalance` method allows you
47/// to control just how many buffers the sorter is sitting on.
48pub trait RadixSorter<T, U: Unsigned> : RadixSorterBase<T> {
49
50 /// Pushes a single element using the supplied radix key function.
51 fn push<F: Fn(&T)->U>(&mut self, element: T, key: &F);
52 /// Pushes a batch of elements using the supplied radix key function.
53 fn push_batch<F: Fn(&T)->U>(&mut self, batch: Vec<T>, key: &F);
54 /// Pushes a sequence of elements using the supplied radix key function.
55 fn extend<F: Fn(&T)->U, I: Iterator<Item=T>>(&mut self, iterator: I, key: &F) {
56 for element in iterator {
57 self.push(element, key)
58 }
59 }
60 /// Completes the sorting session and returns the sorted results.
61 fn finish<F: Fn(&T)->U>(&mut self, key: &F) -> Vec<Vec<T>> {
62 let mut result = Vec::new();
63 self.finish_into(&mut result, key);
64 result
65 }
66 /// Completes the sorting session and puts the sorted results into `target`.
67 fn finish_into<F: Fn(&T)->U>(&mut self, target: &mut Vec<Vec<T>>, key: &F);
68 /// Sorts batched data using the supplied radix key function.
69 fn sort<F: Fn(&T)->U>(&mut self, batches: &mut Vec<Vec<T>>, key: &F) {
70 for batch in batches.drain(..) {
71 self.push_batch(batch, key);
72 }
73 self.finish_into(batches, key);
74 }
75}
76
77/// Functionality independent of the type `U` used to sort.
78pub trait RadixSorterBase<T> {
79 /// Allocates a new instance of the radix sorter.
80 fn new() -> Self;
81 /// Provides empty buffers for the radix sorter to use.
82 fn recycle(&mut self, buffers: &mut Vec<Vec<T>>) {
83 self.rebalance(buffers, usize::max_value());
84 }
85 /// Provides empty buffers for the radix sorter to use, with the intent that it should own at most `intended`.
86 fn rebalance(&mut self, buffers: &mut Vec<Vec<T>>, intended: usize);
87}