1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
//! Types and traits for radix sorting.
//!
//! The current offerings are
//!
//! * LSBRadixSorter: A least-significant byte radix sorter.
//! * MSBRadixSorter: A most-significant byte radix sorter.
//! * LSBSWCRadixSorter: A least-significant byte radix sorter with software write combining.
//! 
//! There should probably be a `MSBSWCRadixSorter` in the future, because I like both of those things.

mod lsb;
mod lsb_swc;
mod msb;
mod stash;
mod batched_vec;

pub use lsb::Sorter as LSBRadixSorter;
pub use lsb_swc::Sorter as LSBSWCRadixSorter;
pub use msb::Sorter as MSBRadixSorter;

/// An unsigned integer fit for use as a radix key.
pub trait Unsigned : Ord {
    fn bytes() -> usize;
    fn as_u64(&self) -> u64;
}

impl Unsigned for  u8 { #[inline]fn bytes() -> usize { 1 } #[inline] fn as_u64(&self) -> u64 { *self as u64 } }
impl Unsigned for u16 { #[inline]fn bytes() -> usize { 2 } #[inline] fn as_u64(&self) -> u64 { *self as u64 } }
impl Unsigned for u32 { #[inline]fn bytes() -> usize { 4 } #[inline] fn as_u64(&self) -> u64 { *self as u64 } }
impl Unsigned for u64 { #[inline]fn bytes() -> usize { 8 } #[inline] fn as_u64(&self) -> u64 { *self as u64 } }
impl Unsigned for usize { #[inline]fn bytes() -> usize { ::std::mem::size_of::<usize>() } #[inline]fn as_u64(&self) -> u64 { *self as u64 } }

/// Functionality provided by a radix sorter.
///
/// These radix sorters allow you to specify a radix key function with each method call, so that 
/// the sorter does not need to reflect this (hard to name) type in its own type. This means you
/// need to take some care and not abuse this.
///
/// Internally the sorters manage data using blocks `Vec<T>`, and are delighted to consume data 
/// in block form (they will re-use the blocks), and will return results as a sequence of blocks
/// such that if they are traversed in-order they are appropriately sorted.
///
/// Most of the sorters manage a stash of buffers (ideally: 256) that they use when sorting the 
/// data. If this stash goes dry the sorters will allocate, but they much prefer to re-use buffers
/// whenever possible, and if having consulted your sorted results you would like to return the
/// buffers (using either `recycle` or `rebalance`) you should! The `rebalance` method allows you
/// to control just how many buffers the sorter is sitting on. 
pub trait RadixSorter<T, U: Unsigned> : RadixSorterBase<T> {

    /// Pushes a single element using the supplied radix key function.
    fn push<F: Fn(&T)->U>(&mut self, element: T, key: &F);
    /// Pushes a batch of elements using the supplied radix key function.
    fn push_batch<F: Fn(&T)->U>(&mut self, batch: Vec<T>, key: &F);
    /// Pushes a sequence of elements using the supplied radix key function.
    fn extend<F: Fn(&T)->U, I: Iterator<Item=T>>(&mut self, iterator: I, key: &F) {
    	for element in iterator {
    		self.push(element, key)
    	}
    }
    /// Completes the sorting session and returns the sorted results.
    fn finish<F: Fn(&T)->U>(&mut self, key: &F) -> Vec<Vec<T>> {
        let mut result = Vec::new();
        self.finish_into(&mut result, key);
        result    	
    }
    /// Completes the sorting session and puts the sorted results into `target`.
    fn finish_into<F: Fn(&T)->U>(&mut self, target: &mut Vec<Vec<T>>, key: &F);
    /// Sorts batched data using the supplied radix key function.
    fn sort<F: Fn(&T)->U>(&mut self, batches: &mut Vec<Vec<T>>, key: &F) {
        for batch in batches.drain(..) { 
            self.push_batch(batch, key); 
        }
        self.finish_into(batches, key);    	
    }
}

/// Functionality independent of the type `U` used to sort.
pub trait RadixSorterBase<T> {
	/// Allocates a new instance of the radix sorter.
    fn new() -> Self;
    /// Provides empty buffers for the radix sorter to use.
    fn recycle(&mut self, buffers: &mut Vec<Vec<T>>) {
        self.rebalance(buffers, usize::max_value());
	}
	/// Provides empty buffers for the radix sorter to use, with the intent that it should own at most `intended`.
    fn rebalance(&mut self, buffers: &mut Vec<Vec<T>>, intended: usize);	
}