1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
use super::super::algo::k_way_merge::k_way_merge;
use super::super::algo::verge_sort_heuristic::{
    explore_simple_forward, verge_sort_preprocessing, Orientation,
};
use super::super::{RadixKey, Radixable};
use super::counting_sort::counting_sort;
use super::msd_sort::msd_radixsort_rec;
use super::ska_sort::ska_swap;
use super::utils::{get_histogram, prefix_sums, Params};

pub fn voracious_sort_rec<T: Radixable<K>, K: RadixKey>(
    arr: &mut [T],
    p: Params,
    zipf_heuristic_count: usize,
) {
    // Small optimization, use PDQ sort (sort implemented in Std Rust Unstable)
    // instead of insertion sort for small size array.
    if arr.len() <= 128 {
        arr.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap());
        return;
    }
    // Main optimization is here: better diversion handling.
    // Maybe it seems not important, but, really, it is very important.
    // What is important is to have a sort that is as fast as possible
    // for small size array. msd_radixsort has been designed that way.
    // It is unusual to have an out of place msd radix sort (usually msd radix
    // sort are in place).
    // The threshold has been found by experimental tests.
    if arr.len() <= 30_000 {
        msd_radixsort_rec(arr, p);
        return;
    }

    let dummy = arr[0];
    let (mask, shift) = dummy.get_mask_and_shift_from_left(&p);
    let histogram = get_histogram(arr, &p, mask, shift);
    let (p_sums, mut heads, tails) = prefix_sums(&histogram);

    ska_swap(arr, &mut heads, &tails, mask, shift);

    let mut rest = arr;
    if p.level < p.max_level - 1 {
        for i in 0..(p.radix_range) {
            let bucket_end = p_sums[i + 1] - p_sums[i];
            let (first_part, second_part) = rest.split_at_mut(bucket_end);
            rest = second_part;
            if histogram[i] > 1 {
                // skip slice with only 1 items (already sorted)
                let new_params = p.new_level(p.level + 1);
                // Other optimization, it costs almost nothing to perform this
                // check, and it allows to gain time on some data distributions.
                if zipf_heuristic_count > 0 {
                    match explore_simple_forward(first_part) {
                        Orientation::IsAsc => (),
                        Orientation::IsDesc => {
                            first_part.reverse();
                        }
                        Orientation::IsPlateau => (),
                        Orientation::IsNone => {
                            voracious_sort_rec(
                                first_part,
                                new_params,
                                zipf_heuristic_count - 1,
                            );
                        }
                    }
                } else {
                    voracious_sort_rec(first_part, new_params, 0);
                }
            }
        }
    }
}

fn voracious_sort_aux<T: Radixable<K>, K: RadixKey>(
    arr: &mut [T],
    radix: usize,
    heuristic: bool,
    min_cs2: usize,
) {
    let size = arr.len();
    if size <= 128 {
        arr.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap());
        return;
    }

    let dummy = arr[0];
    let (offset, _) = dummy.compute_offset(arr, radix);
    let max_level = dummy.compute_max_level(offset, radix);

    if max_level == 0 {
        return;
    }

    let params = Params::new(0, radix, offset, max_level);

    if heuristic {
        // we could add more heuristics, but the idea is to keep an MSD radix
        // sort, so there is no additional memory requirement
        if max_level == 1 {
            counting_sort(arr, 8);
        } else if max_level == 2 && size >= min_cs2 {
            counting_sort(arr, 16);
        } else {
            voracious_sort_rec(arr, params, 2);
        }
    } else {
        voracious_sort_rec(arr, params, 2);
    }
}

/// # Voracious sort
///
/// It is an improvement of the
/// [Ska sort](https://probablydance.com/2016/12/27/i-wrote-a-faster-sorting-algorithm/)
/// algorithm.
///
/// Insertion sort has been replaced by the PDQ sort as a fallback for very
/// small input.
///
/// We added a second fallback for array smaller or equal to 30_000 elements.
/// For this purpose, we implemented the MSD sort which is the fastest algorithm
/// from this crate for "small" input.
///
/// Other heuristics have been added.
///
/// The Verge sort pre-processing heuristic is also added.
///
/// The Voracious sort is an in place unstable radix sort. For array smaller than
/// 30_000 elements it fallbacks to MSD sort which is out of place, but since the
/// threshold is a constant, this algorithm is in place.
pub fn voracious_sort<T, K>(arr: &mut [T], radix: usize)
where
    T: Radixable<K>,
    K: RadixKey,
{
    if arr.len() <= 128 {
        arr.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap());
        return;
    }

    let mut separators = verge_sort_preprocessing(arr, radix, &|arr, radix| {
        voracious_sort_aux(arr, radix, false, 0)
    });
    k_way_merge(arr, &mut separators);
}

pub fn voracious_sort_heu<T, K>(arr: &mut [T], radix: usize, min_cs2: usize)
where
    T: Radixable<K>,
    K: RadixKey,
{
    if arr.len() <= 128 {
        arr.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap());
        return;
    }

    let mut separators = verge_sort_preprocessing(arr, radix, &|arr, radix| {
        voracious_sort_aux(arr, radix, true, min_cs2)
    });
    k_way_merge(arr, &mut separators);
}