pdqselect/
lib.rs

1//! Pattern-defeating quickselect
2//!
3//! The algorithm is based on pattern-defeating quicksort by Orson Peters, published at:
4//! <https://github.com/orlp/pdqsort>
5//! It is also heavily adapted from the Rust implementation of pdqsort
6//! (<https://github.com/stjepang/pdqsort>) and Rust's own [`slice::sort_unstable`].
7//!
8//! NOTE: you may want to use Rust's official version of this algorithm,
9//! [`slice::select_nth_unstable_by`]
10//!
11//! # Properties
12//!
13//! - Best-case running time is `O(n)`.
14//! - Worst-case running time is `O(n log n)`.
15//! - Does not allocate additional memory.
16//! - Uses `#![no_std]`.
17//!
18//! # Examples
19//!
20//! ```
21//! let mut v = [-5i32, 4, 1, -3, 2];
22//! let k = 3;
23//!
24//! pdqselect::select(&mut v, k);
25//! let kth = v[k];
26//! assert!(v[..k].iter().all(|&x| x <= kth));
27//! assert!(v[k+1..].iter().all(|&x| x >= kth));
28//!
29//! pdqselect::select_by(&mut v, k, |a, b| b.cmp(a));
30//! let kth = v[k];
31//! assert!(v[..k].iter().all(|&x| x >= kth));
32//! assert!(v[k+1..].iter().all(|&x| x <= kth));
33//!
34//! pdqselect::select_by_key(&mut v, k, |k| k.abs());
35//! let kth = v[k].abs();
36//! assert!(v[..k].iter().all(|&x| x.abs() <= kth));
37//! assert!(v[k+1..].iter().all(|&x| x.abs() >= kth));
38//! ```
39
40#![no_std]
41
42use core::cmp::{self, Ordering};
43use core::mem;
44use core::ptr;
45
46/// When dropped, copies from `src` into `dest`.
47struct CopyOnDrop<T> {
48    src: *mut T,
49    dest: *mut T,
50}
51
52impl<T> Drop for CopyOnDrop<T> {
53    fn drop(&mut self) {
54        unsafe { ptr::copy_nonoverlapping(self.src, self.dest, 1); }
55    }
56}
57
58/// Shifts the first element to the right until it encounters a greater or equal element.
59fn shift_head<T, F>(v: &mut [T], is_less: &mut F)
60    where F: FnMut(&T, &T) -> bool
61{
62    let len = v.len();
63    unsafe {
64        // If the first two elements are out-of-order...
65        if len >= 2 && is_less(v.get_unchecked(1), v.get_unchecked(0)) {
66            // Read the first element into a stack-allocated variable. If a following comparison
67            // operation panics, `hole` will get dropped and automatically write the element back
68            // into the slice.
69            let mut tmp = mem::ManuallyDrop::new(ptr::read(v.get_unchecked(0)));
70            let mut hole = CopyOnDrop {
71                src: &mut *tmp,
72                dest: v.get_unchecked_mut(1),
73            };
74            ptr::copy_nonoverlapping(v.get_unchecked(1), v.get_unchecked_mut(0), 1);
75
76            for i in 2..len {
77                if !is_less(v.get_unchecked(i), &*tmp) {
78                    break;
79                }
80
81                // Move `i`-th element one place to the left, thus shifting the hole to the right.
82                ptr::copy_nonoverlapping(v.get_unchecked(i), v.get_unchecked_mut(i - 1), 1);
83                hole.dest = v.get_unchecked_mut(i);
84            }
85            // `hole` gets dropped and thus copies `tmp` into the remaining hole in `v`.
86        }
87    }
88}
89
90/// Shifts the last element to the left until it encounters a smaller or equal element.
91fn shift_tail<T, F>(v: &mut [T], is_less: &mut F)
92    where F: FnMut(&T, &T) -> bool
93{
94    let len = v.len();
95    unsafe {
96        // If the last two elements are out-of-order...
97        if len >= 2 && is_less(v.get_unchecked(len - 1), v.get_unchecked(len - 2)) {
98            // Read the last element into a stack-allocated variable. If a following comparison
99            // operation panics, `hole` will get dropped and automatically write the element back
100            // into the slice.
101            let mut tmp = mem::ManuallyDrop::new(ptr::read(v.get_unchecked(len - 1)));
102            let mut hole = CopyOnDrop {
103                src: &mut *tmp,
104                dest: v.get_unchecked_mut(len - 2),
105            };
106            ptr::copy_nonoverlapping(v.get_unchecked(len - 2), v.get_unchecked_mut(len - 1), 1);
107
108            for i in (0..len-2).rev() {
109                if !is_less(&*tmp, v.get_unchecked(i)) {
110                    break;
111                }
112
113                // Move `i`-th element one place to the right, thus shifting the hole to the left.
114                ptr::copy_nonoverlapping(v.get_unchecked(i), v.get_unchecked_mut(i + 1), 1);
115                hole.dest = v.get_unchecked_mut(i);
116            }
117            // `hole` gets dropped and thus copies `tmp` into the remaining hole in `v`.
118        }
119    }
120}
121
122/// Partially sorts a slice by shifting several out-of-order elements around.
123///
124/// Returns `true` if the slice is sorted at the end. This function is `O(n)` worst-case.
125#[cold]
126fn partial_insertion_sort<T, F>(v: &mut [T], is_less: &mut F) -> bool
127    where F: FnMut(&T, &T) -> bool
128{
129    // Maximum number of adjacent out-of-order pairs that will get shifted.
130    const MAX_STEPS: usize = 5;
131    // If the slice is shorter than this, don't shift any elements.
132    const SHORTEST_SHIFTING: usize = 50;
133
134    let len = v.len();
135    let mut i = 1;
136
137    for _ in 0..MAX_STEPS {
138        unsafe {
139            // Find the next pair of adjacent out-of-order elements.
140            while i < len && !is_less(v.get_unchecked(i), v.get_unchecked(i - 1)) {
141                i += 1;
142            }
143        }
144
145        // Are we done?
146        if i == len {
147            return true;
148        }
149
150        // Don't shift elements on short arrays, that has a performance cost.
151        if len < SHORTEST_SHIFTING {
152            return false;
153        }
154
155        // Swap the found pair of elements. This puts them in correct order.
156        v.swap(i - 1, i);
157
158        // Shift the smaller element to the left.
159        // shift_tail(unsafe{v.get_unchecked_mut(..i)}, is_less);
160        shift_tail(&mut v[..i], is_less);
161        // Shift the greater element to the right.
162        // shift_head(unsafe{v.get_unchecked_mut(i..)}, is_less);
163        shift_head(&mut v[i..], is_less);
164    }
165
166    // Didn't manage to sort the slice in the limited number of steps.
167    false
168}
169
170/// Sorts a slice using insertion sort, which is `O(n^2)` worst-case.
171fn insertion_sort<T, F>(v: &mut [T], is_less: &mut F)
172    where F: FnMut(&T, &T) -> bool
173{
174    for i in 1..v.len() {
175        // shift_tail(unsafe{v.get_unchecked_mut(..i+1)}, is_less);
176        shift_tail(&mut v[..i+1], is_less);
177    }
178}
179
180/// Sorts `v` using heapsort, which guarantees `O(n log n)` worst-case.
181#[cold]
182pub fn heapsort<T, F>(v: &mut [T], is_less: &mut F)
183    where F: FnMut(&T, &T) -> bool
184{
185    // This binary heap respects the invariant `parent >= child`.
186    let mut sift_down = |v: &mut [T], mut node| {
187        loop {
188            // Children of `node`:
189            let left = 2 * node + 1;
190            let right = 2 * node + 2;
191
192            // Choose the greater child.
193            /*let greater = unsafe { if right < v.len() &&
194                    is_less(v.get_unchecked(left), v.get_unchecked(right))
195                {
196                    right
197                } else {
198                    left
199                }
200            };*/
201
202            let greater = if right < v.len() && is_less(&v[left], &v[right]) {
203                right
204            } else {
205                left
206            };
207
208            // Stop if the invariant holds at `node`.
209            /*unsafe {
210                if greater >= v.len() ||
211                    !is_less(v.get_unchecked(node), v.get_unchecked(greater))
212                {
213                    break;
214                }
215            }*/
216            if greater >= v.len() || !is_less(&v[node], &v[greater]) {
217                break;
218            }
219
220            // Swap `node` with the greater child, move one step down, and continue sifting.
221            v.swap(node, greater);
222            node = greater;
223        }
224    };
225
226    // Build the heap in linear time.
227    for i in (0 .. v.len() / 2).rev() {
228        sift_down(v, i);
229    }
230
231    // Pop maximal elements from the heap.
232    for i in (1 .. v.len()).rev() {
233        v.swap(0, i);
234        // sift_down(unsafe { v.get_unchecked_mut(..i) }, 0);
235        sift_down(&mut v[..i], 0);
236    }
237}
238
239/// Partitions `v` into elements smaller than `pivot`, followed by elements greater than or equal
240/// to `pivot`.
241///
242/// Returns the number of elements smaller than `pivot`.
243///
244/// Partitioning is performed block-by-block in order to minimize the cost of branching operations.
245/// This idea is presented in the [`BlockQuicksort`][pdf] paper.
246///
247/// [pdf]: http://drops.dagstuhl.de/opus/volltexte/2016/6389/pdf/LIPIcs-ESA-2016-38.pdf
248fn partition_in_blocks<T, F>(v: &mut [T], pivot: &T, is_less: &mut F) -> usize
249    where F: FnMut(&T, &T) -> bool
250{
251    // Number of elements in a typical block.
252    const BLOCK: usize = 128;
253
254    // The partitioning algorithm repeats the following steps until completion:
255    //
256    // 1. Trace a block from the left side to identify elements greater than or equal to the pivot.
257    // 2. Trace a block from the right side to identify elements smaller than the pivot.
258    // 3. Exchange the identified elements between the left and right side.
259    //
260    // We keep the following variables for a block of elements:
261    //
262    // 1. `block` - Number of elements in the block.
263    // 2. `start` - Start pointer into the `offsets` array.
264    // 3. `end` - End pointer into the `offsets` array.
265    // 4. `offsets - Indices of out-of-order elements within the block.
266
267    // The current block on the left side (from `l` to `l.offset(block_l)`).
268    let mut l = v.as_mut_ptr();
269    let mut block_l = BLOCK;
270    let mut start_l = ptr::null_mut();
271    let mut end_l = ptr::null_mut();
272    let mut offsets_l: [u8; BLOCK] = unsafe { mem::uninitialized() };
273
274    // The current block on the right side (from `r.offset(-block_r)` to `r`).
275    let mut r = unsafe { l.offset(v.len() as isize) };
276    let mut block_r = BLOCK;
277    let mut start_r = ptr::null_mut();
278    let mut end_r = ptr::null_mut();
279    let mut offsets_r: [u8; BLOCK] = unsafe { mem::uninitialized() };
280
281    // FIXME: When we get VLAs, try creating one array of length `min(v.len(), 2 * BLOCK)` rather
282    // than two fixed-size arrays of length `BLOCK`. VLAs might be more cache-efficient.
283
284    // Returns the number of elements between pointers `l` (inclusive) and `r` (exclusive).
285    fn width<T>(l: *mut T, r: *mut T) -> usize {
286        assert!(mem::size_of::<T>() > 0); // already done, no?
287        (r as usize - l as usize) / mem::size_of::<T>()
288    }
289
290    loop {
291        // We are done with partitioning block-by-block when `l` and `r` get very close. Then we do
292        // some patch-up work in order to partition the remaining elements in between.
293        let is_done = width(l, r) <= 2 * BLOCK;
294
295        if is_done {
296            // Number of remaining elements (still not compared to the pivot).
297            let mut rem = width(l, r);
298            if start_l < end_l || start_r < end_r {
299                rem -= BLOCK;
300            }
301
302            // Adjust block sizes so that the left and right block don't overlap, but get perfectly
303            // aligned to cover the whole remaining gap.
304            if start_l < end_l {
305                block_r = rem;
306            } else if start_r < end_r {
307                block_l = rem;
308            } else {
309                block_l = rem / 2;
310                block_r = rem - block_l;
311            }
312            debug_assert!(block_l <= BLOCK && block_r <= BLOCK);
313            debug_assert_eq!(width(l, r), block_l + block_r);
314        }
315
316        if start_l == end_l {
317            // Trace `block_l` elements from the left side.
318            start_l = offsets_l.as_mut_ptr();
319            end_l = offsets_l.as_mut_ptr();
320            let mut elem = l;
321
322            for i in 0..block_l {
323                unsafe {
324                    // Branchless comparison.
325                    *end_l = i as u8;
326                    end_l = end_l.offset(!is_less(&*elem, pivot) as isize);
327                    elem = elem.offset(1);
328                }
329            }
330        }
331
332        if start_r == end_r {
333            // Trace `block_r` elements from the right side.
334            start_r = offsets_r.as_mut_ptr();
335            end_r = offsets_r.as_mut_ptr();
336            let mut elem = r;
337
338            for i in 0..block_r {
339                unsafe {
340                    // Branchless comparison.
341                    elem = elem.offset(-1);
342                    *end_r = i as u8;
343                    end_r = end_r.offset(is_less(&*elem, pivot) as isize);
344                }
345            }
346        }
347
348        // Number of out-of-order elements to swap between the left and right side.
349        let count = cmp::min(width(start_l, end_l), width(start_r, end_r));
350
351        if count > 0 {
352            macro_rules! left { () => { l.offset(*start_l as isize) } }
353            macro_rules! right { () => { r.offset(-(*start_r as isize) - 1) } }
354
355            // Instead of swapping one pair at the time, it is more efficient to perform a cyclic
356            // permutation. This is not strictly equivalent to swapping, but produces a similar
357            // result using fewer memory operations.
358            unsafe {
359                let tmp = ptr::read(left!());
360                ptr::copy_nonoverlapping(right!(), left!(), 1);
361
362                for _ in 1..count {
363                    start_l = start_l.offset(1);
364                    ptr::copy_nonoverlapping(left!(), right!(), 1);
365                    start_r = start_r.offset(1);
366                    ptr::copy_nonoverlapping(right!(), left!(), 1);
367                }
368
369                ptr::copy_nonoverlapping(&tmp, right!(), 1);
370                mem::forget(tmp);
371                start_l = start_l.offset(1);
372                start_r = start_r.offset(1);
373            }
374        }
375
376        if start_l == end_l {
377            // All out-of-order elements in the left block were moved. Move to the next block.
378            l = unsafe { l.offset(block_l as isize) };
379        }
380
381        if start_r == end_r {
382            // All out-of-order elements in the right block were moved. Move to the previous block.
383            r = unsafe { r.offset(-(block_r as isize)) };
384        }
385
386        if is_done {
387            break;
388        }
389    }
390
391    // All that remains now is at most one block (either the left or the right) with out-of-order
392    // elements that need to be moved. Such remaining elements can be simply shifted to the end
393    // within their block.
394
395    if start_l < end_l {
396        // The left block remains.
397        // Move its remaining out-of-order elements to the far right.
398        debug_assert_eq!(width(l, r), block_l);
399        while start_l < end_l {
400            unsafe {
401                end_l = end_l.offset(-1);
402                ptr::swap(l.offset(*end_l as isize), r.offset(-1));
403                r = r.offset(-1);
404            }
405        }
406        width(v.as_mut_ptr(), r)
407    } else if start_r < end_r {
408        // The right block remains.
409        // Move its remaining out-of-order elements to the far left.
410        debug_assert_eq!(width(l, r), block_r);
411        while start_r < end_r {
412            unsafe {
413                end_r = end_r.offset(-1);
414                ptr::swap(l, r.offset(-(*end_r as isize) - 1));
415                l = l.offset(1);
416            }
417        }
418        width(v.as_mut_ptr(), l)
419    } else {
420        // Nothing else to do, we're done.
421        width(v.as_mut_ptr(), l)
422    }
423}
424
425/// Partitions `v` into elements smaller than `v[pivot]`, followed by elements greater than or
426/// equal to `v[pivot]`.
427///
428/// Returns a tuple of:
429///
430/// 1. Number of elements smaller than `v[pivot]`.
431/// 2. True if `v` was already partitioned.
432fn partition<T, F>(v: &mut [T], pivot: usize, is_less: &mut F) -> (usize, bool)
433    where F: FnMut(&T, &T) -> bool
434{
435    let (mid, was_partitioned) = {
436        // Place the pivot at the beginning of slice.
437        v.swap(0, pivot);
438        // let (pivot, v) = v.split_first_mut().unwrap();
439        let (pivot, v) = v.split_at_mut(1);
440        let pivot = &mut pivot[0];
441        // could prob do `swap_remove`?
442
443        // Read the pivot into a stack-allocated variable for efficiency. If a following comparison
444        // operation panics, the pivot will be automatically written back into the slice.
445        let mut tmp = mem::ManuallyDrop::new(unsafe { ptr::read(pivot) });
446        let _pivot_guard = CopyOnDrop {
447            src: &mut *tmp,
448            dest: pivot,
449        };
450        let pivot = &*tmp;
451
452        // Find the first pair of out-of-order elements.
453        let mut l = 0;
454        let mut r = v.len();
455        unsafe {
456            // Find the first element greater then or equal to the pivot.
457            while l < r && is_less(v.get_unchecked(l), pivot) {
458                l += 1;
459            }
460
461            // Find the last element smaller that the pivot.
462            while l < r && !is_less(v.get_unchecked(r - 1), pivot) {
463                r -= 1;
464            }
465        }
466
467        // (l + partition_in_blocks(unsafe { v.get_unchecked_mut(l..r) }, pivot, is_less), l >= r)
468        (l + partition_in_blocks(&mut v[l..r], pivot, is_less), l >= r)
469
470        // `_pivot_guard` goes out of scope and writes the pivot (which is a stack-allocated
471        // variable) back into the slice where it originally was. This step is critical in ensuring
472        // safety!
473    };
474
475    // Place the pivot between the two partitions.
476    v.swap(0, mid);
477
478    (mid, was_partitioned)
479}
480
481/// Partitions `v` into elements equal to `v[pivot]` followed by elements greater than `v[pivot]`.
482///
483/// Returns the number of elements equal to the pivot. It is assumed that `v` does not contain
484/// elements smaller than the pivot.
485fn partition_equal<T, F>(v: &mut [T], pivot: usize, is_less: &mut F) -> usize
486    where F: FnMut(&T, &T) -> bool
487{
488    // Place the pivot at the beginning of slice.
489    v.swap(0, pivot);
490    let (pivot, v) = v.split_at_mut(1);
491    let pivot = &mut pivot[0];
492    // let (pivot, v) = v.split_first_mut().unwrap();
493
494    // Read the pivot into a stack-allocated variable for efficiency. If a following comparison
495    // operation panics, the pivot will be automatically written back into the slice.
496    let mut tmp = mem::ManuallyDrop::new(unsafe { ptr::read(pivot) });
497    let _pivot_guard = CopyOnDrop {
498        src: &mut *tmp,
499        dest: pivot,
500    };
501    let pivot = &*tmp;
502
503    // Now partition the slice.
504    let mut l = 0;
505    let mut r = v.len();
506    loop {
507        unsafe {
508            // Find the first element greater that the pivot.
509            while l < r && !is_less(pivot, v.get_unchecked(l)) {
510                l += 1;
511            }
512
513            // Find the last element equal to the pivot.
514            while l < r && is_less(pivot, v.get_unchecked(r - 1)) {
515                r -= 1;
516            }
517
518            // Are we done?
519            if l >= r {
520                break;
521            }
522
523            // Swap the found pair of out-of-order elements.
524            r -= 1;
525            ptr::swap(v.get_unchecked_mut(l), v.get_unchecked_mut(r));
526            l += 1;
527        }
528    }
529
530    // We found `l` elements equal to the pivot. Add 1 to account for the pivot itself.
531    l + 1
532
533    // `_pivot_guard` goes out of scope and writes the pivot (which is a stack-allocated variable)
534    // back into the slice where it originally was. This step is critical in ensuring safety!
535}
536
537/// Scatters some elements around in an attempt to break patterns that might cause imbalanced
538/// partitions in quickselect.
539#[cold]
540fn break_patterns<T>(v: &mut [T]) {
541    let len = v.len();
542    if len >= 8 {
543        // Pseudorandom number generator from the "Xorshift RNGs" paper by George Marsaglia.
544        let mut random = len as u32;
545        // TODO: bench this vs the Xoroshiro128 core
546        let mut gen_u32 = || {
547            random ^= random << 13;
548            random ^= random >> 17;
549            random ^ random << 5
550        };
551        let mut gen_usize = || {
552            if mem::size_of::<usize>() <= 4 {
553                gen_u32() as usize
554            } else {
555                (((gen_u32() as u64) << 32) | (gen_u32() as u64)) as usize
556                // ((u64::from(gen_u32()) << 32) | u64::from(gen_u32())) as usize
557            }
558        };
559
560        // Take random numbers modulo this number.
561        // The number fits into `usize` because `len` is not greater than `isize::MAX`.
562        let modulus = len.next_power_of_two();
563
564        // Some pivot candidates will be in the nearby of this index. Let's randomize them.
565        let pos = len / 4 * 2;
566
567        for i in 0..3 {
568            // Generate a random number modulo `len`. However, in order to avoid costly operations
569            // we first take it modulo a power of two, and then decrease by `len` until it fits
570            // into the range `[0, len - 1]`.
571            let mut other = gen_usize() & (modulus - 1);
572
573            // `other` is guaranteed to be less than `2 * len`.
574            if other >= len {
575                other -= len;
576            }
577
578            v.swap(pos - 1 + i, other);
579        }
580    }
581}
582
583/*#[cold]
584fn break_xoroshiro<T>(v: &mut [T]) {
585    let len = v.len();
586    if len < 8 {
587        return;
588    }
589
590    // Pseudorandom number generator from the "Xorshift RNGs" paper by George Marsaglia.
591    let mut first = len as u64;
592    let mut second = {
593        let mut z = first + 0x9E37_79B9_7F4A_7C15_u64;
594        // first = z;
595        z = (z ^ (z >> 30)) * 0xBF58_476D_1CE4_E5B9_u64;
596        z = (z ^ (z >> 27)) * 0x94D0_49BB_1331_11EB_u64;
597        z ^ (z >> 31)
598    };
599
600    let mut gen_u64 = || {
601        #[inline]
602        fn rotl(x: u64, k: i32) -> u64 {
603            (x << k) | (x >> (64 - k))
604        }
605
606        let s0 = first;
607        let mut s1 = second;
608        let result = s0 + s1;
609
610        s1 ^= s0;
611        first = rotl(s0, 55) ^ s1 ^ (s1 << 14);
612        second = rotl(s1, 36);
613
614        result
615    };
616
617    let mut gen_usize = || {
618        if mem::size_of::<usize>() == 4 {
619            gen_u64() as u32 as usize
620        } else {
621            gen_u64() as usize
622        }
623    };
624
625    // Take random numbers modulo this number.
626    // The number fits into `usize` because `len` is not greater than `isize::MAX`.
627    let modulus = len.next_power_of_two();
628
629    // Some pivot candidates will be in the nearby of this index. Let's randomize them.
630    let pos = len / 4 * 2;
631
632    for i in 0..3 {
633        // Generate a random number modulo `len`. However, in order to avoid costly operations
634        // we first take it modulo a power of two, and then decrease by `len` until it fits
635        // into the range `[0, len - 1]`.
636        let mut other = gen_usize() & (modulus - 1);
637
638        // `other` is guaranteed to be less than `2 * len`.
639        if other >= len {
640            other -= len;
641        }
642
643        v.swap(pos - 1 + i, other);
644    }
645}*/
646
647/*fn break_xoroshiro_alpha<T>(v: &mut [T]) {
648    let len = v.len();
649    if len < 8 {
650        return;
651    }
652
653    // Pseudorandom number generator from the "Xorshift RNGs" paper by George Marsaglia.
654    let mut first = len as u64;
655    let mut second = {
656        let mut z = first + 0x9E37_79B9_7F4A_7C15_u64;
657        first = z;
658        z = (z ^ (z >> 30)) * 0xBF58_476D_1CE4_E5B9_u64;
659        z = (z ^ (z >> 27)) * 0x94D0_49BB_1331_11EB_u64;
660        z ^ (z >> 31)
661    };
662
663    // Take random numbers modulo this number.
664    // The number fits into `usize` because `len` is not greater than `isize::MAX`.
665    let modulus = len.next_power_of_two();
666
667    // Some pivot candidates will be in the nearby of this index. Let's randomize them.
668    let pos = len / 4 * 2;
669
670    for i in 0..2 {
671        let mut gen_u64 = || {
672            #[inline]
673            fn rotl(x: u64, k: i32) -> u64 {
674                (x << k) | (x >> (64 - k))
675            }
676
677            let s0 = first;
678            let mut s1 = second;
679            let result = s0 + s1;
680
681            s1 ^= s0;
682            first = rotl(s0, 55) ^ s1 ^ (s1 << 14);
683            second = rotl(s1, 36);
684
685            result
686        };
687
688        let mut gen_usize = || {
689            if mem::size_of::<usize>() == 4 {
690                gen_u64() as u32 as usize
691            } else {
692                gen_u64() as usize
693            }
694        };
695
696        // Generate a random number modulo `len`. However, in order to avoid costly operations
697        // we first take it modulo a power of two, and then decrease by `len` until it fits
698        // into the range `[0, len - 1]`.
699        let mut other = gen_usize() & (modulus - 1);
700
701        // `other` is guaranteed to be less than `2 * len`.
702        if other >= len {
703            other -= len;
704        }
705
706        v.swap(pos - 1 + i, other);
707    }
708    let next = first + second;
709    let next_usize = if mem::size_of::<usize>() == 4 {
710        next as u32 as usize
711    } else {
712        next as usize
713    };
714    let mut other = next_usize & (modulus - 1);
715    if other >= len {
716        other -= len;
717    }
718    v.swap(pos - 1 + 2, other);
719}
720
721// #[cold]
722fn break_xoroshiro_pure<T>(v: &mut [T]) {
723    use self::std::num::Wrapping as w;
724
725    let len = v.len();
726    if len >= 8 {
727        // Pseudorandom number generator from the "Xorshift RNGs" paper by George Marsaglia.
728        let mut first = len as u64;
729        let mut second = len as u64;
730
731        let mut gen_u64 = || {
732            #[inline]
733            fn rotl(x: u64, k: i32) -> u64 {
734                (x << k) | (x >> (64 - k))
735            }
736
737            let s0 = w(first);
738            let mut s1 = w(second);
739            let result = s0 + s1;
740
741            s1 ^= s0;
742            first = (w(s0.0.rotate_left(55)) ^ s1 ^ (s1 << 14)).0;
743            second = s1.0.rotate_left(36);
744
745            result.0
746        };
747
748        let mut gen_usize = || {
749            if mem::size_of::<usize>() == 4 {
750                gen_u64() as u32 as usize
751            } else {
752                gen_u64() as usize
753            }
754        };
755
756        // Take random numbers modulo this number.
757        // The number fits into `usize` because `len` is not greater than `isize::MAX`.
758        let modulus = len.next_power_of_two();
759
760        // Some pivot candidates will be in the nearby of this index. Let's randomize them.
761        let pos = len / 4 * 2;
762
763        for i in 0..3 {
764            // Generate a random number modulo `len`. However, in order to avoid costly operations
765            // we first take it modulo a power of two, and then decrease by `len` until it fits
766            // into the range `[0, len - 1]`.
767            let mut other = gen_usize() & (modulus - 1);
768
769            // `other` is guaranteed to be less than `2 * len`.
770            if other >= len {
771                other -= len;
772            }
773
774            v.swap(pos - 1 + i, other);
775        }
776    }
777}
778
779fn break_xorshift_star<T>(v: &mut [T]) {
780    let len = v.len();
781    if len < 8 {
782        return;
783    }
784
785    // Pseudorandom number generator from the "Xorshift RNGs" paper by George Marsaglia.
786    let mut random = len as u64;
787
788    let mut gen_u64 = || {
789        random ^= random >> 12; // a
790        random ^= random << 25; // b
791        random ^= random >> 27; // c
792        random * 0x2545_F491_4F6C_DD1D
793    };
794
795    let mut gen_usize = || {
796        if mem::size_of::<usize>() == 4 {
797            gen_u64() as u32 as usize
798        } else {
799            gen_u64() as usize
800        }
801    };
802
803    // Take random numbers modulo this number.
804    // The number fits into `usize` because `len` is not greater than `isize::MAX`.
805    let modulus = len.next_power_of_two();
806
807    // Some pivot candidates will be in the nearby of this index. Let's randomize them.
808    let pos = len / 4 * 2;
809
810    for i in 0..3 {
811        // Generate a random number modulo `len`. However, in order to avoid costly operations
812        // we first take it modulo a power of two, and then decrease by `len` until it fits
813        // into the range `[0, len - 1]`.
814        let mut other = gen_usize() & (modulus - 1);
815
816        // `other` is guaranteed to be less than `2 * len`.
817        if other >= len {
818            other -= len;
819        }
820
821        v.swap(pos - 1 + i, other);
822    }
823}
824
825fn break_xorshift_plus<T>(v: &mut [T]) {
826    let len = v.len();
827    if len < 8 {
828        return;
829    }
830
831    // Pseudorandom number generator from the "Xorshift RNGs" paper by George Marsaglia.
832    let mut random = len as u64;
833
834    let mut gen_u64 = || {
835        random ^= random >> 12; // a
836        random ^= random << 25; // b
837        random ^= random >> 27; // c
838        random * 0x2545_F491_4F6C_DD1D
839    };
840
841    let mut gen_usize = || {
842        if mem::size_of::<usize>() == 4 {
843            gen_u64() as u32 as usize
844        } else {
845            gen_u64() as usize
846        }
847    };
848
849    // Take random numbers modulo this number.
850    // The number fits into `usize` because `len` is not greater than `isize::MAX`.
851    let modulus = len.next_power_of_two();
852
853    // Some pivot candidates will be in the nearby of this index. Let's randomize them.
854    let pos = len / 4 * 2;
855
856    for i in 0..3 {
857        // Generate a random number modulo `len`. However, in order to avoid costly operations
858        // we first take it modulo a power of two, and then decrease by `len` until it fits
859        // into the range `[0, len - 1]`.
860        let mut other = gen_usize() & (modulus - 1);
861
862        // `other` is guaranteed to be less than `2 * len`.
863        if other >= len {
864            other -= len;
865        }
866
867        v.swap(pos - 1 + i, other);
868    }
869}
870
871// #[cold]
872fn break_splitmix<T>(v: &mut [T]) {
873    use std::num::Wrapping as w;
874
875    let len = v.len();
876    if len >= 8 {
877        // Pseudorandom number generator from the "Xorshift RNGs" paper by George Marsaglia.
878        let mut random = len as u64;
879
880        let mut gen_u64 = || {
881            let mut z = w(random) + w(0x9E37_79B9_7F4A_7C15_u64);
882            random = z.0;
883            z = (z ^ (z >> 30)) * w(0xBF58_476D_1CE4_E5B9_u64);
884            z = (z ^ (z >> 27)) * w(0x94D0_49BB_1331_11EB_u64);
885            (z ^ (z >> 31)).0
886        };
887
888        let mut gen_usize = || {
889            if mem::size_of::<usize>() == 4 {
890                gen_u64() as u32 as usize
891            } else {
892                gen_u64() as usize
893            }
894        };
895
896        // Take random numbers modulo this number.
897        // The number fits into `usize` because `len` is not greater than `isize::MAX`.
898        let modulus = len.next_power_of_two();
899
900        // Some pivot candidates will be in the nearby of this index. Let's randomize them.
901        let pos = len / 4 * 2;
902
903        for i in 0..3 {
904            // Generate a random number modulo `len`. However, in order to avoid costly operations
905            // we first take it moduluso a power of two, and then decrease by `len` until it fits
906            // into the range `[0, len - 1]`.
907            let mut other = gen_usize() & (modulus - 1);
908
909            // `other` is guaranteed to be less than `2 * len`.
910            if other >= len {
911                other -= len;
912            }
913
914            v.swap(pos - 1 + i, other);
915        }
916    }
917}*/
918
919/// Chooses a pivot in `v` and returns the index and `true` if the slice is likely already sorted.
920///
921/// Elements in `v` might be reordered in the process.
922fn choose_pivot<T, F>(v: &mut [T], is_less: &mut F) -> (usize, bool)
923    where F: FnMut(&T, &T) -> bool
924{
925    // Minimum length to choose the median-of-medians method.
926    // Shorter slices use the simple median-of-three method.
927    const SHORTEST_MEDIAN_OF_MEDIANS: usize = 50;
928    // Maximum number of swaps that can be performed in this function.
929    const MAX_SWAPS: usize = 4 * 3;
930
931    let len = v.len();
932
933    // Three indices near which we are going to choose a pivot.
934    let mut a = len / 4/* * 1*/;
935    let mut b = len / 4 * 2;
936    let mut c = len / 4 * 3;
937
938    // Counts the total number of swaps we are about to perform while sorting indices.
939    let mut swaps = 0;
940
941    if len >= 8 {
942        // Swaps indices so that `v[a] <= v[b]`.
943        let mut sort2 = |a: &mut usize, b: &mut usize| unsafe {
944            if is_less(v.get_unchecked(*b), v.get_unchecked(*a)) {
945                ptr::swap(a, b);
946                swaps += 1;
947            }
948        };
949
950        // Swaps indices so that `v[a] <= v[b] <= v[c]`.
951        let mut sort3 = |a: &mut usize, b: &mut usize, c: &mut usize| {
952            sort2(a, b);
953            sort2(b, c);
954            sort2(a, b);
955        };
956
957        if len >= SHORTEST_MEDIAN_OF_MEDIANS {
958            // Finds the median of `v[a - 1], v[a], v[a + 1]` and stores the index into `a`.
959            let mut sort_adjacent = |a: &mut usize| {
960                let tmp = *a;
961                sort3(&mut (tmp - 1), a, &mut (tmp + 1));
962            };
963
964            // Find medians in the neighborhoods of `a`, `b`, and `c`.
965            sort_adjacent(&mut a);
966            sort_adjacent(&mut b);
967            sort_adjacent(&mut c);
968        }
969
970        // Find the median among `a`, `b`, and `c`.
971        sort3(&mut a, &mut b, &mut c);
972    }
973
974    if swaps < MAX_SWAPS {
975        (b, swaps == 0)
976    } else {
977        // The maximum number of swaps was performed. Chances are the slice is descending or mostly
978        // descending, so reversing will probably help sort it faster.
979        v.reverse();
980        (len - 1 - b, true)
981    }
982}
983
984/// Sorts `v` recursively.
985///
986/// If the slice had a predecessor in the original array, it is specified as `pred`.
987///
988/// `limit` is the number of allowed imbalanced partitions before switching to `heapsort`. If zero,
989/// this function will immediately switch to heapsort.
990fn recurse<'a, T, F>(mut v: &'a mut [T], mut k: usize, is_less: &mut F/*, mut pred: Option<&'a T>*//*b*/)
991    where F: FnMut(&T, &T) -> bool,
992{
993    // Slices of up to this length get sorted using insertion sort.
994    const MAX_INSERTION: usize = 20;
995
996    // `limit` is the number of allowed imbalanced partitions before switching to `heapsort`. If zero,
997    // this function will immediately switch to heapsort.
998    let mut limit = mem::size_of::<usize>() * 8 - v.len().leading_zeros() as usize;
999
1000    // True if the last partitioning was reasonably balanced.
1001    let mut was_balanced = true;
1002    // True if the last partitioning didn't shuffle elements (the slice was already partitioned).
1003    let mut was_partitioned = true;
1004
1005    let mut pred = None;
1006
1007    loop {
1008        let len = v.len();
1009
1010        // Very short slices get sorted using insertion sort.
1011        if len <= MAX_INSERTION {
1012            insertion_sort(v, is_less);
1013            return;
1014        }
1015        // if len <= 1 { return }
1016
1017        // println!("v {:?}", v);
1018        // println!("k {:?}", k);
1019
1020        // If too many bad pivot choices were made, simply fall back to heapsort in order to
1021        // guarantee `O(n log n)` worst-case.
1022        if limit == 0 {
1023            heapsort(v, is_less);
1024            return;
1025        }
1026
1027        // If the last partitioning was imbalanced, try breaking patterns in the slice by shuffling
1028        // some elements around. Hopefully we'll choose a better pivot this time.
1029        if !was_balanced {
1030            // println!("break");
1031            break_patterns(v);
1032            limit -= 1;
1033        }
1034
1035        // Choose a pivot and try guessing whether the slice is already sorted.
1036        let (pivot, likely_sorted) = choose_pivot(v, is_less);
1037
1038        // println!("pivot {:?}, val: {:?}", pivot, v[pivot]);
1039
1040        // If the last partitioning was decently balanced and didn't shuffle elements, and if pivot
1041        // selection predicts the slice is likely already sorted...
1042        if was_balanced && was_partitioned && likely_sorted {
1043            // Try identifying several out-of-order elements and shifting them to correct
1044            // positions. If the slice ends up being completely sorted, we're done.
1045            // println!("partial");
1046            if partial_insertion_sort(v, is_less) {
1047                return;
1048            }
1049        }
1050
1051        // If the chosen pivot is equal to the predecessor, then it's the smallest element in the
1052        // slice. Partition the slice into elements equal to and elements greater than the pivot.
1053        // This case is usually hit when the slice contains many duplicate elements.
1054        if let Some(p) = pred {
1055            if !is_less(p, &v[pivot]) {
1056                // println!("equal");
1057                let mid = partition_equal(v, pivot, is_less);
1058                // mid is where the greaters start
1059                // If there are more than k items smaller than the pivot and
1060                // they are partitioned, we can exit.
1061                // unsure about this
1062
1063                // `mid` items are smaller than v[mid] and are to the left
1064                // if `k` is smaller than `mid`, we have partitioned enough items
1065                // now just
1066                if mid > k {
1067                    return;
1068                }
1069
1070                // Continue sorting elements greater than the pivot.
1071                v = &mut {v}[mid..];
1072                k -= mid;
1073                continue;
1074            }
1075        }
1076
1077        // Partition the slice.
1078        let (mid, was_p) = partition(v, pivot, is_less);
1079
1080        // println!("partition {:?}", v);
1081        // println!("mid {:?}", mid);
1082
1083        // If the pivot is at `k`, then the `k` smallest items are properly partitioned.  Unsure
1084        // if this means that v[k] is in its final sorted position.  If that assumption is true, we
1085        // are done.
1086        if mid == k {
1087            return;
1088        }
1089
1090        was_balanced = cmp::min(mid, len - mid) >= len / 8;
1091        was_partitioned = was_p;
1092
1093        // Split the slice into `left`, `pivot`, and `right`.
1094        let (left, right) = {v}.split_at_mut(mid);
1095        let (pivot, right) = right.split_at_mut(1);
1096        let pivot = &pivot[0];
1097
1098        // If k is to the right of the partition, partition the right
1099        if mid < k {
1100            // println!("right");
1101            // let (_, right) = {v}.split_at_mut(mid);
1102            // let (pivot, right) = right.split_first_mut().unwrap();
1103            pred = Some(pivot);
1104            v = right;
1105            k -= mid + 1; // select the `k - mid` smallest items of the new `v`
1106        } else { // Otherwise k is to the left of the partition, partition the left
1107            // println!("left");
1108            // let (left, _) = {v}.split_at_mut(mid);
1109            v = left;
1110        }
1111    }
1112}
1113
1114/// Sorts `v` using pattern-defeating quickselect, which is `O(n log n)` worst-case.
1115fn quickselect<T, F>(v: &mut [T], k: usize, mut is_less: F)
1116    where F: FnMut(&T, &T) -> bool,
1117{
1118    // Sorting has no meaningful behavior on zero-sized types.
1119    if mem::size_of::<T>() == 0 {
1120        return;
1121    }
1122
1123    // TODO: impl the cheaper `iter::min` function when k is 1
1124
1125    recurse(v, k, &mut is_less);
1126}
1127
1128/// Partially sorts a slice and puts the `k`th smallest item in place.
1129///
1130/// This sort is in-place, unstable, and `O(n log n)` worst-case.
1131///
1132/// The implementation is based on Orson Peters' pattern-defeating quickselect.
1133///
1134/// # Examples
1135///
1136/// ```
1137/// let mut v = [-5, 4, 1, -3, 2];
1138/// let k = 2;
1139/// pdqselect::select(&mut v, k);
1140/// assert!(v[..k].iter().all(|&x| x <= v[k]));
1141/// assert!(v[k+1..].iter().all(|&x| x >= v[k]));
1142/// ```
1143pub fn select<T>(v: &mut [T], k: usize)
1144    where T: Ord,
1145{
1146    quickselect(v, k, |a, b| a.lt(b));
1147}
1148
1149/// Partially sorts a slice using `compare` to compare elements and puts the `k`th smallest
1150/// item in place.
1151///
1152/// This sort is in-place, unstable, and `O(n log n)` worst-case.
1153///
1154/// The implementation is based on Orson Peters' pattern-defeating quickselect.
1155///
1156/// # Examples
1157///
1158/// ```
1159/// let mut v = [5, 4, 1, 3, 2];
1160/// let k = 2;
1161/// pdqselect::select_by(&mut v, k, |a, b| a.cmp(b));
1162/// assert!(v[..k].iter().all(|&x| x <= v[k]));
1163/// assert!(v[k+1..].iter().all(|&x| x >= v[k]));
1164///
1165/// // reverse sorting
1166/// pdqselect::select_by(&mut v, k, |a, b| b.cmp(a));
1167/// assert!(v[..k].iter().all(|&x| x >= v[k]));
1168/// assert!(v[k+1..].iter().all(|&x| x <= v[k]));
1169/// ```
1170pub fn select_by<T, F>(v: &mut [T], k: usize, mut compare: F)
1171    where F: FnMut(&T, &T) -> Ordering,
1172{
1173    quickselect(v, k, |a, b| compare(a, b) == Ordering::Less);
1174}
1175
1176/// Partially sorts a slice using `f` to extract a key to compare elements by and puts the `k`th
1177/// smallest item in place.
1178///
1179/// This sort is in-place, unstable, and `O(n log n)` worst-case.
1180///
1181/// The implementation is based on Orson Peters' pattern-defeating quicksort.
1182///
1183/// # Examples
1184///
1185/// ```
1186/// let mut v = [-5i32, 4, 1, -3, 2];
1187/// let k = 3;
1188/// pdqselect::select_by_key(&mut v, k, |x| x.abs());
1189/// assert!(v[..k].iter().all(|&x| x.abs() <= v[k].abs()));
1190/// assert!(v[k+1..].iter().all(|&x| x.abs() >= v[k].abs()));
1191/// ```
1192pub fn select_by_key<T, B, F>(v: &mut [T], k: usize, mut f: F)
1193    where F: FnMut(&T) -> B,
1194          B: Ord,
1195{
1196    quickselect(v, k, |a, b| f(a).lt(&f(b)));
1197}
pdqselect/lib.rs

pdqselect/
lib.rs