pdqselect/lib.rs
1//! Pattern-defeating quickselect
2//!
3//! The algorithm is based on pattern-defeating quicksort by Orson Peters, published at:
4//! <https://github.com/orlp/pdqsort>
5//! It is also heavily adapted from the Rust implementation of pdqsort
6//! (<https://github.com/stjepang/pdqsort>) and Rust's own [`slice::sort_unstable`].
7//!
8//! NOTE: you may want to use Rust's official version of this algorithm,
9//! [`slice::select_nth_unstable_by`]
10//!
11//! # Properties
12//!
13//! - Best-case running time is `O(n)`.
14//! - Worst-case running time is `O(n log n)`.
15//! - Does not allocate additional memory.
16//! - Uses `#![no_std]`.
17//!
18//! # Examples
19//!
20//! ```
21//! let mut v = [-5i32, 4, 1, -3, 2];
22//! let k = 3;
23//!
24//! pdqselect::select(&mut v, k);
25//! let kth = v[k];
26//! assert!(v[..k].iter().all(|&x| x <= kth));
27//! assert!(v[k+1..].iter().all(|&x| x >= kth));
28//!
29//! pdqselect::select_by(&mut v, k, |a, b| b.cmp(a));
30//! let kth = v[k];
31//! assert!(v[..k].iter().all(|&x| x >= kth));
32//! assert!(v[k+1..].iter().all(|&x| x <= kth));
33//!
34//! pdqselect::select_by_key(&mut v, k, |k| k.abs());
35//! let kth = v[k].abs();
36//! assert!(v[..k].iter().all(|&x| x.abs() <= kth));
37//! assert!(v[k+1..].iter().all(|&x| x.abs() >= kth));
38//! ```
39
40#![no_std]
41
42use core::cmp::{self, Ordering};
43use core::mem;
44use core::ptr;
45
46/// When dropped, copies from `src` into `dest`.
47struct CopyOnDrop<T> {
48 src: *mut T,
49 dest: *mut T,
50}
51
52impl<T> Drop for CopyOnDrop<T> {
53 fn drop(&mut self) {
54 unsafe { ptr::copy_nonoverlapping(self.src, self.dest, 1); }
55 }
56}
57
58/// Shifts the first element to the right until it encounters a greater or equal element.
59fn shift_head<T, F>(v: &mut [T], is_less: &mut F)
60 where F: FnMut(&T, &T) -> bool
61{
62 let len = v.len();
63 unsafe {
64 // If the first two elements are out-of-order...
65 if len >= 2 && is_less(v.get_unchecked(1), v.get_unchecked(0)) {
66 // Read the first element into a stack-allocated variable. If a following comparison
67 // operation panics, `hole` will get dropped and automatically write the element back
68 // into the slice.
69 let mut tmp = mem::ManuallyDrop::new(ptr::read(v.get_unchecked(0)));
70 let mut hole = CopyOnDrop {
71 src: &mut *tmp,
72 dest: v.get_unchecked_mut(1),
73 };
74 ptr::copy_nonoverlapping(v.get_unchecked(1), v.get_unchecked_mut(0), 1);
75
76 for i in 2..len {
77 if !is_less(v.get_unchecked(i), &*tmp) {
78 break;
79 }
80
81 // Move `i`-th element one place to the left, thus shifting the hole to the right.
82 ptr::copy_nonoverlapping(v.get_unchecked(i), v.get_unchecked_mut(i - 1), 1);
83 hole.dest = v.get_unchecked_mut(i);
84 }
85 // `hole` gets dropped and thus copies `tmp` into the remaining hole in `v`.
86 }
87 }
88}
89
90/// Shifts the last element to the left until it encounters a smaller or equal element.
91fn shift_tail<T, F>(v: &mut [T], is_less: &mut F)
92 where F: FnMut(&T, &T) -> bool
93{
94 let len = v.len();
95 unsafe {
96 // If the last two elements are out-of-order...
97 if len >= 2 && is_less(v.get_unchecked(len - 1), v.get_unchecked(len - 2)) {
98 // Read the last element into a stack-allocated variable. If a following comparison
99 // operation panics, `hole` will get dropped and automatically write the element back
100 // into the slice.
101 let mut tmp = mem::ManuallyDrop::new(ptr::read(v.get_unchecked(len - 1)));
102 let mut hole = CopyOnDrop {
103 src: &mut *tmp,
104 dest: v.get_unchecked_mut(len - 2),
105 };
106 ptr::copy_nonoverlapping(v.get_unchecked(len - 2), v.get_unchecked_mut(len - 1), 1);
107
108 for i in (0..len-2).rev() {
109 if !is_less(&*tmp, v.get_unchecked(i)) {
110 break;
111 }
112
113 // Move `i`-th element one place to the right, thus shifting the hole to the left.
114 ptr::copy_nonoverlapping(v.get_unchecked(i), v.get_unchecked_mut(i + 1), 1);
115 hole.dest = v.get_unchecked_mut(i);
116 }
117 // `hole` gets dropped and thus copies `tmp` into the remaining hole in `v`.
118 }
119 }
120}
121
122/// Partially sorts a slice by shifting several out-of-order elements around.
123///
124/// Returns `true` if the slice is sorted at the end. This function is `O(n)` worst-case.
125#[cold]
126fn partial_insertion_sort<T, F>(v: &mut [T], is_less: &mut F) -> bool
127 where F: FnMut(&T, &T) -> bool
128{
129 // Maximum number of adjacent out-of-order pairs that will get shifted.
130 const MAX_STEPS: usize = 5;
131 // If the slice is shorter than this, don't shift any elements.
132 const SHORTEST_SHIFTING: usize = 50;
133
134 let len = v.len();
135 let mut i = 1;
136
137 for _ in 0..MAX_STEPS {
138 unsafe {
139 // Find the next pair of adjacent out-of-order elements.
140 while i < len && !is_less(v.get_unchecked(i), v.get_unchecked(i - 1)) {
141 i += 1;
142 }
143 }
144
145 // Are we done?
146 if i == len {
147 return true;
148 }
149
150 // Don't shift elements on short arrays, that has a performance cost.
151 if len < SHORTEST_SHIFTING {
152 return false;
153 }
154
155 // Swap the found pair of elements. This puts them in correct order.
156 v.swap(i - 1, i);
157
158 // Shift the smaller element to the left.
159 // shift_tail(unsafe{v.get_unchecked_mut(..i)}, is_less);
160 shift_tail(&mut v[..i], is_less);
161 // Shift the greater element to the right.
162 // shift_head(unsafe{v.get_unchecked_mut(i..)}, is_less);
163 shift_head(&mut v[i..], is_less);
164 }
165
166 // Didn't manage to sort the slice in the limited number of steps.
167 false
168}
169
170/// Sorts a slice using insertion sort, which is `O(n^2)` worst-case.
171fn insertion_sort<T, F>(v: &mut [T], is_less: &mut F)
172 where F: FnMut(&T, &T) -> bool
173{
174 for i in 1..v.len() {
175 // shift_tail(unsafe{v.get_unchecked_mut(..i+1)}, is_less);
176 shift_tail(&mut v[..i+1], is_less);
177 }
178}
179
180/// Sorts `v` using heapsort, which guarantees `O(n log n)` worst-case.
181#[cold]
182pub fn heapsort<T, F>(v: &mut [T], is_less: &mut F)
183 where F: FnMut(&T, &T) -> bool
184{
185 // This binary heap respects the invariant `parent >= child`.
186 let mut sift_down = |v: &mut [T], mut node| {
187 loop {
188 // Children of `node`:
189 let left = 2 * node + 1;
190 let right = 2 * node + 2;
191
192 // Choose the greater child.
193 /*let greater = unsafe { if right < v.len() &&
194 is_less(v.get_unchecked(left), v.get_unchecked(right))
195 {
196 right
197 } else {
198 left
199 }
200 };*/
201
202 let greater = if right < v.len() && is_less(&v[left], &v[right]) {
203 right
204 } else {
205 left
206 };
207
208 // Stop if the invariant holds at `node`.
209 /*unsafe {
210 if greater >= v.len() ||
211 !is_less(v.get_unchecked(node), v.get_unchecked(greater))
212 {
213 break;
214 }
215 }*/
216 if greater >= v.len() || !is_less(&v[node], &v[greater]) {
217 break;
218 }
219
220 // Swap `node` with the greater child, move one step down, and continue sifting.
221 v.swap(node, greater);
222 node = greater;
223 }
224 };
225
226 // Build the heap in linear time.
227 for i in (0 .. v.len() / 2).rev() {
228 sift_down(v, i);
229 }
230
231 // Pop maximal elements from the heap.
232 for i in (1 .. v.len()).rev() {
233 v.swap(0, i);
234 // sift_down(unsafe { v.get_unchecked_mut(..i) }, 0);
235 sift_down(&mut v[..i], 0);
236 }
237}
238
239/// Partitions `v` into elements smaller than `pivot`, followed by elements greater than or equal
240/// to `pivot`.
241///
242/// Returns the number of elements smaller than `pivot`.
243///
244/// Partitioning is performed block-by-block in order to minimize the cost of branching operations.
245/// This idea is presented in the [`BlockQuicksort`][pdf] paper.
246///
247/// [pdf]: http://drops.dagstuhl.de/opus/volltexte/2016/6389/pdf/LIPIcs-ESA-2016-38.pdf
248fn partition_in_blocks<T, F>(v: &mut [T], pivot: &T, is_less: &mut F) -> usize
249 where F: FnMut(&T, &T) -> bool
250{
251 // Number of elements in a typical block.
252 const BLOCK: usize = 128;
253
254 // The partitioning algorithm repeats the following steps until completion:
255 //
256 // 1. Trace a block from the left side to identify elements greater than or equal to the pivot.
257 // 2. Trace a block from the right side to identify elements smaller than the pivot.
258 // 3. Exchange the identified elements between the left and right side.
259 //
260 // We keep the following variables for a block of elements:
261 //
262 // 1. `block` - Number of elements in the block.
263 // 2. `start` - Start pointer into the `offsets` array.
264 // 3. `end` - End pointer into the `offsets` array.
265 // 4. `offsets - Indices of out-of-order elements within the block.
266
267 // The current block on the left side (from `l` to `l.offset(block_l)`).
268 let mut l = v.as_mut_ptr();
269 let mut block_l = BLOCK;
270 let mut start_l = ptr::null_mut();
271 let mut end_l = ptr::null_mut();
272 let mut offsets_l: [u8; BLOCK] = unsafe { mem::uninitialized() };
273
274 // The current block on the right side (from `r.offset(-block_r)` to `r`).
275 let mut r = unsafe { l.offset(v.len() as isize) };
276 let mut block_r = BLOCK;
277 let mut start_r = ptr::null_mut();
278 let mut end_r = ptr::null_mut();
279 let mut offsets_r: [u8; BLOCK] = unsafe { mem::uninitialized() };
280
281 // FIXME: When we get VLAs, try creating one array of length `min(v.len(), 2 * BLOCK)` rather
282 // than two fixed-size arrays of length `BLOCK`. VLAs might be more cache-efficient.
283
284 // Returns the number of elements between pointers `l` (inclusive) and `r` (exclusive).
285 fn width<T>(l: *mut T, r: *mut T) -> usize {
286 assert!(mem::size_of::<T>() > 0); // already done, no?
287 (r as usize - l as usize) / mem::size_of::<T>()
288 }
289
290 loop {
291 // We are done with partitioning block-by-block when `l` and `r` get very close. Then we do
292 // some patch-up work in order to partition the remaining elements in between.
293 let is_done = width(l, r) <= 2 * BLOCK;
294
295 if is_done {
296 // Number of remaining elements (still not compared to the pivot).
297 let mut rem = width(l, r);
298 if start_l < end_l || start_r < end_r {
299 rem -= BLOCK;
300 }
301
302 // Adjust block sizes so that the left and right block don't overlap, but get perfectly
303 // aligned to cover the whole remaining gap.
304 if start_l < end_l {
305 block_r = rem;
306 } else if start_r < end_r {
307 block_l = rem;
308 } else {
309 block_l = rem / 2;
310 block_r = rem - block_l;
311 }
312 debug_assert!(block_l <= BLOCK && block_r <= BLOCK);
313 debug_assert_eq!(width(l, r), block_l + block_r);
314 }
315
316 if start_l == end_l {
317 // Trace `block_l` elements from the left side.
318 start_l = offsets_l.as_mut_ptr();
319 end_l = offsets_l.as_mut_ptr();
320 let mut elem = l;
321
322 for i in 0..block_l {
323 unsafe {
324 // Branchless comparison.
325 *end_l = i as u8;
326 end_l = end_l.offset(!is_less(&*elem, pivot) as isize);
327 elem = elem.offset(1);
328 }
329 }
330 }
331
332 if start_r == end_r {
333 // Trace `block_r` elements from the right side.
334 start_r = offsets_r.as_mut_ptr();
335 end_r = offsets_r.as_mut_ptr();
336 let mut elem = r;
337
338 for i in 0..block_r {
339 unsafe {
340 // Branchless comparison.
341 elem = elem.offset(-1);
342 *end_r = i as u8;
343 end_r = end_r.offset(is_less(&*elem, pivot) as isize);
344 }
345 }
346 }
347
348 // Number of out-of-order elements to swap between the left and right side.
349 let count = cmp::min(width(start_l, end_l), width(start_r, end_r));
350
351 if count > 0 {
352 macro_rules! left { () => { l.offset(*start_l as isize) } }
353 macro_rules! right { () => { r.offset(-(*start_r as isize) - 1) } }
354
355 // Instead of swapping one pair at the time, it is more efficient to perform a cyclic
356 // permutation. This is not strictly equivalent to swapping, but produces a similar
357 // result using fewer memory operations.
358 unsafe {
359 let tmp = ptr::read(left!());
360 ptr::copy_nonoverlapping(right!(), left!(), 1);
361
362 for _ in 1..count {
363 start_l = start_l.offset(1);
364 ptr::copy_nonoverlapping(left!(), right!(), 1);
365 start_r = start_r.offset(1);
366 ptr::copy_nonoverlapping(right!(), left!(), 1);
367 }
368
369 ptr::copy_nonoverlapping(&tmp, right!(), 1);
370 mem::forget(tmp);
371 start_l = start_l.offset(1);
372 start_r = start_r.offset(1);
373 }
374 }
375
376 if start_l == end_l {
377 // All out-of-order elements in the left block were moved. Move to the next block.
378 l = unsafe { l.offset(block_l as isize) };
379 }
380
381 if start_r == end_r {
382 // All out-of-order elements in the right block were moved. Move to the previous block.
383 r = unsafe { r.offset(-(block_r as isize)) };
384 }
385
386 if is_done {
387 break;
388 }
389 }
390
391 // All that remains now is at most one block (either the left or the right) with out-of-order
392 // elements that need to be moved. Such remaining elements can be simply shifted to the end
393 // within their block.
394
395 if start_l < end_l {
396 // The left block remains.
397 // Move its remaining out-of-order elements to the far right.
398 debug_assert_eq!(width(l, r), block_l);
399 while start_l < end_l {
400 unsafe {
401 end_l = end_l.offset(-1);
402 ptr::swap(l.offset(*end_l as isize), r.offset(-1));
403 r = r.offset(-1);
404 }
405 }
406 width(v.as_mut_ptr(), r)
407 } else if start_r < end_r {
408 // The right block remains.
409 // Move its remaining out-of-order elements to the far left.
410 debug_assert_eq!(width(l, r), block_r);
411 while start_r < end_r {
412 unsafe {
413 end_r = end_r.offset(-1);
414 ptr::swap(l, r.offset(-(*end_r as isize) - 1));
415 l = l.offset(1);
416 }
417 }
418 width(v.as_mut_ptr(), l)
419 } else {
420 // Nothing else to do, we're done.
421 width(v.as_mut_ptr(), l)
422 }
423}
424
425/// Partitions `v` into elements smaller than `v[pivot]`, followed by elements greater than or
426/// equal to `v[pivot]`.
427///
428/// Returns a tuple of:
429///
430/// 1. Number of elements smaller than `v[pivot]`.
431/// 2. True if `v` was already partitioned.
432fn partition<T, F>(v: &mut [T], pivot: usize, is_less: &mut F) -> (usize, bool)
433 where F: FnMut(&T, &T) -> bool
434{
435 let (mid, was_partitioned) = {
436 // Place the pivot at the beginning of slice.
437 v.swap(0, pivot);
438 // let (pivot, v) = v.split_first_mut().unwrap();
439 let (pivot, v) = v.split_at_mut(1);
440 let pivot = &mut pivot[0];
441 // could prob do `swap_remove`?
442
443 // Read the pivot into a stack-allocated variable for efficiency. If a following comparison
444 // operation panics, the pivot will be automatically written back into the slice.
445 let mut tmp = mem::ManuallyDrop::new(unsafe { ptr::read(pivot) });
446 let _pivot_guard = CopyOnDrop {
447 src: &mut *tmp,
448 dest: pivot,
449 };
450 let pivot = &*tmp;
451
452 // Find the first pair of out-of-order elements.
453 let mut l = 0;
454 let mut r = v.len();
455 unsafe {
456 // Find the first element greater then or equal to the pivot.
457 while l < r && is_less(v.get_unchecked(l), pivot) {
458 l += 1;
459 }
460
461 // Find the last element smaller that the pivot.
462 while l < r && !is_less(v.get_unchecked(r - 1), pivot) {
463 r -= 1;
464 }
465 }
466
467 // (l + partition_in_blocks(unsafe { v.get_unchecked_mut(l..r) }, pivot, is_less), l >= r)
468 (l + partition_in_blocks(&mut v[l..r], pivot, is_less), l >= r)
469
470 // `_pivot_guard` goes out of scope and writes the pivot (which is a stack-allocated
471 // variable) back into the slice where it originally was. This step is critical in ensuring
472 // safety!
473 };
474
475 // Place the pivot between the two partitions.
476 v.swap(0, mid);
477
478 (mid, was_partitioned)
479}
480
481/// Partitions `v` into elements equal to `v[pivot]` followed by elements greater than `v[pivot]`.
482///
483/// Returns the number of elements equal to the pivot. It is assumed that `v` does not contain
484/// elements smaller than the pivot.
485fn partition_equal<T, F>(v: &mut [T], pivot: usize, is_less: &mut F) -> usize
486 where F: FnMut(&T, &T) -> bool
487{
488 // Place the pivot at the beginning of slice.
489 v.swap(0, pivot);
490 let (pivot, v) = v.split_at_mut(1);
491 let pivot = &mut pivot[0];
492 // let (pivot, v) = v.split_first_mut().unwrap();
493
494 // Read the pivot into a stack-allocated variable for efficiency. If a following comparison
495 // operation panics, the pivot will be automatically written back into the slice.
496 let mut tmp = mem::ManuallyDrop::new(unsafe { ptr::read(pivot) });
497 let _pivot_guard = CopyOnDrop {
498 src: &mut *tmp,
499 dest: pivot,
500 };
501 let pivot = &*tmp;
502
503 // Now partition the slice.
504 let mut l = 0;
505 let mut r = v.len();
506 loop {
507 unsafe {
508 // Find the first element greater that the pivot.
509 while l < r && !is_less(pivot, v.get_unchecked(l)) {
510 l += 1;
511 }
512
513 // Find the last element equal to the pivot.
514 while l < r && is_less(pivot, v.get_unchecked(r - 1)) {
515 r -= 1;
516 }
517
518 // Are we done?
519 if l >= r {
520 break;
521 }
522
523 // Swap the found pair of out-of-order elements.
524 r -= 1;
525 ptr::swap(v.get_unchecked_mut(l), v.get_unchecked_mut(r));
526 l += 1;
527 }
528 }
529
530 // We found `l` elements equal to the pivot. Add 1 to account for the pivot itself.
531 l + 1
532
533 // `_pivot_guard` goes out of scope and writes the pivot (which is a stack-allocated variable)
534 // back into the slice where it originally was. This step is critical in ensuring safety!
535}
536
537/// Scatters some elements around in an attempt to break patterns that might cause imbalanced
538/// partitions in quickselect.
539#[cold]
540fn break_patterns<T>(v: &mut [T]) {
541 let len = v.len();
542 if len >= 8 {
543 // Pseudorandom number generator from the "Xorshift RNGs" paper by George Marsaglia.
544 let mut random = len as u32;
545 // TODO: bench this vs the Xoroshiro128 core
546 let mut gen_u32 = || {
547 random ^= random << 13;
548 random ^= random >> 17;
549 random ^ random << 5
550 };
551 let mut gen_usize = || {
552 if mem::size_of::<usize>() <= 4 {
553 gen_u32() as usize
554 } else {
555 (((gen_u32() as u64) << 32) | (gen_u32() as u64)) as usize
556 // ((u64::from(gen_u32()) << 32) | u64::from(gen_u32())) as usize
557 }
558 };
559
560 // Take random numbers modulo this number.
561 // The number fits into `usize` because `len` is not greater than `isize::MAX`.
562 let modulus = len.next_power_of_two();
563
564 // Some pivot candidates will be in the nearby of this index. Let's randomize them.
565 let pos = len / 4 * 2;
566
567 for i in 0..3 {
568 // Generate a random number modulo `len`. However, in order to avoid costly operations
569 // we first take it modulo a power of two, and then decrease by `len` until it fits
570 // into the range `[0, len - 1]`.
571 let mut other = gen_usize() & (modulus - 1);
572
573 // `other` is guaranteed to be less than `2 * len`.
574 if other >= len {
575 other -= len;
576 }
577
578 v.swap(pos - 1 + i, other);
579 }
580 }
581}
582
583/*#[cold]
584fn break_xoroshiro<T>(v: &mut [T]) {
585 let len = v.len();
586 if len < 8 {
587 return;
588 }
589
590 // Pseudorandom number generator from the "Xorshift RNGs" paper by George Marsaglia.
591 let mut first = len as u64;
592 let mut second = {
593 let mut z = first + 0x9E37_79B9_7F4A_7C15_u64;
594 // first = z;
595 z = (z ^ (z >> 30)) * 0xBF58_476D_1CE4_E5B9_u64;
596 z = (z ^ (z >> 27)) * 0x94D0_49BB_1331_11EB_u64;
597 z ^ (z >> 31)
598 };
599
600 let mut gen_u64 = || {
601 #[inline]
602 fn rotl(x: u64, k: i32) -> u64 {
603 (x << k) | (x >> (64 - k))
604 }
605
606 let s0 = first;
607 let mut s1 = second;
608 let result = s0 + s1;
609
610 s1 ^= s0;
611 first = rotl(s0, 55) ^ s1 ^ (s1 << 14);
612 second = rotl(s1, 36);
613
614 result
615 };
616
617 let mut gen_usize = || {
618 if mem::size_of::<usize>() == 4 {
619 gen_u64() as u32 as usize
620 } else {
621 gen_u64() as usize
622 }
623 };
624
625 // Take random numbers modulo this number.
626 // The number fits into `usize` because `len` is not greater than `isize::MAX`.
627 let modulus = len.next_power_of_two();
628
629 // Some pivot candidates will be in the nearby of this index. Let's randomize them.
630 let pos = len / 4 * 2;
631
632 for i in 0..3 {
633 // Generate a random number modulo `len`. However, in order to avoid costly operations
634 // we first take it modulo a power of two, and then decrease by `len` until it fits
635 // into the range `[0, len - 1]`.
636 let mut other = gen_usize() & (modulus - 1);
637
638 // `other` is guaranteed to be less than `2 * len`.
639 if other >= len {
640 other -= len;
641 }
642
643 v.swap(pos - 1 + i, other);
644 }
645}*/
646
647/*fn break_xoroshiro_alpha<T>(v: &mut [T]) {
648 let len = v.len();
649 if len < 8 {
650 return;
651 }
652
653 // Pseudorandom number generator from the "Xorshift RNGs" paper by George Marsaglia.
654 let mut first = len as u64;
655 let mut second = {
656 let mut z = first + 0x9E37_79B9_7F4A_7C15_u64;
657 first = z;
658 z = (z ^ (z >> 30)) * 0xBF58_476D_1CE4_E5B9_u64;
659 z = (z ^ (z >> 27)) * 0x94D0_49BB_1331_11EB_u64;
660 z ^ (z >> 31)
661 };
662
663 // Take random numbers modulo this number.
664 // The number fits into `usize` because `len` is not greater than `isize::MAX`.
665 let modulus = len.next_power_of_two();
666
667 // Some pivot candidates will be in the nearby of this index. Let's randomize them.
668 let pos = len / 4 * 2;
669
670 for i in 0..2 {
671 let mut gen_u64 = || {
672 #[inline]
673 fn rotl(x: u64, k: i32) -> u64 {
674 (x << k) | (x >> (64 - k))
675 }
676
677 let s0 = first;
678 let mut s1 = second;
679 let result = s0 + s1;
680
681 s1 ^= s0;
682 first = rotl(s0, 55) ^ s1 ^ (s1 << 14);
683 second = rotl(s1, 36);
684
685 result
686 };
687
688 let mut gen_usize = || {
689 if mem::size_of::<usize>() == 4 {
690 gen_u64() as u32 as usize
691 } else {
692 gen_u64() as usize
693 }
694 };
695
696 // Generate a random number modulo `len`. However, in order to avoid costly operations
697 // we first take it modulo a power of two, and then decrease by `len` until it fits
698 // into the range `[0, len - 1]`.
699 let mut other = gen_usize() & (modulus - 1);
700
701 // `other` is guaranteed to be less than `2 * len`.
702 if other >= len {
703 other -= len;
704 }
705
706 v.swap(pos - 1 + i, other);
707 }
708 let next = first + second;
709 let next_usize = if mem::size_of::<usize>() == 4 {
710 next as u32 as usize
711 } else {
712 next as usize
713 };
714 let mut other = next_usize & (modulus - 1);
715 if other >= len {
716 other -= len;
717 }
718 v.swap(pos - 1 + 2, other);
719}
720
721// #[cold]
722fn break_xoroshiro_pure<T>(v: &mut [T]) {
723 use self::std::num::Wrapping as w;
724
725 let len = v.len();
726 if len >= 8 {
727 // Pseudorandom number generator from the "Xorshift RNGs" paper by George Marsaglia.
728 let mut first = len as u64;
729 let mut second = len as u64;
730
731 let mut gen_u64 = || {
732 #[inline]
733 fn rotl(x: u64, k: i32) -> u64 {
734 (x << k) | (x >> (64 - k))
735 }
736
737 let s0 = w(first);
738 let mut s1 = w(second);
739 let result = s0 + s1;
740
741 s1 ^= s0;
742 first = (w(s0.0.rotate_left(55)) ^ s1 ^ (s1 << 14)).0;
743 second = s1.0.rotate_left(36);
744
745 result.0
746 };
747
748 let mut gen_usize = || {
749 if mem::size_of::<usize>() == 4 {
750 gen_u64() as u32 as usize
751 } else {
752 gen_u64() as usize
753 }
754 };
755
756 // Take random numbers modulo this number.
757 // The number fits into `usize` because `len` is not greater than `isize::MAX`.
758 let modulus = len.next_power_of_two();
759
760 // Some pivot candidates will be in the nearby of this index. Let's randomize them.
761 let pos = len / 4 * 2;
762
763 for i in 0..3 {
764 // Generate a random number modulo `len`. However, in order to avoid costly operations
765 // we first take it modulo a power of two, and then decrease by `len` until it fits
766 // into the range `[0, len - 1]`.
767 let mut other = gen_usize() & (modulus - 1);
768
769 // `other` is guaranteed to be less than `2 * len`.
770 if other >= len {
771 other -= len;
772 }
773
774 v.swap(pos - 1 + i, other);
775 }
776 }
777}
778
779fn break_xorshift_star<T>(v: &mut [T]) {
780 let len = v.len();
781 if len < 8 {
782 return;
783 }
784
785 // Pseudorandom number generator from the "Xorshift RNGs" paper by George Marsaglia.
786 let mut random = len as u64;
787
788 let mut gen_u64 = || {
789 random ^= random >> 12; // a
790 random ^= random << 25; // b
791 random ^= random >> 27; // c
792 random * 0x2545_F491_4F6C_DD1D
793 };
794
795 let mut gen_usize = || {
796 if mem::size_of::<usize>() == 4 {
797 gen_u64() as u32 as usize
798 } else {
799 gen_u64() as usize
800 }
801 };
802
803 // Take random numbers modulo this number.
804 // The number fits into `usize` because `len` is not greater than `isize::MAX`.
805 let modulus = len.next_power_of_two();
806
807 // Some pivot candidates will be in the nearby of this index. Let's randomize them.
808 let pos = len / 4 * 2;
809
810 for i in 0..3 {
811 // Generate a random number modulo `len`. However, in order to avoid costly operations
812 // we first take it modulo a power of two, and then decrease by `len` until it fits
813 // into the range `[0, len - 1]`.
814 let mut other = gen_usize() & (modulus - 1);
815
816 // `other` is guaranteed to be less than `2 * len`.
817 if other >= len {
818 other -= len;
819 }
820
821 v.swap(pos - 1 + i, other);
822 }
823}
824
825fn break_xorshift_plus<T>(v: &mut [T]) {
826 let len = v.len();
827 if len < 8 {
828 return;
829 }
830
831 // Pseudorandom number generator from the "Xorshift RNGs" paper by George Marsaglia.
832 let mut random = len as u64;
833
834 let mut gen_u64 = || {
835 random ^= random >> 12; // a
836 random ^= random << 25; // b
837 random ^= random >> 27; // c
838 random * 0x2545_F491_4F6C_DD1D
839 };
840
841 let mut gen_usize = || {
842 if mem::size_of::<usize>() == 4 {
843 gen_u64() as u32 as usize
844 } else {
845 gen_u64() as usize
846 }
847 };
848
849 // Take random numbers modulo this number.
850 // The number fits into `usize` because `len` is not greater than `isize::MAX`.
851 let modulus = len.next_power_of_two();
852
853 // Some pivot candidates will be in the nearby of this index. Let's randomize them.
854 let pos = len / 4 * 2;
855
856 for i in 0..3 {
857 // Generate a random number modulo `len`. However, in order to avoid costly operations
858 // we first take it modulo a power of two, and then decrease by `len` until it fits
859 // into the range `[0, len - 1]`.
860 let mut other = gen_usize() & (modulus - 1);
861
862 // `other` is guaranteed to be less than `2 * len`.
863 if other >= len {
864 other -= len;
865 }
866
867 v.swap(pos - 1 + i, other);
868 }
869}
870
871// #[cold]
872fn break_splitmix<T>(v: &mut [T]) {
873 use std::num::Wrapping as w;
874
875 let len = v.len();
876 if len >= 8 {
877 // Pseudorandom number generator from the "Xorshift RNGs" paper by George Marsaglia.
878 let mut random = len as u64;
879
880 let mut gen_u64 = || {
881 let mut z = w(random) + w(0x9E37_79B9_7F4A_7C15_u64);
882 random = z.0;
883 z = (z ^ (z >> 30)) * w(0xBF58_476D_1CE4_E5B9_u64);
884 z = (z ^ (z >> 27)) * w(0x94D0_49BB_1331_11EB_u64);
885 (z ^ (z >> 31)).0
886 };
887
888 let mut gen_usize = || {
889 if mem::size_of::<usize>() == 4 {
890 gen_u64() as u32 as usize
891 } else {
892 gen_u64() as usize
893 }
894 };
895
896 // Take random numbers modulo this number.
897 // The number fits into `usize` because `len` is not greater than `isize::MAX`.
898 let modulus = len.next_power_of_two();
899
900 // Some pivot candidates will be in the nearby of this index. Let's randomize them.
901 let pos = len / 4 * 2;
902
903 for i in 0..3 {
904 // Generate a random number modulo `len`. However, in order to avoid costly operations
905 // we first take it moduluso a power of two, and then decrease by `len` until it fits
906 // into the range `[0, len - 1]`.
907 let mut other = gen_usize() & (modulus - 1);
908
909 // `other` is guaranteed to be less than `2 * len`.
910 if other >= len {
911 other -= len;
912 }
913
914 v.swap(pos - 1 + i, other);
915 }
916 }
917}*/
918
919/// Chooses a pivot in `v` and returns the index and `true` if the slice is likely already sorted.
920///
921/// Elements in `v` might be reordered in the process.
922fn choose_pivot<T, F>(v: &mut [T], is_less: &mut F) -> (usize, bool)
923 where F: FnMut(&T, &T) -> bool
924{
925 // Minimum length to choose the median-of-medians method.
926 // Shorter slices use the simple median-of-three method.
927 const SHORTEST_MEDIAN_OF_MEDIANS: usize = 50;
928 // Maximum number of swaps that can be performed in this function.
929 const MAX_SWAPS: usize = 4 * 3;
930
931 let len = v.len();
932
933 // Three indices near which we are going to choose a pivot.
934 let mut a = len / 4/* * 1*/;
935 let mut b = len / 4 * 2;
936 let mut c = len / 4 * 3;
937
938 // Counts the total number of swaps we are about to perform while sorting indices.
939 let mut swaps = 0;
940
941 if len >= 8 {
942 // Swaps indices so that `v[a] <= v[b]`.
943 let mut sort2 = |a: &mut usize, b: &mut usize| unsafe {
944 if is_less(v.get_unchecked(*b), v.get_unchecked(*a)) {
945 ptr::swap(a, b);
946 swaps += 1;
947 }
948 };
949
950 // Swaps indices so that `v[a] <= v[b] <= v[c]`.
951 let mut sort3 = |a: &mut usize, b: &mut usize, c: &mut usize| {
952 sort2(a, b);
953 sort2(b, c);
954 sort2(a, b);
955 };
956
957 if len >= SHORTEST_MEDIAN_OF_MEDIANS {
958 // Finds the median of `v[a - 1], v[a], v[a + 1]` and stores the index into `a`.
959 let mut sort_adjacent = |a: &mut usize| {
960 let tmp = *a;
961 sort3(&mut (tmp - 1), a, &mut (tmp + 1));
962 };
963
964 // Find medians in the neighborhoods of `a`, `b`, and `c`.
965 sort_adjacent(&mut a);
966 sort_adjacent(&mut b);
967 sort_adjacent(&mut c);
968 }
969
970 // Find the median among `a`, `b`, and `c`.
971 sort3(&mut a, &mut b, &mut c);
972 }
973
974 if swaps < MAX_SWAPS {
975 (b, swaps == 0)
976 } else {
977 // The maximum number of swaps was performed. Chances are the slice is descending or mostly
978 // descending, so reversing will probably help sort it faster.
979 v.reverse();
980 (len - 1 - b, true)
981 }
982}
983
984/// Sorts `v` recursively.
985///
986/// If the slice had a predecessor in the original array, it is specified as `pred`.
987///
988/// `limit` is the number of allowed imbalanced partitions before switching to `heapsort`. If zero,
989/// this function will immediately switch to heapsort.
990fn recurse<'a, T, F>(mut v: &'a mut [T], mut k: usize, is_less: &mut F/*, mut pred: Option<&'a T>*//*b*/)
991 where F: FnMut(&T, &T) -> bool,
992{
993 // Slices of up to this length get sorted using insertion sort.
994 const MAX_INSERTION: usize = 20;
995
996 // `limit` is the number of allowed imbalanced partitions before switching to `heapsort`. If zero,
997 // this function will immediately switch to heapsort.
998 let mut limit = mem::size_of::<usize>() * 8 - v.len().leading_zeros() as usize;
999
1000 // True if the last partitioning was reasonably balanced.
1001 let mut was_balanced = true;
1002 // True if the last partitioning didn't shuffle elements (the slice was already partitioned).
1003 let mut was_partitioned = true;
1004
1005 let mut pred = None;
1006
1007 loop {
1008 let len = v.len();
1009
1010 // Very short slices get sorted using insertion sort.
1011 if len <= MAX_INSERTION {
1012 insertion_sort(v, is_less);
1013 return;
1014 }
1015 // if len <= 1 { return }
1016
1017 // println!("v {:?}", v);
1018 // println!("k {:?}", k);
1019
1020 // If too many bad pivot choices were made, simply fall back to heapsort in order to
1021 // guarantee `O(n log n)` worst-case.
1022 if limit == 0 {
1023 heapsort(v, is_less);
1024 return;
1025 }
1026
1027 // If the last partitioning was imbalanced, try breaking patterns in the slice by shuffling
1028 // some elements around. Hopefully we'll choose a better pivot this time.
1029 if !was_balanced {
1030 // println!("break");
1031 break_patterns(v);
1032 limit -= 1;
1033 }
1034
1035 // Choose a pivot and try guessing whether the slice is already sorted.
1036 let (pivot, likely_sorted) = choose_pivot(v, is_less);
1037
1038 // println!("pivot {:?}, val: {:?}", pivot, v[pivot]);
1039
1040 // If the last partitioning was decently balanced and didn't shuffle elements, and if pivot
1041 // selection predicts the slice is likely already sorted...
1042 if was_balanced && was_partitioned && likely_sorted {
1043 // Try identifying several out-of-order elements and shifting them to correct
1044 // positions. If the slice ends up being completely sorted, we're done.
1045 // println!("partial");
1046 if partial_insertion_sort(v, is_less) {
1047 return;
1048 }
1049 }
1050
1051 // If the chosen pivot is equal to the predecessor, then it's the smallest element in the
1052 // slice. Partition the slice into elements equal to and elements greater than the pivot.
1053 // This case is usually hit when the slice contains many duplicate elements.
1054 if let Some(p) = pred {
1055 if !is_less(p, &v[pivot]) {
1056 // println!("equal");
1057 let mid = partition_equal(v, pivot, is_less);
1058 // mid is where the greaters start
1059 // If there are more than k items smaller than the pivot and
1060 // they are partitioned, we can exit.
1061 // unsure about this
1062
1063 // `mid` items are smaller than v[mid] and are to the left
1064 // if `k` is smaller than `mid`, we have partitioned enough items
1065 // now just
1066 if mid > k {
1067 return;
1068 }
1069
1070 // Continue sorting elements greater than the pivot.
1071 v = &mut {v}[mid..];
1072 k -= mid;
1073 continue;
1074 }
1075 }
1076
1077 // Partition the slice.
1078 let (mid, was_p) = partition(v, pivot, is_less);
1079
1080 // println!("partition {:?}", v);
1081 // println!("mid {:?}", mid);
1082
1083 // If the pivot is at `k`, then the `k` smallest items are properly partitioned. Unsure
1084 // if this means that v[k] is in its final sorted position. If that assumption is true, we
1085 // are done.
1086 if mid == k {
1087 return;
1088 }
1089
1090 was_balanced = cmp::min(mid, len - mid) >= len / 8;
1091 was_partitioned = was_p;
1092
1093 // Split the slice into `left`, `pivot`, and `right`.
1094 let (left, right) = {v}.split_at_mut(mid);
1095 let (pivot, right) = right.split_at_mut(1);
1096 let pivot = &pivot[0];
1097
1098 // If k is to the right of the partition, partition the right
1099 if mid < k {
1100 // println!("right");
1101 // let (_, right) = {v}.split_at_mut(mid);
1102 // let (pivot, right) = right.split_first_mut().unwrap();
1103 pred = Some(pivot);
1104 v = right;
1105 k -= mid + 1; // select the `k - mid` smallest items of the new `v`
1106 } else { // Otherwise k is to the left of the partition, partition the left
1107 // println!("left");
1108 // let (left, _) = {v}.split_at_mut(mid);
1109 v = left;
1110 }
1111 }
1112}
1113
1114/// Sorts `v` using pattern-defeating quickselect, which is `O(n log n)` worst-case.
1115fn quickselect<T, F>(v: &mut [T], k: usize, mut is_less: F)
1116 where F: FnMut(&T, &T) -> bool,
1117{
1118 // Sorting has no meaningful behavior on zero-sized types.
1119 if mem::size_of::<T>() == 0 {
1120 return;
1121 }
1122
1123 // TODO: impl the cheaper `iter::min` function when k is 1
1124
1125 recurse(v, k, &mut is_less);
1126}
1127
1128/// Partially sorts a slice and puts the `k`th smallest item in place.
1129///
1130/// This sort is in-place, unstable, and `O(n log n)` worst-case.
1131///
1132/// The implementation is based on Orson Peters' pattern-defeating quickselect.
1133///
1134/// # Examples
1135///
1136/// ```
1137/// let mut v = [-5, 4, 1, -3, 2];
1138/// let k = 2;
1139/// pdqselect::select(&mut v, k);
1140/// assert!(v[..k].iter().all(|&x| x <= v[k]));
1141/// assert!(v[k+1..].iter().all(|&x| x >= v[k]));
1142/// ```
1143pub fn select<T>(v: &mut [T], k: usize)
1144 where T: Ord,
1145{
1146 quickselect(v, k, |a, b| a.lt(b));
1147}
1148
1149/// Partially sorts a slice using `compare` to compare elements and puts the `k`th smallest
1150/// item in place.
1151///
1152/// This sort is in-place, unstable, and `O(n log n)` worst-case.
1153///
1154/// The implementation is based on Orson Peters' pattern-defeating quickselect.
1155///
1156/// # Examples
1157///
1158/// ```
1159/// let mut v = [5, 4, 1, 3, 2];
1160/// let k = 2;
1161/// pdqselect::select_by(&mut v, k, |a, b| a.cmp(b));
1162/// assert!(v[..k].iter().all(|&x| x <= v[k]));
1163/// assert!(v[k+1..].iter().all(|&x| x >= v[k]));
1164///
1165/// // reverse sorting
1166/// pdqselect::select_by(&mut v, k, |a, b| b.cmp(a));
1167/// assert!(v[..k].iter().all(|&x| x >= v[k]));
1168/// assert!(v[k+1..].iter().all(|&x| x <= v[k]));
1169/// ```
1170pub fn select_by<T, F>(v: &mut [T], k: usize, mut compare: F)
1171 where F: FnMut(&T, &T) -> Ordering,
1172{
1173 quickselect(v, k, |a, b| compare(a, b) == Ordering::Less);
1174}
1175
1176/// Partially sorts a slice using `f` to extract a key to compare elements by and puts the `k`th
1177/// smallest item in place.
1178///
1179/// This sort is in-place, unstable, and `O(n log n)` worst-case.
1180///
1181/// The implementation is based on Orson Peters' pattern-defeating quicksort.
1182///
1183/// # Examples
1184///
1185/// ```
1186/// let mut v = [-5i32, 4, 1, -3, 2];
1187/// let k = 3;
1188/// pdqselect::select_by_key(&mut v, k, |x| x.abs());
1189/// assert!(v[..k].iter().all(|&x| x.abs() <= v[k].abs()));
1190/// assert!(v[k+1..].iter().all(|&x| x.abs() >= v[k].abs()));
1191/// ```
1192pub fn select_by_key<T, B, F>(v: &mut [T], k: usize, mut f: F)
1193 where F: FnMut(&T) -> B,
1194 B: Ord,
1195{
1196 quickselect(v, k, |a, b| f(a).lt(&f(b)));
1197}