par_iter/slice/mod.rs
1//! Parallel iterator types for [slices][std::slice]
2//!
3//! You will rarely need to interact with this module directly unless you need
4//! to name one of the iterator types.
5//!
6//! [std::slice]: https://doc.rust-lang.org/stable/std/slice/
7
8mod chunk_by;
9mod chunks;
10mod rchunks;
11mod sort;
12
13mod test;
14
15use std::{
16 cmp::Ordering,
17 fmt::{self, Debug},
18 mem,
19};
20
21use self::sort::{par_mergesort, par_quicksort};
22pub use self::{
23 chunk_by::{ChunkBy, ChunkByMut},
24 chunks::{Chunks, ChunksExact, ChunksExactMut, ChunksMut},
25 rchunks::{RChunks, RChunksExact, RChunksExactMut, RChunksMut},
26};
27use crate::{
28 iter::{plumbing::*, *},
29 split_producer::*,
30};
31
32/// Parallel extensions for slices.
33pub trait ParallelSlice<T: Sync> {
34 /// Returns a plain slice, which is used to implement the rest of the
35 /// parallel methods.
36 fn as_parallel_slice(&self) -> &[T];
37
38 /// Returns a parallel iterator over subslices separated by elements that
39 /// match the separator.
40 ///
41 /// # Examples
42 ///
43 /// ```
44 /// use par_iter::prelude::*;
45 /// let products: Vec<_> = [1, 2, 3, 0, 2, 4, 8, 0, 3, 6, 9]
46 /// .par_split(|i| *i == 0)
47 /// .map(|numbers| numbers.iter().product::<i32>())
48 /// .collect();
49 /// assert_eq!(products, [6, 64, 162]);
50 /// ```
51 fn par_split<P>(&self, separator: P) -> Split<'_, T, P>
52 where
53 P: Fn(&T) -> bool + Sync + Send,
54 {
55 Split {
56 slice: self.as_parallel_slice(),
57 separator,
58 }
59 }
60
61 /// Returns a parallel iterator over subslices separated by elements that
62 /// match the separator, including the matched part as a terminator.
63 ///
64 /// # Examples
65 ///
66 /// ```
67 /// use par_iter::prelude::*;
68 /// let lengths: Vec<_> = [1, 2, 3, 0, 2, 4, 8, 0, 3, 6, 9]
69 /// .par_split_inclusive(|i| *i == 0)
70 /// .map(|numbers| numbers.len())
71 /// .collect();
72 /// assert_eq!(lengths, [4, 4, 3]);
73 /// ```
74 fn par_split_inclusive<P>(&self, separator: P) -> SplitInclusive<'_, T, P>
75 where
76 P: Fn(&T) -> bool + Sync + Send,
77 {
78 SplitInclusive {
79 slice: self.as_parallel_slice(),
80 separator,
81 }
82 }
83
84 /// Returns a parallel iterator over all contiguous windows of length
85 /// `window_size`. The windows overlap.
86 ///
87 /// # Examples
88 ///
89 /// ```
90 /// use par_iter::prelude::*;
91 /// let windows: Vec<_> = [1, 2, 3].par_windows(2).collect();
92 /// assert_eq!(vec![[1, 2], [2, 3]], windows);
93 /// ```
94 fn par_windows(&self, window_size: usize) -> Windows<'_, T> {
95 Windows {
96 window_size,
97 slice: self.as_parallel_slice(),
98 }
99 }
100
101 /// Returns a parallel iterator over at most `chunk_size` elements of
102 /// `self` at a time. The chunks do not overlap.
103 ///
104 /// If the number of elements in the iterator is not divisible by
105 /// `chunk_size`, the last chunk may be shorter than `chunk_size`. All
106 /// other chunks will have that exact length.
107 ///
108 /// # Examples
109 ///
110 /// ```
111 /// use par_iter::prelude::*;
112 /// let chunks: Vec<_> = [1, 2, 3, 4, 5].par_chunks(2).collect();
113 /// assert_eq!(chunks, vec![&[1, 2][..], &[3, 4], &[5]]);
114 /// ```
115 #[track_caller]
116 fn par_chunks(&self, chunk_size: usize) -> Chunks<'_, T> {
117 assert!(chunk_size != 0, "chunk_size must not be zero");
118 Chunks::new(chunk_size, self.as_parallel_slice())
119 }
120
121 /// Returns a parallel iterator over `chunk_size` elements of
122 /// `self` at a time. The chunks do not overlap.
123 ///
124 /// If `chunk_size` does not divide the length of the slice, then the
125 /// last up to `chunk_size-1` elements will be omitted and can be
126 /// retrieved from the remainder function of the iterator.
127 ///
128 /// # Examples
129 ///
130 /// ```
131 /// use par_iter::prelude::*;
132 /// let chunks: Vec<_> = [1, 2, 3, 4, 5].par_chunks_exact(2).collect();
133 /// assert_eq!(chunks, vec![&[1, 2][..], &[3, 4]]);
134 /// ```
135 #[track_caller]
136 fn par_chunks_exact(&self, chunk_size: usize) -> ChunksExact<'_, T> {
137 assert!(chunk_size != 0, "chunk_size must not be zero");
138 ChunksExact::new(chunk_size, self.as_parallel_slice())
139 }
140
141 /// Returns a parallel iterator over at most `chunk_size` elements of `self`
142 /// at a time, starting at the end. The chunks do not overlap.
143 ///
144 /// If the number of elements in the iterator is not divisible by
145 /// `chunk_size`, the last chunk may be shorter than `chunk_size`. All
146 /// other chunks will have that exact length.
147 ///
148 /// # Examples
149 ///
150 /// ```
151 /// use par_iter::prelude::*;
152 /// let chunks: Vec<_> = [1, 2, 3, 4, 5].par_rchunks(2).collect();
153 /// assert_eq!(chunks, vec![&[4, 5][..], &[2, 3], &[1]]);
154 /// ```
155 #[track_caller]
156 fn par_rchunks(&self, chunk_size: usize) -> RChunks<'_, T> {
157 assert!(chunk_size != 0, "chunk_size must not be zero");
158 RChunks::new(chunk_size, self.as_parallel_slice())
159 }
160
161 /// Returns a parallel iterator over `chunk_size` elements of `self` at a
162 /// time, starting at the end. The chunks do not overlap.
163 ///
164 /// If `chunk_size` does not divide the length of the slice, then the
165 /// last up to `chunk_size-1` elements will be omitted and can be
166 /// retrieved from the remainder function of the iterator.
167 ///
168 /// # Examples
169 ///
170 /// ```
171 /// use par_iter::prelude::*;
172 /// let chunks: Vec<_> = [1, 2, 3, 4, 5].par_rchunks_exact(2).collect();
173 /// assert_eq!(chunks, vec![&[4, 5][..], &[2, 3]]);
174 /// ```
175 #[track_caller]
176 fn par_rchunks_exact(&self, chunk_size: usize) -> RChunksExact<'_, T> {
177 assert!(chunk_size != 0, "chunk_size must not be zero");
178 RChunksExact::new(chunk_size, self.as_parallel_slice())
179 }
180
181 /// Returns a parallel iterator over the slice producing non-overlapping
182 /// runs of elements using the predicate to separate them.
183 ///
184 /// The predicate is called on two elements following themselves,
185 /// it means the predicate is called on `slice[0]` and `slice[1]`
186 /// then on `slice[1]` and `slice[2]` and so on.
187 ///
188 /// # Examples
189 ///
190 /// ```
191 /// use par_iter::prelude::*;
192 /// let chunks: Vec<_> = [1, 2, 2, 3, 3, 3].par_chunk_by(|&x, &y| x == y).collect();
193 /// assert_eq!(chunks[0], &[1]);
194 /// assert_eq!(chunks[1], &[2, 2]);
195 /// assert_eq!(chunks[2], &[3, 3, 3]);
196 /// ```
197 fn par_chunk_by<F>(&self, pred: F) -> ChunkBy<'_, T, F>
198 where
199 F: Fn(&T, &T) -> bool + Send + Sync,
200 {
201 ChunkBy::new(self.as_parallel_slice(), pred)
202 }
203}
204
205impl<T: Sync> ParallelSlice<T> for [T] {
206 #[inline]
207 fn as_parallel_slice(&self) -> &[T] {
208 self
209 }
210}
211
212/// Parallel extensions for mutable slices.
213pub trait ParallelSliceMut<T: Send> {
214 /// Returns a plain mutable slice, which is used to implement the rest of
215 /// the parallel methods.
216 fn as_parallel_slice_mut(&mut self) -> &mut [T];
217
218 /// Returns a parallel iterator over mutable subslices separated by
219 /// elements that match the separator.
220 ///
221 /// # Examples
222 ///
223 /// ```
224 /// use par_iter::prelude::*;
225 /// let mut array = [1, 2, 3, 0, 2, 4, 8, 0, 3, 6, 9];
226 /// array.par_split_mut(|i| *i == 0)
227 /// .for_each(|slice| slice.reverse());
228 /// assert_eq!(array, [3, 2, 1, 0, 8, 4, 2, 0, 9, 6, 3]);
229 /// ```
230 fn par_split_mut<P>(&mut self, separator: P) -> SplitMut<'_, T, P>
231 where
232 P: Fn(&T) -> bool + Sync + Send,
233 {
234 SplitMut {
235 slice: self.as_parallel_slice_mut(),
236 separator,
237 }
238 }
239
240 /// Returns a parallel iterator over mutable subslices separated by elements
241 /// that match the separator, including the matched part as a terminator.
242 ///
243 /// # Examples
244 ///
245 /// ```
246 /// use par_iter::prelude::*;
247 /// let mut array = [1, 2, 3, 0, 2, 4, 8, 0, 3, 6, 9];
248 /// array.par_split_inclusive_mut(|i| *i == 0)
249 /// .for_each(|slice| slice.reverse());
250 /// assert_eq!(array, [0, 3, 2, 1, 0, 8, 4, 2, 9, 6, 3]);
251 /// ```
252 fn par_split_inclusive_mut<P>(&mut self, separator: P) -> SplitInclusiveMut<'_, T, P>
253 where
254 P: Fn(&T) -> bool + Sync + Send,
255 {
256 SplitInclusiveMut {
257 slice: self.as_parallel_slice_mut(),
258 separator,
259 }
260 }
261
262 /// Returns a parallel iterator over at most `chunk_size` elements of
263 /// `self` at a time. The chunks are mutable and do not overlap.
264 ///
265 /// If the number of elements in the iterator is not divisible by
266 /// `chunk_size`, the last chunk may be shorter than `chunk_size`. All
267 /// other chunks will have that exact length.
268 ///
269 /// # Examples
270 ///
271 /// ```
272 /// use par_iter::prelude::*;
273 /// let mut array = [1, 2, 3, 4, 5];
274 /// array.par_chunks_mut(2)
275 /// .for_each(|slice| slice.reverse());
276 /// assert_eq!(array, [2, 1, 4, 3, 5]);
277 /// ```
278 #[track_caller]
279 fn par_chunks_mut(&mut self, chunk_size: usize) -> ChunksMut<'_, T> {
280 assert!(chunk_size != 0, "chunk_size must not be zero");
281 ChunksMut::new(chunk_size, self.as_parallel_slice_mut())
282 }
283
284 /// Returns a parallel iterator over `chunk_size` elements of
285 /// `self` at a time. The chunks are mutable and do not overlap.
286 ///
287 /// If `chunk_size` does not divide the length of the slice, then the
288 /// last up to `chunk_size-1` elements will be omitted and can be
289 /// retrieved from the remainder function of the iterator.
290 ///
291 /// # Examples
292 ///
293 /// ```
294 /// use par_iter::prelude::*;
295 /// let mut array = [1, 2, 3, 4, 5];
296 /// array.par_chunks_exact_mut(3)
297 /// .for_each(|slice| slice.reverse());
298 /// assert_eq!(array, [3, 2, 1, 4, 5]);
299 /// ```
300 #[track_caller]
301 fn par_chunks_exact_mut(&mut self, chunk_size: usize) -> ChunksExactMut<'_, T> {
302 assert!(chunk_size != 0, "chunk_size must not be zero");
303 ChunksExactMut::new(chunk_size, self.as_parallel_slice_mut())
304 }
305
306 /// Returns a parallel iterator over at most `chunk_size` elements of `self`
307 /// at a time, starting at the end. The chunks are mutable and do not
308 /// overlap.
309 ///
310 /// If the number of elements in the iterator is not divisible by
311 /// `chunk_size`, the last chunk may be shorter than `chunk_size`. All
312 /// other chunks will have that exact length.
313 ///
314 /// # Examples
315 ///
316 /// ```
317 /// use par_iter::prelude::*;
318 /// let mut array = [1, 2, 3, 4, 5];
319 /// array.par_rchunks_mut(2)
320 /// .for_each(|slice| slice.reverse());
321 /// assert_eq!(array, [1, 3, 2, 5, 4]);
322 /// ```
323 #[track_caller]
324 fn par_rchunks_mut(&mut self, chunk_size: usize) -> RChunksMut<'_, T> {
325 assert!(chunk_size != 0, "chunk_size must not be zero");
326 RChunksMut::new(chunk_size, self.as_parallel_slice_mut())
327 }
328
329 /// Returns a parallel iterator over `chunk_size` elements of `self` at a
330 /// time, starting at the end. The chunks are mutable and do not
331 /// overlap.
332 ///
333 /// If `chunk_size` does not divide the length of the slice, then the
334 /// last up to `chunk_size-1` elements will be omitted and can be
335 /// retrieved from the remainder function of the iterator.
336 ///
337 /// # Examples
338 ///
339 /// ```
340 /// use par_iter::prelude::*;
341 /// let mut array = [1, 2, 3, 4, 5];
342 /// array.par_rchunks_exact_mut(3)
343 /// .for_each(|slice| slice.reverse());
344 /// assert_eq!(array, [1, 2, 5, 4, 3]);
345 /// ```
346 #[track_caller]
347 fn par_rchunks_exact_mut(&mut self, chunk_size: usize) -> RChunksExactMut<'_, T> {
348 assert!(chunk_size != 0, "chunk_size must not be zero");
349 RChunksExactMut::new(chunk_size, self.as_parallel_slice_mut())
350 }
351
352 /// Sorts the slice in parallel.
353 ///
354 /// This sort is stable (i.e., does not reorder equal elements) and *O*(*n*
355 /// \* log(*n*)) worst-case.
356 ///
357 /// When applicable, unstable sorting is preferred because it is generally
358 /// faster than stable sorting and it doesn't allocate auxiliary memory.
359 /// See [`par_sort_unstable`](#method.par_sort_unstable).
360 ///
361 /// # Current implementation
362 ///
363 /// The current algorithm is an adaptive merge sort inspired by
364 /// [timsort](https://en.wikipedia.org/wiki/Timsort).
365 /// It is designed to be very fast in cases where the slice is nearly
366 /// sorted, or consists of two or more sorted sequences concatenated one
367 /// after another.
368 ///
369 /// Also, it allocates temporary storage the same size as `self`, but for
370 /// very short slices a non-allocating insertion sort is used instead.
371 ///
372 /// In order to sort the slice in parallel, the slice is first divided into
373 /// smaller chunks and all chunks are sorted in parallel. Then, adjacent
374 /// chunks that together form non-descending or descending runs are
375 /// concatenated. Finally, the remaining chunks are merged together using
376 /// parallel subdivision of chunks and parallel merge operation.
377 ///
378 /// # Examples
379 ///
380 /// ```
381 /// use par_iter::prelude::*;
382 ///
383 /// let mut v = [-5, 4, 1, -3, 2];
384 ///
385 /// v.par_sort();
386 /// assert_eq!(v, [-5, -3, 1, 2, 4]);
387 /// ```
388 fn par_sort(&mut self)
389 where
390 T: Ord,
391 {
392 par_mergesort(self.as_parallel_slice_mut(), T::lt);
393 }
394
395 /// Sorts the slice in parallel with a comparator function.
396 ///
397 /// This sort is stable (i.e., does not reorder equal elements) and *O*(*n*
398 /// \* log(*n*)) worst-case.
399 ///
400 /// The comparator function must define a total ordering for the elements in
401 /// the slice. If the ordering is not total, the order of the elements
402 /// is unspecified. An order is a total order if it is (for all `a`, `b`
403 /// and `c`):
404 ///
405 /// * total and antisymmetric: exactly one of `a < b`, `a == b` or `a > b`
406 /// is true, and
407 /// * transitive, `a < b` and `b < c` implies `a < c`. The same must hold
408 /// for both `==` and `>`.
409 ///
410 /// For example, while [`f64`] doesn't implement [`Ord`] because `NaN !=
411 /// NaN`, we can use `partial_cmp` as our sort function when we know the
412 /// slice doesn't contain a `NaN`.
413 ///
414 /// ```
415 /// use par_iter::prelude::*;
416 ///
417 /// let mut floats = [5f64, 4.0, 1.0, 3.0, 2.0];
418 /// floats.par_sort_by(|a, b| a.partial_cmp(b).unwrap());
419 /// assert_eq!(floats, [1.0, 2.0, 3.0, 4.0, 5.0]);
420 /// ```
421 ///
422 /// When applicable, unstable sorting is preferred because it is generally
423 /// faster than stable sorting and it doesn't allocate auxiliary memory.
424 /// See [`par_sort_unstable_by`](#method.par_sort_unstable_by).
425 ///
426 /// # Current implementation
427 ///
428 /// The current algorithm is an adaptive merge sort inspired by
429 /// [timsort](https://en.wikipedia.org/wiki/Timsort).
430 /// It is designed to be very fast in cases where the slice is nearly
431 /// sorted, or consists of two or more sorted sequences concatenated one
432 /// after another.
433 ///
434 /// Also, it allocates temporary storage the same size as `self`, but for
435 /// very short slices a non-allocating insertion sort is used instead.
436 ///
437 /// In order to sort the slice in parallel, the slice is first divided into
438 /// smaller chunks and all chunks are sorted in parallel. Then, adjacent
439 /// chunks that together form non-descending or descending runs are
440 /// concatenated. Finally, the remaining chunks are merged together using
441 /// parallel subdivision of chunks and parallel merge operation.
442 ///
443 /// # Examples
444 ///
445 /// ```
446 /// use par_iter::prelude::*;
447 ///
448 /// let mut v = [5, 4, 1, 3, 2];
449 /// v.par_sort_by(|a, b| a.cmp(b));
450 /// assert_eq!(v, [1, 2, 3, 4, 5]);
451 ///
452 /// // reverse sorting
453 /// v.par_sort_by(|a, b| b.cmp(a));
454 /// assert_eq!(v, [5, 4, 3, 2, 1]);
455 /// ```
456 fn par_sort_by<F>(&mut self, compare: F)
457 where
458 F: Fn(&T, &T) -> Ordering + Sync,
459 {
460 par_mergesort(self.as_parallel_slice_mut(), |a, b| {
461 compare(a, b) == Ordering::Less
462 });
463 }
464
465 /// Sorts the slice in parallel with a key extraction function.
466 ///
467 /// This sort is stable (i.e., does not reorder equal elements) and *O*(*m*
468 /// \* *n* \* log(*n*)) worst-case, where the key function is *O*(*m*).
469 ///
470 /// For expensive key functions (e.g. functions that are not simple property
471 /// accesses or basic operations),
472 /// [`par_sort_by_cached_key`](#method.par_sort_by_cached_key) is likely to
473 /// be significantly faster, as it does not recompute element keys.
474 ///
475 /// When applicable, unstable sorting is preferred because it is generally
476 /// faster than stable sorting and it doesn't allocate auxiliary memory.
477 /// See [`par_sort_unstable_by_key`](#method.par_sort_unstable_by_key).
478 ///
479 /// # Current implementation
480 ///
481 /// The current algorithm is an adaptive merge sort inspired by
482 /// [timsort](https://en.wikipedia.org/wiki/Timsort).
483 /// It is designed to be very fast in cases where the slice is nearly
484 /// sorted, or consists of two or more sorted sequences concatenated one
485 /// after another.
486 ///
487 /// Also, it allocates temporary storage the same size as `self`, but for
488 /// very short slices a non-allocating insertion sort is used instead.
489 ///
490 /// In order to sort the slice in parallel, the slice is first divided into
491 /// smaller chunks and all chunks are sorted in parallel. Then, adjacent
492 /// chunks that together form non-descending or descending runs are
493 /// concatenated. Finally, the remaining chunks are merged together using
494 /// parallel subdivision of chunks and parallel merge operation.
495 ///
496 /// # Examples
497 ///
498 /// ```
499 /// use par_iter::prelude::*;
500 ///
501 /// let mut v = [-5i32, 4, 1, -3, 2];
502 ///
503 /// v.par_sort_by_key(|k| k.abs());
504 /// assert_eq!(v, [1, 2, -3, 4, -5]);
505 /// ```
506 fn par_sort_by_key<K, F>(&mut self, f: F)
507 where
508 K: Ord,
509 F: Fn(&T) -> K + Sync,
510 {
511 par_mergesort(self.as_parallel_slice_mut(), |a, b| f(a).lt(&f(b)));
512 }
513
514 /// Sorts the slice in parallel with a key extraction function.
515 ///
516 /// During sorting, the key function is called at most once per element, by
517 /// using temporary storage to remember the results of key evaluation.
518 /// The key function is called in parallel, so the order of calls is
519 /// completely unspecified.
520 ///
521 /// This sort is stable (i.e., does not reorder equal elements) and *O*(*m*
522 /// \* *n* + *n* \* log(*n*)) worst-case, where the key function is
523 /// *O*(*m*).
524 ///
525 /// For simple key functions (e.g., functions that are property accesses or
526 /// basic operations), [`par_sort_by_key`](#method.par_sort_by_key) is
527 /// likely to be faster.
528 ///
529 /// # Current implementation
530 ///
531 /// The current algorithm is based on [pattern-defeating quicksort][pdqsort]
532 /// by Orson Peters, which combines the fast average case of randomized
533 /// quicksort with the fast worst case of heapsort, while achieving
534 /// linear time on slices with certain patterns. It uses some
535 /// randomization to avoid degenerate cases, but with a fixed seed to always
536 /// provide deterministic behavior.
537 ///
538 /// In the worst case, the algorithm allocates temporary storage in a
539 /// `Vec<(K, usize)>` the length of the slice.
540 ///
541 /// All quicksorts work in two stages: partitioning into two halves followed
542 /// by recursive calls. The partitioning phase is sequential, but the
543 /// two recursive calls are performed in parallel. Finally, after
544 /// sorting the cached keys, the item positions are updated sequentially.
545 ///
546 /// [pdqsort]: https://github.com/orlp/pdqsort
547 ///
548 /// # Examples
549 ///
550 /// ```
551 /// use par_iter::prelude::*;
552 ///
553 /// let mut v = [-5i32, 4, 32, -3, 2];
554 ///
555 /// v.par_sort_by_cached_key(|k| k.to_string());
556 /// assert!(v == [-3, -5, 2, 32, 4]);
557 /// ```
558 fn par_sort_by_cached_key<K, F>(&mut self, f: F)
559 where
560 F: Fn(&T) -> K + Sync,
561 K: Ord + Send,
562 {
563 let slice = self.as_parallel_slice_mut();
564 let len = slice.len();
565 if len < 2 {
566 return;
567 }
568
569 // Helper macro for indexing our vector by the smallest possible type, to reduce
570 // allocation.
571 macro_rules! sort_by_key {
572 ($t:ty) => {{
573 let mut indices: Vec<_> = slice
574 .par_iter_mut()
575 .enumerate()
576 .map(|(i, x)| (f(&*x), i as $t))
577 .collect();
578 // The elements of `indices` are unique, as they are indexed, so any sort will
579 // be stable with respect to the original slice. We use `sort_unstable`
580 // here because it requires less memory allocation.
581 indices.par_sort_unstable();
582 for i in 0..len {
583 let mut index = indices[i].1;
584 while (index as usize) < i {
585 index = indices[index as usize].1;
586 }
587 indices[i].1 = index;
588 slice.swap(i, index as usize);
589 }
590 }};
591 }
592
593 let sz_u8 = mem::size_of::<(K, u8)>();
594 let sz_u16 = mem::size_of::<(K, u16)>();
595 let sz_u32 = mem::size_of::<(K, u32)>();
596 let sz_usize = mem::size_of::<(K, usize)>();
597
598 if sz_u8 < sz_u16 && len <= (u8::MAX as usize) {
599 return sort_by_key!(u8);
600 }
601 if sz_u16 < sz_u32 && len <= (u16::MAX as usize) {
602 return sort_by_key!(u16);
603 }
604 if sz_u32 < sz_usize && len <= (u32::MAX as usize) {
605 return sort_by_key!(u32);
606 }
607 sort_by_key!(usize)
608 }
609
610 /// Sorts the slice in parallel, but might not preserve the order of equal
611 /// elements.
612 ///
613 /// This sort is unstable (i.e., may reorder equal elements), in-place
614 /// (i.e., does not allocate), and *O*(*n* \* log(*n*)) worst-case.
615 ///
616 /// # Current implementation
617 ///
618 /// The current algorithm is based on [pattern-defeating quicksort][pdqsort]
619 /// by Orson Peters, which combines the fast average case of randomized
620 /// quicksort with the fast worst case of heapsort, while achieving
621 /// linear time on slices with certain patterns. It uses some
622 /// randomization to avoid degenerate cases, but with a fixed seed to always
623 /// provide deterministic behavior.
624 ///
625 /// It is typically faster than stable sorting, except in a few special
626 /// cases, e.g., when the slice consists of several concatenated sorted
627 /// sequences.
628 ///
629 /// All quicksorts work in two stages: partitioning into two halves followed
630 /// by recursive calls. The partitioning phase is sequential, but the
631 /// two recursive calls are performed in parallel.
632 ///
633 /// [pdqsort]: https://github.com/orlp/pdqsort
634 ///
635 /// # Examples
636 ///
637 /// ```
638 /// use par_iter::prelude::*;
639 ///
640 /// let mut v = [-5, 4, 1, -3, 2];
641 ///
642 /// v.par_sort_unstable();
643 /// assert_eq!(v, [-5, -3, 1, 2, 4]);
644 /// ```
645 fn par_sort_unstable(&mut self)
646 where
647 T: Ord,
648 {
649 par_quicksort(self.as_parallel_slice_mut(), T::lt);
650 }
651
652 /// Sorts the slice in parallel with a comparator function, but might not
653 /// preserve the order of equal elements.
654 ///
655 /// This sort is unstable (i.e., may reorder equal elements), in-place
656 /// (i.e., does not allocate), and *O*(*n* \* log(*n*)) worst-case.
657 ///
658 /// The comparator function must define a total ordering for the elements in
659 /// the slice. If the ordering is not total, the order of the elements
660 /// is unspecified. An order is a total order if it is (for all `a`, `b`
661 /// and `c`):
662 ///
663 /// * total and antisymmetric: exactly one of `a < b`, `a == b` or `a > b`
664 /// is true, and
665 /// * transitive, `a < b` and `b < c` implies `a < c`. The same must hold
666 /// for both `==` and `>`.
667 ///
668 /// For example, while [`f64`] doesn't implement [`Ord`] because `NaN !=
669 /// NaN`, we can use `partial_cmp` as our sort function when we know the
670 /// slice doesn't contain a `NaN`.
671 ///
672 /// ```
673 /// use par_iter::prelude::*;
674 ///
675 /// let mut floats = [5f64, 4.0, 1.0, 3.0, 2.0];
676 /// floats.par_sort_unstable_by(|a, b| a.partial_cmp(b).unwrap());
677 /// assert_eq!(floats, [1.0, 2.0, 3.0, 4.0, 5.0]);
678 /// ```
679 ///
680 /// # Current implementation
681 ///
682 /// The current algorithm is based on [pattern-defeating quicksort][pdqsort]
683 /// by Orson Peters, which combines the fast average case of randomized
684 /// quicksort with the fast worst case of heapsort, while achieving
685 /// linear time on slices with certain patterns. It uses some
686 /// randomization to avoid degenerate cases, but with a fixed seed to always
687 /// provide deterministic behavior.
688 ///
689 /// It is typically faster than stable sorting, except in a few special
690 /// cases, e.g., when the slice consists of several concatenated sorted
691 /// sequences.
692 ///
693 /// All quicksorts work in two stages: partitioning into two halves followed
694 /// by recursive calls. The partitioning phase is sequential, but the
695 /// two recursive calls are performed in parallel.
696 ///
697 /// [pdqsort]: https://github.com/orlp/pdqsort
698 ///
699 /// # Examples
700 ///
701 /// ```
702 /// use par_iter::prelude::*;
703 ///
704 /// let mut v = [5, 4, 1, 3, 2];
705 /// v.par_sort_unstable_by(|a, b| a.cmp(b));
706 /// assert_eq!(v, [1, 2, 3, 4, 5]);
707 ///
708 /// // reverse sorting
709 /// v.par_sort_unstable_by(|a, b| b.cmp(a));
710 /// assert_eq!(v, [5, 4, 3, 2, 1]);
711 /// ```
712 fn par_sort_unstable_by<F>(&mut self, compare: F)
713 where
714 F: Fn(&T, &T) -> Ordering + Sync,
715 {
716 par_quicksort(self.as_parallel_slice_mut(), |a, b| {
717 compare(a, b) == Ordering::Less
718 });
719 }
720
721 /// Sorts the slice in parallel with a key extraction function, but might
722 /// not preserve the order of equal elements.
723 ///
724 /// This sort is unstable (i.e., may reorder equal elements), in-place
725 /// (i.e., does not allocate), and *O*(m \* *n* \* log(*n*)) worst-case,
726 /// where the key function is *O*(*m*).
727 ///
728 /// # Current implementation
729 ///
730 /// The current algorithm is based on [pattern-defeating quicksort][pdqsort]
731 /// by Orson Peters, which combines the fast average case of randomized
732 /// quicksort with the fast worst case of heapsort, while achieving
733 /// linear time on slices with certain patterns. It uses some
734 /// randomization to avoid degenerate cases, but with a fixed seed to always
735 /// provide deterministic behavior.
736 ///
737 /// Due to its key calling strategy, `par_sort_unstable_by_key` is likely to
738 /// be slower than [`par_sort_by_cached_key`](#method.
739 /// par_sort_by_cached_key) in cases where the key function
740 /// is expensive.
741 ///
742 /// All quicksorts work in two stages: partitioning into two halves followed
743 /// by recursive calls. The partitioning phase is sequential, but the
744 /// two recursive calls are performed in parallel.
745 ///
746 /// [pdqsort]: https://github.com/orlp/pdqsort
747 ///
748 /// # Examples
749 ///
750 /// ```
751 /// use par_iter::prelude::*;
752 ///
753 /// let mut v = [-5i32, 4, 1, -3, 2];
754 ///
755 /// v.par_sort_unstable_by_key(|k| k.abs());
756 /// assert_eq!(v, [1, 2, -3, 4, -5]);
757 /// ```
758 fn par_sort_unstable_by_key<K, F>(&mut self, f: F)
759 where
760 K: Ord,
761 F: Fn(&T) -> K + Sync,
762 {
763 par_quicksort(self.as_parallel_slice_mut(), |a, b| f(a).lt(&f(b)));
764 }
765
766 /// Returns a parallel iterator over the slice producing non-overlapping
767 /// mutable runs of elements using the predicate to separate them.
768 ///
769 /// The predicate is called on two elements following themselves,
770 /// it means the predicate is called on `slice[0]` and `slice[1]`
771 /// then on `slice[1]` and `slice[2]` and so on.
772 ///
773 /// # Examples
774 ///
775 /// ```
776 /// use par_iter::prelude::*;
777 /// let mut xs = [1, 2, 2, 3, 3, 3];
778 /// let chunks: Vec<_> = xs.par_chunk_by_mut(|&x, &y| x == y).collect();
779 /// assert_eq!(chunks[0], &mut [1]);
780 /// assert_eq!(chunks[1], &mut [2, 2]);
781 /// assert_eq!(chunks[2], &mut [3, 3, 3]);
782 /// ```
783 fn par_chunk_by_mut<F>(&mut self, pred: F) -> ChunkByMut<'_, T, F>
784 where
785 F: Fn(&T, &T) -> bool + Send + Sync,
786 {
787 ChunkByMut::new(self.as_parallel_slice_mut(), pred)
788 }
789}
790
791impl<T: Send> ParallelSliceMut<T> for [T] {
792 #[inline]
793 fn as_parallel_slice_mut(&mut self) -> &mut [T] {
794 self
795 }
796}
797
798impl<'data, T: Sync + 'data> IntoParallelIterator for &'data [T] {
799 type Item = &'data T;
800 type Iter = Iter<'data, T>;
801
802 fn into_par_iter(self) -> Self::Iter {
803 Iter { slice: self }
804 }
805}
806
807impl<'data, T: Send + 'data> IntoParallelIterator for &'data mut [T] {
808 type Item = &'data mut T;
809 type Iter = IterMut<'data, T>;
810
811 fn into_par_iter(self) -> Self::Iter {
812 IterMut { slice: self }
813 }
814}
815
816/// Parallel iterator over immutable items in a slice
817#[derive(Debug)]
818pub struct Iter<'data, T: Sync> {
819 slice: &'data [T],
820}
821
822impl<'data, T: Sync> Clone for Iter<'data, T> {
823 fn clone(&self) -> Self {
824 Iter { ..*self }
825 }
826}
827
828impl<'data, T: Sync + 'data> ParallelIterator for Iter<'data, T> {
829 type Item = &'data T;
830
831 fn drive_unindexed<C>(self, consumer: C) -> C::Result
832 where
833 C: UnindexedConsumer<Self::Item>,
834 {
835 bridge(self, consumer)
836 }
837
838 fn opt_len(&self) -> Option<usize> {
839 Some(self.len())
840 }
841}
842
843impl<'data, T: Sync + 'data> IndexedParallelIterator for Iter<'data, T> {
844 fn drive<C>(self, consumer: C) -> C::Result
845 where
846 C: Consumer<Self::Item>,
847 {
848 bridge(self, consumer)
849 }
850
851 fn len(&self) -> usize {
852 self.slice.len()
853 }
854
855 fn with_producer<CB>(self, callback: CB) -> CB::Output
856 where
857 CB: ProducerCallback<Self::Item>,
858 {
859 callback.callback(IterProducer { slice: self.slice })
860 }
861}
862
863struct IterProducer<'data, T: Sync> {
864 slice: &'data [T],
865}
866
867impl<'data, T: 'data + Sync> Producer for IterProducer<'data, T> {
868 type IntoIter = ::std::slice::Iter<'data, T>;
869 type Item = &'data T;
870
871 fn into_iter(self) -> Self::IntoIter {
872 self.slice.iter()
873 }
874
875 fn split_at(self, index: usize) -> (Self, Self) {
876 let (left, right) = self.slice.split_at(index);
877 (IterProducer { slice: left }, IterProducer { slice: right })
878 }
879}
880
881/// Parallel iterator over immutable overlapping windows of a slice
882#[derive(Debug)]
883pub struct Windows<'data, T: Sync> {
884 window_size: usize,
885 slice: &'data [T],
886}
887
888impl<'data, T: Sync> Clone for Windows<'data, T> {
889 fn clone(&self) -> Self {
890 Windows { ..*self }
891 }
892}
893
894impl<'data, T: Sync + 'data> ParallelIterator for Windows<'data, T> {
895 type Item = &'data [T];
896
897 fn drive_unindexed<C>(self, consumer: C) -> C::Result
898 where
899 C: UnindexedConsumer<Self::Item>,
900 {
901 bridge(self, consumer)
902 }
903
904 fn opt_len(&self) -> Option<usize> {
905 Some(self.len())
906 }
907}
908
909impl<'data, T: Sync + 'data> IndexedParallelIterator for Windows<'data, T> {
910 fn drive<C>(self, consumer: C) -> C::Result
911 where
912 C: Consumer<Self::Item>,
913 {
914 bridge(self, consumer)
915 }
916
917 fn len(&self) -> usize {
918 assert!(self.window_size >= 1);
919 self.slice.len().saturating_sub(self.window_size - 1)
920 }
921
922 fn with_producer<CB>(self, callback: CB) -> CB::Output
923 where
924 CB: ProducerCallback<Self::Item>,
925 {
926 callback.callback(WindowsProducer {
927 window_size: self.window_size,
928 slice: self.slice,
929 })
930 }
931}
932
933struct WindowsProducer<'data, T: Sync> {
934 window_size: usize,
935 slice: &'data [T],
936}
937
938impl<'data, T: 'data + Sync> Producer for WindowsProducer<'data, T> {
939 type IntoIter = ::std::slice::Windows<'data, T>;
940 type Item = &'data [T];
941
942 fn into_iter(self) -> Self::IntoIter {
943 self.slice.windows(self.window_size)
944 }
945
946 fn split_at(self, index: usize) -> (Self, Self) {
947 let left_index = Ord::min(self.slice.len(), index + (self.window_size - 1));
948 let left = &self.slice[..left_index];
949 let right = &self.slice[index..];
950 (
951 WindowsProducer {
952 window_size: self.window_size,
953 slice: left,
954 },
955 WindowsProducer {
956 window_size: self.window_size,
957 slice: right,
958 },
959 )
960 }
961}
962
963/// Parallel iterator over mutable items in a slice
964#[derive(Debug)]
965pub struct IterMut<'data, T: Send> {
966 slice: &'data mut [T],
967}
968
969impl<'data, T: Send + 'data> ParallelIterator for IterMut<'data, T> {
970 type Item = &'data mut T;
971
972 fn drive_unindexed<C>(self, consumer: C) -> C::Result
973 where
974 C: UnindexedConsumer<Self::Item>,
975 {
976 bridge(self, consumer)
977 }
978
979 fn opt_len(&self) -> Option<usize> {
980 Some(self.len())
981 }
982}
983
984impl<'data, T: Send + 'data> IndexedParallelIterator for IterMut<'data, T> {
985 fn drive<C>(self, consumer: C) -> C::Result
986 where
987 C: Consumer<Self::Item>,
988 {
989 bridge(self, consumer)
990 }
991
992 fn len(&self) -> usize {
993 self.slice.len()
994 }
995
996 fn with_producer<CB>(self, callback: CB) -> CB::Output
997 where
998 CB: ProducerCallback<Self::Item>,
999 {
1000 callback.callback(IterMutProducer { slice: self.slice })
1001 }
1002}
1003
1004struct IterMutProducer<'data, T: Send> {
1005 slice: &'data mut [T],
1006}
1007
1008impl<'data, T: 'data + Send> Producer for IterMutProducer<'data, T> {
1009 type IntoIter = ::std::slice::IterMut<'data, T>;
1010 type Item = &'data mut T;
1011
1012 fn into_iter(self) -> Self::IntoIter {
1013 self.slice.iter_mut()
1014 }
1015
1016 fn split_at(self, index: usize) -> (Self, Self) {
1017 let (left, right) = self.slice.split_at_mut(index);
1018 (
1019 IterMutProducer { slice: left },
1020 IterMutProducer { slice: right },
1021 )
1022 }
1023}
1024
1025/// Parallel iterator over slices separated by a predicate
1026pub struct Split<'data, T, P> {
1027 slice: &'data [T],
1028 separator: P,
1029}
1030
1031impl<'data, T, P: Clone> Clone for Split<'data, T, P> {
1032 fn clone(&self) -> Self {
1033 Split {
1034 separator: self.separator.clone(),
1035 ..*self
1036 }
1037 }
1038}
1039
1040impl<'data, T: Debug, P> Debug for Split<'data, T, P> {
1041 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1042 f.debug_struct("Split").field("slice", &self.slice).finish()
1043 }
1044}
1045
1046impl<'data, T, P> ParallelIterator for Split<'data, T, P>
1047where
1048 P: Fn(&T) -> bool + Sync + Send,
1049 T: Sync,
1050{
1051 type Item = &'data [T];
1052
1053 fn drive_unindexed<C>(self, consumer: C) -> C::Result
1054 where
1055 C: UnindexedConsumer<Self::Item>,
1056 {
1057 let producer = SplitProducer::new(self.slice, &self.separator);
1058 bridge_unindexed(producer, consumer)
1059 }
1060}
1061
1062/// Parallel iterator over slices separated by a predicate,
1063/// including the matched part as a terminator.
1064pub struct SplitInclusive<'data, T, P> {
1065 slice: &'data [T],
1066 separator: P,
1067}
1068
1069impl<'data, T, P: Clone> Clone for SplitInclusive<'data, T, P> {
1070 fn clone(&self) -> Self {
1071 SplitInclusive {
1072 separator: self.separator.clone(),
1073 ..*self
1074 }
1075 }
1076}
1077
1078impl<'data, T: Debug, P> Debug for SplitInclusive<'data, T, P> {
1079 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1080 f.debug_struct("SplitInclusive")
1081 .field("slice", &self.slice)
1082 .finish()
1083 }
1084}
1085
1086impl<'data, T, P> ParallelIterator for SplitInclusive<'data, T, P>
1087where
1088 P: Fn(&T) -> bool + Sync + Send,
1089 T: Sync,
1090{
1091 type Item = &'data [T];
1092
1093 fn drive_unindexed<C>(self, consumer: C) -> C::Result
1094 where
1095 C: UnindexedConsumer<Self::Item>,
1096 {
1097 let producer = SplitInclusiveProducer::new_incl(self.slice, &self.separator);
1098 bridge_unindexed(producer, consumer)
1099 }
1100}
1101
1102/// Implement support for `SplitProducer`.
1103impl<'data, T, P> Fissile<P> for &'data [T]
1104where
1105 P: Fn(&T) -> bool,
1106{
1107 fn length(&self) -> usize {
1108 self.len()
1109 }
1110
1111 fn midpoint(&self, end: usize) -> usize {
1112 end / 2
1113 }
1114
1115 fn find(&self, separator: &P, start: usize, end: usize) -> Option<usize> {
1116 self[start..end].iter().position(separator)
1117 }
1118
1119 fn rfind(&self, separator: &P, end: usize) -> Option<usize> {
1120 self[..end].iter().rposition(separator)
1121 }
1122
1123 fn split_once<const INCL: bool>(self, index: usize) -> (Self, Self) {
1124 if INCL {
1125 // include the separator in the left side
1126 self.split_at(index + 1)
1127 } else {
1128 let (left, right) = self.split_at(index);
1129 (left, &right[1..]) // skip the separator
1130 }
1131 }
1132
1133 fn fold_splits<F, const INCL: bool>(self, separator: &P, folder: F, skip_last: bool) -> F
1134 where
1135 F: Folder<Self>,
1136 Self: Send,
1137 {
1138 if INCL {
1139 debug_assert!(!skip_last);
1140 folder.consume_iter(self.split_inclusive(separator))
1141 } else {
1142 let mut split = self.split(separator);
1143 if skip_last {
1144 split.next_back();
1145 }
1146 folder.consume_iter(split)
1147 }
1148 }
1149}
1150
1151/// Parallel iterator over mutable slices separated by a predicate
1152pub struct SplitMut<'data, T, P> {
1153 slice: &'data mut [T],
1154 separator: P,
1155}
1156
1157impl<'data, T: Debug, P> Debug for SplitMut<'data, T, P> {
1158 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1159 f.debug_struct("SplitMut")
1160 .field("slice", &self.slice)
1161 .finish()
1162 }
1163}
1164
1165impl<'data, T, P> ParallelIterator for SplitMut<'data, T, P>
1166where
1167 P: Fn(&T) -> bool + Sync + Send,
1168 T: Send,
1169{
1170 type Item = &'data mut [T];
1171
1172 fn drive_unindexed<C>(self, consumer: C) -> C::Result
1173 where
1174 C: UnindexedConsumer<Self::Item>,
1175 {
1176 let producer = SplitProducer::new(self.slice, &self.separator);
1177 bridge_unindexed(producer, consumer)
1178 }
1179}
1180
1181/// Parallel iterator over mutable slices separated by a predicate,
1182/// including the matched part as a terminator.
1183pub struct SplitInclusiveMut<'data, T, P> {
1184 slice: &'data mut [T],
1185 separator: P,
1186}
1187
1188impl<'data, T: Debug, P> Debug for SplitInclusiveMut<'data, T, P> {
1189 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1190 f.debug_struct("SplitInclusiveMut")
1191 .field("slice", &self.slice)
1192 .finish()
1193 }
1194}
1195
1196impl<'data, T, P> ParallelIterator for SplitInclusiveMut<'data, T, P>
1197where
1198 P: Fn(&T) -> bool + Sync + Send,
1199 T: Send,
1200{
1201 type Item = &'data mut [T];
1202
1203 fn drive_unindexed<C>(self, consumer: C) -> C::Result
1204 where
1205 C: UnindexedConsumer<Self::Item>,
1206 {
1207 let producer = SplitInclusiveProducer::new_incl(self.slice, &self.separator);
1208 bridge_unindexed(producer, consumer)
1209 }
1210}
1211
1212/// Implement support for `SplitProducer`.
1213impl<'data, T, P> Fissile<P> for &'data mut [T]
1214where
1215 P: Fn(&T) -> bool,
1216{
1217 fn length(&self) -> usize {
1218 self.len()
1219 }
1220
1221 fn midpoint(&self, end: usize) -> usize {
1222 end / 2
1223 }
1224
1225 fn find(&self, separator: &P, start: usize, end: usize) -> Option<usize> {
1226 self[start..end].iter().position(separator)
1227 }
1228
1229 fn rfind(&self, separator: &P, end: usize) -> Option<usize> {
1230 self[..end].iter().rposition(separator)
1231 }
1232
1233 fn split_once<const INCL: bool>(self, index: usize) -> (Self, Self) {
1234 if INCL {
1235 // include the separator in the left side
1236 self.split_at_mut(index + 1)
1237 } else {
1238 let (left, right) = self.split_at_mut(index);
1239 (left, &mut right[1..]) // skip the separator
1240 }
1241 }
1242
1243 fn fold_splits<F, const INCL: bool>(self, separator: &P, folder: F, skip_last: bool) -> F
1244 where
1245 F: Folder<Self>,
1246 Self: Send,
1247 {
1248 if INCL {
1249 debug_assert!(!skip_last);
1250 folder.consume_iter(self.split_inclusive_mut(separator))
1251 } else {
1252 let mut split = self.split_mut(separator);
1253 if skip_last {
1254 split.next_back();
1255 }
1256 folder.consume_iter(split)
1257 }
1258 }
1259}