par_iter/slice/
mod.rs

1//! Parallel iterator types for [slices][std::slice]
2//!
3//! You will rarely need to interact with this module directly unless you need
4//! to name one of the iterator types.
5//!
6//! [std::slice]: https://doc.rust-lang.org/stable/std/slice/
7
8mod chunk_by;
9mod chunks;
10mod rchunks;
11mod sort;
12
13mod test;
14
15use std::{
16    cmp::Ordering,
17    fmt::{self, Debug},
18    mem,
19};
20
21use self::sort::{par_mergesort, par_quicksort};
22pub use self::{
23    chunk_by::{ChunkBy, ChunkByMut},
24    chunks::{Chunks, ChunksExact, ChunksExactMut, ChunksMut},
25    rchunks::{RChunks, RChunksExact, RChunksExactMut, RChunksMut},
26};
27use crate::{
28    iter::{plumbing::*, *},
29    split_producer::*,
30};
31
32/// Parallel extensions for slices.
33pub trait ParallelSlice<T: Sync> {
34    /// Returns a plain slice, which is used to implement the rest of the
35    /// parallel methods.
36    fn as_parallel_slice(&self) -> &[T];
37
38    /// Returns a parallel iterator over subslices separated by elements that
39    /// match the separator.
40    ///
41    /// # Examples
42    ///
43    /// ```
44    /// use par_iter::prelude::*;
45    /// let products: Vec<_> = [1, 2, 3, 0, 2, 4, 8, 0, 3, 6, 9]
46    ///     .par_split(|i| *i == 0)
47    ///     .map(|numbers| numbers.iter().product::<i32>())
48    ///     .collect();
49    /// assert_eq!(products, [6, 64, 162]);
50    /// ```
51    fn par_split<P>(&self, separator: P) -> Split<'_, T, P>
52    where
53        P: Fn(&T) -> bool + Sync + Send,
54    {
55        Split {
56            slice: self.as_parallel_slice(),
57            separator,
58        }
59    }
60
61    /// Returns a parallel iterator over subslices separated by elements that
62    /// match the separator, including the matched part as a terminator.
63    ///
64    /// # Examples
65    ///
66    /// ```
67    /// use par_iter::prelude::*;
68    /// let lengths: Vec<_> = [1, 2, 3, 0, 2, 4, 8, 0, 3, 6, 9]
69    ///     .par_split_inclusive(|i| *i == 0)
70    ///     .map(|numbers| numbers.len())
71    ///     .collect();
72    /// assert_eq!(lengths, [4, 4, 3]);
73    /// ```
74    fn par_split_inclusive<P>(&self, separator: P) -> SplitInclusive<'_, T, P>
75    where
76        P: Fn(&T) -> bool + Sync + Send,
77    {
78        SplitInclusive {
79            slice: self.as_parallel_slice(),
80            separator,
81        }
82    }
83
84    /// Returns a parallel iterator over all contiguous windows of length
85    /// `window_size`. The windows overlap.
86    ///
87    /// # Examples
88    ///
89    /// ```
90    /// use par_iter::prelude::*;
91    /// let windows: Vec<_> = [1, 2, 3].par_windows(2).collect();
92    /// assert_eq!(vec![[1, 2], [2, 3]], windows);
93    /// ```
94    fn par_windows(&self, window_size: usize) -> Windows<'_, T> {
95        Windows {
96            window_size,
97            slice: self.as_parallel_slice(),
98        }
99    }
100
101    /// Returns a parallel iterator over at most `chunk_size` elements of
102    /// `self` at a time. The chunks do not overlap.
103    ///
104    /// If the number of elements in the iterator is not divisible by
105    /// `chunk_size`, the last chunk may be shorter than `chunk_size`.  All
106    /// other chunks will have that exact length.
107    ///
108    /// # Examples
109    ///
110    /// ```
111    /// use par_iter::prelude::*;
112    /// let chunks: Vec<_> = [1, 2, 3, 4, 5].par_chunks(2).collect();
113    /// assert_eq!(chunks, vec![&[1, 2][..], &[3, 4], &[5]]);
114    /// ```
115    #[track_caller]
116    fn par_chunks(&self, chunk_size: usize) -> Chunks<'_, T> {
117        assert!(chunk_size != 0, "chunk_size must not be zero");
118        Chunks::new(chunk_size, self.as_parallel_slice())
119    }
120
121    /// Returns a parallel iterator over `chunk_size` elements of
122    /// `self` at a time. The chunks do not overlap.
123    ///
124    /// If `chunk_size` does not divide the length of the slice, then the
125    /// last up to `chunk_size-1` elements will be omitted and can be
126    /// retrieved from the remainder function of the iterator.
127    ///
128    /// # Examples
129    ///
130    /// ```
131    /// use par_iter::prelude::*;
132    /// let chunks: Vec<_> = [1, 2, 3, 4, 5].par_chunks_exact(2).collect();
133    /// assert_eq!(chunks, vec![&[1, 2][..], &[3, 4]]);
134    /// ```
135    #[track_caller]
136    fn par_chunks_exact(&self, chunk_size: usize) -> ChunksExact<'_, T> {
137        assert!(chunk_size != 0, "chunk_size must not be zero");
138        ChunksExact::new(chunk_size, self.as_parallel_slice())
139    }
140
141    /// Returns a parallel iterator over at most `chunk_size` elements of `self`
142    /// at a time, starting at the end. The chunks do not overlap.
143    ///
144    /// If the number of elements in the iterator is not divisible by
145    /// `chunk_size`, the last chunk may be shorter than `chunk_size`.  All
146    /// other chunks will have that exact length.
147    ///
148    /// # Examples
149    ///
150    /// ```
151    /// use par_iter::prelude::*;
152    /// let chunks: Vec<_> = [1, 2, 3, 4, 5].par_rchunks(2).collect();
153    /// assert_eq!(chunks, vec![&[4, 5][..], &[2, 3], &[1]]);
154    /// ```
155    #[track_caller]
156    fn par_rchunks(&self, chunk_size: usize) -> RChunks<'_, T> {
157        assert!(chunk_size != 0, "chunk_size must not be zero");
158        RChunks::new(chunk_size, self.as_parallel_slice())
159    }
160
161    /// Returns a parallel iterator over `chunk_size` elements of `self` at a
162    /// time, starting at the end. The chunks do not overlap.
163    ///
164    /// If `chunk_size` does not divide the length of the slice, then the
165    /// last up to `chunk_size-1` elements will be omitted and can be
166    /// retrieved from the remainder function of the iterator.
167    ///
168    /// # Examples
169    ///
170    /// ```
171    /// use par_iter::prelude::*;
172    /// let chunks: Vec<_> = [1, 2, 3, 4, 5].par_rchunks_exact(2).collect();
173    /// assert_eq!(chunks, vec![&[4, 5][..], &[2, 3]]);
174    /// ```
175    #[track_caller]
176    fn par_rchunks_exact(&self, chunk_size: usize) -> RChunksExact<'_, T> {
177        assert!(chunk_size != 0, "chunk_size must not be zero");
178        RChunksExact::new(chunk_size, self.as_parallel_slice())
179    }
180
181    /// Returns a parallel iterator over the slice producing non-overlapping
182    /// runs of elements using the predicate to separate them.
183    ///
184    /// The predicate is called on two elements following themselves,
185    /// it means the predicate is called on `slice[0]` and `slice[1]`
186    /// then on `slice[1]` and `slice[2]` and so on.
187    ///
188    /// # Examples
189    ///
190    /// ```
191    /// use par_iter::prelude::*;
192    /// let chunks: Vec<_> = [1, 2, 2, 3, 3, 3].par_chunk_by(|&x, &y| x == y).collect();
193    /// assert_eq!(chunks[0], &[1]);
194    /// assert_eq!(chunks[1], &[2, 2]);
195    /// assert_eq!(chunks[2], &[3, 3, 3]);
196    /// ```
197    fn par_chunk_by<F>(&self, pred: F) -> ChunkBy<'_, T, F>
198    where
199        F: Fn(&T, &T) -> bool + Send + Sync,
200    {
201        ChunkBy::new(self.as_parallel_slice(), pred)
202    }
203}
204
205impl<T: Sync> ParallelSlice<T> for [T] {
206    #[inline]
207    fn as_parallel_slice(&self) -> &[T] {
208        self
209    }
210}
211
212/// Parallel extensions for mutable slices.
213pub trait ParallelSliceMut<T: Send> {
214    /// Returns a plain mutable slice, which is used to implement the rest of
215    /// the parallel methods.
216    fn as_parallel_slice_mut(&mut self) -> &mut [T];
217
218    /// Returns a parallel iterator over mutable subslices separated by
219    /// elements that match the separator.
220    ///
221    /// # Examples
222    ///
223    /// ```
224    /// use par_iter::prelude::*;
225    /// let mut array = [1, 2, 3, 0, 2, 4, 8, 0, 3, 6, 9];
226    /// array.par_split_mut(|i| *i == 0)
227    ///      .for_each(|slice| slice.reverse());
228    /// assert_eq!(array, [3, 2, 1, 0, 8, 4, 2, 0, 9, 6, 3]);
229    /// ```
230    fn par_split_mut<P>(&mut self, separator: P) -> SplitMut<'_, T, P>
231    where
232        P: Fn(&T) -> bool + Sync + Send,
233    {
234        SplitMut {
235            slice: self.as_parallel_slice_mut(),
236            separator,
237        }
238    }
239
240    /// Returns a parallel iterator over mutable subslices separated by elements
241    /// that match the separator, including the matched part as a terminator.
242    ///
243    /// # Examples
244    ///
245    /// ```
246    /// use par_iter::prelude::*;
247    /// let mut array = [1, 2, 3, 0, 2, 4, 8, 0, 3, 6, 9];
248    /// array.par_split_inclusive_mut(|i| *i == 0)
249    ///      .for_each(|slice| slice.reverse());
250    /// assert_eq!(array, [0, 3, 2, 1, 0, 8, 4, 2, 9, 6, 3]);
251    /// ```
252    fn par_split_inclusive_mut<P>(&mut self, separator: P) -> SplitInclusiveMut<'_, T, P>
253    where
254        P: Fn(&T) -> bool + Sync + Send,
255    {
256        SplitInclusiveMut {
257            slice: self.as_parallel_slice_mut(),
258            separator,
259        }
260    }
261
262    /// Returns a parallel iterator over at most `chunk_size` elements of
263    /// `self` at a time. The chunks are mutable and do not overlap.
264    ///
265    /// If the number of elements in the iterator is not divisible by
266    /// `chunk_size`, the last chunk may be shorter than `chunk_size`.  All
267    /// other chunks will have that exact length.
268    ///
269    /// # Examples
270    ///
271    /// ```
272    /// use par_iter::prelude::*;
273    /// let mut array = [1, 2, 3, 4, 5];
274    /// array.par_chunks_mut(2)
275    ///      .for_each(|slice| slice.reverse());
276    /// assert_eq!(array, [2, 1, 4, 3, 5]);
277    /// ```
278    #[track_caller]
279    fn par_chunks_mut(&mut self, chunk_size: usize) -> ChunksMut<'_, T> {
280        assert!(chunk_size != 0, "chunk_size must not be zero");
281        ChunksMut::new(chunk_size, self.as_parallel_slice_mut())
282    }
283
284    /// Returns a parallel iterator over `chunk_size` elements of
285    /// `self` at a time. The chunks are mutable and do not overlap.
286    ///
287    /// If `chunk_size` does not divide the length of the slice, then the
288    /// last up to `chunk_size-1` elements will be omitted and can be
289    /// retrieved from the remainder function of the iterator.
290    ///
291    /// # Examples
292    ///
293    /// ```
294    /// use par_iter::prelude::*;
295    /// let mut array = [1, 2, 3, 4, 5];
296    /// array.par_chunks_exact_mut(3)
297    ///      .for_each(|slice| slice.reverse());
298    /// assert_eq!(array, [3, 2, 1, 4, 5]);
299    /// ```
300    #[track_caller]
301    fn par_chunks_exact_mut(&mut self, chunk_size: usize) -> ChunksExactMut<'_, T> {
302        assert!(chunk_size != 0, "chunk_size must not be zero");
303        ChunksExactMut::new(chunk_size, self.as_parallel_slice_mut())
304    }
305
306    /// Returns a parallel iterator over at most `chunk_size` elements of `self`
307    /// at a time, starting at the end. The chunks are mutable and do not
308    /// overlap.
309    ///
310    /// If the number of elements in the iterator is not divisible by
311    /// `chunk_size`, the last chunk may be shorter than `chunk_size`.  All
312    /// other chunks will have that exact length.
313    ///
314    /// # Examples
315    ///
316    /// ```
317    /// use par_iter::prelude::*;
318    /// let mut array = [1, 2, 3, 4, 5];
319    /// array.par_rchunks_mut(2)
320    ///      .for_each(|slice| slice.reverse());
321    /// assert_eq!(array, [1, 3, 2, 5, 4]);
322    /// ```
323    #[track_caller]
324    fn par_rchunks_mut(&mut self, chunk_size: usize) -> RChunksMut<'_, T> {
325        assert!(chunk_size != 0, "chunk_size must not be zero");
326        RChunksMut::new(chunk_size, self.as_parallel_slice_mut())
327    }
328
329    /// Returns a parallel iterator over `chunk_size` elements of `self` at a
330    /// time, starting at the end. The chunks are mutable and do not
331    /// overlap.
332    ///
333    /// If `chunk_size` does not divide the length of the slice, then the
334    /// last up to `chunk_size-1` elements will be omitted and can be
335    /// retrieved from the remainder function of the iterator.
336    ///
337    /// # Examples
338    ///
339    /// ```
340    /// use par_iter::prelude::*;
341    /// let mut array = [1, 2, 3, 4, 5];
342    /// array.par_rchunks_exact_mut(3)
343    ///      .for_each(|slice| slice.reverse());
344    /// assert_eq!(array, [1, 2, 5, 4, 3]);
345    /// ```
346    #[track_caller]
347    fn par_rchunks_exact_mut(&mut self, chunk_size: usize) -> RChunksExactMut<'_, T> {
348        assert!(chunk_size != 0, "chunk_size must not be zero");
349        RChunksExactMut::new(chunk_size, self.as_parallel_slice_mut())
350    }
351
352    /// Sorts the slice in parallel.
353    ///
354    /// This sort is stable (i.e., does not reorder equal elements) and *O*(*n*
355    /// \* log(*n*)) worst-case.
356    ///
357    /// When applicable, unstable sorting is preferred because it is generally
358    /// faster than stable sorting and it doesn't allocate auxiliary memory.
359    /// See [`par_sort_unstable`](#method.par_sort_unstable).
360    ///
361    /// # Current implementation
362    ///
363    /// The current algorithm is an adaptive merge sort inspired by
364    /// [timsort](https://en.wikipedia.org/wiki/Timsort).
365    /// It is designed to be very fast in cases where the slice is nearly
366    /// sorted, or consists of two or more sorted sequences concatenated one
367    /// after another.
368    ///
369    /// Also, it allocates temporary storage the same size as `self`, but for
370    /// very short slices a non-allocating insertion sort is used instead.
371    ///
372    /// In order to sort the slice in parallel, the slice is first divided into
373    /// smaller chunks and all chunks are sorted in parallel. Then, adjacent
374    /// chunks that together form non-descending or descending runs are
375    /// concatenated. Finally, the remaining chunks are merged together using
376    /// parallel subdivision of chunks and parallel merge operation.
377    ///
378    /// # Examples
379    ///
380    /// ```
381    /// use par_iter::prelude::*;
382    ///
383    /// let mut v = [-5, 4, 1, -3, 2];
384    ///
385    /// v.par_sort();
386    /// assert_eq!(v, [-5, -3, 1, 2, 4]);
387    /// ```
388    fn par_sort(&mut self)
389    where
390        T: Ord,
391    {
392        par_mergesort(self.as_parallel_slice_mut(), T::lt);
393    }
394
395    /// Sorts the slice in parallel with a comparator function.
396    ///
397    /// This sort is stable (i.e., does not reorder equal elements) and *O*(*n*
398    /// \* log(*n*)) worst-case.
399    ///
400    /// The comparator function must define a total ordering for the elements in
401    /// the slice. If the ordering is not total, the order of the elements
402    /// is unspecified. An order is a total order if it is (for all `a`, `b`
403    /// and `c`):
404    ///
405    /// * total and antisymmetric: exactly one of `a < b`, `a == b` or `a > b`
406    ///   is true, and
407    /// * transitive, `a < b` and `b < c` implies `a < c`. The same must hold
408    ///   for both `==` and `>`.
409    ///
410    /// For example, while [`f64`] doesn't implement [`Ord`] because `NaN !=
411    /// NaN`, we can use `partial_cmp` as our sort function when we know the
412    /// slice doesn't contain a `NaN`.
413    ///
414    /// ```
415    /// use par_iter::prelude::*;
416    ///
417    /// let mut floats = [5f64, 4.0, 1.0, 3.0, 2.0];
418    /// floats.par_sort_by(|a, b| a.partial_cmp(b).unwrap());
419    /// assert_eq!(floats, [1.0, 2.0, 3.0, 4.0, 5.0]);
420    /// ```
421    ///
422    /// When applicable, unstable sorting is preferred because it is generally
423    /// faster than stable sorting and it doesn't allocate auxiliary memory.
424    /// See [`par_sort_unstable_by`](#method.par_sort_unstable_by).
425    ///
426    /// # Current implementation
427    ///
428    /// The current algorithm is an adaptive merge sort inspired by
429    /// [timsort](https://en.wikipedia.org/wiki/Timsort).
430    /// It is designed to be very fast in cases where the slice is nearly
431    /// sorted, or consists of two or more sorted sequences concatenated one
432    /// after another.
433    ///
434    /// Also, it allocates temporary storage the same size as `self`, but for
435    /// very short slices a non-allocating insertion sort is used instead.
436    ///
437    /// In order to sort the slice in parallel, the slice is first divided into
438    /// smaller chunks and all chunks are sorted in parallel. Then, adjacent
439    /// chunks that together form non-descending or descending runs are
440    /// concatenated. Finally, the remaining chunks are merged together using
441    /// parallel subdivision of chunks and parallel merge operation.
442    ///
443    /// # Examples
444    ///
445    /// ```
446    /// use par_iter::prelude::*;
447    ///
448    /// let mut v = [5, 4, 1, 3, 2];
449    /// v.par_sort_by(|a, b| a.cmp(b));
450    /// assert_eq!(v, [1, 2, 3, 4, 5]);
451    ///
452    /// // reverse sorting
453    /// v.par_sort_by(|a, b| b.cmp(a));
454    /// assert_eq!(v, [5, 4, 3, 2, 1]);
455    /// ```
456    fn par_sort_by<F>(&mut self, compare: F)
457    where
458        F: Fn(&T, &T) -> Ordering + Sync,
459    {
460        par_mergesort(self.as_parallel_slice_mut(), |a, b| {
461            compare(a, b) == Ordering::Less
462        });
463    }
464
465    /// Sorts the slice in parallel with a key extraction function.
466    ///
467    /// This sort is stable (i.e., does not reorder equal elements) and *O*(*m*
468    /// \* *n* \* log(*n*)) worst-case, where the key function is *O*(*m*).
469    ///
470    /// For expensive key functions (e.g. functions that are not simple property
471    /// accesses or basic operations),
472    /// [`par_sort_by_cached_key`](#method.par_sort_by_cached_key) is likely to
473    /// be significantly faster, as it does not recompute element keys.
474    ///
475    /// When applicable, unstable sorting is preferred because it is generally
476    /// faster than stable sorting and it doesn't allocate auxiliary memory.
477    /// See [`par_sort_unstable_by_key`](#method.par_sort_unstable_by_key).
478    ///
479    /// # Current implementation
480    ///
481    /// The current algorithm is an adaptive merge sort inspired by
482    /// [timsort](https://en.wikipedia.org/wiki/Timsort).
483    /// It is designed to be very fast in cases where the slice is nearly
484    /// sorted, or consists of two or more sorted sequences concatenated one
485    /// after another.
486    ///
487    /// Also, it allocates temporary storage the same size as `self`, but for
488    /// very short slices a non-allocating insertion sort is used instead.
489    ///
490    /// In order to sort the slice in parallel, the slice is first divided into
491    /// smaller chunks and all chunks are sorted in parallel. Then, adjacent
492    /// chunks that together form non-descending or descending runs are
493    /// concatenated. Finally, the remaining chunks are merged together using
494    /// parallel subdivision of chunks and parallel merge operation.
495    ///
496    /// # Examples
497    ///
498    /// ```
499    /// use par_iter::prelude::*;
500    ///
501    /// let mut v = [-5i32, 4, 1, -3, 2];
502    ///
503    /// v.par_sort_by_key(|k| k.abs());
504    /// assert_eq!(v, [1, 2, -3, 4, -5]);
505    /// ```
506    fn par_sort_by_key<K, F>(&mut self, f: F)
507    where
508        K: Ord,
509        F: Fn(&T) -> K + Sync,
510    {
511        par_mergesort(self.as_parallel_slice_mut(), |a, b| f(a).lt(&f(b)));
512    }
513
514    /// Sorts the slice in parallel with a key extraction function.
515    ///
516    /// During sorting, the key function is called at most once per element, by
517    /// using temporary storage to remember the results of key evaluation.
518    /// The key function is called in parallel, so the order of calls is
519    /// completely unspecified.
520    ///
521    /// This sort is stable (i.e., does not reorder equal elements) and *O*(*m*
522    /// \* *n* + *n* \* log(*n*)) worst-case, where the key function is
523    /// *O*(*m*).
524    ///
525    /// For simple key functions (e.g., functions that are property accesses or
526    /// basic operations), [`par_sort_by_key`](#method.par_sort_by_key) is
527    /// likely to be faster.
528    ///
529    /// # Current implementation
530    ///
531    /// The current algorithm is based on [pattern-defeating quicksort][pdqsort]
532    /// by Orson Peters, which combines the fast average case of randomized
533    /// quicksort with the fast worst case of heapsort, while achieving
534    /// linear time on slices with certain patterns. It uses some
535    /// randomization to avoid degenerate cases, but with a fixed seed to always
536    /// provide deterministic behavior.
537    ///
538    /// In the worst case, the algorithm allocates temporary storage in a
539    /// `Vec<(K, usize)>` the length of the slice.
540    ///
541    /// All quicksorts work in two stages: partitioning into two halves followed
542    /// by recursive calls. The partitioning phase is sequential, but the
543    /// two recursive calls are performed in parallel. Finally, after
544    /// sorting the cached keys, the item positions are updated sequentially.
545    ///
546    /// [pdqsort]: https://github.com/orlp/pdqsort
547    ///
548    /// # Examples
549    ///
550    /// ```
551    /// use par_iter::prelude::*;
552    ///
553    /// let mut v = [-5i32, 4, 32, -3, 2];
554    ///
555    /// v.par_sort_by_cached_key(|k| k.to_string());
556    /// assert!(v == [-3, -5, 2, 32, 4]);
557    /// ```
558    fn par_sort_by_cached_key<K, F>(&mut self, f: F)
559    where
560        F: Fn(&T) -> K + Sync,
561        K: Ord + Send,
562    {
563        let slice = self.as_parallel_slice_mut();
564        let len = slice.len();
565        if len < 2 {
566            return;
567        }
568
569        // Helper macro for indexing our vector by the smallest possible type, to reduce
570        // allocation.
571        macro_rules! sort_by_key {
572            ($t:ty) => {{
573                let mut indices: Vec<_> = slice
574                    .par_iter_mut()
575                    .enumerate()
576                    .map(|(i, x)| (f(&*x), i as $t))
577                    .collect();
578                // The elements of `indices` are unique, as they are indexed, so any sort will
579                // be stable with respect to the original slice. We use `sort_unstable`
580                // here because it requires less memory allocation.
581                indices.par_sort_unstable();
582                for i in 0..len {
583                    let mut index = indices[i].1;
584                    while (index as usize) < i {
585                        index = indices[index as usize].1;
586                    }
587                    indices[i].1 = index;
588                    slice.swap(i, index as usize);
589                }
590            }};
591        }
592
593        let sz_u8 = mem::size_of::<(K, u8)>();
594        let sz_u16 = mem::size_of::<(K, u16)>();
595        let sz_u32 = mem::size_of::<(K, u32)>();
596        let sz_usize = mem::size_of::<(K, usize)>();
597
598        if sz_u8 < sz_u16 && len <= (u8::MAX as usize) {
599            return sort_by_key!(u8);
600        }
601        if sz_u16 < sz_u32 && len <= (u16::MAX as usize) {
602            return sort_by_key!(u16);
603        }
604        if sz_u32 < sz_usize && len <= (u32::MAX as usize) {
605            return sort_by_key!(u32);
606        }
607        sort_by_key!(usize)
608    }
609
610    /// Sorts the slice in parallel, but might not preserve the order of equal
611    /// elements.
612    ///
613    /// This sort is unstable (i.e., may reorder equal elements), in-place
614    /// (i.e., does not allocate), and *O*(*n* \* log(*n*)) worst-case.
615    ///
616    /// # Current implementation
617    ///
618    /// The current algorithm is based on [pattern-defeating quicksort][pdqsort]
619    /// by Orson Peters, which combines the fast average case of randomized
620    /// quicksort with the fast worst case of heapsort, while achieving
621    /// linear time on slices with certain patterns. It uses some
622    /// randomization to avoid degenerate cases, but with a fixed seed to always
623    /// provide deterministic behavior.
624    ///
625    /// It is typically faster than stable sorting, except in a few special
626    /// cases, e.g., when the slice consists of several concatenated sorted
627    /// sequences.
628    ///
629    /// All quicksorts work in two stages: partitioning into two halves followed
630    /// by recursive calls. The partitioning phase is sequential, but the
631    /// two recursive calls are performed in parallel.
632    ///
633    /// [pdqsort]: https://github.com/orlp/pdqsort
634    ///
635    /// # Examples
636    ///
637    /// ```
638    /// use par_iter::prelude::*;
639    ///
640    /// let mut v = [-5, 4, 1, -3, 2];
641    ///
642    /// v.par_sort_unstable();
643    /// assert_eq!(v, [-5, -3, 1, 2, 4]);
644    /// ```
645    fn par_sort_unstable(&mut self)
646    where
647        T: Ord,
648    {
649        par_quicksort(self.as_parallel_slice_mut(), T::lt);
650    }
651
652    /// Sorts the slice in parallel with a comparator function, but might not
653    /// preserve the order of equal elements.
654    ///
655    /// This sort is unstable (i.e., may reorder equal elements), in-place
656    /// (i.e., does not allocate), and *O*(*n* \* log(*n*)) worst-case.
657    ///
658    /// The comparator function must define a total ordering for the elements in
659    /// the slice. If the ordering is not total, the order of the elements
660    /// is unspecified. An order is a total order if it is (for all `a`, `b`
661    /// and `c`):
662    ///
663    /// * total and antisymmetric: exactly one of `a < b`, `a == b` or `a > b`
664    ///   is true, and
665    /// * transitive, `a < b` and `b < c` implies `a < c`. The same must hold
666    ///   for both `==` and `>`.
667    ///
668    /// For example, while [`f64`] doesn't implement [`Ord`] because `NaN !=
669    /// NaN`, we can use `partial_cmp` as our sort function when we know the
670    /// slice doesn't contain a `NaN`.
671    ///
672    /// ```
673    /// use par_iter::prelude::*;
674    ///
675    /// let mut floats = [5f64, 4.0, 1.0, 3.0, 2.0];
676    /// floats.par_sort_unstable_by(|a, b| a.partial_cmp(b).unwrap());
677    /// assert_eq!(floats, [1.0, 2.0, 3.0, 4.0, 5.0]);
678    /// ```
679    ///
680    /// # Current implementation
681    ///
682    /// The current algorithm is based on [pattern-defeating quicksort][pdqsort]
683    /// by Orson Peters, which combines the fast average case of randomized
684    /// quicksort with the fast worst case of heapsort, while achieving
685    /// linear time on slices with certain patterns. It uses some
686    /// randomization to avoid degenerate cases, but with a fixed seed to always
687    /// provide deterministic behavior.
688    ///
689    /// It is typically faster than stable sorting, except in a few special
690    /// cases, e.g., when the slice consists of several concatenated sorted
691    /// sequences.
692    ///
693    /// All quicksorts work in two stages: partitioning into two halves followed
694    /// by recursive calls. The partitioning phase is sequential, but the
695    /// two recursive calls are performed in parallel.
696    ///
697    /// [pdqsort]: https://github.com/orlp/pdqsort
698    ///
699    /// # Examples
700    ///
701    /// ```
702    /// use par_iter::prelude::*;
703    ///
704    /// let mut v = [5, 4, 1, 3, 2];
705    /// v.par_sort_unstable_by(|a, b| a.cmp(b));
706    /// assert_eq!(v, [1, 2, 3, 4, 5]);
707    ///
708    /// // reverse sorting
709    /// v.par_sort_unstable_by(|a, b| b.cmp(a));
710    /// assert_eq!(v, [5, 4, 3, 2, 1]);
711    /// ```
712    fn par_sort_unstable_by<F>(&mut self, compare: F)
713    where
714        F: Fn(&T, &T) -> Ordering + Sync,
715    {
716        par_quicksort(self.as_parallel_slice_mut(), |a, b| {
717            compare(a, b) == Ordering::Less
718        });
719    }
720
721    /// Sorts the slice in parallel with a key extraction function, but might
722    /// not preserve the order of equal elements.
723    ///
724    /// This sort is unstable (i.e., may reorder equal elements), in-place
725    /// (i.e., does not allocate), and *O*(m \* *n* \* log(*n*)) worst-case,
726    /// where the key function is *O*(*m*).
727    ///
728    /// # Current implementation
729    ///
730    /// The current algorithm is based on [pattern-defeating quicksort][pdqsort]
731    /// by Orson Peters, which combines the fast average case of randomized
732    /// quicksort with the fast worst case of heapsort, while achieving
733    /// linear time on slices with certain patterns. It uses some
734    /// randomization to avoid degenerate cases, but with a fixed seed to always
735    /// provide deterministic behavior.
736    ///
737    /// Due to its key calling strategy, `par_sort_unstable_by_key` is likely to
738    /// be slower than [`par_sort_by_cached_key`](#method.
739    /// par_sort_by_cached_key) in cases where the key function
740    /// is expensive.
741    ///
742    /// All quicksorts work in two stages: partitioning into two halves followed
743    /// by recursive calls. The partitioning phase is sequential, but the
744    /// two recursive calls are performed in parallel.
745    ///
746    /// [pdqsort]: https://github.com/orlp/pdqsort
747    ///
748    /// # Examples
749    ///
750    /// ```
751    /// use par_iter::prelude::*;
752    ///
753    /// let mut v = [-5i32, 4, 1, -3, 2];
754    ///
755    /// v.par_sort_unstable_by_key(|k| k.abs());
756    /// assert_eq!(v, [1, 2, -3, 4, -5]);
757    /// ```
758    fn par_sort_unstable_by_key<K, F>(&mut self, f: F)
759    where
760        K: Ord,
761        F: Fn(&T) -> K + Sync,
762    {
763        par_quicksort(self.as_parallel_slice_mut(), |a, b| f(a).lt(&f(b)));
764    }
765
766    /// Returns a parallel iterator over the slice producing non-overlapping
767    /// mutable runs of elements using the predicate to separate them.
768    ///
769    /// The predicate is called on two elements following themselves,
770    /// it means the predicate is called on `slice[0]` and `slice[1]`
771    /// then on `slice[1]` and `slice[2]` and so on.
772    ///
773    /// # Examples
774    ///
775    /// ```
776    /// use par_iter::prelude::*;
777    /// let mut xs = [1, 2, 2, 3, 3, 3];
778    /// let chunks: Vec<_> = xs.par_chunk_by_mut(|&x, &y| x == y).collect();
779    /// assert_eq!(chunks[0], &mut [1]);
780    /// assert_eq!(chunks[1], &mut [2, 2]);
781    /// assert_eq!(chunks[2], &mut [3, 3, 3]);
782    /// ```
783    fn par_chunk_by_mut<F>(&mut self, pred: F) -> ChunkByMut<'_, T, F>
784    where
785        F: Fn(&T, &T) -> bool + Send + Sync,
786    {
787        ChunkByMut::new(self.as_parallel_slice_mut(), pred)
788    }
789}
790
791impl<T: Send> ParallelSliceMut<T> for [T] {
792    #[inline]
793    fn as_parallel_slice_mut(&mut self) -> &mut [T] {
794        self
795    }
796}
797
798impl<'data, T: Sync + 'data> IntoParallelIterator for &'data [T] {
799    type Item = &'data T;
800    type Iter = Iter<'data, T>;
801
802    fn into_par_iter(self) -> Self::Iter {
803        Iter { slice: self }
804    }
805}
806
807impl<'data, T: Send + 'data> IntoParallelIterator for &'data mut [T] {
808    type Item = &'data mut T;
809    type Iter = IterMut<'data, T>;
810
811    fn into_par_iter(self) -> Self::Iter {
812        IterMut { slice: self }
813    }
814}
815
816/// Parallel iterator over immutable items in a slice
817#[derive(Debug)]
818pub struct Iter<'data, T: Sync> {
819    slice: &'data [T],
820}
821
822impl<'data, T: Sync> Clone for Iter<'data, T> {
823    fn clone(&self) -> Self {
824        Iter { ..*self }
825    }
826}
827
828impl<'data, T: Sync + 'data> ParallelIterator for Iter<'data, T> {
829    type Item = &'data T;
830
831    fn drive_unindexed<C>(self, consumer: C) -> C::Result
832    where
833        C: UnindexedConsumer<Self::Item>,
834    {
835        bridge(self, consumer)
836    }
837
838    fn opt_len(&self) -> Option<usize> {
839        Some(self.len())
840    }
841}
842
843impl<'data, T: Sync + 'data> IndexedParallelIterator for Iter<'data, T> {
844    fn drive<C>(self, consumer: C) -> C::Result
845    where
846        C: Consumer<Self::Item>,
847    {
848        bridge(self, consumer)
849    }
850
851    fn len(&self) -> usize {
852        self.slice.len()
853    }
854
855    fn with_producer<CB>(self, callback: CB) -> CB::Output
856    where
857        CB: ProducerCallback<Self::Item>,
858    {
859        callback.callback(IterProducer { slice: self.slice })
860    }
861}
862
863struct IterProducer<'data, T: Sync> {
864    slice: &'data [T],
865}
866
867impl<'data, T: 'data + Sync> Producer for IterProducer<'data, T> {
868    type IntoIter = ::std::slice::Iter<'data, T>;
869    type Item = &'data T;
870
871    fn into_iter(self) -> Self::IntoIter {
872        self.slice.iter()
873    }
874
875    fn split_at(self, index: usize) -> (Self, Self) {
876        let (left, right) = self.slice.split_at(index);
877        (IterProducer { slice: left }, IterProducer { slice: right })
878    }
879}
880
881/// Parallel iterator over immutable overlapping windows of a slice
882#[derive(Debug)]
883pub struct Windows<'data, T: Sync> {
884    window_size: usize,
885    slice: &'data [T],
886}
887
888impl<'data, T: Sync> Clone for Windows<'data, T> {
889    fn clone(&self) -> Self {
890        Windows { ..*self }
891    }
892}
893
894impl<'data, T: Sync + 'data> ParallelIterator for Windows<'data, T> {
895    type Item = &'data [T];
896
897    fn drive_unindexed<C>(self, consumer: C) -> C::Result
898    where
899        C: UnindexedConsumer<Self::Item>,
900    {
901        bridge(self, consumer)
902    }
903
904    fn opt_len(&self) -> Option<usize> {
905        Some(self.len())
906    }
907}
908
909impl<'data, T: Sync + 'data> IndexedParallelIterator for Windows<'data, T> {
910    fn drive<C>(self, consumer: C) -> C::Result
911    where
912        C: Consumer<Self::Item>,
913    {
914        bridge(self, consumer)
915    }
916
917    fn len(&self) -> usize {
918        assert!(self.window_size >= 1);
919        self.slice.len().saturating_sub(self.window_size - 1)
920    }
921
922    fn with_producer<CB>(self, callback: CB) -> CB::Output
923    where
924        CB: ProducerCallback<Self::Item>,
925    {
926        callback.callback(WindowsProducer {
927            window_size: self.window_size,
928            slice: self.slice,
929        })
930    }
931}
932
933struct WindowsProducer<'data, T: Sync> {
934    window_size: usize,
935    slice: &'data [T],
936}
937
938impl<'data, T: 'data + Sync> Producer for WindowsProducer<'data, T> {
939    type IntoIter = ::std::slice::Windows<'data, T>;
940    type Item = &'data [T];
941
942    fn into_iter(self) -> Self::IntoIter {
943        self.slice.windows(self.window_size)
944    }
945
946    fn split_at(self, index: usize) -> (Self, Self) {
947        let left_index = Ord::min(self.slice.len(), index + (self.window_size - 1));
948        let left = &self.slice[..left_index];
949        let right = &self.slice[index..];
950        (
951            WindowsProducer {
952                window_size: self.window_size,
953                slice: left,
954            },
955            WindowsProducer {
956                window_size: self.window_size,
957                slice: right,
958            },
959        )
960    }
961}
962
963/// Parallel iterator over mutable items in a slice
964#[derive(Debug)]
965pub struct IterMut<'data, T: Send> {
966    slice: &'data mut [T],
967}
968
969impl<'data, T: Send + 'data> ParallelIterator for IterMut<'data, T> {
970    type Item = &'data mut T;
971
972    fn drive_unindexed<C>(self, consumer: C) -> C::Result
973    where
974        C: UnindexedConsumer<Self::Item>,
975    {
976        bridge(self, consumer)
977    }
978
979    fn opt_len(&self) -> Option<usize> {
980        Some(self.len())
981    }
982}
983
984impl<'data, T: Send + 'data> IndexedParallelIterator for IterMut<'data, T> {
985    fn drive<C>(self, consumer: C) -> C::Result
986    where
987        C: Consumer<Self::Item>,
988    {
989        bridge(self, consumer)
990    }
991
992    fn len(&self) -> usize {
993        self.slice.len()
994    }
995
996    fn with_producer<CB>(self, callback: CB) -> CB::Output
997    where
998        CB: ProducerCallback<Self::Item>,
999    {
1000        callback.callback(IterMutProducer { slice: self.slice })
1001    }
1002}
1003
1004struct IterMutProducer<'data, T: Send> {
1005    slice: &'data mut [T],
1006}
1007
1008impl<'data, T: 'data + Send> Producer for IterMutProducer<'data, T> {
1009    type IntoIter = ::std::slice::IterMut<'data, T>;
1010    type Item = &'data mut T;
1011
1012    fn into_iter(self) -> Self::IntoIter {
1013        self.slice.iter_mut()
1014    }
1015
1016    fn split_at(self, index: usize) -> (Self, Self) {
1017        let (left, right) = self.slice.split_at_mut(index);
1018        (
1019            IterMutProducer { slice: left },
1020            IterMutProducer { slice: right },
1021        )
1022    }
1023}
1024
1025/// Parallel iterator over slices separated by a predicate
1026pub struct Split<'data, T, P> {
1027    slice: &'data [T],
1028    separator: P,
1029}
1030
1031impl<'data, T, P: Clone> Clone for Split<'data, T, P> {
1032    fn clone(&self) -> Self {
1033        Split {
1034            separator: self.separator.clone(),
1035            ..*self
1036        }
1037    }
1038}
1039
1040impl<'data, T: Debug, P> Debug for Split<'data, T, P> {
1041    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1042        f.debug_struct("Split").field("slice", &self.slice).finish()
1043    }
1044}
1045
1046impl<'data, T, P> ParallelIterator for Split<'data, T, P>
1047where
1048    P: Fn(&T) -> bool + Sync + Send,
1049    T: Sync,
1050{
1051    type Item = &'data [T];
1052
1053    fn drive_unindexed<C>(self, consumer: C) -> C::Result
1054    where
1055        C: UnindexedConsumer<Self::Item>,
1056    {
1057        let producer = SplitProducer::new(self.slice, &self.separator);
1058        bridge_unindexed(producer, consumer)
1059    }
1060}
1061
1062/// Parallel iterator over slices separated by a predicate,
1063/// including the matched part as a terminator.
1064pub struct SplitInclusive<'data, T, P> {
1065    slice: &'data [T],
1066    separator: P,
1067}
1068
1069impl<'data, T, P: Clone> Clone for SplitInclusive<'data, T, P> {
1070    fn clone(&self) -> Self {
1071        SplitInclusive {
1072            separator: self.separator.clone(),
1073            ..*self
1074        }
1075    }
1076}
1077
1078impl<'data, T: Debug, P> Debug for SplitInclusive<'data, T, P> {
1079    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1080        f.debug_struct("SplitInclusive")
1081            .field("slice", &self.slice)
1082            .finish()
1083    }
1084}
1085
1086impl<'data, T, P> ParallelIterator for SplitInclusive<'data, T, P>
1087where
1088    P: Fn(&T) -> bool + Sync + Send,
1089    T: Sync,
1090{
1091    type Item = &'data [T];
1092
1093    fn drive_unindexed<C>(self, consumer: C) -> C::Result
1094    where
1095        C: UnindexedConsumer<Self::Item>,
1096    {
1097        let producer = SplitInclusiveProducer::new_incl(self.slice, &self.separator);
1098        bridge_unindexed(producer, consumer)
1099    }
1100}
1101
1102/// Implement support for `SplitProducer`.
1103impl<'data, T, P> Fissile<P> for &'data [T]
1104where
1105    P: Fn(&T) -> bool,
1106{
1107    fn length(&self) -> usize {
1108        self.len()
1109    }
1110
1111    fn midpoint(&self, end: usize) -> usize {
1112        end / 2
1113    }
1114
1115    fn find(&self, separator: &P, start: usize, end: usize) -> Option<usize> {
1116        self[start..end].iter().position(separator)
1117    }
1118
1119    fn rfind(&self, separator: &P, end: usize) -> Option<usize> {
1120        self[..end].iter().rposition(separator)
1121    }
1122
1123    fn split_once<const INCL: bool>(self, index: usize) -> (Self, Self) {
1124        if INCL {
1125            // include the separator in the left side
1126            self.split_at(index + 1)
1127        } else {
1128            let (left, right) = self.split_at(index);
1129            (left, &right[1..]) // skip the separator
1130        }
1131    }
1132
1133    fn fold_splits<F, const INCL: bool>(self, separator: &P, folder: F, skip_last: bool) -> F
1134    where
1135        F: Folder<Self>,
1136        Self: Send,
1137    {
1138        if INCL {
1139            debug_assert!(!skip_last);
1140            folder.consume_iter(self.split_inclusive(separator))
1141        } else {
1142            let mut split = self.split(separator);
1143            if skip_last {
1144                split.next_back();
1145            }
1146            folder.consume_iter(split)
1147        }
1148    }
1149}
1150
1151/// Parallel iterator over mutable slices separated by a predicate
1152pub struct SplitMut<'data, T, P> {
1153    slice: &'data mut [T],
1154    separator: P,
1155}
1156
1157impl<'data, T: Debug, P> Debug for SplitMut<'data, T, P> {
1158    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1159        f.debug_struct("SplitMut")
1160            .field("slice", &self.slice)
1161            .finish()
1162    }
1163}
1164
1165impl<'data, T, P> ParallelIterator for SplitMut<'data, T, P>
1166where
1167    P: Fn(&T) -> bool + Sync + Send,
1168    T: Send,
1169{
1170    type Item = &'data mut [T];
1171
1172    fn drive_unindexed<C>(self, consumer: C) -> C::Result
1173    where
1174        C: UnindexedConsumer<Self::Item>,
1175    {
1176        let producer = SplitProducer::new(self.slice, &self.separator);
1177        bridge_unindexed(producer, consumer)
1178    }
1179}
1180
1181/// Parallel iterator over mutable slices separated by a predicate,
1182/// including the matched part as a terminator.
1183pub struct SplitInclusiveMut<'data, T, P> {
1184    slice: &'data mut [T],
1185    separator: P,
1186}
1187
1188impl<'data, T: Debug, P> Debug for SplitInclusiveMut<'data, T, P> {
1189    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1190        f.debug_struct("SplitInclusiveMut")
1191            .field("slice", &self.slice)
1192            .finish()
1193    }
1194}
1195
1196impl<'data, T, P> ParallelIterator for SplitInclusiveMut<'data, T, P>
1197where
1198    P: Fn(&T) -> bool + Sync + Send,
1199    T: Send,
1200{
1201    type Item = &'data mut [T];
1202
1203    fn drive_unindexed<C>(self, consumer: C) -> C::Result
1204    where
1205        C: UnindexedConsumer<Self::Item>,
1206    {
1207        let producer = SplitInclusiveProducer::new_incl(self.slice, &self.separator);
1208        bridge_unindexed(producer, consumer)
1209    }
1210}
1211
1212/// Implement support for `SplitProducer`.
1213impl<'data, T, P> Fissile<P> for &'data mut [T]
1214where
1215    P: Fn(&T) -> bool,
1216{
1217    fn length(&self) -> usize {
1218        self.len()
1219    }
1220
1221    fn midpoint(&self, end: usize) -> usize {
1222        end / 2
1223    }
1224
1225    fn find(&self, separator: &P, start: usize, end: usize) -> Option<usize> {
1226        self[start..end].iter().position(separator)
1227    }
1228
1229    fn rfind(&self, separator: &P, end: usize) -> Option<usize> {
1230        self[..end].iter().rposition(separator)
1231    }
1232
1233    fn split_once<const INCL: bool>(self, index: usize) -> (Self, Self) {
1234        if INCL {
1235            // include the separator in the left side
1236            self.split_at_mut(index + 1)
1237        } else {
1238            let (left, right) = self.split_at_mut(index);
1239            (left, &mut right[1..]) // skip the separator
1240        }
1241    }
1242
1243    fn fold_splits<F, const INCL: bool>(self, separator: &P, folder: F, skip_last: bool) -> F
1244    where
1245        F: Folder<Self>,
1246        Self: Send,
1247    {
1248        if INCL {
1249            debug_assert!(!skip_last);
1250            folder.consume_iter(self.split_inclusive_mut(separator))
1251        } else {
1252            let mut split = self.split_mut(separator);
1253            if skip_last {
1254                split.next_back();
1255            }
1256            folder.consume_iter(split)
1257        }
1258    }
1259}