1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
use crate::{ChunkSize, Fallible, NumThreads, ParCollectInto, Params};
use orx_split_vec::{Recursive, SplitVec};
use std::{cmp::Ordering, ops::Add};

/// An iterator used to define a computation that can be executed in parallel.
pub trait Par
where
    Self: Sized,
{
    /// Type of the items that the iterator yields.
    type Item: Send + Sync;

    /// Parameters of the parallel computation which can be set by `num_threads` and `chunk_size` methods.
    fn params(&self) -> Params;

    /// Transforms the parallel computation with a new one with the given `num_threads`.
    ///
    /// See [`crate::NumThreads`] for details.
    ///
    /// `num_threads` represents the degree of parallelization. It is possible to define an upper bound on the number of threads to be used for the parallel computation.
    /// When set to **1**, the computation will be executed sequentially without any overhead.
    /// In this sense, parallel iterators defined in this crate are a union of sequential and parallel execution.
    ///
    /// # Examples
    ///
    /// ```rust
    /// use orx_parallel::*;
    /// use std::num::NonZeroUsize;
    ///
    /// let expected = (0..(1 << 10)).sum();
    ///
    /// // unset/default -> NumThreads::Auto
    /// let sum = (0..(1 << 10)).par().sum();
    /// assert_eq!(sum, expected);
    ///
    /// // A: NumThreads::Auto
    /// let sum = (0..(1 << 10)).par().num_threads(0).sum();
    /// assert_eq!(sum, expected);
    ///
    /// let sum = (0..(1 << 10)).par().num_threads(NumThreads::Auto).sum();
    /// assert_eq!(sum, expected);
    ///
    /// // B: with a limit on the number of threads
    /// let sum = (0..(1 << 10)).par().num_threads(4).sum();
    /// assert_eq!(sum, expected);
    ///
    /// let sum = (0..(1 << 10)).par().num_threads(NumThreads::Max(NonZeroUsize::new(4).unwrap())).sum();
    /// assert_eq!(sum, expected);
    ///
    /// // C: sequential execution
    /// let sum = (0..(1 << 10)).par().num_threads(1).sum();
    /// assert_eq!(sum, expected);
    ///
    /// let sum = (0..(1 << 10)).par().num_threads(NumThreads::sequential()).sum();
    /// assert_eq!(sum, expected);
    /// ```
    ///
    /// # Rules of Thumb / Guidelines
    ///
    /// It is recommended to set this parameter to its default value, `NumThreads::Auto`.
    /// This setting assumes that it can use all available threads; however, the computation will spawn new threads only when required.
    /// In other words, when we can dynamically decide that the task is not large enough to justify spawning a new thread, the parallel execution will avoid it.
    ///
    /// A special case is `NumThreads::Max(NonZeroUsize::new(1).unwrap())`, or equivalently `NumThreads::sequential()`.
    /// This will lead to a sequential execution of the defined computation on the main thread.
    /// Both in terms of used resources and computation time, this mode is not similar but **identical** to a sequential execution using the regular sequential `Iterator`s.
    ///
    /// Lastly, `NumThreads::Max(t)` where `t >= 2` can be used in the following scenarios:
    /// * We have a strict limit on the resources that we can use for this computation, even if the hardware has more resources.
    /// Parallel execution will ensure that `t` will never be exceeded.
    /// * We have a computation which is extremely time-critical and our benchmarks show that `t` outperforms the `NumThreads::Auto` on the corresponding system.
    fn num_threads(self, num_threads: impl Into<NumThreads>) -> Self;

    /// Transforms the parallel computation with a new one with the given `chunk_size`.
    ///
    /// See [`crate::ChunkSize`] for details.
    ///
    /// `chunk_size` represents the batch size of elements each thread will pull from the main iterator once it becomes idle again.
    /// It is possible to define a minimum or exact chunk size.
    ///
    /// # Examples
    ///
    /// ```rust
    /// use orx_parallel::*;
    /// use std::num::NonZeroUsize;
    ///
    /// let expected = (0..(1 << 10)).sum();
    ///
    /// // unset/default -> ChunkSize::Auto
    /// let sum = (0..(1 << 10)).par().sum();
    /// assert_eq!(sum, expected);
    ///
    /// // A: ChunkSize::Auto
    /// let sum = (0..(1 << 10)).par().chunk_size(0).sum();
    /// assert_eq!(sum, expected);
    ///
    /// let sum = (0..(1 << 10)).par().chunk_size(ChunkSize::Auto).sum();
    /// assert_eq!(sum, expected);
    ///
    /// // B: with an exact chunk size
    /// let sum = (0..(1 << 10)).par().chunk_size(1024).sum();
    /// assert_eq!(sum, expected);
    ///
    /// let sum = (0..(1 << 10)).par().chunk_size(ChunkSize::Exact(NonZeroUsize::new(1024).unwrap())).sum();
    /// assert_eq!(sum, expected);
    ///
    /// // C: with lower bound on the chunk size, execution may increase chunk size whenever it improves performance
    /// let sum = (0..(1 << 10)).par().chunk_size(ChunkSize::Min(NonZeroUsize::new(1024).unwrap())).sum();
    /// assert_eq!(sum, expected);
    /// ```
    ///
    /// # Rules of Thumb / Guidelines
    ///
    /// The objective of this parameter is to balance the overhead of parallelization and cost of heterogeneity of tasks.
    ///
    /// In order to illustrate, assume that there exist 8 elements to process, or 8 jobs to execute, and we will use 2 threads for this computation.
    /// Two extreme strategies can be defined as follows.
    ///
    /// * **Perfect Sharing of Tasks**
    ///   * Setting chunk size to 4 provides a perfect division of tasks in terms of quantity.
    /// Each thread will retrieve 4 elements at once in one pull and process them.
    /// This *one pull* per thread can be considered as the parallelization overhead and this is the best/minimum we can achieve.
    ///   * Drawback of this approach, on the other hand, is observed when the execution time of each job is significantly different; i.e., when we have heterogeneous tasks.
    ///   * Assume, for instance, that the first element requires 7 units of time while all remaining elements require 1 unit of time.
    ///   * Roughly, the parallel execution with a chunk size of 4 would complete in 10 units of time, which is the execution time of the first thread (7 + 3*1).
    ///   * The second thread will complete its 4 tasks in 4 units of time and will remain idle for 6 units of time.
    /// * **Perfect Handling of Heterogeneity**
    ///   * Setting chunk size to 1 provides a perfect way to deal with heterogeneous tasks, minimizing the idle time of threads.
    /// Each thread will retrieve elements one by one whenever they become idle.
    ///   * Considering the heterogeneous example above, the parallel execution with a chunk size of 1 would complete around 7 units of time.
    ///     * This is again the execution time of the first thread, which will only execute the first element.
    ///     * The second thread will execute the remaining 7 elements, again in 7 units in time.
    ///   * None of the threads will be idle, which is the best we can achieve.
    ///   * Drawback of this approach is the parallelization overhead due to *pull*s.
    ///   * Chunk size being 1, this setting will lead to a total of 8 pull operations (1 pull by the first thread, 7 pulls by the second thread).
    ///   * This leads to the maximum/worst parallelization overhead in this scenario.
    ///
    /// The objective then is to find a chunk size which is:
    /// * large enough that total time spent for the pulls is insignificant, while
    /// * small enough not to suffer from the impact of heterogeneity.
    ///
    /// Note that this decision is data dependent, and hence, can be tuned for the input when the operation is extremely time-critical.
    ///
    /// In these cases, the following rule of thumb helps to find a good chunk size.
    /// We can set the chunk size to the smallest value which would make the overhead of pulls insignificant:
    /// * The larger each individual task, the less significant the parallelization overhead. A small chunk size would do.
    /// * The smaller each individual task, the more significant the parallelization overhead. We require a larger chunk size while being careful not to suffer from idle times of threads due to heterogeneity.
    ///
    /// In general, it is recommended to set this parameter to its default value, `ChunkSize::Auto`.
    /// This library will try to solve the tradeoff explained above depending on the input data to minimize execution time and idle thread time.
    ///
    /// For more critical operations, this `ChunkSize::Exact` and `ChunkSize::Min` options can be used to tune the execution for the class of the relevant input data.
    fn chunk_size(self, chunk_size: impl Into<ChunkSize>) -> Self;

    // transform

    /// Takes the closure `map` and creates an iterator which calls that closure on each element.
    ///
    /// # Examples
    ///
    /// ```rust
    /// use orx_parallel::*;
    ///
    /// let doubles = (0..5).par().map(|x| x * 2).collect_vec();
    /// assert_eq!(&doubles[..], &[0, 2, 4, 6, 8]);
    /// ```
    fn map<O, M>(self, map: M) -> impl Par<Item = O>
    where
        O: Send + Sync,
        M: Fn(Self::Item) -> O + Send + Sync + Clone;

    /// Takes the closure `fmap` and creates an iterator which calls that closure on each element and flattens the result.
    ///
    /// # Examples
    ///
    /// ```rust
    /// use orx_parallel::*;
    ///
    /// let numbers = (0..5).par().flat_map(|x| vec![x; x]).collect_vec();
    /// assert_eq!(&numbers[..], &[1, 2, 2, 3, 3, 3, 4, 4, 4, 4]);
    /// ```
    fn flat_map<O, OI, FM>(self, flat_map: FM) -> impl Par<Item = O>
    where
        O: Send + Sync,
        OI: IntoIterator<Item = O>,
        FM: Fn(Self::Item) -> OI + Send + Sync + Clone;

    /// Creates an iterator which uses the closure `filter` to determine if an element should be yielded.
    ///
    /// # Examples
    ///
    /// ```rust
    /// use orx_parallel::*;
    ///
    /// let evens = (0..10).par().filter(|x| x % 2 == 0).collect_vec();
    /// assert_eq!(&evens[..], &[0, 2, 4, 6, 8]);
    /// ```
    fn filter<F>(self, filter: F) -> impl Par<Item = Self::Item>
    where
        F: Fn(&Self::Item) -> bool + Send + Sync + Clone;

    /// Creates an iterator that both filters and maps.
    ///
    /// The returned iterator yields only the values for which the supplied closure returns a successful value of the fallible type such as:
    /// * `Some` variant for `Option`,
    /// * `Ok` variant for `Result`, etc.
    ///
    /// See [`crate::Fallible`] trait for details of the fallible types and extending.
    ///
    /// Filter_map can be used to make chains of filter and map more concise.
    /// The example below shows how a map().filter().map() can be shortened to a single call to filter_map.
    ///
    /// # Examples
    ///
    /// Basic usage:
    ///
    /// ```
    /// use orx_parallel::*;
    ///
    /// let a = ["1", "two", "NaN", "four", "5"];
    ///
    /// let numbers = a.par().filter_map(|s| s.parse::<u64>()).collect_vec();
    /// assert_eq!(numbers, [1, 5]);
    /// ```
    ///
    /// Here's the same example, but with [`crate::Par::filter`] and [`crate::Par::map`]:
    ///
    /// ```
    /// use orx_parallel::*;
    ///
    /// let a = ["1", "two", "NaN", "four", "5"];
    ///
    /// let numbers = a
    ///    .par()
    ///    .map(|s| s.parse::<u64>())
    ///    .filter(|x| x.is_ok())
    ///    .map(|x| x.unwrap())
    ///    .collect_vec();
    /// assert_eq!(numbers, [1, 5]);
    /// ```
    fn filter_map<O, FO, FM>(self, filter_map: FM) -> impl Par<Item = O>
    where
        O: Send + Sync,
        FO: Fallible<O> + Send + Sync,
        FM: Fn(Self::Item) -> FO + Send + Sync + Clone;

    //reduce

    /// Reduces the elements to a single one, by repeatedly applying the `reduce` operation.
    ///
    /// If the iterator is empty, returns None; otherwise, returns the result of the reduction.
    ///
    /// The reducing function is a closure with two arguments: an ‘accumulator’, and an element.
    ///
    /// # Examples
    ///
    /// ```rust
    /// use orx_parallel::*;
    ///
    /// let reduced = (1..10).par().reduce(|acc, e| acc + e);
    /// assert_eq!(reduced, Some(45));
    ///
    /// let reduced = (1..10).par().filter(|x| *x > 10).reduce(|acc, e| acc + e);
    /// assert_eq!(reduced, None);
    /// ```
    fn reduce<R>(self, reduce: R) -> Option<Self::Item>
    where
        R: Fn(Self::Item, Self::Item) -> Self::Item + Send + Sync + Clone;

    /// Calls a closure on each element of an iterator.
    ///
    /// Unlike the for_each operation on a sequential iterator; parallel for_each method might apply the closure on the elements in different orders in every execution.
    ///
    /// # Examples
    ///
    /// ```rust
    /// use orx_parallel::*;
    ///
    /// (0..100).par().for_each(|x| println!("{:?}", x));
    /// ```
    ///
    /// For a more detailed use case, see below which involves a complex computation and writing the results to the database.
    /// In addition, a concurrent bag is used to collect some information while applying the closure.
    ///
    /// ```rust
    /// use orx_parallel::*;
    /// use orx_concurrent_bag::*;
    ///
    /// struct Input(usize);
    ///
    /// struct Output(String);
    ///
    /// fn computation(input: Input) -> Output {
    ///     Output(input.0.to_string())
    /// }
    ///
    /// fn write_output_to_db(_output: Output) -> Result<(), &'static str> {
    ///     Ok(())
    /// }
    ///
    /// let results_bag = ConcurrentBag::new();
    /// let inputs = (0..1024).map(|x| Input(x));
    ///
    /// inputs.par().for_each(|input| {
    ///     let output = computation(input);
    ///     let result = write_output_to_db(output);
    ///     results_bag.push(result);
    /// });
    ///
    /// let results = results_bag.into_inner();
    /// assert_eq!(1024, results.len());
    /// assert!(results.iter().all(|x| x.is_ok()));
    /// ```
    fn for_each<F>(self, f: F)
    where
        F: Fn(Self::Item) + Send + Sync + Clone,
    {
        let map = |item: Self::Item| f(item);
        _ = self.map(map).count();
    }

    /// Consumes the iterator, counting the number of iterations and returning it.
    ///
    /// # Examples
    ///
    /// ```rust
    /// use orx_parallel::*;
    ///
    /// let evens = (0..10).par().filter(|x| x % 2 == 0);
    /// assert_eq!(evens.count(), 5);
    /// ```
    fn count(self) -> usize;

    /// Returns true if any of the elements of the iterator satisfies the given `predicate`.
    ///
    /// # Examples
    ///
    /// ```rust
    /// use orx_parallel::*;
    ///
    /// let mut a: Vec<_> = (0..4242).map(|x| 2 * x).collect();
    ///
    /// let any_odd = a.par().any(|x| *x % 2 == 1);
    /// assert!(!any_odd);
    ///
    /// a.push(7);
    /// let any_odd = a.par().any(|x| *x % 2 == 1);
    /// assert!(any_odd);
    /// ```
    fn any<P>(self, predicate: P) -> bool
    where
        P: Fn(&Self::Item) -> bool + Send + Sync + Clone,
    {
        self.find(predicate).is_some()
    }

    /// Returns true if all of the elements of the iterator satisfies the given `predicate`.
    ///
    /// # Examples
    ///
    /// ```rust
    /// use orx_parallel::*;
    ///
    /// let mut a: Vec<_> = (0..4242).map(|x| 2 * x).collect();
    ///
    /// let all_even = a.par().all(|x| *x % 2 == 0);
    /// assert!(all_even);
    ///
    /// a.push(7);
    /// let all_even = a.par().all(|x| *x % 2 == 0);
    /// assert!(!all_even);
    /// ```
    fn all<P>(self, predicate: P) -> bool
    where
        P: Fn(&Self::Item) -> bool + Send + Sync + Clone,
    {
        let negated_predicate = |x: &Self::Item| !predicate(x);
        self.find(negated_predicate).is_none()
    }

    // find

    /// Returns the first element of the iterator satisfying the given `predicate`; returns None if the iterator is empty.
    ///
    /// # Examples
    ///
    /// ```rust
    /// use orx_parallel::*;
    ///
    /// fn firstfac(x: usize) -> usize {
    ///     if x % 2 == 0 {
    ///         return 2;
    ///     };
    ///     for n in (1..).map(|m| 2 * m + 1).take_while(|m| m * m <= x) {
    ///         if x % n == 0 {
    ///             return n;
    ///         };
    ///     }
    ///     x
    /// }
    ///
    /// fn is_prime(n: &usize) -> bool {
    ///     match n {
    ///         0 | 1 => false,
    ///         _ => firstfac(*n) == *n,
    ///     }
    /// }
    ///
    /// let first_prime = (21..100).par().find(is_prime);
    /// assert_eq!(first_prime, Some(23));
    ///
    /// let first_prime = (24..28).par().find(is_prime);
    /// assert_eq!(first_prime, None);
    /// ```
    fn find<P>(self, predicate: P) -> Option<Self::Item>
    where
        P: Fn(&Self::Item) -> bool + Send + Sync + Clone;

    /// Returns the first element of the iterator; returns None if the iterator is empty.
    ///
    /// # Examples
    ///
    /// ```rust
    /// use orx_parallel::*;
    ///
    /// fn firstfac(x: usize) -> usize {
    ///     if x % 2 == 0 {
    ///         return 2;
    ///     };
    ///     for n in (1..).map(|m| 2 * m + 1).take_while(|m| m * m <= x) {
    ///         if x % n == 0 {
    ///             return n;
    ///         };
    ///     }
    ///     x
    /// }
    ///
    /// fn is_prime(n: &usize) -> bool {
    ///     match n {
    ///         0 | 1 => false,
    ///         _ => firstfac(*n) == *n,
    ///     }
    /// }
    ///
    /// let first_prime = (21..100).par().filter(is_prime).first();
    /// assert_eq!(first_prime, Some(23));
    ///
    /// let first_prime = (24..28).par().filter(is_prime).first();
    /// assert_eq!(first_prime, None);
    /// ```
    fn first(self) -> Option<Self::Item>;

    // collect

    /// Transforms the iterator into a collection.
    ///
    /// In this case, the result is transformed into a standard vector; i.e., `std::vec::Vec`.
    ///
    /// # Examples
    ///
    /// ```rust
    /// use orx_parallel::*;
    ///
    /// let evens = (0..10).par().filter(|x| x % 2 == 0).collect_vec();
    /// assert_eq!(evens, vec![0, 2, 4, 6, 8]);
    /// ```
    fn collect_vec(self) -> Vec<Self::Item>;

    /// Transforms the iterator into a collection.
    ///
    /// In this case, the result is transformed into the split vector which is the underlying [`PinnedVec`](https://crates.io/crates/orx-pinned-vec) used to collect the results concurrently;
    /// i.e., [`SplitVec`](https://crates.io/crates/orx-split-vec).
    ///
    /// # Examples
    ///
    /// ```rust
    /// use orx_parallel::*;
    /// use orx_split_vec::*;
    ///
    /// let evens = (0..10).par().filter(|x| x % 2 == 0).collect();
    /// assert_eq!(evens, SplitVec::from_iter([0, 2, 4, 6, 8]));
    /// ```
    fn collect(self) -> SplitVec<Self::Item>;

    /// Collects elements yielded by the iterator into the given `output` collection.
    ///
    /// Note that `output` does not need to be empty; hence, this method allows extending collections from the parallel iterator.
    ///
    /// # Examples
    ///
    /// ```rust
    /// use orx_parallel::*;
    /// use orx_split_vec::*;
    ///
    /// let output_vec = vec![42];
    ///
    /// let evens = (0..10).par().filter(|x| x % 2 == 0);
    /// let output_vec = evens.collect_into(output_vec);
    /// assert_eq!(output_vec, vec![42, 0, 2, 4, 6, 8]);
    ///
    /// let odds = (0..10).par().filter(|x| x % 2 == 1);
    /// let output_vec = odds.collect_into(output_vec);
    /// assert_eq!(output_vec, vec![42, 0, 2, 4, 6, 8, 1, 3, 5, 7, 9]);
    ///
    /// // alternatively, any `PinnedVec` can be used
    /// let output_vec: SplitVec<_> = [42].into_iter().collect();
    ///
    /// let evens = (0..10).par().filter(|x| x % 2 == 0);
    /// let output_vec = evens.collect_into(output_vec);
    /// assert_eq!(output_vec, vec![42, 0, 2, 4, 6, 8]);
    /// ```
    fn collect_into<C: ParCollectInto<Self::Item>>(self, output: C) -> C;

    /// Transforms the iterator into a collection, where the results are collected in arbitrary order.
    /// This method can be used when preserving the order is not critical.
    /// In certain scenarios, this might improve the performance.
    ///
    /// In this case, the result is transformed into the split vector with recursive growth [`SplitVec<Self::Item, Recursive>`](https://docs.rs/orx-split-vec/latest/orx_split_vec/struct.Recursive.html):
    /// * Note that the `SplitVec` returned by the `collect` method uses the `Doubling` growth which allows for efficient constant time random access.
    /// On the other hand, `Recursive` growth does not allow for constant time random access.
    /// * On the other hand, `Recursive` growth allows for zero cost `append` method to append another vector to the end.
    ///
    /// # Examples
    ///
    /// ```rust
    /// use orx_parallel::*;
    /// use orx_split_vec::*;
    ///
    /// let output = (0..5).par().flat_map(|x| vec![x; x]).collect_x();
    /// let mut sorted_output = output.to_vec();
    /// sorted_output.sort(); // WIP: PinnedVec::sort(&mut self)
    /// assert_eq!(sorted_output, vec![1, 2, 2, 3, 3, 3, 4, 4, 4, 4]);
    /// ```
    fn collect_x(self) -> SplitVec<Self::Item, Recursive> {
        self.collect().into()
    }

    // reduced - provided

    /// Folds the elements to a single one, by repeatedly applying the `fold` operation starting from the `identity`.
    ///
    /// If the iterator is empty, returns back the `identity`; otherwise, returns the result of the fold.
    ///
    /// The fold function is a closure with two arguments: an ‘accumulator’, and an element.
    ///
    /// Note that, unlike its sequential counterpart, parallel fold requires the `identity` and `fold` to satisfy the following:
    /// * `fold(a, b)` is equal to `fold(b, a)`,
    /// * `fold(a, fold(b, c))` is equal to `fold(fold(a, b), c)`,
    /// * `fold(identity, a)` is equal to `a`.
    ///
    /// # Examples
    ///
    /// ```rust
    /// use orx_parallel::*;
    ///
    /// let fold = (1..10).par().fold(|| 0, |acc, e| acc + e);
    /// assert_eq!(fold, 45);
    ///
    /// let fold = (1..10).par().filter(|x| *x > 10).fold(|| 1, |acc, e| acc * e);
    /// assert_eq!(fold, 1);
    /// ```
    fn fold<Id, F>(self, identity: Id, fold: F) -> Self::Item
    where
        Id: Fn() -> Self::Item,
        F: Fn(Self::Item, Self::Item) -> Self::Item + Send + Sync + Clone,
    {
        self.reduce(fold).unwrap_or_else(identity)
    }

    /// Sums up the items in the iterator.
    ///
    /// Note that the order in items will be reduced is not specified, so if the + operator is not truly associative (as is the case for floating point numbers), then the results are not fully deterministic.
    ///
    /// Basically equivalent to `self.fold(|| 0, |a, b| a + b)`, except that the type of 0 and the + operation may vary depending on the type of value being produced.
    ///
    /// # Examples
    ///
    /// ```rust
    /// use orx_parallel::*;
    ///
    /// let sum = (1..10).par().sum();
    /// assert_eq!(sum, 45);
    ///
    /// let sum = (1..10).par().map(|x| x as f32).sum();
    /// assert!((sum - 45.0).abs() < f32::EPSILON);
    ///
    /// let sum = (1..10).par().filter(|x| *x > 10).sum();
    /// assert_eq!(sum, 0);
    /// ```
    fn sum(self) -> Self::Item
    where
        Self::Item: Default + Add<Output = Self::Item>,
    {
        self.reduce(|x, y| x + y).unwrap_or(Self::Item::default())
    }

    /// Computes the minimum of all the items in the iterator. If the iterator is empty, None is returned; otherwise, Some(min) is returned.
    ///
    /// Note that the order in which the items will be reduced is not specified, so if the Ord impl is not truly associative, then the results are not deterministic.
    ///     
    /// Basically equivalent to `self.reduce(|a, b| Ord::min(a, b))`.
    ///
    /// # Examples
    ///
    /// ```rust
    /// use orx_parallel::*;
    ///
    /// let min = (1..10).par().filter(|x| *x > 6).min();
    /// assert_eq!(min, Some(7));
    ///
    /// let min = (1..10).par().filter(|x| *x > 10).min();
    /// assert_eq!(min, None);
    /// ```
    fn min(self) -> Option<Self::Item>
    where
        Self::Item: Ord,
    {
        self.reduce(Ord::min)
    }

    /// Computes the maximum of all the items in the iterator. If the iterator is empty, None is returned; otherwise, Some(max) is returned.
    ///
    /// Note that the order in which the items will be reduced is not specified, so if the Ord impl is not truly associative, then the results are not deterministic.
    ///     
    /// Basically equivalent to `self.reduce(|a, b| Ord::max(a, b))`.
    ///
    /// # Examples
    ///
    /// ```rust
    /// use orx_parallel::*;
    ///
    /// let max = (1..10).par().filter(|x| *x < 6).max();
    /// assert_eq!(max, Some(5));
    ///
    /// let max = (1..10).par().filter(|x| *x > 10).max();
    /// assert_eq!(max, None);
    /// ```
    fn max(self) -> Option<Self::Item>
    where
        Self::Item: Ord,
    {
        self.reduce(Ord::max)
    }

    /// Computes the minimum of all the items in the iterator with respect to the given comparison function.
    /// If the iterator is empty, None is returned; otherwise, Some(min) is returned.
    ///
    /// Note that the order in which the items will be reduced is not specified, so if the comparison function is not associative, then the results are not deterministic.
    ///
    /// # Examples
    ///
    /// ```rust
    /// use orx_parallel::*;
    ///
    /// let names: Vec<_> = ["john", "doe", "adams", "jones", "grumpy"]
    ///     .map(String::from)
    ///     .into_iter()
    ///     .collect();
    ///
    /// let min = names.par().min_by(|a, b| a.len().cmp(&b.len()));
    /// assert_eq!(min.map(|x| x.as_ref()), Some("doe"));
    ///
    /// let min = names
    ///     .par()
    ///     .filter(|x| x.starts_with('x'))
    ///     .min_by(|a, b| a.len().cmp(&b.len()));
    /// assert_eq!(min, None);
    /// ```
    fn min_by<F>(self, compare: F) -> Option<Self::Item>
    where
        F: Fn(&Self::Item, &Self::Item) -> Ordering + Sync,
    {
        self.reduce(|x, y| match compare(&x, &y) {
            Ordering::Less | Ordering::Equal => x,
            Ordering::Greater => y,
        })
    }

    /// Computes the maximum of all the items in the iterator with respect to the given comparison function.
    /// If the iterator is empty, None is returned; otherwise, Some(max) is returned.
    ///
    /// Note that the order in which the items will be reduced is not specified, so if the comparison function is not associative, then the results are not deterministic.
    ///
    /// # Examples
    ///
    /// ```rust
    /// use orx_parallel::*;
    ///
    /// let names: Vec<_> = ["john", "doe", "adams", "jones", "grumpy"]
    ///     .map(String::from)
    ///     .into_iter()
    ///     .collect();
    ///
    /// let max = names.par().max_by(|a, b| a.len().cmp(&b.len()));
    /// assert_eq!(max.map(|x| x.as_ref()), Some("grumpy"));
    ///
    /// let max = names
    ///     .par()
    ///     .filter(|x| x.starts_with('x'))
    ///     .max_by(|a, b| a.len().cmp(&b.len()));
    /// assert_eq!(max, None);
    /// ```
    fn max_by<F>(self, compare: F) -> Option<Self::Item>
    where
        F: Fn(&Self::Item, &Self::Item) -> Ordering + Sync,
    {
        self.reduce(|x, y| match compare(&x, &y) {
            Ordering::Greater | Ordering::Equal => x,
            Ordering::Less => y,
        })
    }

    /// Computes the item that yields the minimum value for the given function. If the iterator is empty, None is returned; otherwise, Some(min) is returned.
    ///
    /// Note that the order in which the items will be reduced is not specified, so if the Ord impl is not truly associative, then the results are not deterministic.
    ///
    /// # Examples
    ///
    /// ```rust
    /// use orx_parallel::*;
    ///
    /// let names: Vec<_> = ["john", "doe", "adams", "jones", "grumpy"]
    ///     .map(String::from)
    ///     .into_iter()
    ///     .collect();
    ///
    /// let min = names.par().min_by_key(|x| x.len());
    /// assert_eq!(min.map(|x| x.as_ref()), Some("doe"));
    ///
    /// let min = names
    ///     .par()
    ///     .filter(|x| x.starts_with('x'))
    ///     .min_by_key(|x| x.len());
    /// assert_eq!(min, None);
    /// ```
    fn min_by_key<B, F>(self, get_key: F) -> Option<Self::Item>
    where
        B: Ord,
        F: Fn(&Self::Item) -> B + Sync,
    {
        self.reduce(|x, y| match get_key(&x).cmp(&get_key(&y)) {
            Ordering::Less | Ordering::Equal => x,
            Ordering::Greater => y,
        })
    }

    /// Computes the item that yields the maximum value for the given function. If the iterator is empty, None is returned; otherwise, Some(max) is returned.
    ///
    /// Note that the order in which the items will be reduced is not specified, so if the Ord impl is not truly associative, then the results are not deterministic.
    ///
    /// # Examples
    ///
    /// ```rust
    /// use orx_parallel::*;
    ///
    /// let names: Vec<_> = ["john", "doe", "adams", "jones", "grumpy"]
    ///     .map(String::from)
    ///     .into_iter()
    ///     .collect();
    ///
    /// let max = names.par().max_by_key(|x| x.len());
    /// assert_eq!(max.map(|x| x.as_ref()), Some("grumpy"));
    ///
    /// let max = names
    ///     .par()
    ///     .filter(|x| x.starts_with('x'))
    ///     .max_by_key(|x| x.len());
    /// assert_eq!(max, None);
    /// ```
    fn max_by_key<B, F>(self, get_key: F) -> Option<Self::Item>
    where
        B: Ord,
        F: Fn(&Self::Item) -> B + Sync,
    {
        self.reduce(|x, y| match get_key(&x).cmp(&get_key(&y)) {
            Ordering::Greater | Ordering::Equal => x,
            Ordering::Less => y,
        })
    }
}