par_iter/
str.rs

1//! Parallel iterator types for [strings][std::str]
2//!
3//! You will rarely need to interact with this module directly unless you need
4//! to name one of the iterator types.
5//!
6//! Note: [`ParallelString::par_split()`] and [`par_split_terminator()`]
7//! reference a `Pattern` trait which is not visible outside this crate.
8//! This trait is intentionally kept private, for use only by Rayon itself.
9//! It is implemented for `char`, `&[char]`, `[char; N]`, `&[char; N]`,
10//! and any function or closure `F: Fn(char) -> bool + Sync + Send`.
11//!
12//! [`ParallelString::par_split()`]: trait.ParallelString.html#method.par_split
13//! [`par_split_terminator()`]: trait.ParallelString.html#method.par_split_terminator
14//!
15//! [std::str]: https://doc.rust-lang.org/stable/std/str/
16
17use crate::{
18    iter::{plumbing::*, *},
19    split_producer::*,
20};
21
22/// Test if a byte is the start of a UTF-8 character.
23/// (extracted from `str::is_char_boundary`)
24#[inline]
25fn is_char_boundary(b: u8) -> bool {
26    // This is bit magic equivalent to: b < 128 || b >= 192
27    (b as i8) >= -0x40
28}
29
30/// Find the index of a character boundary near the midpoint.
31#[inline]
32fn find_char_midpoint(chars: &str) -> usize {
33    let mid = chars.len() / 2;
34
35    // We want to split near the midpoint, but we need to find an actual
36    // character boundary.  So we look at the raw bytes, first scanning
37    // forward from the midpoint for a boundary, then trying backward.
38    let (left, right) = chars.as_bytes().split_at(mid);
39    match right.iter().copied().position(is_char_boundary) {
40        Some(i) => mid + i,
41        None => left
42            .iter()
43            .copied()
44            .rposition(is_char_boundary)
45            .unwrap_or(0),
46    }
47}
48
49/// Try to split a string near the midpoint.
50#[inline]
51fn split(chars: &str) -> Option<(&str, &str)> {
52    let index = find_char_midpoint(chars);
53    if index > 0 {
54        Some(chars.split_at(index))
55    } else {
56        None
57    }
58}
59
60/// Parallel extensions for strings.
61pub trait ParallelString {
62    /// Returns a plain string slice, which is used to implement the rest of
63    /// the parallel methods.
64    fn as_parallel_string(&self) -> &str;
65
66    /// Returns a parallel iterator over the characters of a string.
67    ///
68    /// # Examples
69    ///
70    /// ```
71    /// use par_iter::prelude::*;
72    /// let max = "hello".par_chars().max_by_key(|c| *c as i32);
73    /// assert_eq!(Some('o'), max);
74    /// ```
75    fn par_chars(&self) -> Chars<'_> {
76        Chars {
77            chars: self.as_parallel_string(),
78        }
79    }
80
81    /// Returns a parallel iterator over the characters of a string, with their
82    /// positions.
83    ///
84    /// # Examples
85    ///
86    /// ```
87    /// use par_iter::prelude::*;
88    /// let min = "hello".par_char_indices().min_by_key(|&(_i, c)| c as i32);
89    /// assert_eq!(Some((1, 'e')), min);
90    /// ```
91    fn par_char_indices(&self) -> CharIndices<'_> {
92        CharIndices {
93            chars: self.as_parallel_string(),
94        }
95    }
96
97    /// Returns a parallel iterator over the bytes of a string.
98    ///
99    /// Note that multi-byte sequences (for code points greater than `U+007F`)
100    /// are produced as separate items, but will not be split across threads.
101    /// If you would prefer an indexed iterator without that guarantee, consider
102    /// `string.as_bytes().par_iter().copied()` instead.
103    ///
104    /// # Examples
105    ///
106    /// ```
107    /// use par_iter::prelude::*;
108    /// let max = "hello".par_bytes().max();
109    /// assert_eq!(Some(b'o'), max);
110    /// ```
111    fn par_bytes(&self) -> Bytes<'_> {
112        Bytes {
113            chars: self.as_parallel_string(),
114        }
115    }
116
117    /// Returns a parallel iterator over a string encoded as UTF-16.
118    ///
119    /// Note that surrogate pairs (for code points greater than `U+FFFF`) are
120    /// produced as separate items, but will not be split across threads.
121    ///
122    /// # Examples
123    ///
124    /// ```
125    /// use par_iter::prelude::*;
126    ///
127    /// let max = "hello".par_encode_utf16().max();
128    /// assert_eq!(Some(b'o' as u16), max);
129    ///
130    /// let text = "Zażółć gęślą jaźń";
131    /// let utf8_len = text.len();
132    /// let utf16_len = text.par_encode_utf16().count();
133    /// assert!(utf16_len <= utf8_len);
134    /// ```
135    fn par_encode_utf16(&self) -> EncodeUtf16<'_> {
136        EncodeUtf16 {
137            chars: self.as_parallel_string(),
138        }
139    }
140
141    /// Returns a parallel iterator over substrings separated by a
142    /// given character or predicate, similar to `str::split`.
143    ///
144    /// Note: the `Pattern` trait is private, for use only by Rayon itself.
145    /// It is implemented for `char`, `&[char]`, `[char; N]`, `&[char; N]`,
146    /// and any function or closure `F: Fn(char) -> bool + Sync + Send`.
147    ///
148    /// # Examples
149    ///
150    /// ```
151    /// use par_iter::prelude::*;
152    /// let total = "1, 2, buckle, 3, 4, door"
153    ///    .par_split(',')
154    ///    .filter_map(|s| s.trim().parse::<i32>().ok())
155    ///    .sum();
156    /// assert_eq!(10, total);
157    /// ```
158    fn par_split<P: Pattern>(&self, separator: P) -> Split<'_, P> {
159        Split::new(self.as_parallel_string(), separator)
160    }
161
162    /// Returns a parallel iterator over substrings separated by a
163    /// given character or predicate, keeping the matched part as a terminator
164    /// of the substring similar to `str::split_inclusive`.
165    ///
166    /// Note: the `Pattern` trait is private, for use only by Rayon itself.
167    /// It is implemented for `char`, `&[char]`, `[char; N]`, `&[char; N]`,
168    /// and any function or closure `F: Fn(char) -> bool + Sync + Send`.
169    ///
170    /// # Examples
171    ///
172    /// ```
173    /// use par_iter::prelude::*;
174    /// let lines: Vec<_> = "Mary had a little lamb\nlittle lamb\nlittle lamb."
175    ///    .par_split_inclusive('\n')
176    ///    .collect();
177    /// assert_eq!(lines, ["Mary had a little lamb\n", "little lamb\n", "little lamb."]);
178    /// ```
179    fn par_split_inclusive<P: Pattern>(&self, separator: P) -> SplitInclusive<'_, P> {
180        SplitInclusive::new(self.as_parallel_string(), separator)
181    }
182
183    /// Returns a parallel iterator over substrings terminated by a
184    /// given character or predicate, similar to `str::split_terminator`.
185    /// It's equivalent to `par_split`, except it doesn't produce an empty
186    /// substring after a trailing terminator.
187    ///
188    /// Note: the `Pattern` trait is private, for use only by Rayon itself.
189    /// It is implemented for `char`, `&[char]`, `[char; N]`, `&[char; N]`,
190    /// and any function or closure `F: Fn(char) -> bool + Sync + Send`.
191    ///
192    /// # Examples
193    ///
194    /// ```
195    /// use par_iter::prelude::*;
196    /// let parts: Vec<_> = "((1 + 3) * 2)"
197    ///     .par_split_terminator(|c| c == '(' || c == ')')
198    ///     .collect();
199    /// assert_eq!(vec!["", "", "1 + 3", " * 2"], parts);
200    /// ```
201    fn par_split_terminator<P: Pattern>(&self, terminator: P) -> SplitTerminator<'_, P> {
202        SplitTerminator::new(self.as_parallel_string(), terminator)
203    }
204
205    /// Returns a parallel iterator over the lines of a string, ending with an
206    /// optional carriage return and with a newline (`\r\n` or just `\n`).
207    /// The final line ending is optional, and line endings are not included in
208    /// the output strings.
209    ///
210    /// # Examples
211    ///
212    /// ```
213    /// use par_iter::prelude::*;
214    /// let lengths: Vec<_> = "hello world\nfizbuzz"
215    ///     .par_lines()
216    ///     .map(|l| l.len())
217    ///     .collect();
218    /// assert_eq!(vec![11, 7], lengths);
219    /// ```
220    fn par_lines(&self) -> Lines<'_> {
221        Lines(self.as_parallel_string())
222    }
223
224    /// Returns a parallel iterator over the sub-slices of a string that are
225    /// separated by any amount of whitespace.
226    ///
227    /// As with `str::split_whitespace`, 'whitespace' is defined according to
228    /// the terms of the Unicode Derived Core Property `White_Space`.
229    /// If you only want to split on ASCII whitespace instead, use
230    /// [`par_split_ascii_whitespace`][`ParallelString::par_split_ascii_whitespace`].
231    ///
232    /// # Examples
233    ///
234    /// ```
235    /// use par_iter::prelude::*;
236    /// let longest = "which is the longest word?"
237    ///     .par_split_whitespace()
238    ///     .max_by_key(|word| word.len());
239    /// assert_eq!(Some("longest"), longest);
240    /// ```
241    ///
242    /// All kinds of whitespace are considered:
243    ///
244    /// ```
245    /// use par_iter::prelude::*;
246    /// let words: Vec<&str> = " Mary   had\ta\u{2009}little  \n\t lamb"
247    ///     .par_split_whitespace()
248    ///     .collect();
249    /// assert_eq!(words, ["Mary", "had", "a", "little", "lamb"]);
250    /// ```
251    ///
252    /// If the string is empty or all whitespace, the iterator yields no string
253    /// slices:
254    ///
255    /// ```
256    /// use par_iter::prelude::*;
257    /// assert_eq!("".par_split_whitespace().count(), 0);
258    /// assert_eq!("   ".par_split_whitespace().count(), 0);
259    /// ```
260    fn par_split_whitespace(&self) -> SplitWhitespace<'_> {
261        SplitWhitespace(self.as_parallel_string())
262    }
263
264    /// Returns a parallel iterator over the sub-slices of a string that are
265    /// separated by any amount of ASCII whitespace.
266    ///
267    /// To split by Unicode `White_Space` instead, use
268    /// [`par_split_whitespace`][`ParallelString::par_split_whitespace`].
269    ///
270    /// # Examples
271    ///
272    /// ```
273    /// use par_iter::prelude::*;
274    /// let longest = "which is the longest word?"
275    ///     .par_split_ascii_whitespace()
276    ///     .max_by_key(|word| word.len());
277    /// assert_eq!(Some("longest"), longest);
278    /// ```
279    ///
280    /// All kinds of ASCII whitespace are considered, but not Unicode
281    /// `White_Space`:
282    ///
283    /// ```
284    /// use par_iter::prelude::*;
285    /// let words: Vec<&str> = " Mary   had\ta\u{2009}little  \n\t lamb"
286    ///     .par_split_ascii_whitespace()
287    ///     .collect();
288    /// assert_eq!(words, ["Mary", "had", "a\u{2009}little", "lamb"]);
289    /// ```
290    ///
291    /// If the string is empty or all ASCII whitespace, the iterator yields no
292    /// string slices:
293    ///
294    /// ```
295    /// use par_iter::prelude::*;
296    /// assert_eq!("".par_split_whitespace().count(), 0);
297    /// assert_eq!("   ".par_split_whitespace().count(), 0);
298    /// ```
299    fn par_split_ascii_whitespace(&self) -> SplitAsciiWhitespace<'_> {
300        SplitAsciiWhitespace(self.as_parallel_string())
301    }
302
303    /// Returns a parallel iterator over substrings that match a
304    /// given character or predicate, similar to `str::matches`.
305    ///
306    /// Note: the `Pattern` trait is private, for use only by Rayon itself.
307    /// It is implemented for `char`, `&[char]`, `[char; N]`, `&[char; N]`,
308    /// and any function or closure `F: Fn(char) -> bool + Sync + Send`.
309    ///
310    /// # Examples
311    ///
312    /// ```
313    /// use par_iter::prelude::*;
314    /// let total = "1, 2, buckle, 3, 4, door"
315    ///    .par_matches(char::is_numeric)
316    ///    .map(|s| s.parse::<i32>().expect("digit"))
317    ///    .sum();
318    /// assert_eq!(10, total);
319    /// ```
320    fn par_matches<P: Pattern>(&self, pattern: P) -> Matches<'_, P> {
321        Matches {
322            chars: self.as_parallel_string(),
323            pattern,
324        }
325    }
326
327    /// Returns a parallel iterator over substrings that match a given character
328    /// or predicate, with their positions, similar to `str::match_indices`.
329    ///
330    /// Note: the `Pattern` trait is private, for use only by Rayon itself.
331    /// It is implemented for `char`, `&[char]`, `[char; N]`, `&[char; N]`,
332    /// and any function or closure `F: Fn(char) -> bool + Sync + Send`.
333    ///
334    /// # Examples
335    ///
336    /// ```
337    /// use par_iter::prelude::*;
338    /// let digits: Vec<_> = "1, 2, buckle, 3, 4, door"
339    ///    .par_match_indices(char::is_numeric)
340    ///    .collect();
341    /// assert_eq!(digits, vec![(0, "1"), (3, "2"), (14, "3"), (17, "4")]);
342    /// ```
343    fn par_match_indices<P: Pattern>(&self, pattern: P) -> MatchIndices<'_, P> {
344        MatchIndices {
345            chars: self.as_parallel_string(),
346            pattern,
347        }
348    }
349}
350
351impl ParallelString for str {
352    #[inline]
353    fn as_parallel_string(&self) -> &str {
354        self
355    }
356}
357
358// /////////////////////////////////////////////////////////////////////////
359
360/// We hide the `Pattern` trait in a private module, as its API is not meant
361/// for general consumption.  If we could have privacy on trait items, then it
362/// would be nicer to have its basic existence and implementors public while
363/// keeping all of the methods private.
364mod private {
365    use crate::iter::plumbing::Folder;
366
367    /// Pattern-matching trait for `ParallelString`, somewhat like a mix of
368    /// `std::str::pattern::{Pattern, Searcher}`.
369    ///
370    /// Implementing this trait is not permitted outside of `rayon`.
371    pub trait Pattern: Sized + Sync + Send {
372        private_decl! {}
373        fn find_in(&self, haystack: &str) -> Option<usize>;
374        fn rfind_in(&self, haystack: &str) -> Option<usize>;
375        fn is_suffix_of(&self, haystack: &str) -> bool;
376        fn fold_splits<'ch, F>(&self, haystack: &'ch str, folder: F, skip_last: bool) -> F
377        where
378            F: Folder<&'ch str>;
379        fn fold_inclusive_splits<'ch, F>(&self, haystack: &'ch str, folder: F) -> F
380        where
381            F: Folder<&'ch str>;
382        fn fold_matches<'ch, F>(&self, haystack: &'ch str, folder: F) -> F
383        where
384            F: Folder<&'ch str>;
385        fn fold_match_indices<'ch, F>(&self, haystack: &'ch str, folder: F, base: usize) -> F
386        where
387            F: Folder<(usize, &'ch str)>;
388    }
389}
390use self::private::Pattern;
391
392#[inline]
393fn offset<T>(base: usize) -> impl Fn((usize, T)) -> (usize, T) {
394    move |(i, x)| (base + i, x)
395}
396
397macro_rules! impl_pattern {
398    (&$self:ident => $pattern:expr) => {
399        private_impl! {}
400
401        #[inline]
402        fn find_in(&$self, chars: &str) -> Option<usize> {
403            chars.find($pattern)
404        }
405
406        #[inline]
407        fn rfind_in(&$self, chars: &str) -> Option<usize> {
408            chars.rfind($pattern)
409        }
410
411        #[inline]
412        fn is_suffix_of(&$self, chars: &str) -> bool {
413            chars.ends_with($pattern)
414        }
415
416        fn fold_splits<'ch, F>(&$self, chars: &'ch str, folder: F, skip_last: bool) -> F
417        where
418            F: Folder<&'ch str>,
419        {
420            let mut split = chars.split($pattern);
421            if skip_last {
422                split.next_back();
423            }
424            folder.consume_iter(split)
425        }
426
427        fn fold_inclusive_splits<'ch, F>(&$self, chars: &'ch str, folder: F) -> F
428        where
429            F: Folder<&'ch str>,
430        {
431            folder.consume_iter(chars.split_inclusive($pattern))
432        }
433
434        fn fold_matches<'ch, F>(&$self, chars: &'ch str, folder: F) -> F
435        where
436            F: Folder<&'ch str>,
437        {
438            folder.consume_iter(chars.matches($pattern))
439        }
440
441        fn fold_match_indices<'ch, F>(&$self, chars: &'ch str, folder: F, base: usize) -> F
442        where
443            F: Folder<(usize, &'ch str)>,
444        {
445            folder.consume_iter(chars.match_indices($pattern).map(offset(base)))
446        }
447    }
448}
449
450impl Pattern for char {
451    impl_pattern!(&self => *self);
452}
453
454impl Pattern for &[char] {
455    impl_pattern!(&self => *self);
456}
457
458// TODO (MSRV 1.75): use `*self` for array patterns too.
459// - Needs `DoubleEndedSearcher` so `split.next_back()` works.
460
461impl<const N: usize> Pattern for [char; N] {
462    impl_pattern!(&self => self.as_slice());
463}
464
465impl<const N: usize> Pattern for &[char; N] {
466    impl_pattern!(&self => self.as_slice());
467}
468
469impl<FN: Sync + Send + Fn(char) -> bool> Pattern for FN {
470    impl_pattern!(&self => self);
471}
472
473// /////////////////////////////////////////////////////////////////////////
474
475/// Parallel iterator over the characters of a string
476#[derive(Debug, Clone)]
477pub struct Chars<'ch> {
478    chars: &'ch str,
479}
480
481struct CharsProducer<'ch> {
482    chars: &'ch str,
483}
484
485impl<'ch> ParallelIterator for Chars<'ch> {
486    type Item = char;
487
488    fn drive_unindexed<C>(self, consumer: C) -> C::Result
489    where
490        C: UnindexedConsumer<Self::Item>,
491    {
492        bridge_unindexed(CharsProducer { chars: self.chars }, consumer)
493    }
494}
495
496impl<'ch> UnindexedProducer for CharsProducer<'ch> {
497    type Item = char;
498
499    fn split(self) -> (Self, Option<Self>) {
500        match split(self.chars) {
501            Some((left, right)) => (
502                CharsProducer { chars: left },
503                Some(CharsProducer { chars: right }),
504            ),
505            None => (self, None),
506        }
507    }
508
509    fn fold_with<F>(self, folder: F) -> F
510    where
511        F: Folder<Self::Item>,
512    {
513        folder.consume_iter(self.chars.chars())
514    }
515}
516
517// /////////////////////////////////////////////////////////////////////////
518
519/// Parallel iterator over the characters of a string, with their positions
520#[derive(Debug, Clone)]
521pub struct CharIndices<'ch> {
522    chars: &'ch str,
523}
524
525struct CharIndicesProducer<'ch> {
526    index: usize,
527    chars: &'ch str,
528}
529
530impl<'ch> ParallelIterator for CharIndices<'ch> {
531    type Item = (usize, char);
532
533    fn drive_unindexed<C>(self, consumer: C) -> C::Result
534    where
535        C: UnindexedConsumer<Self::Item>,
536    {
537        let producer = CharIndicesProducer {
538            index: 0,
539            chars: self.chars,
540        };
541        bridge_unindexed(producer, consumer)
542    }
543}
544
545impl<'ch> UnindexedProducer for CharIndicesProducer<'ch> {
546    type Item = (usize, char);
547
548    fn split(self) -> (Self, Option<Self>) {
549        match split(self.chars) {
550            Some((left, right)) => (
551                CharIndicesProducer {
552                    chars: left,
553                    ..self
554                },
555                Some(CharIndicesProducer {
556                    chars: right,
557                    index: self.index + left.len(),
558                }),
559            ),
560            None => (self, None),
561        }
562    }
563
564    fn fold_with<F>(self, folder: F) -> F
565    where
566        F: Folder<Self::Item>,
567    {
568        let base = self.index;
569        folder.consume_iter(self.chars.char_indices().map(offset(base)))
570    }
571}
572
573// /////////////////////////////////////////////////////////////////////////
574
575/// Parallel iterator over the bytes of a string
576#[derive(Debug, Clone)]
577pub struct Bytes<'ch> {
578    chars: &'ch str,
579}
580
581struct BytesProducer<'ch> {
582    chars: &'ch str,
583}
584
585impl<'ch> ParallelIterator for Bytes<'ch> {
586    type Item = u8;
587
588    fn drive_unindexed<C>(self, consumer: C) -> C::Result
589    where
590        C: UnindexedConsumer<Self::Item>,
591    {
592        bridge_unindexed(BytesProducer { chars: self.chars }, consumer)
593    }
594}
595
596impl<'ch> UnindexedProducer for BytesProducer<'ch> {
597    type Item = u8;
598
599    fn split(self) -> (Self, Option<Self>) {
600        match split(self.chars) {
601            Some((left, right)) => (
602                BytesProducer { chars: left },
603                Some(BytesProducer { chars: right }),
604            ),
605            None => (self, None),
606        }
607    }
608
609    fn fold_with<F>(self, folder: F) -> F
610    where
611        F: Folder<Self::Item>,
612    {
613        folder.consume_iter(self.chars.bytes())
614    }
615}
616
617// /////////////////////////////////////////////////////////////////////////
618
619/// Parallel iterator over a string encoded as UTF-16
620#[derive(Debug, Clone)]
621pub struct EncodeUtf16<'ch> {
622    chars: &'ch str,
623}
624
625struct EncodeUtf16Producer<'ch> {
626    chars: &'ch str,
627}
628
629impl<'ch> ParallelIterator for EncodeUtf16<'ch> {
630    type Item = u16;
631
632    fn drive_unindexed<C>(self, consumer: C) -> C::Result
633    where
634        C: UnindexedConsumer<Self::Item>,
635    {
636        bridge_unindexed(EncodeUtf16Producer { chars: self.chars }, consumer)
637    }
638}
639
640impl<'ch> UnindexedProducer for EncodeUtf16Producer<'ch> {
641    type Item = u16;
642
643    fn split(self) -> (Self, Option<Self>) {
644        match split(self.chars) {
645            Some((left, right)) => (
646                EncodeUtf16Producer { chars: left },
647                Some(EncodeUtf16Producer { chars: right }),
648            ),
649            None => (self, None),
650        }
651    }
652
653    fn fold_with<F>(self, folder: F) -> F
654    where
655        F: Folder<Self::Item>,
656    {
657        folder.consume_iter(self.chars.encode_utf16())
658    }
659}
660
661// /////////////////////////////////////////////////////////////////////////
662
663/// Parallel iterator over substrings separated by a pattern
664#[derive(Debug, Clone)]
665pub struct Split<'ch, P: Pattern> {
666    chars: &'ch str,
667    separator: P,
668}
669
670impl<'ch, P: Pattern> Split<'ch, P> {
671    fn new(chars: &'ch str, separator: P) -> Self {
672        Split { chars, separator }
673    }
674}
675
676impl<'ch, P: Pattern> ParallelIterator for Split<'ch, P> {
677    type Item = &'ch str;
678
679    fn drive_unindexed<C>(self, consumer: C) -> C::Result
680    where
681        C: UnindexedConsumer<Self::Item>,
682    {
683        let producer = SplitProducer::new(self.chars, &self.separator);
684        bridge_unindexed(producer, consumer)
685    }
686}
687
688/// Implement support for `SplitProducer`.
689impl<'ch, P: Pattern> Fissile<P> for &'ch str {
690    fn length(&self) -> usize {
691        self.len()
692    }
693
694    fn midpoint(&self, end: usize) -> usize {
695        // First find a suitable UTF-8 boundary.
696        find_char_midpoint(&self[..end])
697    }
698
699    fn find(&self, separator: &P, start: usize, end: usize) -> Option<usize> {
700        separator.find_in(&self[start..end])
701    }
702
703    fn rfind(&self, separator: &P, end: usize) -> Option<usize> {
704        separator.rfind_in(&self[..end])
705    }
706
707    fn split_once<const INCL: bool>(self, index: usize) -> (Self, Self) {
708        if INCL {
709            // include the separator in the left side
710            let separator = self[index..].chars().next().unwrap();
711            self.split_at(index + separator.len_utf8())
712        } else {
713            let (left, right) = self.split_at(index);
714            let mut right_iter = right.chars();
715            right_iter.next(); // skip the separator
716            (left, right_iter.as_str())
717        }
718    }
719
720    fn fold_splits<F, const INCL: bool>(self, separator: &P, folder: F, skip_last: bool) -> F
721    where
722        F: Folder<Self>,
723    {
724        if INCL {
725            debug_assert!(!skip_last);
726            separator.fold_inclusive_splits(self, folder)
727        } else {
728            separator.fold_splits(self, folder, skip_last)
729        }
730    }
731}
732
733// /////////////////////////////////////////////////////////////////////////
734
735/// Parallel iterator over substrings separated by a pattern
736#[derive(Debug, Clone)]
737pub struct SplitInclusive<'ch, P: Pattern> {
738    chars: &'ch str,
739    separator: P,
740}
741
742impl<'ch, P: Pattern> SplitInclusive<'ch, P> {
743    fn new(chars: &'ch str, separator: P) -> Self {
744        SplitInclusive { chars, separator }
745    }
746}
747
748impl<'ch, P: Pattern> ParallelIterator for SplitInclusive<'ch, P> {
749    type Item = &'ch str;
750
751    fn drive_unindexed<C>(self, consumer: C) -> C::Result
752    where
753        C: UnindexedConsumer<Self::Item>,
754    {
755        let producer = SplitInclusiveProducer::new_incl(self.chars, &self.separator);
756        bridge_unindexed(producer, consumer)
757    }
758}
759
760// /////////////////////////////////////////////////////////////////////////
761
762/// Parallel iterator over substrings separated by a terminator pattern
763#[derive(Debug, Clone)]
764pub struct SplitTerminator<'ch, P: Pattern> {
765    chars: &'ch str,
766    terminator: P,
767}
768
769struct SplitTerminatorProducer<'ch, 'sep, P: Pattern> {
770    splitter: SplitProducer<'sep, P, &'ch str>,
771    skip_last: bool,
772}
773
774impl<'ch, P: Pattern> SplitTerminator<'ch, P> {
775    fn new(chars: &'ch str, terminator: P) -> Self {
776        SplitTerminator { chars, terminator }
777    }
778}
779
780impl<'ch, 'sep, P: Pattern + 'sep> SplitTerminatorProducer<'ch, 'sep, P> {
781    fn new(chars: &'ch str, terminator: &'sep P) -> Self {
782        SplitTerminatorProducer {
783            splitter: SplitProducer::new(chars, terminator),
784            skip_last: chars.is_empty() || terminator.is_suffix_of(chars),
785        }
786    }
787}
788
789impl<'ch, P: Pattern> ParallelIterator for SplitTerminator<'ch, P> {
790    type Item = &'ch str;
791
792    fn drive_unindexed<C>(self, consumer: C) -> C::Result
793    where
794        C: UnindexedConsumer<Self::Item>,
795    {
796        let producer = SplitTerminatorProducer::new(self.chars, &self.terminator);
797        bridge_unindexed(producer, consumer)
798    }
799}
800
801impl<'ch, 'sep, P: Pattern + 'sep> UnindexedProducer for SplitTerminatorProducer<'ch, 'sep, P> {
802    type Item = &'ch str;
803
804    fn split(mut self) -> (Self, Option<Self>) {
805        let (left, right) = self.splitter.split();
806        self.splitter = left;
807        let right = right.map(|right| {
808            let skip_last = self.skip_last;
809            self.skip_last = false;
810            SplitTerminatorProducer {
811                splitter: right,
812                skip_last,
813            }
814        });
815        (self, right)
816    }
817
818    fn fold_with<F>(self, folder: F) -> F
819    where
820        F: Folder<Self::Item>,
821    {
822        self.splitter.fold_with(folder, self.skip_last)
823    }
824}
825
826// /////////////////////////////////////////////////////////////////////////
827
828/// Parallel iterator over lines in a string
829#[derive(Debug, Clone)]
830pub struct Lines<'ch>(&'ch str);
831
832#[inline]
833fn no_carriage_return(line: &str) -> &str {
834    line.strip_suffix('\r').unwrap_or(line)
835}
836
837impl<'ch> ParallelIterator for Lines<'ch> {
838    type Item = &'ch str;
839
840    fn drive_unindexed<C>(self, consumer: C) -> C::Result
841    where
842        C: UnindexedConsumer<Self::Item>,
843    {
844        self.0
845            .par_split_terminator('\n')
846            .map(no_carriage_return)
847            .drive_unindexed(consumer)
848    }
849}
850
851// /////////////////////////////////////////////////////////////////////////
852
853/// Parallel iterator over substrings separated by whitespace
854#[derive(Debug, Clone)]
855pub struct SplitWhitespace<'ch>(&'ch str);
856
857#[inline]
858fn not_empty(s: &&str) -> bool {
859    !s.is_empty()
860}
861
862impl<'ch> ParallelIterator for SplitWhitespace<'ch> {
863    type Item = &'ch str;
864
865    fn drive_unindexed<C>(self, consumer: C) -> C::Result
866    where
867        C: UnindexedConsumer<Self::Item>,
868    {
869        self.0
870            .par_split(char::is_whitespace)
871            .filter(not_empty)
872            .drive_unindexed(consumer)
873    }
874}
875
876// /////////////////////////////////////////////////////////////////////////
877
878/// Parallel iterator over substrings separated by ASCII whitespace
879#[derive(Debug, Clone)]
880pub struct SplitAsciiWhitespace<'ch>(&'ch str);
881
882#[inline]
883fn is_ascii_whitespace(c: char) -> bool {
884    c.is_ascii_whitespace()
885}
886
887impl<'ch> ParallelIterator for SplitAsciiWhitespace<'ch> {
888    type Item = &'ch str;
889
890    fn drive_unindexed<C>(self, consumer: C) -> C::Result
891    where
892        C: UnindexedConsumer<Self::Item>,
893    {
894        self.0
895            .par_split(is_ascii_whitespace)
896            .filter(not_empty)
897            .drive_unindexed(consumer)
898    }
899}
900
901// /////////////////////////////////////////////////////////////////////////
902
903/// Parallel iterator over substrings that match a pattern
904#[derive(Debug, Clone)]
905pub struct Matches<'ch, P: Pattern> {
906    chars: &'ch str,
907    pattern: P,
908}
909
910struct MatchesProducer<'ch, 'pat, P: Pattern> {
911    chars: &'ch str,
912    pattern: &'pat P,
913}
914
915impl<'ch, P: Pattern> ParallelIterator for Matches<'ch, P> {
916    type Item = &'ch str;
917
918    fn drive_unindexed<C>(self, consumer: C) -> C::Result
919    where
920        C: UnindexedConsumer<Self::Item>,
921    {
922        let producer = MatchesProducer {
923            chars: self.chars,
924            pattern: &self.pattern,
925        };
926        bridge_unindexed(producer, consumer)
927    }
928}
929
930impl<'ch, 'pat, P: Pattern> UnindexedProducer for MatchesProducer<'ch, 'pat, P> {
931    type Item = &'ch str;
932
933    fn split(self) -> (Self, Option<Self>) {
934        match split(self.chars) {
935            Some((left, right)) => (
936                MatchesProducer {
937                    chars: left,
938                    ..self
939                },
940                Some(MatchesProducer {
941                    chars: right,
942                    ..self
943                }),
944            ),
945            None => (self, None),
946        }
947    }
948
949    fn fold_with<F>(self, folder: F) -> F
950    where
951        F: Folder<Self::Item>,
952    {
953        self.pattern.fold_matches(self.chars, folder)
954    }
955}
956
957// /////////////////////////////////////////////////////////////////////////
958
959/// Parallel iterator over substrings that match a pattern, with their positions
960#[derive(Debug, Clone)]
961pub struct MatchIndices<'ch, P: Pattern> {
962    chars: &'ch str,
963    pattern: P,
964}
965
966struct MatchIndicesProducer<'ch, 'pat, P: Pattern> {
967    index: usize,
968    chars: &'ch str,
969    pattern: &'pat P,
970}
971
972impl<'ch, P: Pattern> ParallelIterator for MatchIndices<'ch, P> {
973    type Item = (usize, &'ch str);
974
975    fn drive_unindexed<C>(self, consumer: C) -> C::Result
976    where
977        C: UnindexedConsumer<Self::Item>,
978    {
979        let producer = MatchIndicesProducer {
980            index: 0,
981            chars: self.chars,
982            pattern: &self.pattern,
983        };
984        bridge_unindexed(producer, consumer)
985    }
986}
987
988impl<'ch, 'pat, P: Pattern> UnindexedProducer for MatchIndicesProducer<'ch, 'pat, P> {
989    type Item = (usize, &'ch str);
990
991    fn split(self) -> (Self, Option<Self>) {
992        match split(self.chars) {
993            Some((left, right)) => (
994                MatchIndicesProducer {
995                    chars: left,
996                    ..self
997                },
998                Some(MatchIndicesProducer {
999                    chars: right,
1000                    index: self.index + left.len(),
1001                    ..self
1002                }),
1003            ),
1004            None => (self, None),
1005        }
1006    }
1007
1008    fn fold_with<F>(self, folder: F) -> F
1009    where
1010        F: Folder<Self::Item>,
1011    {
1012        self.pattern
1013            .fold_match_indices(self.chars, folder, self.index)
1014    }
1015}