lazy_string_replace/
pattern.rs

1//! A vendored version of `core::str::pattern`, since the version in `core` is unstable.
2//!
3//! For more details, see the traits [`Pattern`], [`Searcher`],
4//! [`ReverseSearcher`], and [`DoubleEndedSearcher`].
5
6use memchr;
7use std::{cmp, fmt, str::CharIndices, usize};
8
9// Pattern
10
11/// A string pattern.
12///
13/// A `Pattern<'a>` expresses that the implementing type
14/// can be used as a string pattern for searching in a `&'a str`.
15///
16/// For example, both `'a'` and `"aa"` are patterns that
17/// would match at index `1` in the string `"baaaab"`.
18///
19/// The trait itself acts as a builder for an associated
20/// `Searcher` type, which does the actual work of finding
21/// occurrences of the pattern in a string.
22pub trait Pattern<'a>: Sized {
23    /// Associated searcher for this pattern
24    type Searcher: Searcher<'a>;
25
26    /// Constructs the associated searcher from
27    /// `self` and the `haystack` to search in.
28    fn into_searcher(self, haystack: &'a str) -> Self::Searcher;
29
30    /// Checks whether the pattern matches anywhere in the haystack
31    #[inline]
32    fn is_contained_in(self, haystack: &'a str) -> bool {
33        self.into_searcher(haystack).next_match().is_some()
34    }
35
36    /// Checks whether the pattern matches at the front of the haystack
37    #[inline]
38    fn is_prefix_of(self, haystack: &'a str) -> bool {
39        match self.into_searcher(haystack).next() {
40            SearchStep::Match(0, _) => true,
41            _ => false,
42        }
43    }
44
45    /// Checks whether the pattern matches at the back of the haystack
46    #[inline]
47    fn is_suffix_of(self, haystack: &'a str) -> bool
48    where
49        Self::Searcher: ReverseSearcher<'a>,
50    {
51        match self.into_searcher(haystack).next_back() {
52            SearchStep::Match(_, j) if haystack.len() == j => true,
53            _ => false,
54        }
55    }
56}
57
58// Searcher
59
60/// Result of calling `Searcher::next()` or `ReverseSearcher::next_back()`.
61#[derive(Copy, Clone, Eq, PartialEq, Debug)]
62pub enum SearchStep {
63    /// Expresses that a match of the pattern has been found at
64    /// `haystack[a..b]`.
65    Match(usize, usize),
66    /// Expresses that `haystack[a..b]` has been rejected as a possible match
67    /// of the pattern.
68    ///
69    /// Note that there might be more than one `Reject` between two `Match`es,
70    /// there is no requirement for them to be combined into one.
71    Reject(usize, usize),
72    /// Expresses that every byte of the haystack has been visited, ending
73    /// the iteration.
74    Done,
75}
76
77/// A searcher for a string pattern.
78///
79/// This trait provides methods for searching for non-overlapping
80/// matches of a pattern starting from the front (left) of a string.
81///
82/// It will be implemented by associated `Searcher`
83/// types of the `Pattern` trait.
84///
85/// The trait is marked unsafe because the indices returned by the
86/// `next()` methods are required to lie on valid utf8 boundaries in
87/// the haystack. This enables consumers of this trait to
88/// slice the haystack without additional runtime checks.
89pub unsafe trait Searcher<'a> {
90    /// Getter for the underlying string to be searched in
91    ///
92    /// Will always return the same `&str`
93    fn haystack(&self) -> &'a str;
94
95    /// Performs the next search step starting from the front.
96    ///
97    /// - Returns `Match(a, b)` if `haystack[a..b]` matches the pattern.
98    /// - Returns `Reject(a, b)` if `haystack[a..b]` can not match the
99    ///   pattern, even partially.
100    /// - Returns `Done` if every byte of the haystack has been visited
101    ///
102    /// The stream of `Match` and `Reject` values up to a `Done`
103    /// will contain index ranges that are adjacent, non-overlapping,
104    /// covering the whole haystack, and laying on utf8 boundaries.
105    ///
106    /// A `Match` result needs to contain the whole matched pattern,
107    /// however `Reject` results may be split up into arbitrary
108    /// many adjacent fragments. Both ranges may have zero length.
109    ///
110    /// As an example, the pattern `"aaa"` and the haystack `"cbaaaaab"`
111    /// might produce the stream
112    /// `[Reject(0, 1), Reject(1, 2), Match(2, 5), Reject(5, 8)]`
113    fn next(&mut self) -> SearchStep;
114
115    /// Finds the next `Match` result. See `next()`
116    ///
117    /// Unlike next(), there is no guarantee that the returned ranges
118    /// of this and next_reject will overlap. This will return (start_match, end_match),
119    /// where start_match is the index of where the match begins, and end_match is
120    /// the index after the end of the match.
121    #[inline]
122    fn next_match(&mut self) -> Option<(usize, usize)> {
123        loop {
124            match self.next() {
125                SearchStep::Match(a, b) => return Some((a, b)),
126                SearchStep::Done => return None,
127                _ => continue,
128            }
129        }
130    }
131
132    /// Finds the next `Reject` result. See `next()` and `next_match()`
133    ///
134    /// Unlike next(), there is no guarantee that the returned ranges
135    /// of this and next_match will overlap.
136    #[inline]
137    fn next_reject(&mut self) -> Option<(usize, usize)> {
138        loop {
139            match self.next() {
140                SearchStep::Reject(a, b) => return Some((a, b)),
141                SearchStep::Done => return None,
142                _ => continue,
143            }
144        }
145    }
146}
147
148/// A reverse searcher for a string pattern.
149///
150/// This trait provides methods for searching for non-overlapping
151/// matches of a pattern starting from the back (right) of a string.
152///
153/// It will be implemented by associated `Searcher`
154/// types of the `Pattern` trait if the pattern supports searching
155/// for it from the back.
156///
157/// The index ranges returned by this trait are not required
158/// to exactly match those of the forward search in reverse.
159///
160/// For the reason why this trait is marked unsafe, see them
161/// parent trait `Searcher`.
162pub unsafe trait ReverseSearcher<'a>: Searcher<'a> {
163    /// Performs the next search step starting from the back.
164    ///
165    /// - Returns `Match(a, b)` if `haystack[a..b]` matches the pattern.
166    /// - Returns `Reject(a, b)` if `haystack[a..b]` can not match the
167    ///   pattern, even partially.
168    /// - Returns `Done` if every byte of the haystack has been visited
169    ///
170    /// The stream of `Match` and `Reject` values up to a `Done`
171    /// will contain index ranges that are adjacent, non-overlapping,
172    /// covering the whole haystack, and laying on utf8 boundaries.
173    ///
174    /// A `Match` result needs to contain the whole matched pattern,
175    /// however `Reject` results may be split up into arbitrary
176    /// many adjacent fragments. Both ranges may have zero length.
177    ///
178    /// As an example, the pattern `"aaa"` and the haystack `"cbaaaaab"`
179    /// might produce the stream
180    /// `[Reject(7, 8), Match(4, 7), Reject(1, 4), Reject(0, 1)]`
181    fn next_back(&mut self) -> SearchStep;
182
183    /// Finds the next `Match` result. See `next_back()`
184    #[inline]
185    fn next_match_back(&mut self) -> Option<(usize, usize)> {
186        loop {
187            match self.next_back() {
188                SearchStep::Match(a, b) => return Some((a, b)),
189                SearchStep::Done => return None,
190                _ => continue,
191            }
192        }
193    }
194
195    /// Finds the next `Reject` result. See `next_back()`
196    #[inline]
197    fn next_reject_back(&mut self) -> Option<(usize, usize)> {
198        loop {
199            match self.next_back() {
200                SearchStep::Reject(a, b) => return Some((a, b)),
201                SearchStep::Done => return None,
202                _ => continue,
203            }
204        }
205    }
206}
207
208/// A marker trait to express that a `ReverseSearcher`
209/// can be used for a `DoubleEndedIterator` implementation.
210///
211/// For this, the impl of `Searcher` and `ReverseSearcher` need
212/// to follow these conditions:
213///
214/// - All results of `next()` need to be identical
215///   to the results of `next_back()` in reverse order.
216/// - `next()` and `next_back()` need to behave as
217///   the two ends of a range of values, that is they
218///   can not "walk past each other".
219///
220/// # Examples
221///
222/// `char::Searcher` is a `DoubleEndedSearcher` because searching for a
223/// `char` only requires looking at one at a time, which behaves the same
224/// from both ends.
225///
226/// `(&str)::Searcher` is not a `DoubleEndedSearcher` because
227/// the pattern `"aa"` in the haystack `"aaa"` matches as either
228/// `"[aa]a"` or `"a[aa]"`, depending from which side it is searched.
229pub trait DoubleEndedSearcher<'a>: ReverseSearcher<'a> {}
230
231/////////////////////////////////////////////////////////////////////////////
232// Impl for char
233/////////////////////////////////////////////////////////////////////////////
234
235/// Associated type for `<char as Pattern<'a>>::Searcher`.
236#[derive(Clone, Debug)]
237pub struct CharSearcher<'a> {
238    haystack: &'a str,
239    // safety invariant: `finger`/`finger_back` must be a valid utf8 byte index of `haystack`
240    // This invariant can be broken *within* next_match and next_match_back, however
241    // they must exit with fingers on valid code point boundaries.
242    /// `finger` is the current byte index of the forward search.
243    /// Imagine that it exists before the byte at its index, i.e.
244    /// `haystack[finger]` is the first byte of the slice we must inspect during
245    /// forward searching
246    finger: usize,
247    /// `finger_back` is the current byte index of the reverse search.
248    /// Imagine that it exists after the byte at its index, i.e.
249    /// haystack[finger_back - 1] is the last byte of the slice we must inspect during
250    /// forward searching (and thus the first byte to be inspected when calling next_back())
251    finger_back: usize,
252    /// The character being searched for
253    needle: char,
254
255    // safety invariant: `utf8_size` must be less than 5
256    /// The number of bytes `needle` takes up when encoded in utf8
257    utf8_size: usize,
258    /// A utf8 encoded copy of the `needle`
259    utf8_encoded: [u8; 4],
260}
261
262unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
263    #[inline]
264    fn haystack(&self) -> &'a str {
265        self.haystack
266    }
267    #[inline]
268    fn next(&mut self) -> SearchStep {
269        let old_finger = self.finger;
270        let slice = unsafe { self.haystack.get_unchecked(old_finger..self.finger_back) };
271        let mut iter = slice.chars();
272        let old_len = iter.size_hint().1.unwrap();
273        if let Some(ch) = iter.next() {
274            // add byte offset of current character
275            // without re-encoding as utf-8
276            self.finger += old_len - iter.size_hint().1.unwrap();
277            if ch == self.needle {
278                SearchStep::Match(old_finger, self.finger)
279            } else {
280                SearchStep::Reject(old_finger, self.finger)
281            }
282        } else {
283            SearchStep::Done
284        }
285    }
286    #[inline]
287    fn next_match(&mut self) -> Option<(usize, usize)> {
288        loop {
289            // get the haystack after the last character found
290            let bytes =
291                if let Some(slice) = self.haystack.as_bytes().get(self.finger..self.finger_back) {
292                    slice
293                } else {
294                    return None;
295                };
296            // the last byte of the utf8 encoded needle
297            let last_byte = unsafe { *self.utf8_encoded.get_unchecked(self.utf8_size - 1) };
298            if let Some(index) = memchr::memchr(last_byte, bytes) {
299                // The new finger is the index of the byte we found,
300                // plus one, since we memchr'd for the last byte of the character.
301                //
302                // Note that this doesn't always give us a finger on a UTF8 boundary.
303                // If we *didn't* find our character
304                // we may have indexed to the non-last byte of a 3-byte or 4-byte character.
305                // We can't just skip to the next valid starting byte because a character like
306                // ꁁ (U+A041 YI SYLLABLE PA), utf-8 `EA 81 81` will have us always find
307                // the second byte when searching for the third.
308                //
309                // However, this is totally okay. While we have the invariant that
310                // self.finger is on a UTF8 boundary, this invariant is not relied upon
311                // within this method (it is relied upon in CharSearcher::next()).
312                //
313                // We only exit this method when we reach the end of the string, or if we
314                // find something. When we find something the `finger` will be set
315                // to a UTF8 boundary.
316                self.finger += index + 1;
317                if self.finger >= self.utf8_size {
318                    let found_char = self.finger - self.utf8_size;
319                    if let Some(slice) = self.haystack.as_bytes().get(found_char..self.finger) {
320                        if slice == &self.utf8_encoded[0..self.utf8_size] {
321                            return Some((found_char, self.finger));
322                        }
323                    }
324                }
325            } else {
326                // found nothing, exit
327                self.finger = self.finger_back;
328                return None;
329            }
330        }
331    }
332
333    // let next_reject use the default implementation from the Searcher trait
334}
335
336unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
337    #[inline]
338    fn next_back(&mut self) -> SearchStep {
339        let old_finger = self.finger_back;
340        let slice = unsafe { self.haystack.get_unchecked(self.finger..old_finger) };
341        let mut iter = slice.chars();
342        let old_len = iter.size_hint().1.unwrap();
343        if let Some(ch) = iter.next_back() {
344            // subtract byte offset of current character
345            // without re-encoding as utf-8
346            self.finger_back -= old_len - iter.size_hint().1.unwrap();
347            if ch == self.needle {
348                SearchStep::Match(self.finger_back, old_finger)
349            } else {
350                SearchStep::Reject(self.finger_back, old_finger)
351            }
352        } else {
353            SearchStep::Done
354        }
355    }
356    #[inline]
357    fn next_match_back(&mut self) -> Option<(usize, usize)> {
358        let haystack = self.haystack.as_bytes();
359        loop {
360            // get the haystack up to but not including the last character searched
361            let bytes = if let Some(slice) = haystack.get(self.finger..self.finger_back) {
362                slice
363            } else {
364                return None;
365            };
366            // the last byte of the utf8 encoded needle
367            let last_byte = unsafe { *self.utf8_encoded.get_unchecked(self.utf8_size - 1) };
368            if let Some(index) = memchr::memrchr(last_byte, bytes) {
369                // we searched a slice that was offset by self.finger,
370                // add self.finger to recoup the original index
371                let index = self.finger + index;
372                // memrchr will return the index of the byte we wish to
373                // find. In case of an ASCII character, this is indeed
374                // were we wish our new finger to be ("after" the found
375                // char in the paradigm of reverse iteration). For
376                // multibyte chars we need to skip down by the number of more
377                // bytes they have than ASCII
378                let shift = self.utf8_size - 1;
379                if index >= shift {
380                    let found_char = index - shift;
381                    if let Some(slice) = haystack.get(found_char..(found_char + self.utf8_size)) {
382                        if slice == &self.utf8_encoded[0..self.utf8_size] {
383                            // move finger to before the character found (i.e., at its start index)
384                            self.finger_back = found_char;
385                            return Some((self.finger_back, self.finger_back + self.utf8_size));
386                        }
387                    }
388                }
389                // We can't use finger_back = index - size + 1 here. If we found the last char
390                // of a different-sized character (or the middle byte of a different character)
391                // we need to bump the finger_back down to `index`. This similarly makes
392                // `finger_back` have the potential to no longer be on a boundary,
393                // but this is OK since we only exit this function on a boundary
394                // or when the haystack has been searched completely.
395                //
396                // Unlike next_match this does not
397                // have the problem of repeated bytes in utf-8 because
398                // we're searching for the last byte, and we can only have
399                // found the last byte when searching in reverse.
400                self.finger_back = index;
401            } else {
402                self.finger_back = self.finger;
403                // found nothing, exit
404                return None;
405            }
406        }
407    }
408
409    // let next_reject_back use the default implementation from the Searcher trait
410}
411
412impl<'a> DoubleEndedSearcher<'a> for CharSearcher<'a> {}
413
414/// Searches for chars that are equal to a given char
415impl<'a> Pattern<'a> for char {
416    type Searcher = CharSearcher<'a>;
417
418    #[inline]
419    fn into_searcher(self, haystack: &'a str) -> Self::Searcher {
420        let mut utf8_encoded = [0; 4];
421        let utf8_size = self.encode_utf8(&mut utf8_encoded).len();
422        CharSearcher {
423            haystack,
424            finger: 0,
425            finger_back: haystack.len(),
426            needle: self,
427            utf8_size,
428            utf8_encoded,
429        }
430    }
431
432    #[inline]
433    fn is_contained_in(self, haystack: &'a str) -> bool {
434        if (self as u32) < 128 {
435            haystack.as_bytes().contains(&(self as u8))
436        } else {
437            let mut buffer = [0u8; 4];
438            self.encode_utf8(&mut buffer).is_contained_in(haystack)
439        }
440    }
441
442    #[inline]
443    fn is_prefix_of(self, haystack: &'a str) -> bool {
444        if let Some(ch) = haystack.chars().next() {
445            self == ch
446        } else {
447            false
448        }
449    }
450
451    #[inline]
452    fn is_suffix_of(self, haystack: &'a str) -> bool
453    where
454        Self::Searcher: ReverseSearcher<'a>,
455    {
456        if let Some(ch) = haystack.chars().next_back() {
457            self == ch
458        } else {
459            false
460        }
461    }
462}
463
464/////////////////////////////////////////////////////////////////////////////
465// Impl for a MultiCharEq wrapper
466/////////////////////////////////////////////////////////////////////////////
467
468#[doc(hidden)]
469trait MultiCharEq {
470    fn matches(&mut self, c: char) -> bool;
471}
472
473impl<F> MultiCharEq for F
474where
475    F: FnMut(char) -> bool,
476{
477    #[inline]
478    fn matches(&mut self, c: char) -> bool {
479        (*self)(c)
480    }
481}
482
483impl MultiCharEq for &[char] {
484    #[inline]
485    fn matches(&mut self, c: char) -> bool {
486        self.iter().any(|&m| m == c)
487    }
488}
489
490struct MultiCharEqPattern<C: MultiCharEq>(C);
491
492#[derive(Clone, Debug)]
493struct MultiCharEqSearcher<'a, C: MultiCharEq> {
494    char_eq: C,
495    haystack: &'a str,
496    char_indices: CharIndices<'a>,
497}
498
499impl<'a, C: MultiCharEq> Pattern<'a> for MultiCharEqPattern<C> {
500    type Searcher = MultiCharEqSearcher<'a, C>;
501
502    #[inline]
503    fn into_searcher(self, haystack: &'a str) -> MultiCharEqSearcher<'a, C> {
504        MultiCharEqSearcher {
505            haystack,
506            char_eq: self.0,
507            char_indices: haystack.char_indices(),
508        }
509    }
510}
511
512unsafe impl<'a, C: MultiCharEq> Searcher<'a> for MultiCharEqSearcher<'a, C> {
513    #[inline]
514    fn haystack(&self) -> &'a str {
515        self.haystack
516    }
517
518    #[inline]
519    fn next(&mut self) -> SearchStep {
520        let s = &mut self.char_indices;
521        // Compare lengths of the internal byte slice iterator
522        // to find length of current char
523        let pre_len = s.size_hint().1.unwrap();
524        if let Some((i, c)) = s.next() {
525            let len = s.size_hint().1.unwrap();
526            let char_len = pre_len - len;
527            if self.char_eq.matches(c) {
528                return SearchStep::Match(i, i + char_len);
529            } else {
530                return SearchStep::Reject(i, i + char_len);
531            }
532        }
533        SearchStep::Done
534    }
535}
536
537unsafe impl<'a, C: MultiCharEq> ReverseSearcher<'a> for MultiCharEqSearcher<'a, C> {
538    #[inline]
539    fn next_back(&mut self) -> SearchStep {
540        let s = &mut self.char_indices;
541        // Compare lengths of the internal byte slice iterator
542        // to find length of current char
543        let pre_len = s.size_hint().1.unwrap();
544        if let Some((i, c)) = s.next_back() {
545            let len = s.size_hint().1.unwrap();
546            let char_len = pre_len - len;
547            if self.char_eq.matches(c) {
548                return SearchStep::Match(i, i + char_len);
549            } else {
550                return SearchStep::Reject(i, i + char_len);
551            }
552        }
553        SearchStep::Done
554    }
555}
556
557impl<'a, C: MultiCharEq> DoubleEndedSearcher<'a> for MultiCharEqSearcher<'a, C> {}
558
559/////////////////////////////////////////////////////////////////////////////
560
561macro_rules! pattern_methods {
562    ($t:ty, $pmap:expr, $smap:expr) => {
563        type Searcher = $t;
564
565        #[inline]
566        fn into_searcher(self, haystack: &'a str) -> $t {
567            ($smap)(($pmap)(self).into_searcher(haystack))
568        }
569
570        #[inline]
571        fn is_contained_in(self, haystack: &'a str) -> bool {
572            ($pmap)(self).is_contained_in(haystack)
573        }
574
575        #[inline]
576        fn is_prefix_of(self, haystack: &'a str) -> bool {
577            ($pmap)(self).is_prefix_of(haystack)
578        }
579
580        #[inline]
581        fn is_suffix_of(self, haystack: &'a str) -> bool
582            where $t: ReverseSearcher<'a>
583        {
584            ($pmap)(self).is_suffix_of(haystack)
585        }
586    }
587}
588
589macro_rules! searcher_methods {
590    (forward) => {
591        #[inline]
592        fn haystack(&self) -> &'a str {
593            self.0.haystack()
594        }
595        #[inline]
596        fn next(&mut self) -> SearchStep {
597            self.0.next()
598        }
599        #[inline]
600        fn next_match(&mut self) -> Option<(usize, usize)> {
601            self.0.next_match()
602        }
603        #[inline]
604        fn next_reject(&mut self) -> Option<(usize, usize)> {
605            self.0.next_reject()
606        }
607    };
608    (reverse) => {
609        #[inline]
610        fn next_back(&mut self) -> SearchStep {
611            self.0.next_back()
612        }
613        #[inline]
614        fn next_match_back(&mut self) -> Option<(usize, usize)> {
615            self.0.next_match_back()
616        }
617        #[inline]
618        fn next_reject_back(&mut self) -> Option<(usize, usize)> {
619            self.0.next_reject_back()
620        }
621    }
622}
623
624/////////////////////////////////////////////////////////////////////////////
625// Impl for &[char]
626/////////////////////////////////////////////////////////////////////////////
627
628// Todo: Change / Remove due to ambiguity in meaning.
629
630/// Associated type for `<&[char] as Pattern<'a>>::Searcher`.
631#[derive(Clone, Debug)]
632pub struct CharSliceSearcher<'a, 'b>(<MultiCharEqPattern<&'b [char]> as Pattern<'a>>::Searcher);
633
634unsafe impl<'a, 'b> Searcher<'a> for CharSliceSearcher<'a, 'b> {
635    searcher_methods!(forward);
636}
637
638unsafe impl<'a, 'b> ReverseSearcher<'a> for CharSliceSearcher<'a, 'b> {
639    searcher_methods!(reverse);
640}
641
642impl<'a, 'b> DoubleEndedSearcher<'a> for CharSliceSearcher<'a, 'b> {}
643
644/// Searches for chars that are equal to any of the chars in the array
645impl<'a, 'b> Pattern<'a> for &'b [char] {
646    pattern_methods!(
647        CharSliceSearcher<'a, 'b>,
648        MultiCharEqPattern,
649        CharSliceSearcher
650    );
651}
652
653/////////////////////////////////////////////////////////////////////////////
654// Impl for F: FnMut(char) -> bool
655/////////////////////////////////////////////////////////////////////////////
656
657/// Associated type for `<F as Pattern<'a>>::Searcher`.
658#[derive(Clone)]
659pub struct CharPredicateSearcher<'a, F>(<MultiCharEqPattern<F> as Pattern<'a>>::Searcher)
660where
661    F: FnMut(char) -> bool;
662
663impl<F> fmt::Debug for CharPredicateSearcher<'_, F>
664where
665    F: FnMut(char) -> bool,
666{
667    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
668        f.debug_struct("CharPredicateSearcher")
669            .field("haystack", &self.0.haystack)
670            .field("char_indices", &self.0.char_indices)
671            .finish()
672    }
673}
674unsafe impl<'a, F> Searcher<'a> for CharPredicateSearcher<'a, F>
675where
676    F: FnMut(char) -> bool,
677{
678    searcher_methods!(forward);
679}
680
681unsafe impl<'a, F> ReverseSearcher<'a> for CharPredicateSearcher<'a, F>
682where
683    F: FnMut(char) -> bool,
684{
685    searcher_methods!(reverse);
686}
687
688impl<'a, F> DoubleEndedSearcher<'a> for CharPredicateSearcher<'a, F> where F: FnMut(char) -> bool {}
689
690/// Searches for chars that match the given predicate
691impl<'a, F> Pattern<'a> for F
692where
693    F: FnMut(char) -> bool,
694{
695    pattern_methods!(
696        CharPredicateSearcher<'a, F>,
697        MultiCharEqPattern,
698        CharPredicateSearcher
699    );
700}
701
702/////////////////////////////////////////////////////////////////////////////
703// Impl for &&str
704/////////////////////////////////////////////////////////////////////////////
705
706/// Delegates to the `&str` impl.
707impl<'a, 'b, 'c> Pattern<'a> for &'c &'b str {
708    pattern_methods!(StrSearcher<'a, 'b>, |&s| s, |s| s);
709}
710
711/////////////////////////////////////////////////////////////////////////////
712// Impl for &str
713/////////////////////////////////////////////////////////////////////////////
714
715/// Non-allocating substring search.
716///
717/// Will handle the pattern `""` as returning empty matches at each character
718/// boundary.
719impl<'a, 'b> Pattern<'a> for &'b str {
720    type Searcher = StrSearcher<'a, 'b>;
721
722    #[inline]
723    fn into_searcher(self, haystack: &'a str) -> StrSearcher<'a, 'b> {
724        StrSearcher::new(haystack, self)
725    }
726
727    /// Checks whether the pattern matches at the front of the haystack
728    #[inline]
729    fn is_prefix_of(self, haystack: &'a str) -> bool {
730        haystack.is_char_boundary(self.len()) && self == &haystack[..self.len()]
731    }
732
733    /// Checks whether the pattern matches at the back of the haystack
734    #[inline]
735    fn is_suffix_of(self, haystack: &'a str) -> bool {
736        self.len() <= haystack.len()
737            && haystack.is_char_boundary(haystack.len() - self.len())
738            && self == &haystack[haystack.len() - self.len()..]
739    }
740}
741
742/////////////////////////////////////////////////////////////////////////////
743// Two Way substring searcher
744/////////////////////////////////////////////////////////////////////////////
745
746#[derive(Clone, Debug)]
747/// Associated type for `<&str as Pattern<'a>>::Searcher`.
748pub struct StrSearcher<'a, 'b> {
749    haystack: &'a str,
750    needle: &'b str,
751
752    searcher: StrSearcherImpl,
753}
754
755#[derive(Clone, Debug)]
756enum StrSearcherImpl {
757    Empty(EmptyNeedle),
758    TwoWay(TwoWaySearcher),
759}
760
761#[derive(Clone, Debug)]
762struct EmptyNeedle {
763    position: usize,
764    end: usize,
765    is_match_fw: bool,
766    is_match_bw: bool,
767}
768
769impl<'a, 'b> StrSearcher<'a, 'b> {
770    fn new(haystack: &'a str, needle: &'b str) -> StrSearcher<'a, 'b> {
771        if needle.is_empty() {
772            StrSearcher {
773                haystack,
774                needle,
775                searcher: StrSearcherImpl::Empty(EmptyNeedle {
776                    position: 0,
777                    end: haystack.len(),
778                    is_match_fw: true,
779                    is_match_bw: true,
780                }),
781            }
782        } else {
783            StrSearcher {
784                haystack,
785                needle,
786                searcher: StrSearcherImpl::TwoWay(TwoWaySearcher::new(
787                    needle.as_bytes(),
788                    haystack.len(),
789                )),
790            }
791        }
792    }
793}
794
795unsafe impl<'a, 'b> Searcher<'a> for StrSearcher<'a, 'b> {
796    #[inline]
797    fn haystack(&self) -> &'a str {
798        self.haystack
799    }
800
801    #[inline]
802    fn next(&mut self) -> SearchStep {
803        match self.searcher {
804            StrSearcherImpl::Empty(ref mut searcher) => {
805                // empty needle rejects every char and matches every empty string between them
806                let is_match = searcher.is_match_fw;
807                searcher.is_match_fw = !searcher.is_match_fw;
808                let pos = searcher.position;
809                match self.haystack[pos..].chars().next() {
810                    _ if is_match => SearchStep::Match(pos, pos),
811                    None => SearchStep::Done,
812                    Some(ch) => {
813                        searcher.position += ch.len_utf8();
814                        SearchStep::Reject(pos, searcher.position)
815                    }
816                }
817            }
818            StrSearcherImpl::TwoWay(ref mut searcher) => {
819                // TwoWaySearcher produces valid *Match* indices that split at char boundaries
820                // as long as it does correct matching and that haystack and needle are
821                // valid UTF-8
822                // *Rejects* from the algorithm can fall on any indices, but we will walk them
823                // manually to the next character boundary, so that they are utf-8 safe.
824                if searcher.position == self.haystack.len() {
825                    return SearchStep::Done;
826                }
827                let is_long = searcher.memory == usize::MAX;
828                match searcher.next::<RejectAndMatch>(
829                    self.haystack.as_bytes(),
830                    self.needle.as_bytes(),
831                    is_long,
832                ) {
833                    SearchStep::Reject(a, mut b) => {
834                        // skip to next char boundary
835                        while !self.haystack.is_char_boundary(b) {
836                            b += 1;
837                        }
838                        searcher.position = cmp::max(b, searcher.position);
839                        SearchStep::Reject(a, b)
840                    }
841                    otherwise => otherwise,
842                }
843            }
844        }
845    }
846
847    #[inline]
848    fn next_match(&mut self) -> Option<(usize, usize)> {
849        match self.searcher {
850            StrSearcherImpl::Empty(..) => loop {
851                match self.next() {
852                    SearchStep::Match(a, b) => return Some((a, b)),
853                    SearchStep::Done => return None,
854                    SearchStep::Reject(..) => {}
855                }
856            },
857            StrSearcherImpl::TwoWay(ref mut searcher) => {
858                let is_long = searcher.memory == usize::MAX;
859                // write out `true` and `false` cases to encourage the compiler
860                // to specialize the two cases separately.
861                if is_long {
862                    searcher.next::<MatchOnly>(
863                        self.haystack.as_bytes(),
864                        self.needle.as_bytes(),
865                        true,
866                    )
867                } else {
868                    searcher.next::<MatchOnly>(
869                        self.haystack.as_bytes(),
870                        self.needle.as_bytes(),
871                        false,
872                    )
873                }
874            }
875        }
876    }
877}
878
879unsafe impl<'a, 'b> ReverseSearcher<'a> for StrSearcher<'a, 'b> {
880    #[inline]
881    fn next_back(&mut self) -> SearchStep {
882        match self.searcher {
883            StrSearcherImpl::Empty(ref mut searcher) => {
884                let is_match = searcher.is_match_bw;
885                searcher.is_match_bw = !searcher.is_match_bw;
886                let end = searcher.end;
887                match self.haystack[..end].chars().next_back() {
888                    _ if is_match => SearchStep::Match(end, end),
889                    None => SearchStep::Done,
890                    Some(ch) => {
891                        searcher.end -= ch.len_utf8();
892                        SearchStep::Reject(searcher.end, end)
893                    }
894                }
895            }
896            StrSearcherImpl::TwoWay(ref mut searcher) => {
897                if searcher.end == 0 {
898                    return SearchStep::Done;
899                }
900                let is_long = searcher.memory == usize::MAX;
901                match searcher.next_back::<RejectAndMatch>(
902                    self.haystack.as_bytes(),
903                    self.needle.as_bytes(),
904                    is_long,
905                ) {
906                    SearchStep::Reject(mut a, b) => {
907                        // skip to next char boundary
908                        while !self.haystack.is_char_boundary(a) {
909                            a -= 1;
910                        }
911                        searcher.end = cmp::min(a, searcher.end);
912                        SearchStep::Reject(a, b)
913                    }
914                    otherwise => otherwise,
915                }
916            }
917        }
918    }
919
920    #[inline]
921    fn next_match_back(&mut self) -> Option<(usize, usize)> {
922        match self.searcher {
923            StrSearcherImpl::Empty(..) => loop {
924                match self.next_back() {
925                    SearchStep::Match(a, b) => return Some((a, b)),
926                    SearchStep::Done => return None,
927                    SearchStep::Reject(..) => {}
928                }
929            },
930            StrSearcherImpl::TwoWay(ref mut searcher) => {
931                let is_long = searcher.memory == usize::MAX;
932                // write out `true` and `false`, like `next_match`
933                if is_long {
934                    searcher.next_back::<MatchOnly>(
935                        self.haystack.as_bytes(),
936                        self.needle.as_bytes(),
937                        true,
938                    )
939                } else {
940                    searcher.next_back::<MatchOnly>(
941                        self.haystack.as_bytes(),
942                        self.needle.as_bytes(),
943                        false,
944                    )
945                }
946            }
947        }
948    }
949}
950
951/// The internal state of the two-way substring search algorithm.
952#[derive(Clone, Debug)]
953struct TwoWaySearcher {
954    // constants
955    /// critical factorization index
956    crit_pos: usize,
957    /// critical factorization index for reversed needle
958    crit_pos_back: usize,
959    period: usize,
960    /// `byteset` is an extension (not part of the two way algorithm);
961    /// it's a 64-bit "fingerprint" where each set bit `j` corresponds
962    /// to a (byte & 63) == j present in the needle.
963    byteset: u64,
964
965    // variables
966    position: usize,
967    end: usize,
968    /// index into needle before which we have already matched
969    memory: usize,
970    /// index into needle after which we have already matched
971    memory_back: usize,
972}
973
974/*
975    This is the Two-Way search algorithm, which was introduced in the paper:
976    Crochemore, M., Perrin, D., 1991, Two-way string-matching, Journal of the ACM 38(3):651-675.
977
978    Here's some background information.
979
980    A *word* is a string of symbols. The *length* of a word should be a familiar
981    notion, and here we denote it for any word x by |x|.
982    (We also allow for the possibility of the *empty word*, a word of length zero).
983
984    If x is any non-empty word, then an integer p with 0 < p <= |x| is said to be a
985    *period* for x iff for all i with 0 <= i <= |x| - p - 1, we have x[i] == x[i+p].
986    For example, both 1 and 2 are periods for the string "aa". As another example,
987    the only period of the string "abcd" is 4.
988
989    We denote by period(x) the *smallest* period of x (provided that x is non-empty).
990    This is always well-defined since every non-empty word x has at least one period,
991    |x|. We sometimes call this *the period* of x.
992
993    If u, v and x are words such that x = uv, where uv is the concatenation of u and
994    v, then we say that (u, v) is a *factorization* of x.
995
996    Let (u, v) be a factorization for a word x. Then if w is a non-empty word such
997    that both of the following hold
998
999      - either w is a suffix of u or u is a suffix of w
1000      - either w is a prefix of v or v is a prefix of w
1001
1002    then w is said to be a *repetition* for the factorization (u, v).
1003
1004    Just to unpack this, there are four possibilities here. Let w = "abc". Then we
1005    might have:
1006
1007      - w is a suffix of u and w is a prefix of v. ex: ("lolabc", "abcde")
1008      - w is a suffix of u and v is a prefix of w. ex: ("lolabc", "ab")
1009      - u is a suffix of w and w is a prefix of v. ex: ("bc", "abchi")
1010      - u is a suffix of w and v is a prefix of w. ex: ("bc", "a")
1011
1012    Note that the word vu is a repetition for any factorization (u,v) of x = uv,
1013    so every factorization has at least one repetition.
1014
1015    If x is a string and (u, v) is a factorization for x, then a *local period* for
1016    (u, v) is an integer r such that there is some word w such that |w| = r and w is
1017    a repetition for (u, v).
1018
1019    We denote by local_period(u, v) the smallest local period of (u, v). We sometimes
1020    call this *the local period* of (u, v). Provided that x = uv is non-empty, this
1021    is well-defined (because each non-empty word has at least one factorization, as
1022    noted above).
1023
1024    It can be proven that the following is an equivalent definition of a local period
1025    for a factorization (u, v): any positive integer r such that x[i] == x[i+r] for
1026    all i such that |u| - r <= i <= |u| - 1 and such that both x[i] and x[i+r] are
1027    defined. (i.e., i > 0 and i + r < |x|).
1028
1029    Using the above reformulation, it is easy to prove that
1030
1031        1 <= local_period(u, v) <= period(uv)
1032
1033    A factorization (u, v) of x such that local_period(u,v) = period(x) is called a
1034    *critical factorization*.
1035
1036    The algorithm hinges on the following theorem, which is stated without proof:
1037
1038    **Critical Factorization Theorem** Any word x has at least one critical
1039    factorization (u, v) such that |u| < period(x).
1040
1041    The purpose of maximal_suffix is to find such a critical factorization.
1042
1043    If the period is short, compute another factorization x = u' v' to use
1044    for reverse search, chosen instead so that |v'| < period(x).
1045
1046*/
1047impl TwoWaySearcher {
1048    fn new(needle: &[u8], end: usize) -> TwoWaySearcher {
1049        let (crit_pos_false, period_false) = TwoWaySearcher::maximal_suffix(needle, false);
1050        let (crit_pos_true, period_true) = TwoWaySearcher::maximal_suffix(needle, true);
1051
1052        let (crit_pos, period) = if crit_pos_false > crit_pos_true {
1053            (crit_pos_false, period_false)
1054        } else {
1055            (crit_pos_true, period_true)
1056        };
1057
1058        // A particularly readable explanation of what's going on here can be found
1059        // in Crochemore and Rytter's book "Text Algorithms", ch 13. Specifically
1060        // see the code for "Algorithm CP" on p. 323.
1061        //
1062        // What's going on is we have some critical factorization (u, v) of the
1063        // needle, and we want to determine whether u is a suffix of
1064        // &v[..period]. If it is, we use "Algorithm CP1". Otherwise we use
1065        // "Algorithm CP2", which is optimized for when the period of the needle
1066        // is large.
1067        if &needle[..crit_pos] == &needle[period..period + crit_pos] {
1068            // short period case -- the period is exact
1069            // compute a separate critical factorization for the reversed needle
1070            // x = u' v' where |v'| < period(x).
1071            //
1072            // This is sped up by the period being known already.
1073            // Note that a case like x = "acba" may be factored exactly forwards
1074            // (crit_pos = 1, period = 3) while being factored with approximate
1075            // period in reverse (crit_pos = 2, period = 2). We use the given
1076            // reverse factorization but keep the exact period.
1077            let crit_pos_back = needle.len()
1078                - cmp::max(
1079                    TwoWaySearcher::reverse_maximal_suffix(needle, period, false),
1080                    TwoWaySearcher::reverse_maximal_suffix(needle, period, true),
1081                );
1082
1083            TwoWaySearcher {
1084                crit_pos,
1085                crit_pos_back,
1086                period,
1087                byteset: Self::byteset_create(&needle[..period]),
1088
1089                position: 0,
1090                end,
1091                memory: 0,
1092                memory_back: needle.len(),
1093            }
1094        } else {
1095            // long period case -- we have an approximation to the actual period,
1096            // and don't use memorization.
1097            //
1098            // Approximate the period by lower bound max(|u|, |v|) + 1.
1099            // The critical factorization is efficient to use for both forward and
1100            // reverse search.
1101
1102            TwoWaySearcher {
1103                crit_pos,
1104                crit_pos_back: crit_pos,
1105                period: cmp::max(crit_pos, needle.len() - crit_pos) + 1,
1106                byteset: Self::byteset_create(needle),
1107
1108                position: 0,
1109                end,
1110                memory: usize::MAX, // Dummy value to signify that the period is long
1111                memory_back: usize::MAX,
1112            }
1113        }
1114    }
1115
1116    #[inline]
1117    fn byteset_create(bytes: &[u8]) -> u64 {
1118        bytes.iter().fold(0, |a, &b| (1 << (b & 0x3f)) | a)
1119    }
1120
1121    #[inline]
1122    fn byteset_contains(&self, byte: u8) -> bool {
1123        (self.byteset >> ((byte & 0x3f) as usize)) & 1 != 0
1124    }
1125
1126    // One of the main ideas of Two-Way is that we factorize the needle into
1127    // two halves, (u, v), and begin trying to find v in the haystack by scanning
1128    // left to right. If v matches, we try to match u by scanning right to left.
1129    // How far we can jump when we encounter a mismatch is all based on the fact
1130    // that (u, v) is a critical factorization for the needle.
1131    #[inline]
1132    fn next<S>(&mut self, haystack: &[u8], needle: &[u8], long_period: bool) -> S::Output
1133    where
1134        S: TwoWayStrategy,
1135    {
1136        // `next()` uses `self.position` as its cursor
1137        let old_pos = self.position;
1138        let needle_last = needle.len() - 1;
1139        'search: loop {
1140            // Check that we have room to search in
1141            // position + needle_last can not overflow if we assume slices
1142            // are bounded by isize's range.
1143            let tail_byte = match haystack.get(self.position + needle_last) {
1144                Some(&b) => b,
1145                None => {
1146                    self.position = haystack.len();
1147                    return S::rejecting(old_pos, self.position);
1148                }
1149            };
1150
1151            if S::use_early_reject() && old_pos != self.position {
1152                return S::rejecting(old_pos, self.position);
1153            }
1154
1155            // Quickly skip by large portions unrelated to our substring
1156            if !self.byteset_contains(tail_byte) {
1157                self.position += needle.len();
1158                if !long_period {
1159                    self.memory = 0;
1160                }
1161                continue 'search;
1162            }
1163
1164            // See if the right part of the needle matches
1165            let start = if long_period {
1166                self.crit_pos
1167            } else {
1168                cmp::max(self.crit_pos, self.memory)
1169            };
1170            for i in start..needle.len() {
1171                if needle[i] != haystack[self.position + i] {
1172                    self.position += i - self.crit_pos + 1;
1173                    if !long_period {
1174                        self.memory = 0;
1175                    }
1176                    continue 'search;
1177                }
1178            }
1179
1180            // See if the left part of the needle matches
1181            let start = if long_period { 0 } else { self.memory };
1182            for i in (start..self.crit_pos).rev() {
1183                if needle[i] != haystack[self.position + i] {
1184                    self.position += self.period;
1185                    if !long_period {
1186                        self.memory = needle.len() - self.period;
1187                    }
1188                    continue 'search;
1189                }
1190            }
1191
1192            // We have found a match!
1193            let match_pos = self.position;
1194
1195            // Note: add self.period instead of needle.len() to have overlapping matches
1196            self.position += needle.len();
1197            if !long_period {
1198                self.memory = 0; // set to needle.len() - self.period for overlapping matches
1199            }
1200
1201            return S::matching(match_pos, match_pos + needle.len());
1202        }
1203    }
1204
1205    // Follows the ideas in `next()`.
1206    //
1207    // The definitions are symmetrical, with period(x) = period(reverse(x))
1208    // and local_period(u, v) = local_period(reverse(v), reverse(u)), so if (u, v)
1209    // is a critical factorization, so is (reverse(v), reverse(u)).
1210    //
1211    // For the reverse case we have computed a critical factorization x = u' v'
1212    // (field `crit_pos_back`). We need |u| < period(x) for the forward case and
1213    // thus |v'| < period(x) for the reverse.
1214    //
1215    // To search in reverse through the haystack, we search forward through
1216    // a reversed haystack with a reversed needle, matching first u' and then v'.
1217    #[inline]
1218    fn next_back<S>(&mut self, haystack: &[u8], needle: &[u8], long_period: bool) -> S::Output
1219    where
1220        S: TwoWayStrategy,
1221    {
1222        // `next_back()` uses `self.end` as its cursor -- so that `next()` and `next_back()`
1223        // are independent.
1224        let old_end = self.end;
1225        'search: loop {
1226            // Check that we have room to search in
1227            // end - needle.len() will wrap around when there is no more room,
1228            // but due to slice length limits it can never wrap all the way back
1229            // into the length of haystack.
1230            let front_byte = match haystack.get(self.end.wrapping_sub(needle.len())) {
1231                Some(&b) => b,
1232                None => {
1233                    self.end = 0;
1234                    return S::rejecting(0, old_end);
1235                }
1236            };
1237
1238            if S::use_early_reject() && old_end != self.end {
1239                return S::rejecting(self.end, old_end);
1240            }
1241
1242            // Quickly skip by large portions unrelated to our substring
1243            if !self.byteset_contains(front_byte) {
1244                self.end -= needle.len();
1245                if !long_period {
1246                    self.memory_back = needle.len();
1247                }
1248                continue 'search;
1249            }
1250
1251            // See if the left part of the needle matches
1252            let crit = if long_period {
1253                self.crit_pos_back
1254            } else {
1255                cmp::min(self.crit_pos_back, self.memory_back)
1256            };
1257            for i in (0..crit).rev() {
1258                if needle[i] != haystack[self.end - needle.len() + i] {
1259                    self.end -= self.crit_pos_back - i;
1260                    if !long_period {
1261                        self.memory_back = needle.len();
1262                    }
1263                    continue 'search;
1264                }
1265            }
1266
1267            // See if the right part of the needle matches
1268            let needle_end = if long_period {
1269                needle.len()
1270            } else {
1271                self.memory_back
1272            };
1273            for i in self.crit_pos_back..needle_end {
1274                if needle[i] != haystack[self.end - needle.len() + i] {
1275                    self.end -= self.period;
1276                    if !long_period {
1277                        self.memory_back = self.period;
1278                    }
1279                    continue 'search;
1280                }
1281            }
1282
1283            // We have found a match!
1284            let match_pos = self.end - needle.len();
1285            // Note: sub self.period instead of needle.len() to have overlapping matches
1286            self.end -= needle.len();
1287            if !long_period {
1288                self.memory_back = needle.len();
1289            }
1290
1291            return S::matching(match_pos, match_pos + needle.len());
1292        }
1293    }
1294
1295    // Compute the maximal suffix of `arr`.
1296    //
1297    // The maximal suffix is a possible critical factorization (u, v) of `arr`.
1298    //
1299    // Returns (`i`, `p`) where `i` is the starting index of v and `p` is the
1300    // period of v.
1301    //
1302    // `order_greater` determines if lexical order is `<` or `>`. Both
1303    // orders must be computed -- the ordering with the largest `i` gives
1304    // a critical factorization.
1305    //
1306    // For long period cases, the resulting period is not exact (it is too short).
1307    #[inline]
1308    fn maximal_suffix(arr: &[u8], order_greater: bool) -> (usize, usize) {
1309        let mut left = 0; // Corresponds to i in the paper
1310        let mut right = 1; // Corresponds to j in the paper
1311        let mut offset = 0; // Corresponds to k in the paper, but starting at 0
1312                            // to match 0-based indexing.
1313        let mut period = 1; // Corresponds to p in the paper
1314
1315        while let Some(&a) = arr.get(right + offset) {
1316            // `left` will be inbounds when `right` is.
1317            let b = arr[left + offset];
1318            if (a < b && !order_greater) || (a > b && order_greater) {
1319                // Suffix is smaller, period is entire prefix so far.
1320                right += offset + 1;
1321                offset = 0;
1322                period = right - left;
1323            } else if a == b {
1324                // Advance through repetition of the current period.
1325                if offset + 1 == period {
1326                    right += offset + 1;
1327                    offset = 0;
1328                } else {
1329                    offset += 1;
1330                }
1331            } else {
1332                // Suffix is larger, start over from current location.
1333                left = right;
1334                right += 1;
1335                offset = 0;
1336                period = 1;
1337            }
1338        }
1339        (left, period)
1340    }
1341
1342    // Compute the maximal suffix of the reverse of `arr`.
1343    //
1344    // The maximal suffix is a possible critical factorization (u', v') of `arr`.
1345    //
1346    // Returns `i` where `i` is the starting index of v', from the back;
1347    // returns immediately when a period of `known_period` is reached.
1348    //
1349    // `order_greater` determines if lexical order is `<` or `>`. Both
1350    // orders must be computed -- the ordering with the largest `i` gives
1351    // a critical factorization.
1352    //
1353    // For long period cases, the resulting period is not exact (it is too short).
1354    fn reverse_maximal_suffix(arr: &[u8], known_period: usize, order_greater: bool) -> usize {
1355        let mut left = 0; // Corresponds to i in the paper
1356        let mut right = 1; // Corresponds to j in the paper
1357        let mut offset = 0; // Corresponds to k in the paper, but starting at 0
1358                            // to match 0-based indexing.
1359        let mut period = 1; // Corresponds to p in the paper
1360        let n = arr.len();
1361
1362        while right + offset < n {
1363            let a = arr[n - (1 + right + offset)];
1364            let b = arr[n - (1 + left + offset)];
1365            if (a < b && !order_greater) || (a > b && order_greater) {
1366                // Suffix is smaller, period is entire prefix so far.
1367                right += offset + 1;
1368                offset = 0;
1369                period = right - left;
1370            } else if a == b {
1371                // Advance through repetition of the current period.
1372                if offset + 1 == period {
1373                    right += offset + 1;
1374                    offset = 0;
1375                } else {
1376                    offset += 1;
1377                }
1378            } else {
1379                // Suffix is larger, start over from current location.
1380                left = right;
1381                right += 1;
1382                offset = 0;
1383                period = 1;
1384            }
1385            if period == known_period {
1386                break;
1387            }
1388        }
1389        debug_assert!(period <= known_period);
1390        left
1391    }
1392}
1393
1394// TwoWayStrategy allows the algorithm to either skip non-matches as quickly
1395// as possible, or to work in a mode where it emits Rejects relatively quickly.
1396trait TwoWayStrategy {
1397    type Output;
1398    fn use_early_reject() -> bool;
1399    fn rejecting(a: usize, b: usize) -> Self::Output;
1400    fn matching(a: usize, b: usize) -> Self::Output;
1401}
1402
1403/// Skip to match intervals as quickly as possible
1404enum MatchOnly {}
1405
1406impl TwoWayStrategy for MatchOnly {
1407    type Output = Option<(usize, usize)>;
1408
1409    #[inline]
1410    fn use_early_reject() -> bool {
1411        false
1412    }
1413    #[inline]
1414    fn rejecting(_a: usize, _b: usize) -> Self::Output {
1415        None
1416    }
1417    #[inline]
1418    fn matching(a: usize, b: usize) -> Self::Output {
1419        Some((a, b))
1420    }
1421}
1422
1423/// Emit Rejects regularly
1424enum RejectAndMatch {}
1425
1426impl TwoWayStrategy for RejectAndMatch {
1427    type Output = SearchStep;
1428
1429    #[inline]
1430    fn use_early_reject() -> bool {
1431        true
1432    }
1433    #[inline]
1434    fn rejecting(a: usize, b: usize) -> Self::Output {
1435        SearchStep::Reject(a, b)
1436    }
1437    #[inline]
1438    fn matching(a: usize, b: usize) -> Self::Output {
1439        SearchStep::Match(a, b)
1440    }
1441}
lazy_string_replace/pattern.rs

lazy_string_replace/
pattern.rs