lazy_string_replace/pattern.rs
1//! A vendored version of `core::str::pattern`, since the version in `core` is unstable.
2//!
3//! For more details, see the traits [`Pattern`], [`Searcher`],
4//! [`ReverseSearcher`], and [`DoubleEndedSearcher`].
5
6use memchr;
7use std::{cmp, fmt, str::CharIndices, usize};
8
9// Pattern
10
11/// A string pattern.
12///
13/// A `Pattern<'a>` expresses that the implementing type
14/// can be used as a string pattern for searching in a `&'a str`.
15///
16/// For example, both `'a'` and `"aa"` are patterns that
17/// would match at index `1` in the string `"baaaab"`.
18///
19/// The trait itself acts as a builder for an associated
20/// `Searcher` type, which does the actual work of finding
21/// occurrences of the pattern in a string.
22pub trait Pattern<'a>: Sized {
23 /// Associated searcher for this pattern
24 type Searcher: Searcher<'a>;
25
26 /// Constructs the associated searcher from
27 /// `self` and the `haystack` to search in.
28 fn into_searcher(self, haystack: &'a str) -> Self::Searcher;
29
30 /// Checks whether the pattern matches anywhere in the haystack
31 #[inline]
32 fn is_contained_in(self, haystack: &'a str) -> bool {
33 self.into_searcher(haystack).next_match().is_some()
34 }
35
36 /// Checks whether the pattern matches at the front of the haystack
37 #[inline]
38 fn is_prefix_of(self, haystack: &'a str) -> bool {
39 match self.into_searcher(haystack).next() {
40 SearchStep::Match(0, _) => true,
41 _ => false,
42 }
43 }
44
45 /// Checks whether the pattern matches at the back of the haystack
46 #[inline]
47 fn is_suffix_of(self, haystack: &'a str) -> bool
48 where
49 Self::Searcher: ReverseSearcher<'a>,
50 {
51 match self.into_searcher(haystack).next_back() {
52 SearchStep::Match(_, j) if haystack.len() == j => true,
53 _ => false,
54 }
55 }
56}
57
58// Searcher
59
60/// Result of calling `Searcher::next()` or `ReverseSearcher::next_back()`.
61#[derive(Copy, Clone, Eq, PartialEq, Debug)]
62pub enum SearchStep {
63 /// Expresses that a match of the pattern has been found at
64 /// `haystack[a..b]`.
65 Match(usize, usize),
66 /// Expresses that `haystack[a..b]` has been rejected as a possible match
67 /// of the pattern.
68 ///
69 /// Note that there might be more than one `Reject` between two `Match`es,
70 /// there is no requirement for them to be combined into one.
71 Reject(usize, usize),
72 /// Expresses that every byte of the haystack has been visited, ending
73 /// the iteration.
74 Done,
75}
76
77/// A searcher for a string pattern.
78///
79/// This trait provides methods for searching for non-overlapping
80/// matches of a pattern starting from the front (left) of a string.
81///
82/// It will be implemented by associated `Searcher`
83/// types of the `Pattern` trait.
84///
85/// The trait is marked unsafe because the indices returned by the
86/// `next()` methods are required to lie on valid utf8 boundaries in
87/// the haystack. This enables consumers of this trait to
88/// slice the haystack without additional runtime checks.
89pub unsafe trait Searcher<'a> {
90 /// Getter for the underlying string to be searched in
91 ///
92 /// Will always return the same `&str`
93 fn haystack(&self) -> &'a str;
94
95 /// Performs the next search step starting from the front.
96 ///
97 /// - Returns `Match(a, b)` if `haystack[a..b]` matches the pattern.
98 /// - Returns `Reject(a, b)` if `haystack[a..b]` can not match the
99 /// pattern, even partially.
100 /// - Returns `Done` if every byte of the haystack has been visited
101 ///
102 /// The stream of `Match` and `Reject` values up to a `Done`
103 /// will contain index ranges that are adjacent, non-overlapping,
104 /// covering the whole haystack, and laying on utf8 boundaries.
105 ///
106 /// A `Match` result needs to contain the whole matched pattern,
107 /// however `Reject` results may be split up into arbitrary
108 /// many adjacent fragments. Both ranges may have zero length.
109 ///
110 /// As an example, the pattern `"aaa"` and the haystack `"cbaaaaab"`
111 /// might produce the stream
112 /// `[Reject(0, 1), Reject(1, 2), Match(2, 5), Reject(5, 8)]`
113 fn next(&mut self) -> SearchStep;
114
115 /// Finds the next `Match` result. See `next()`
116 ///
117 /// Unlike next(), there is no guarantee that the returned ranges
118 /// of this and next_reject will overlap. This will return (start_match, end_match),
119 /// where start_match is the index of where the match begins, and end_match is
120 /// the index after the end of the match.
121 #[inline]
122 fn next_match(&mut self) -> Option<(usize, usize)> {
123 loop {
124 match self.next() {
125 SearchStep::Match(a, b) => return Some((a, b)),
126 SearchStep::Done => return None,
127 _ => continue,
128 }
129 }
130 }
131
132 /// Finds the next `Reject` result. See `next()` and `next_match()`
133 ///
134 /// Unlike next(), there is no guarantee that the returned ranges
135 /// of this and next_match will overlap.
136 #[inline]
137 fn next_reject(&mut self) -> Option<(usize, usize)> {
138 loop {
139 match self.next() {
140 SearchStep::Reject(a, b) => return Some((a, b)),
141 SearchStep::Done => return None,
142 _ => continue,
143 }
144 }
145 }
146}
147
148/// A reverse searcher for a string pattern.
149///
150/// This trait provides methods for searching for non-overlapping
151/// matches of a pattern starting from the back (right) of a string.
152///
153/// It will be implemented by associated `Searcher`
154/// types of the `Pattern` trait if the pattern supports searching
155/// for it from the back.
156///
157/// The index ranges returned by this trait are not required
158/// to exactly match those of the forward search in reverse.
159///
160/// For the reason why this trait is marked unsafe, see them
161/// parent trait `Searcher`.
162pub unsafe trait ReverseSearcher<'a>: Searcher<'a> {
163 /// Performs the next search step starting from the back.
164 ///
165 /// - Returns `Match(a, b)` if `haystack[a..b]` matches the pattern.
166 /// - Returns `Reject(a, b)` if `haystack[a..b]` can not match the
167 /// pattern, even partially.
168 /// - Returns `Done` if every byte of the haystack has been visited
169 ///
170 /// The stream of `Match` and `Reject` values up to a `Done`
171 /// will contain index ranges that are adjacent, non-overlapping,
172 /// covering the whole haystack, and laying on utf8 boundaries.
173 ///
174 /// A `Match` result needs to contain the whole matched pattern,
175 /// however `Reject` results may be split up into arbitrary
176 /// many adjacent fragments. Both ranges may have zero length.
177 ///
178 /// As an example, the pattern `"aaa"` and the haystack `"cbaaaaab"`
179 /// might produce the stream
180 /// `[Reject(7, 8), Match(4, 7), Reject(1, 4), Reject(0, 1)]`
181 fn next_back(&mut self) -> SearchStep;
182
183 /// Finds the next `Match` result. See `next_back()`
184 #[inline]
185 fn next_match_back(&mut self) -> Option<(usize, usize)> {
186 loop {
187 match self.next_back() {
188 SearchStep::Match(a, b) => return Some((a, b)),
189 SearchStep::Done => return None,
190 _ => continue,
191 }
192 }
193 }
194
195 /// Finds the next `Reject` result. See `next_back()`
196 #[inline]
197 fn next_reject_back(&mut self) -> Option<(usize, usize)> {
198 loop {
199 match self.next_back() {
200 SearchStep::Reject(a, b) => return Some((a, b)),
201 SearchStep::Done => return None,
202 _ => continue,
203 }
204 }
205 }
206}
207
208/// A marker trait to express that a `ReverseSearcher`
209/// can be used for a `DoubleEndedIterator` implementation.
210///
211/// For this, the impl of `Searcher` and `ReverseSearcher` need
212/// to follow these conditions:
213///
214/// - All results of `next()` need to be identical
215/// to the results of `next_back()` in reverse order.
216/// - `next()` and `next_back()` need to behave as
217/// the two ends of a range of values, that is they
218/// can not "walk past each other".
219///
220/// # Examples
221///
222/// `char::Searcher` is a `DoubleEndedSearcher` because searching for a
223/// `char` only requires looking at one at a time, which behaves the same
224/// from both ends.
225///
226/// `(&str)::Searcher` is not a `DoubleEndedSearcher` because
227/// the pattern `"aa"` in the haystack `"aaa"` matches as either
228/// `"[aa]a"` or `"a[aa]"`, depending from which side it is searched.
229pub trait DoubleEndedSearcher<'a>: ReverseSearcher<'a> {}
230
231/////////////////////////////////////////////////////////////////////////////
232// Impl for char
233/////////////////////////////////////////////////////////////////////////////
234
235/// Associated type for `<char as Pattern<'a>>::Searcher`.
236#[derive(Clone, Debug)]
237pub struct CharSearcher<'a> {
238 haystack: &'a str,
239 // safety invariant: `finger`/`finger_back` must be a valid utf8 byte index of `haystack`
240 // This invariant can be broken *within* next_match and next_match_back, however
241 // they must exit with fingers on valid code point boundaries.
242 /// `finger` is the current byte index of the forward search.
243 /// Imagine that it exists before the byte at its index, i.e.
244 /// `haystack[finger]` is the first byte of the slice we must inspect during
245 /// forward searching
246 finger: usize,
247 /// `finger_back` is the current byte index of the reverse search.
248 /// Imagine that it exists after the byte at its index, i.e.
249 /// haystack[finger_back - 1] is the last byte of the slice we must inspect during
250 /// forward searching (and thus the first byte to be inspected when calling next_back())
251 finger_back: usize,
252 /// The character being searched for
253 needle: char,
254
255 // safety invariant: `utf8_size` must be less than 5
256 /// The number of bytes `needle` takes up when encoded in utf8
257 utf8_size: usize,
258 /// A utf8 encoded copy of the `needle`
259 utf8_encoded: [u8; 4],
260}
261
262unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
263 #[inline]
264 fn haystack(&self) -> &'a str {
265 self.haystack
266 }
267 #[inline]
268 fn next(&mut self) -> SearchStep {
269 let old_finger = self.finger;
270 let slice = unsafe { self.haystack.get_unchecked(old_finger..self.finger_back) };
271 let mut iter = slice.chars();
272 let old_len = iter.size_hint().1.unwrap();
273 if let Some(ch) = iter.next() {
274 // add byte offset of current character
275 // without re-encoding as utf-8
276 self.finger += old_len - iter.size_hint().1.unwrap();
277 if ch == self.needle {
278 SearchStep::Match(old_finger, self.finger)
279 } else {
280 SearchStep::Reject(old_finger, self.finger)
281 }
282 } else {
283 SearchStep::Done
284 }
285 }
286 #[inline]
287 fn next_match(&mut self) -> Option<(usize, usize)> {
288 loop {
289 // get the haystack after the last character found
290 let bytes =
291 if let Some(slice) = self.haystack.as_bytes().get(self.finger..self.finger_back) {
292 slice
293 } else {
294 return None;
295 };
296 // the last byte of the utf8 encoded needle
297 let last_byte = unsafe { *self.utf8_encoded.get_unchecked(self.utf8_size - 1) };
298 if let Some(index) = memchr::memchr(last_byte, bytes) {
299 // The new finger is the index of the byte we found,
300 // plus one, since we memchr'd for the last byte of the character.
301 //
302 // Note that this doesn't always give us a finger on a UTF8 boundary.
303 // If we *didn't* find our character
304 // we may have indexed to the non-last byte of a 3-byte or 4-byte character.
305 // We can't just skip to the next valid starting byte because a character like
306 // ꁁ (U+A041 YI SYLLABLE PA), utf-8 `EA 81 81` will have us always find
307 // the second byte when searching for the third.
308 //
309 // However, this is totally okay. While we have the invariant that
310 // self.finger is on a UTF8 boundary, this invariant is not relied upon
311 // within this method (it is relied upon in CharSearcher::next()).
312 //
313 // We only exit this method when we reach the end of the string, or if we
314 // find something. When we find something the `finger` will be set
315 // to a UTF8 boundary.
316 self.finger += index + 1;
317 if self.finger >= self.utf8_size {
318 let found_char = self.finger - self.utf8_size;
319 if let Some(slice) = self.haystack.as_bytes().get(found_char..self.finger) {
320 if slice == &self.utf8_encoded[0..self.utf8_size] {
321 return Some((found_char, self.finger));
322 }
323 }
324 }
325 } else {
326 // found nothing, exit
327 self.finger = self.finger_back;
328 return None;
329 }
330 }
331 }
332
333 // let next_reject use the default implementation from the Searcher trait
334}
335
336unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
337 #[inline]
338 fn next_back(&mut self) -> SearchStep {
339 let old_finger = self.finger_back;
340 let slice = unsafe { self.haystack.get_unchecked(self.finger..old_finger) };
341 let mut iter = slice.chars();
342 let old_len = iter.size_hint().1.unwrap();
343 if let Some(ch) = iter.next_back() {
344 // subtract byte offset of current character
345 // without re-encoding as utf-8
346 self.finger_back -= old_len - iter.size_hint().1.unwrap();
347 if ch == self.needle {
348 SearchStep::Match(self.finger_back, old_finger)
349 } else {
350 SearchStep::Reject(self.finger_back, old_finger)
351 }
352 } else {
353 SearchStep::Done
354 }
355 }
356 #[inline]
357 fn next_match_back(&mut self) -> Option<(usize, usize)> {
358 let haystack = self.haystack.as_bytes();
359 loop {
360 // get the haystack up to but not including the last character searched
361 let bytes = if let Some(slice) = haystack.get(self.finger..self.finger_back) {
362 slice
363 } else {
364 return None;
365 };
366 // the last byte of the utf8 encoded needle
367 let last_byte = unsafe { *self.utf8_encoded.get_unchecked(self.utf8_size - 1) };
368 if let Some(index) = memchr::memrchr(last_byte, bytes) {
369 // we searched a slice that was offset by self.finger,
370 // add self.finger to recoup the original index
371 let index = self.finger + index;
372 // memrchr will return the index of the byte we wish to
373 // find. In case of an ASCII character, this is indeed
374 // were we wish our new finger to be ("after" the found
375 // char in the paradigm of reverse iteration). For
376 // multibyte chars we need to skip down by the number of more
377 // bytes they have than ASCII
378 let shift = self.utf8_size - 1;
379 if index >= shift {
380 let found_char = index - shift;
381 if let Some(slice) = haystack.get(found_char..(found_char + self.utf8_size)) {
382 if slice == &self.utf8_encoded[0..self.utf8_size] {
383 // move finger to before the character found (i.e., at its start index)
384 self.finger_back = found_char;
385 return Some((self.finger_back, self.finger_back + self.utf8_size));
386 }
387 }
388 }
389 // We can't use finger_back = index - size + 1 here. If we found the last char
390 // of a different-sized character (or the middle byte of a different character)
391 // we need to bump the finger_back down to `index`. This similarly makes
392 // `finger_back` have the potential to no longer be on a boundary,
393 // but this is OK since we only exit this function on a boundary
394 // or when the haystack has been searched completely.
395 //
396 // Unlike next_match this does not
397 // have the problem of repeated bytes in utf-8 because
398 // we're searching for the last byte, and we can only have
399 // found the last byte when searching in reverse.
400 self.finger_back = index;
401 } else {
402 self.finger_back = self.finger;
403 // found nothing, exit
404 return None;
405 }
406 }
407 }
408
409 // let next_reject_back use the default implementation from the Searcher trait
410}
411
412impl<'a> DoubleEndedSearcher<'a> for CharSearcher<'a> {}
413
414/// Searches for chars that are equal to a given char
415impl<'a> Pattern<'a> for char {
416 type Searcher = CharSearcher<'a>;
417
418 #[inline]
419 fn into_searcher(self, haystack: &'a str) -> Self::Searcher {
420 let mut utf8_encoded = [0; 4];
421 let utf8_size = self.encode_utf8(&mut utf8_encoded).len();
422 CharSearcher {
423 haystack,
424 finger: 0,
425 finger_back: haystack.len(),
426 needle: self,
427 utf8_size,
428 utf8_encoded,
429 }
430 }
431
432 #[inline]
433 fn is_contained_in(self, haystack: &'a str) -> bool {
434 if (self as u32) < 128 {
435 haystack.as_bytes().contains(&(self as u8))
436 } else {
437 let mut buffer = [0u8; 4];
438 self.encode_utf8(&mut buffer).is_contained_in(haystack)
439 }
440 }
441
442 #[inline]
443 fn is_prefix_of(self, haystack: &'a str) -> bool {
444 if let Some(ch) = haystack.chars().next() {
445 self == ch
446 } else {
447 false
448 }
449 }
450
451 #[inline]
452 fn is_suffix_of(self, haystack: &'a str) -> bool
453 where
454 Self::Searcher: ReverseSearcher<'a>,
455 {
456 if let Some(ch) = haystack.chars().next_back() {
457 self == ch
458 } else {
459 false
460 }
461 }
462}
463
464/////////////////////////////////////////////////////////////////////////////
465// Impl for a MultiCharEq wrapper
466/////////////////////////////////////////////////////////////////////////////
467
468#[doc(hidden)]
469trait MultiCharEq {
470 fn matches(&mut self, c: char) -> bool;
471}
472
473impl<F> MultiCharEq for F
474where
475 F: FnMut(char) -> bool,
476{
477 #[inline]
478 fn matches(&mut self, c: char) -> bool {
479 (*self)(c)
480 }
481}
482
483impl MultiCharEq for &[char] {
484 #[inline]
485 fn matches(&mut self, c: char) -> bool {
486 self.iter().any(|&m| m == c)
487 }
488}
489
490struct MultiCharEqPattern<C: MultiCharEq>(C);
491
492#[derive(Clone, Debug)]
493struct MultiCharEqSearcher<'a, C: MultiCharEq> {
494 char_eq: C,
495 haystack: &'a str,
496 char_indices: CharIndices<'a>,
497}
498
499impl<'a, C: MultiCharEq> Pattern<'a> for MultiCharEqPattern<C> {
500 type Searcher = MultiCharEqSearcher<'a, C>;
501
502 #[inline]
503 fn into_searcher(self, haystack: &'a str) -> MultiCharEqSearcher<'a, C> {
504 MultiCharEqSearcher {
505 haystack,
506 char_eq: self.0,
507 char_indices: haystack.char_indices(),
508 }
509 }
510}
511
512unsafe impl<'a, C: MultiCharEq> Searcher<'a> for MultiCharEqSearcher<'a, C> {
513 #[inline]
514 fn haystack(&self) -> &'a str {
515 self.haystack
516 }
517
518 #[inline]
519 fn next(&mut self) -> SearchStep {
520 let s = &mut self.char_indices;
521 // Compare lengths of the internal byte slice iterator
522 // to find length of current char
523 let pre_len = s.size_hint().1.unwrap();
524 if let Some((i, c)) = s.next() {
525 let len = s.size_hint().1.unwrap();
526 let char_len = pre_len - len;
527 if self.char_eq.matches(c) {
528 return SearchStep::Match(i, i + char_len);
529 } else {
530 return SearchStep::Reject(i, i + char_len);
531 }
532 }
533 SearchStep::Done
534 }
535}
536
537unsafe impl<'a, C: MultiCharEq> ReverseSearcher<'a> for MultiCharEqSearcher<'a, C> {
538 #[inline]
539 fn next_back(&mut self) -> SearchStep {
540 let s = &mut self.char_indices;
541 // Compare lengths of the internal byte slice iterator
542 // to find length of current char
543 let pre_len = s.size_hint().1.unwrap();
544 if let Some((i, c)) = s.next_back() {
545 let len = s.size_hint().1.unwrap();
546 let char_len = pre_len - len;
547 if self.char_eq.matches(c) {
548 return SearchStep::Match(i, i + char_len);
549 } else {
550 return SearchStep::Reject(i, i + char_len);
551 }
552 }
553 SearchStep::Done
554 }
555}
556
557impl<'a, C: MultiCharEq> DoubleEndedSearcher<'a> for MultiCharEqSearcher<'a, C> {}
558
559/////////////////////////////////////////////////////////////////////////////
560
561macro_rules! pattern_methods {
562 ($t:ty, $pmap:expr, $smap:expr) => {
563 type Searcher = $t;
564
565 #[inline]
566 fn into_searcher(self, haystack: &'a str) -> $t {
567 ($smap)(($pmap)(self).into_searcher(haystack))
568 }
569
570 #[inline]
571 fn is_contained_in(self, haystack: &'a str) -> bool {
572 ($pmap)(self).is_contained_in(haystack)
573 }
574
575 #[inline]
576 fn is_prefix_of(self, haystack: &'a str) -> bool {
577 ($pmap)(self).is_prefix_of(haystack)
578 }
579
580 #[inline]
581 fn is_suffix_of(self, haystack: &'a str) -> bool
582 where $t: ReverseSearcher<'a>
583 {
584 ($pmap)(self).is_suffix_of(haystack)
585 }
586 }
587}
588
589macro_rules! searcher_methods {
590 (forward) => {
591 #[inline]
592 fn haystack(&self) -> &'a str {
593 self.0.haystack()
594 }
595 #[inline]
596 fn next(&mut self) -> SearchStep {
597 self.0.next()
598 }
599 #[inline]
600 fn next_match(&mut self) -> Option<(usize, usize)> {
601 self.0.next_match()
602 }
603 #[inline]
604 fn next_reject(&mut self) -> Option<(usize, usize)> {
605 self.0.next_reject()
606 }
607 };
608 (reverse) => {
609 #[inline]
610 fn next_back(&mut self) -> SearchStep {
611 self.0.next_back()
612 }
613 #[inline]
614 fn next_match_back(&mut self) -> Option<(usize, usize)> {
615 self.0.next_match_back()
616 }
617 #[inline]
618 fn next_reject_back(&mut self) -> Option<(usize, usize)> {
619 self.0.next_reject_back()
620 }
621 }
622}
623
624/////////////////////////////////////////////////////////////////////////////
625// Impl for &[char]
626/////////////////////////////////////////////////////////////////////////////
627
628// Todo: Change / Remove due to ambiguity in meaning.
629
630/// Associated type for `<&[char] as Pattern<'a>>::Searcher`.
631#[derive(Clone, Debug)]
632pub struct CharSliceSearcher<'a, 'b>(<MultiCharEqPattern<&'b [char]> as Pattern<'a>>::Searcher);
633
634unsafe impl<'a, 'b> Searcher<'a> for CharSliceSearcher<'a, 'b> {
635 searcher_methods!(forward);
636}
637
638unsafe impl<'a, 'b> ReverseSearcher<'a> for CharSliceSearcher<'a, 'b> {
639 searcher_methods!(reverse);
640}
641
642impl<'a, 'b> DoubleEndedSearcher<'a> for CharSliceSearcher<'a, 'b> {}
643
644/// Searches for chars that are equal to any of the chars in the array
645impl<'a, 'b> Pattern<'a> for &'b [char] {
646 pattern_methods!(
647 CharSliceSearcher<'a, 'b>,
648 MultiCharEqPattern,
649 CharSliceSearcher
650 );
651}
652
653/////////////////////////////////////////////////////////////////////////////
654// Impl for F: FnMut(char) -> bool
655/////////////////////////////////////////////////////////////////////////////
656
657/// Associated type for `<F as Pattern<'a>>::Searcher`.
658#[derive(Clone)]
659pub struct CharPredicateSearcher<'a, F>(<MultiCharEqPattern<F> as Pattern<'a>>::Searcher)
660where
661 F: FnMut(char) -> bool;
662
663impl<F> fmt::Debug for CharPredicateSearcher<'_, F>
664where
665 F: FnMut(char) -> bool,
666{
667 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
668 f.debug_struct("CharPredicateSearcher")
669 .field("haystack", &self.0.haystack)
670 .field("char_indices", &self.0.char_indices)
671 .finish()
672 }
673}
674unsafe impl<'a, F> Searcher<'a> for CharPredicateSearcher<'a, F>
675where
676 F: FnMut(char) -> bool,
677{
678 searcher_methods!(forward);
679}
680
681unsafe impl<'a, F> ReverseSearcher<'a> for CharPredicateSearcher<'a, F>
682where
683 F: FnMut(char) -> bool,
684{
685 searcher_methods!(reverse);
686}
687
688impl<'a, F> DoubleEndedSearcher<'a> for CharPredicateSearcher<'a, F> where F: FnMut(char) -> bool {}
689
690/// Searches for chars that match the given predicate
691impl<'a, F> Pattern<'a> for F
692where
693 F: FnMut(char) -> bool,
694{
695 pattern_methods!(
696 CharPredicateSearcher<'a, F>,
697 MultiCharEqPattern,
698 CharPredicateSearcher
699 );
700}
701
702/////////////////////////////////////////////////////////////////////////////
703// Impl for &&str
704/////////////////////////////////////////////////////////////////////////////
705
706/// Delegates to the `&str` impl.
707impl<'a, 'b, 'c> Pattern<'a> for &'c &'b str {
708 pattern_methods!(StrSearcher<'a, 'b>, |&s| s, |s| s);
709}
710
711/////////////////////////////////////////////////////////////////////////////
712// Impl for &str
713/////////////////////////////////////////////////////////////////////////////
714
715/// Non-allocating substring search.
716///
717/// Will handle the pattern `""` as returning empty matches at each character
718/// boundary.
719impl<'a, 'b> Pattern<'a> for &'b str {
720 type Searcher = StrSearcher<'a, 'b>;
721
722 #[inline]
723 fn into_searcher(self, haystack: &'a str) -> StrSearcher<'a, 'b> {
724 StrSearcher::new(haystack, self)
725 }
726
727 /// Checks whether the pattern matches at the front of the haystack
728 #[inline]
729 fn is_prefix_of(self, haystack: &'a str) -> bool {
730 haystack.is_char_boundary(self.len()) && self == &haystack[..self.len()]
731 }
732
733 /// Checks whether the pattern matches at the back of the haystack
734 #[inline]
735 fn is_suffix_of(self, haystack: &'a str) -> bool {
736 self.len() <= haystack.len()
737 && haystack.is_char_boundary(haystack.len() - self.len())
738 && self == &haystack[haystack.len() - self.len()..]
739 }
740}
741
742/////////////////////////////////////////////////////////////////////////////
743// Two Way substring searcher
744/////////////////////////////////////////////////////////////////////////////
745
746#[derive(Clone, Debug)]
747/// Associated type for `<&str as Pattern<'a>>::Searcher`.
748pub struct StrSearcher<'a, 'b> {
749 haystack: &'a str,
750 needle: &'b str,
751
752 searcher: StrSearcherImpl,
753}
754
755#[derive(Clone, Debug)]
756enum StrSearcherImpl {
757 Empty(EmptyNeedle),
758 TwoWay(TwoWaySearcher),
759}
760
761#[derive(Clone, Debug)]
762struct EmptyNeedle {
763 position: usize,
764 end: usize,
765 is_match_fw: bool,
766 is_match_bw: bool,
767}
768
769impl<'a, 'b> StrSearcher<'a, 'b> {
770 fn new(haystack: &'a str, needle: &'b str) -> StrSearcher<'a, 'b> {
771 if needle.is_empty() {
772 StrSearcher {
773 haystack,
774 needle,
775 searcher: StrSearcherImpl::Empty(EmptyNeedle {
776 position: 0,
777 end: haystack.len(),
778 is_match_fw: true,
779 is_match_bw: true,
780 }),
781 }
782 } else {
783 StrSearcher {
784 haystack,
785 needle,
786 searcher: StrSearcherImpl::TwoWay(TwoWaySearcher::new(
787 needle.as_bytes(),
788 haystack.len(),
789 )),
790 }
791 }
792 }
793}
794
795unsafe impl<'a, 'b> Searcher<'a> for StrSearcher<'a, 'b> {
796 #[inline]
797 fn haystack(&self) -> &'a str {
798 self.haystack
799 }
800
801 #[inline]
802 fn next(&mut self) -> SearchStep {
803 match self.searcher {
804 StrSearcherImpl::Empty(ref mut searcher) => {
805 // empty needle rejects every char and matches every empty string between them
806 let is_match = searcher.is_match_fw;
807 searcher.is_match_fw = !searcher.is_match_fw;
808 let pos = searcher.position;
809 match self.haystack[pos..].chars().next() {
810 _ if is_match => SearchStep::Match(pos, pos),
811 None => SearchStep::Done,
812 Some(ch) => {
813 searcher.position += ch.len_utf8();
814 SearchStep::Reject(pos, searcher.position)
815 }
816 }
817 }
818 StrSearcherImpl::TwoWay(ref mut searcher) => {
819 // TwoWaySearcher produces valid *Match* indices that split at char boundaries
820 // as long as it does correct matching and that haystack and needle are
821 // valid UTF-8
822 // *Rejects* from the algorithm can fall on any indices, but we will walk them
823 // manually to the next character boundary, so that they are utf-8 safe.
824 if searcher.position == self.haystack.len() {
825 return SearchStep::Done;
826 }
827 let is_long = searcher.memory == usize::MAX;
828 match searcher.next::<RejectAndMatch>(
829 self.haystack.as_bytes(),
830 self.needle.as_bytes(),
831 is_long,
832 ) {
833 SearchStep::Reject(a, mut b) => {
834 // skip to next char boundary
835 while !self.haystack.is_char_boundary(b) {
836 b += 1;
837 }
838 searcher.position = cmp::max(b, searcher.position);
839 SearchStep::Reject(a, b)
840 }
841 otherwise => otherwise,
842 }
843 }
844 }
845 }
846
847 #[inline]
848 fn next_match(&mut self) -> Option<(usize, usize)> {
849 match self.searcher {
850 StrSearcherImpl::Empty(..) => loop {
851 match self.next() {
852 SearchStep::Match(a, b) => return Some((a, b)),
853 SearchStep::Done => return None,
854 SearchStep::Reject(..) => {}
855 }
856 },
857 StrSearcherImpl::TwoWay(ref mut searcher) => {
858 let is_long = searcher.memory == usize::MAX;
859 // write out `true` and `false` cases to encourage the compiler
860 // to specialize the two cases separately.
861 if is_long {
862 searcher.next::<MatchOnly>(
863 self.haystack.as_bytes(),
864 self.needle.as_bytes(),
865 true,
866 )
867 } else {
868 searcher.next::<MatchOnly>(
869 self.haystack.as_bytes(),
870 self.needle.as_bytes(),
871 false,
872 )
873 }
874 }
875 }
876 }
877}
878
879unsafe impl<'a, 'b> ReverseSearcher<'a> for StrSearcher<'a, 'b> {
880 #[inline]
881 fn next_back(&mut self) -> SearchStep {
882 match self.searcher {
883 StrSearcherImpl::Empty(ref mut searcher) => {
884 let is_match = searcher.is_match_bw;
885 searcher.is_match_bw = !searcher.is_match_bw;
886 let end = searcher.end;
887 match self.haystack[..end].chars().next_back() {
888 _ if is_match => SearchStep::Match(end, end),
889 None => SearchStep::Done,
890 Some(ch) => {
891 searcher.end -= ch.len_utf8();
892 SearchStep::Reject(searcher.end, end)
893 }
894 }
895 }
896 StrSearcherImpl::TwoWay(ref mut searcher) => {
897 if searcher.end == 0 {
898 return SearchStep::Done;
899 }
900 let is_long = searcher.memory == usize::MAX;
901 match searcher.next_back::<RejectAndMatch>(
902 self.haystack.as_bytes(),
903 self.needle.as_bytes(),
904 is_long,
905 ) {
906 SearchStep::Reject(mut a, b) => {
907 // skip to next char boundary
908 while !self.haystack.is_char_boundary(a) {
909 a -= 1;
910 }
911 searcher.end = cmp::min(a, searcher.end);
912 SearchStep::Reject(a, b)
913 }
914 otherwise => otherwise,
915 }
916 }
917 }
918 }
919
920 #[inline]
921 fn next_match_back(&mut self) -> Option<(usize, usize)> {
922 match self.searcher {
923 StrSearcherImpl::Empty(..) => loop {
924 match self.next_back() {
925 SearchStep::Match(a, b) => return Some((a, b)),
926 SearchStep::Done => return None,
927 SearchStep::Reject(..) => {}
928 }
929 },
930 StrSearcherImpl::TwoWay(ref mut searcher) => {
931 let is_long = searcher.memory == usize::MAX;
932 // write out `true` and `false`, like `next_match`
933 if is_long {
934 searcher.next_back::<MatchOnly>(
935 self.haystack.as_bytes(),
936 self.needle.as_bytes(),
937 true,
938 )
939 } else {
940 searcher.next_back::<MatchOnly>(
941 self.haystack.as_bytes(),
942 self.needle.as_bytes(),
943 false,
944 )
945 }
946 }
947 }
948 }
949}
950
951/// The internal state of the two-way substring search algorithm.
952#[derive(Clone, Debug)]
953struct TwoWaySearcher {
954 // constants
955 /// critical factorization index
956 crit_pos: usize,
957 /// critical factorization index for reversed needle
958 crit_pos_back: usize,
959 period: usize,
960 /// `byteset` is an extension (not part of the two way algorithm);
961 /// it's a 64-bit "fingerprint" where each set bit `j` corresponds
962 /// to a (byte & 63) == j present in the needle.
963 byteset: u64,
964
965 // variables
966 position: usize,
967 end: usize,
968 /// index into needle before which we have already matched
969 memory: usize,
970 /// index into needle after which we have already matched
971 memory_back: usize,
972}
973
974/*
975 This is the Two-Way search algorithm, which was introduced in the paper:
976 Crochemore, M., Perrin, D., 1991, Two-way string-matching, Journal of the ACM 38(3):651-675.
977
978 Here's some background information.
979
980 A *word* is a string of symbols. The *length* of a word should be a familiar
981 notion, and here we denote it for any word x by |x|.
982 (We also allow for the possibility of the *empty word*, a word of length zero).
983
984 If x is any non-empty word, then an integer p with 0 < p <= |x| is said to be a
985 *period* for x iff for all i with 0 <= i <= |x| - p - 1, we have x[i] == x[i+p].
986 For example, both 1 and 2 are periods for the string "aa". As another example,
987 the only period of the string "abcd" is 4.
988
989 We denote by period(x) the *smallest* period of x (provided that x is non-empty).
990 This is always well-defined since every non-empty word x has at least one period,
991 |x|. We sometimes call this *the period* of x.
992
993 If u, v and x are words such that x = uv, where uv is the concatenation of u and
994 v, then we say that (u, v) is a *factorization* of x.
995
996 Let (u, v) be a factorization for a word x. Then if w is a non-empty word such
997 that both of the following hold
998
999 - either w is a suffix of u or u is a suffix of w
1000 - either w is a prefix of v or v is a prefix of w
1001
1002 then w is said to be a *repetition* for the factorization (u, v).
1003
1004 Just to unpack this, there are four possibilities here. Let w = "abc". Then we
1005 might have:
1006
1007 - w is a suffix of u and w is a prefix of v. ex: ("lolabc", "abcde")
1008 - w is a suffix of u and v is a prefix of w. ex: ("lolabc", "ab")
1009 - u is a suffix of w and w is a prefix of v. ex: ("bc", "abchi")
1010 - u is a suffix of w and v is a prefix of w. ex: ("bc", "a")
1011
1012 Note that the word vu is a repetition for any factorization (u,v) of x = uv,
1013 so every factorization has at least one repetition.
1014
1015 If x is a string and (u, v) is a factorization for x, then a *local period* for
1016 (u, v) is an integer r such that there is some word w such that |w| = r and w is
1017 a repetition for (u, v).
1018
1019 We denote by local_period(u, v) the smallest local period of (u, v). We sometimes
1020 call this *the local period* of (u, v). Provided that x = uv is non-empty, this
1021 is well-defined (because each non-empty word has at least one factorization, as
1022 noted above).
1023
1024 It can be proven that the following is an equivalent definition of a local period
1025 for a factorization (u, v): any positive integer r such that x[i] == x[i+r] for
1026 all i such that |u| - r <= i <= |u| - 1 and such that both x[i] and x[i+r] are
1027 defined. (i.e., i > 0 and i + r < |x|).
1028
1029 Using the above reformulation, it is easy to prove that
1030
1031 1 <= local_period(u, v) <= period(uv)
1032
1033 A factorization (u, v) of x such that local_period(u,v) = period(x) is called a
1034 *critical factorization*.
1035
1036 The algorithm hinges on the following theorem, which is stated without proof:
1037
1038 **Critical Factorization Theorem** Any word x has at least one critical
1039 factorization (u, v) such that |u| < period(x).
1040
1041 The purpose of maximal_suffix is to find such a critical factorization.
1042
1043 If the period is short, compute another factorization x = u' v' to use
1044 for reverse search, chosen instead so that |v'| < period(x).
1045
1046*/
1047impl TwoWaySearcher {
1048 fn new(needle: &[u8], end: usize) -> TwoWaySearcher {
1049 let (crit_pos_false, period_false) = TwoWaySearcher::maximal_suffix(needle, false);
1050 let (crit_pos_true, period_true) = TwoWaySearcher::maximal_suffix(needle, true);
1051
1052 let (crit_pos, period) = if crit_pos_false > crit_pos_true {
1053 (crit_pos_false, period_false)
1054 } else {
1055 (crit_pos_true, period_true)
1056 };
1057
1058 // A particularly readable explanation of what's going on here can be found
1059 // in Crochemore and Rytter's book "Text Algorithms", ch 13. Specifically
1060 // see the code for "Algorithm CP" on p. 323.
1061 //
1062 // What's going on is we have some critical factorization (u, v) of the
1063 // needle, and we want to determine whether u is a suffix of
1064 // &v[..period]. If it is, we use "Algorithm CP1". Otherwise we use
1065 // "Algorithm CP2", which is optimized for when the period of the needle
1066 // is large.
1067 if &needle[..crit_pos] == &needle[period..period + crit_pos] {
1068 // short period case -- the period is exact
1069 // compute a separate critical factorization for the reversed needle
1070 // x = u' v' where |v'| < period(x).
1071 //
1072 // This is sped up by the period being known already.
1073 // Note that a case like x = "acba" may be factored exactly forwards
1074 // (crit_pos = 1, period = 3) while being factored with approximate
1075 // period in reverse (crit_pos = 2, period = 2). We use the given
1076 // reverse factorization but keep the exact period.
1077 let crit_pos_back = needle.len()
1078 - cmp::max(
1079 TwoWaySearcher::reverse_maximal_suffix(needle, period, false),
1080 TwoWaySearcher::reverse_maximal_suffix(needle, period, true),
1081 );
1082
1083 TwoWaySearcher {
1084 crit_pos,
1085 crit_pos_back,
1086 period,
1087 byteset: Self::byteset_create(&needle[..period]),
1088
1089 position: 0,
1090 end,
1091 memory: 0,
1092 memory_back: needle.len(),
1093 }
1094 } else {
1095 // long period case -- we have an approximation to the actual period,
1096 // and don't use memorization.
1097 //
1098 // Approximate the period by lower bound max(|u|, |v|) + 1.
1099 // The critical factorization is efficient to use for both forward and
1100 // reverse search.
1101
1102 TwoWaySearcher {
1103 crit_pos,
1104 crit_pos_back: crit_pos,
1105 period: cmp::max(crit_pos, needle.len() - crit_pos) + 1,
1106 byteset: Self::byteset_create(needle),
1107
1108 position: 0,
1109 end,
1110 memory: usize::MAX, // Dummy value to signify that the period is long
1111 memory_back: usize::MAX,
1112 }
1113 }
1114 }
1115
1116 #[inline]
1117 fn byteset_create(bytes: &[u8]) -> u64 {
1118 bytes.iter().fold(0, |a, &b| (1 << (b & 0x3f)) | a)
1119 }
1120
1121 #[inline]
1122 fn byteset_contains(&self, byte: u8) -> bool {
1123 (self.byteset >> ((byte & 0x3f) as usize)) & 1 != 0
1124 }
1125
1126 // One of the main ideas of Two-Way is that we factorize the needle into
1127 // two halves, (u, v), and begin trying to find v in the haystack by scanning
1128 // left to right. If v matches, we try to match u by scanning right to left.
1129 // How far we can jump when we encounter a mismatch is all based on the fact
1130 // that (u, v) is a critical factorization for the needle.
1131 #[inline]
1132 fn next<S>(&mut self, haystack: &[u8], needle: &[u8], long_period: bool) -> S::Output
1133 where
1134 S: TwoWayStrategy,
1135 {
1136 // `next()` uses `self.position` as its cursor
1137 let old_pos = self.position;
1138 let needle_last = needle.len() - 1;
1139 'search: loop {
1140 // Check that we have room to search in
1141 // position + needle_last can not overflow if we assume slices
1142 // are bounded by isize's range.
1143 let tail_byte = match haystack.get(self.position + needle_last) {
1144 Some(&b) => b,
1145 None => {
1146 self.position = haystack.len();
1147 return S::rejecting(old_pos, self.position);
1148 }
1149 };
1150
1151 if S::use_early_reject() && old_pos != self.position {
1152 return S::rejecting(old_pos, self.position);
1153 }
1154
1155 // Quickly skip by large portions unrelated to our substring
1156 if !self.byteset_contains(tail_byte) {
1157 self.position += needle.len();
1158 if !long_period {
1159 self.memory = 0;
1160 }
1161 continue 'search;
1162 }
1163
1164 // See if the right part of the needle matches
1165 let start = if long_period {
1166 self.crit_pos
1167 } else {
1168 cmp::max(self.crit_pos, self.memory)
1169 };
1170 for i in start..needle.len() {
1171 if needle[i] != haystack[self.position + i] {
1172 self.position += i - self.crit_pos + 1;
1173 if !long_period {
1174 self.memory = 0;
1175 }
1176 continue 'search;
1177 }
1178 }
1179
1180 // See if the left part of the needle matches
1181 let start = if long_period { 0 } else { self.memory };
1182 for i in (start..self.crit_pos).rev() {
1183 if needle[i] != haystack[self.position + i] {
1184 self.position += self.period;
1185 if !long_period {
1186 self.memory = needle.len() - self.period;
1187 }
1188 continue 'search;
1189 }
1190 }
1191
1192 // We have found a match!
1193 let match_pos = self.position;
1194
1195 // Note: add self.period instead of needle.len() to have overlapping matches
1196 self.position += needle.len();
1197 if !long_period {
1198 self.memory = 0; // set to needle.len() - self.period for overlapping matches
1199 }
1200
1201 return S::matching(match_pos, match_pos + needle.len());
1202 }
1203 }
1204
1205 // Follows the ideas in `next()`.
1206 //
1207 // The definitions are symmetrical, with period(x) = period(reverse(x))
1208 // and local_period(u, v) = local_period(reverse(v), reverse(u)), so if (u, v)
1209 // is a critical factorization, so is (reverse(v), reverse(u)).
1210 //
1211 // For the reverse case we have computed a critical factorization x = u' v'
1212 // (field `crit_pos_back`). We need |u| < period(x) for the forward case and
1213 // thus |v'| < period(x) for the reverse.
1214 //
1215 // To search in reverse through the haystack, we search forward through
1216 // a reversed haystack with a reversed needle, matching first u' and then v'.
1217 #[inline]
1218 fn next_back<S>(&mut self, haystack: &[u8], needle: &[u8], long_period: bool) -> S::Output
1219 where
1220 S: TwoWayStrategy,
1221 {
1222 // `next_back()` uses `self.end` as its cursor -- so that `next()` and `next_back()`
1223 // are independent.
1224 let old_end = self.end;
1225 'search: loop {
1226 // Check that we have room to search in
1227 // end - needle.len() will wrap around when there is no more room,
1228 // but due to slice length limits it can never wrap all the way back
1229 // into the length of haystack.
1230 let front_byte = match haystack.get(self.end.wrapping_sub(needle.len())) {
1231 Some(&b) => b,
1232 None => {
1233 self.end = 0;
1234 return S::rejecting(0, old_end);
1235 }
1236 };
1237
1238 if S::use_early_reject() && old_end != self.end {
1239 return S::rejecting(self.end, old_end);
1240 }
1241
1242 // Quickly skip by large portions unrelated to our substring
1243 if !self.byteset_contains(front_byte) {
1244 self.end -= needle.len();
1245 if !long_period {
1246 self.memory_back = needle.len();
1247 }
1248 continue 'search;
1249 }
1250
1251 // See if the left part of the needle matches
1252 let crit = if long_period {
1253 self.crit_pos_back
1254 } else {
1255 cmp::min(self.crit_pos_back, self.memory_back)
1256 };
1257 for i in (0..crit).rev() {
1258 if needle[i] != haystack[self.end - needle.len() + i] {
1259 self.end -= self.crit_pos_back - i;
1260 if !long_period {
1261 self.memory_back = needle.len();
1262 }
1263 continue 'search;
1264 }
1265 }
1266
1267 // See if the right part of the needle matches
1268 let needle_end = if long_period {
1269 needle.len()
1270 } else {
1271 self.memory_back
1272 };
1273 for i in self.crit_pos_back..needle_end {
1274 if needle[i] != haystack[self.end - needle.len() + i] {
1275 self.end -= self.period;
1276 if !long_period {
1277 self.memory_back = self.period;
1278 }
1279 continue 'search;
1280 }
1281 }
1282
1283 // We have found a match!
1284 let match_pos = self.end - needle.len();
1285 // Note: sub self.period instead of needle.len() to have overlapping matches
1286 self.end -= needle.len();
1287 if !long_period {
1288 self.memory_back = needle.len();
1289 }
1290
1291 return S::matching(match_pos, match_pos + needle.len());
1292 }
1293 }
1294
1295 // Compute the maximal suffix of `arr`.
1296 //
1297 // The maximal suffix is a possible critical factorization (u, v) of `arr`.
1298 //
1299 // Returns (`i`, `p`) where `i` is the starting index of v and `p` is the
1300 // period of v.
1301 //
1302 // `order_greater` determines if lexical order is `<` or `>`. Both
1303 // orders must be computed -- the ordering with the largest `i` gives
1304 // a critical factorization.
1305 //
1306 // For long period cases, the resulting period is not exact (it is too short).
1307 #[inline]
1308 fn maximal_suffix(arr: &[u8], order_greater: bool) -> (usize, usize) {
1309 let mut left = 0; // Corresponds to i in the paper
1310 let mut right = 1; // Corresponds to j in the paper
1311 let mut offset = 0; // Corresponds to k in the paper, but starting at 0
1312 // to match 0-based indexing.
1313 let mut period = 1; // Corresponds to p in the paper
1314
1315 while let Some(&a) = arr.get(right + offset) {
1316 // `left` will be inbounds when `right` is.
1317 let b = arr[left + offset];
1318 if (a < b && !order_greater) || (a > b && order_greater) {
1319 // Suffix is smaller, period is entire prefix so far.
1320 right += offset + 1;
1321 offset = 0;
1322 period = right - left;
1323 } else if a == b {
1324 // Advance through repetition of the current period.
1325 if offset + 1 == period {
1326 right += offset + 1;
1327 offset = 0;
1328 } else {
1329 offset += 1;
1330 }
1331 } else {
1332 // Suffix is larger, start over from current location.
1333 left = right;
1334 right += 1;
1335 offset = 0;
1336 period = 1;
1337 }
1338 }
1339 (left, period)
1340 }
1341
1342 // Compute the maximal suffix of the reverse of `arr`.
1343 //
1344 // The maximal suffix is a possible critical factorization (u', v') of `arr`.
1345 //
1346 // Returns `i` where `i` is the starting index of v', from the back;
1347 // returns immediately when a period of `known_period` is reached.
1348 //
1349 // `order_greater` determines if lexical order is `<` or `>`. Both
1350 // orders must be computed -- the ordering with the largest `i` gives
1351 // a critical factorization.
1352 //
1353 // For long period cases, the resulting period is not exact (it is too short).
1354 fn reverse_maximal_suffix(arr: &[u8], known_period: usize, order_greater: bool) -> usize {
1355 let mut left = 0; // Corresponds to i in the paper
1356 let mut right = 1; // Corresponds to j in the paper
1357 let mut offset = 0; // Corresponds to k in the paper, but starting at 0
1358 // to match 0-based indexing.
1359 let mut period = 1; // Corresponds to p in the paper
1360 let n = arr.len();
1361
1362 while right + offset < n {
1363 let a = arr[n - (1 + right + offset)];
1364 let b = arr[n - (1 + left + offset)];
1365 if (a < b && !order_greater) || (a > b && order_greater) {
1366 // Suffix is smaller, period is entire prefix so far.
1367 right += offset + 1;
1368 offset = 0;
1369 period = right - left;
1370 } else if a == b {
1371 // Advance through repetition of the current period.
1372 if offset + 1 == period {
1373 right += offset + 1;
1374 offset = 0;
1375 } else {
1376 offset += 1;
1377 }
1378 } else {
1379 // Suffix is larger, start over from current location.
1380 left = right;
1381 right += 1;
1382 offset = 0;
1383 period = 1;
1384 }
1385 if period == known_period {
1386 break;
1387 }
1388 }
1389 debug_assert!(period <= known_period);
1390 left
1391 }
1392}
1393
1394// TwoWayStrategy allows the algorithm to either skip non-matches as quickly
1395// as possible, or to work in a mode where it emits Rejects relatively quickly.
1396trait TwoWayStrategy {
1397 type Output;
1398 fn use_early_reject() -> bool;
1399 fn rejecting(a: usize, b: usize) -> Self::Output;
1400 fn matching(a: usize, b: usize) -> Self::Output;
1401}
1402
1403/// Skip to match intervals as quickly as possible
1404enum MatchOnly {}
1405
1406impl TwoWayStrategy for MatchOnly {
1407 type Output = Option<(usize, usize)>;
1408
1409 #[inline]
1410 fn use_early_reject() -> bool {
1411 false
1412 }
1413 #[inline]
1414 fn rejecting(_a: usize, _b: usize) -> Self::Output {
1415 None
1416 }
1417 #[inline]
1418 fn matching(a: usize, b: usize) -> Self::Output {
1419 Some((a, b))
1420 }
1421}
1422
1423/// Emit Rejects regularly
1424enum RejectAndMatch {}
1425
1426impl TwoWayStrategy for RejectAndMatch {
1427 type Output = SearchStep;
1428
1429 #[inline]
1430 fn use_early_reject() -> bool {
1431 true
1432 }
1433 #[inline]
1434 fn rejecting(a: usize, b: usize) -> Self::Output {
1435 SearchStep::Reject(a, b)
1436 }
1437 #[inline]
1438 fn matching(a: usize, b: usize) -> Self::Output {
1439 SearchStep::Match(a, b)
1440 }
1441}