pattern-3 0.5.0

Needle API (née Pattern API 3.0), generalization of `std::str::pattern`
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
//! Needle traits.

use haystack::{Haystack, Hay, Span};

use std::ops::Range;

/// A searcher, for searching a [`Needle`] from a [`Hay`].
///
/// This trait provides methods for searching for non-overlapping matches of a
/// needle starting from the front (left) of a hay.
///
/// # Safety
///
/// This trait is marked unsafe because the range returned by its methods are
/// required to lie on valid codeword boundaries in the haystack. This enables
/// users of this trait to slice the haystack without additional runtime checks.
///
/// # Examples
///
/// Implement a searcher and consumer which matches `b"Aaaa"` from a byte string.
///
/// ```rust
/// extern crate pattern_3;
/// use pattern_3::*;
/// use std::ops::Range;
///
/// // The searcher for searching `b"Aaaa"`, using naive search.
/// // We are going to use this as a needle too.
/// struct Aaaa;
///
/// unsafe impl Searcher<[u8]> for Aaaa {
///     // search for an `b"Aaaa"` in the middle of the string, returns its range.
///     fn search(&mut self, span: Span<&[u8]>) -> Option<Range<usize>> {
///         let (hay, range) = span.into_parts();
///
///         let start = range.start;
///         for (i, window) in hay[range].windows(4).enumerate() {
///             if *window == b"Aaaa"[..] {
///                 // remember to include the range offset
///                 return Some((start + i)..(start + i + 4));
///             }
///         }
///
///         None
///     }
/// }
///
/// unsafe impl Consumer<[u8]> for Aaaa {
///     // checks if an `b"Aaaa" is at the beginning of the string, returns the end index.
///     fn consume(&mut self, span: Span<&[u8]>) -> Option<usize> {
///         let (hay, range) = span.into_parts();
///         let end = range.start.checked_add(4)?;
///         if end <= range.end && hay[range.start..end] == b"Aaaa"[..] {
///             Some(end)
///         } else {
///             None
///         }
///     }
/// }
///
/// impl<H: Haystack<Target = [u8]>> pattern_3::Needle<H> for Aaaa {
///     type Searcher = Self;
///     type Consumer = Self;
///     fn into_searcher(self) -> Self { self }
///     fn into_consumer(self) -> Self { self }
/// }
///
/// // test with some standard algorithms.
/// let haystack = &b"Aaaaa!!!Aaa!!!Aaaaaaaaa!!!"[..];
/// assert_eq!(
///     ext::split(haystack, Aaaa).collect::<Vec<_>>(),
///     vec![
///         &b""[..],
///         &b"a!!!Aaa!!!"[..],
///         &b"aaaaa!!!"[..],
///     ]
/// );
/// assert_eq!(
///     ext::match_ranges(haystack, Aaaa).collect::<Vec<_>>(),
///     vec![
///         (0..4, &b"Aaaa"[..]),
///         (14..18, &b"Aaaa"[..]),
///     ]
/// );
/// assert_eq!(
///     ext::trim_start(haystack, Aaaa),
///     &b"a!!!Aaa!!!Aaaaaaaaa!!!"[..]
/// );
/// ```
pub unsafe trait Searcher<A: Hay + ?Sized> {
    /// Searches for the first range which the needle can be found in the span.
    ///
    /// This method is used to support the following standard algorithms:
    ///
    /// * [`matches`](::ext::matches)
    /// * [`contains`](::ext::contains)
    /// * [`match_indices`](::ext::match_indices)
    /// * [`find`](::ext::find)
    /// * [`match_ranges`](::ext::match_ranges)
    /// * [`find_range`](::ext::find_range)
    /// * [`split`](::ext::split)
    /// * [`split_terminator`](::ext::split_terminator)
    /// * [`splitn`](::ext::splitn)
    /// * [`replace_with`](::ext::replace_with)
    /// * [`replacen_with`](::ext::replacen_with)
    ///
    /// The hay and the restricted range for searching can be recovered by
    /// calling `span`[`.into_parts()`](Span::into_parts). The range returned
    /// by this method
    /// should be relative to the hay and must be contained within the
    /// restricted range from the span.
    ///
    /// If the needle is not found, this method should return `None`.
    ///
    /// The reason this method takes a `Span<&A>` instead of just `&A` is
    /// because some needles need context information provided by
    /// the position of the current slice and the content around the slice.
    /// Regex components like the start-/end-of-text anchors `^`/`$`
    /// and word boundary `\b` are primary examples.
    ///
    /// # Examples
    ///
    /// Search for the locations of a substring inside a string, using the
    /// searcher primitive.
    ///
    /// ```
    /// extern crate pattern_3;
    /// use pattern_3::{Searcher, Needle, Span};
    ///
    /// let mut searcher = Needle::<&str>::into_searcher("::");
    /// let span = Span::from("lion::tiger::leopard");
    /// //                     ^   ^      ^        ^
    /// // string indices:     0   4     11       20
    ///
    /// // found the first "::".
    /// assert_eq!(searcher.search(span.clone()), Some(4..6));
    ///
    /// // slice the span to skip the first match.
    /// let span = unsafe { span.slice_unchecked(6..20) };
    ///
    /// // found the second "::".
    /// assert_eq!(searcher.search(span.clone()), Some(11..13));
    ///
    /// // should find nothing now.
    /// let span = unsafe { span.slice_unchecked(13..20) };
    /// assert_eq!(searcher.search(span.clone()), None);
    /// ```
    fn search(&mut self, span: Span<&A>) -> Option<Range<A::Index>>;
}

/// A consumer, for searching a [`Needle`] from a [`Hay`] anchored at the
/// beginnning.
///
/// This trait provides methods for matching a needle anchored at the beginning
/// of a hay.
///
/// See documentation of [`Searcher`] for an example.
///
/// # Safety
///
/// This trait is marked unsafe because the range returned by its methods are
/// required to lie on valid codeword boundaries in the haystack. This enables
/// users of this trait to slice the haystack without additional runtime checks.
pub unsafe trait Consumer<A: Hay + ?Sized> {
    /// Checks if the needle can be found at the beginning of the span.
    ///
    /// This method is used to implement the standard algorithm
    /// [`starts_with()`](::ext::starts_with) as well as providing the default
    /// implementation for [`.trim_start()`](Consumer::trim_start).
    ///
    /// The hay and the restricted range for searching can be recovered by
    /// calling `span`[`.into_parts()`](Span::into_parts). If a needle can be
    /// found starting at `range.start`, this method should return the end index
    /// of the needle relative to the hay.
    ///
    /// If the needle cannot be found at the beginning of the span, this method
    /// should return `None`.
    ///
    /// # Examples
    ///
    /// Consumes ASCII characters from the beginning.
    ///
    /// ```
    /// extern crate pattern_3;
    /// use pattern_3::{Consumer, Needle, Span};
    ///
    /// let mut consumer = Needle::<&str>::into_consumer(|c: char| c.is_ascii());
    /// let span = Span::from("Hi😋!!");
    ///
    /// // consumes the first ASCII character
    /// assert_eq!(consumer.consume(span.clone()), Some(1));
    ///
    /// // slice the span to skip the first match.
    /// let span = unsafe { span.slice_unchecked(1..8) };
    ///
    /// // matched the second ASCII character
    /// assert_eq!(consumer.consume(span.clone()), Some(2));
    ///
    /// // should match nothing now.
    /// let span = unsafe { span.slice_unchecked(2..8) };
    /// assert_eq!(consumer.consume(span.clone()), None);
    /// ```
    fn consume(&mut self, span: Span<&A>) -> Option<A::Index>;

    /// Repeatedly removes prefixes of the hay which matches the needle.
    ///
    /// This method is used to implement the standard algorithm
    /// [`trim_start()`](::ext::trim_start).
    ///
    /// Returns the start index of the slice after all prefixes are removed.
    ///
    /// A fast generic implementation in terms of
    /// [`.consume()`](Consumer::consume) is provided by default. Nevertheless,
    /// many needles allow a higher-performance specialization.
    ///
    /// # Examples
    ///
    /// ```rust
    /// extern crate pattern_3;
    /// use pattern_3::{Consumer, Needle, Span};
    ///
    /// let mut consumer = Needle::<&str>::into_consumer('x');
    /// assert_eq!(consumer.trim_start("xxxyy"), 3);
    ///
    /// let mut consumer = Needle::<&str>::into_consumer('x');
    /// assert_eq!(consumer.trim_start("yyxxx"), 0);
    /// ```
    #[inline]
    fn trim_start(&mut self, hay: &A) -> A::Index {
        let mut offset = hay.start_index();
        let mut span = Span::from(hay);
        while let Some(pos) = self.consume(span.clone()) {
            offset = pos;
            let (hay, range) = span.into_parts();
            if pos == range.start {
                break;
            }
            span = unsafe { Span::from_parts(hay, pos..range.end) };
        }
        offset
    }
}

/// A searcher which can be searched from the end.
///
/// This trait provides methods for searching for non-overlapping matches of a
/// needle starting from the back (right) of a hay.
///
/// # Safety
///
/// This trait is marked unsafe because the range returned by its methods are
/// required to lie on valid codeword boundaries in the haystack. This enables
/// users of this trait to slice the haystack without additional runtime checks.
pub unsafe trait ReverseSearcher<A: Hay + ?Sized>: Searcher<A> {
    /// Searches for the last range which the needle can be found in the span.
    ///
    /// This method is used to support the following standard algorithms:
    ///
    /// * [`rmatches`](::ext::rmatches)
    /// * [`rmatch_indices`](::ext::rmatch_indices)
    /// * [`rfind`](::ext::find)
    /// * [`rmatch_ranges`](::ext::rmatch_ranges)
    /// * [`rfind_range`](::ext::rfind_range)
    /// * [`rsplit`](::ext::rsplit)
    /// * [`rsplit_terminator`](::ext::rsplit_terminator)
    /// * [`rsplitn`](::ext::rsplitn)
    ///
    /// The hay and the restricted range for searching can be recovered by
    /// calling `span`[`.into_parts()`](Span::into_parts). The returned range
    /// should be relative to the hay and must be contained within the
    /// restricted range from the span.
    ///
    /// If the needle is not found, this method should return `None`.
    ///
    /// # Examples
    ///
    /// Search for the locations of a substring inside a string, using the
    /// searcher primitive.
    ///
    /// ```
    /// extern crate pattern_3;
    /// use pattern_3::{ReverseSearcher, Needle, Span};
    ///
    /// let mut searcher = Needle::<&str>::into_searcher("::");
    /// let span = Span::from("lion::tiger::leopard");
    /// //                     ^   ^      ^
    /// // string indices:     0   4     11
    ///
    /// // found the last "::".
    /// assert_eq!(searcher.rsearch(span.clone()), Some(11..13));
    ///
    /// // slice the span to skip the last match.
    /// let span = unsafe { span.slice_unchecked(0..11) };
    ///
    /// // found the second to last "::".
    /// assert_eq!(searcher.rsearch(span.clone()), Some(4..6));
    ///
    /// // should found nothing now.
    /// let span = unsafe { span.slice_unchecked(0..4) };
    /// assert_eq!(searcher.rsearch(span.clone()), None);
    /// ```
    fn rsearch(&mut self, span: Span<&A>) -> Option<Range<A::Index>>;
}

/// A consumer which can be searched from the end.
///
/// This trait provides methods for matching a needle anchored at the end of a
/// hay.
///
/// # Safety
///
/// This trait is marked unsafe because the range returned by its methods are
/// required to lie on valid codeword boundaries in the haystack. This enables
/// users of this trait to slice the haystack without additional runtime checks.
pub unsafe trait ReverseConsumer<A: Hay + ?Sized>: Consumer<A> {
    /// Checks if the needle can be found at the end of the span.
    ///
    /// This method is used to implement the standard algorithm
    /// [`ends_with()`](::ext::ends_with) as well as providing the default
    /// implementation for [`.trim_end()`](ReverseConsumer::trim_end).
    ///
    /// The hay and the restricted range for searching can be recovered by
    /// calling `span`[`.into_parts()`](Span::into_parts). If a needle can be
    /// found ending at `range.end`, this method should return the start index
    /// of the needle relative to the hay.
    ///
    /// If the needle cannot be found at the end of the span, this method
    /// should return `None`.
    ///
    /// # Examples
    ///
    /// Consumes ASCII characters from the end.
    ///
    /// ```
    /// extern crate pattern_3;
    /// use pattern_3::{ReverseConsumer, Needle, Span};
    ///
    /// let mut consumer = Needle::<&str>::into_consumer(|c: char| c.is_ascii());
    /// let span = Span::from("Hi😋!!");
    ///
    /// // consumes the last ASCII character
    /// assert_eq!(consumer.rconsume(span.clone()), Some(7));
    ///
    /// // slice the span to skip the first match.
    /// let span = unsafe { span.slice_unchecked(0..7) };
    ///
    /// // matched the second to last ASCII character
    /// assert_eq!(consumer.rconsume(span.clone()), Some(6));
    ///
    /// // should match nothing now.
    /// let span = unsafe { span.slice_unchecked(0..6) };
    /// assert_eq!(consumer.rconsume(span.clone()), None);
    /// ```
    fn rconsume(&mut self, hay: Span<&A>) -> Option<A::Index>;

    /// Repeatedly removes suffixes of the hay which matches the needle.
    ///
    /// This method is used to implement the standard algorithm
    /// [`trim_end()`](::ext::trim_end).
    ///
    /// A fast generic implementation in terms of
    /// [`.rconsume()`](ReverseConsumer::rconsume) is provided by default.
    /// Nevertheless, many needles allow a higher-performance specialization.
    ///
    /// # Examples
    ///
    /// ```rust
    /// extern crate pattern_3;
    /// use pattern_3::{ReverseConsumer, Needle, Span};
    ///
    /// let mut consumer = Needle::<&str>::into_consumer('x');
    /// assert_eq!(consumer.trim_end("yyxxx"), 2);
    ///
    /// let mut consumer = Needle::<&str>::into_consumer('x');
    /// assert_eq!(consumer.trim_end("xxxyy"), 5);
    /// ```
    #[inline]
    fn trim_end(&mut self, hay: &A) -> A::Index {
        let mut offset = hay.end_index();
        let mut span = Span::from(hay);
        while let Some(pos) = self.rconsume(span.clone()) {
            offset = pos;
            let (hay, range) = span.into_parts();
            if pos == range.end {
                break;
            }
            span = unsafe { Span::from_parts(hay, range.start..pos) };
        }
        offset
    }
}

/// A searcher which can be searched from both end with consistent results.
///
/// Implementing this marker trait enables the following standard algorithms to
/// return [`DoubleEndedIterator`](std::iter::DoubleEndedIterator)s:
///
/// * [`matches`](::ext::matches) / [`rmatches`](::ext::rmatches)
/// * [`match_indices`](::ext::match_indices) / [`rmatch_indices`](::ext::rmatch_indices)
/// * [`match_ranges`](::ext::match_ranges) / [`rmatch_ranges`](::ext::rmatch_ranges)
/// * [`split`](::ext::split) / [`rsplit`](::ext::rsplit)
/// * [`split_terminator`](::ext::split_terminator) / [`rsplit_terminator`](::ext::rsplit_terminator)
/// * [`splitn`](::ext::splitn) / [`rsplitn`](::ext::rsplitn)
///
/// # Examples
///
/// The searcher of a character implements `DoubleEndedSearcher`, while that of
/// a string does not.
///
/// `match_indices` and `rmatch_indices` are reverse of each other only for a
/// `DoubleEndedSearcher`.
///
/// ```rust
/// extern crate pattern_3;
/// use pattern_3::ext::{match_indices, rmatch_indices};
///
/// // `match_indices` and `rmatch_indices` are exact reverse of each other for a `char` needle.
/// let forward = match_indices("xxxxx", 'x').collect::<Vec<_>>();
/// let mut rev_backward = rmatch_indices("xxxxx", 'x').collect::<Vec<_>>();
/// rev_backward.reverse();
///
/// assert_eq!(forward, vec![(0, "x"), (1, "x"), (2, "x"), (3, "x"), (4, "x")]);
/// assert_eq!(rev_backward, vec![(0, "x"), (1, "x"), (2, "x"), (3, "x"), (4, "x")]);
/// assert_eq!(forward, rev_backward);
///
/// // this property does not exist on a `&str` needle in general.
/// let forward = match_indices("xxxxx", "xx").collect::<Vec<_>>();
/// let mut rev_backward = rmatch_indices("xxxxx", "xx").collect::<Vec<_>>();
/// rev_backward.reverse();
///
/// assert_eq!(forward, vec![(0, "xx"), (2, "xx")]);
/// assert_eq!(rev_backward, vec![(1, "xx"), (3, "xx")]);
/// assert_ne!(forward, rev_backward);
/// ```
pub unsafe trait DoubleEndedSearcher<A: Hay + ?Sized>: ReverseSearcher<A> {}

/// A consumer which can be searched from both end with consistent results.
///
/// It is used to support the following standard algorithm:
///
/// * [`trim`](::ext::trim)
///
/// The `trim` function is implemented by calling
/// [`trim_start`](::ext::trim_start) and [`trim_end`](::ext::trim_end) together.
/// This trait encodes the fact that we can call these two functions in any order.
///
/// # Examples
///
/// The consumer of a character implements `DoubleEndedConsumer`, while that of
/// a string does not. `trim` is implemented only for a `DoubleEndedConsumer`.
///
/// ```rust
/// extern crate pattern_3;
/// use pattern_3::ext::{trim_start, trim_end, trim};
///
/// // for a `char`, we get the same trim result no matter which function is called first.
/// let trim_start_first = trim_end(trim_start("xyxyx", 'x'), 'x');
/// let trim_end_first = trim_start(trim_end("xyxyx", 'x'), 'x');
/// let trim_together = trim("xyxyx", 'x');
/// assert_eq!(trim_start_first, "yxy");
/// assert_eq!(trim_end_first, "yxy");
/// assert_eq!(trim_together, "yxy");
///
/// // this property does not exist for a `&str` in general.
/// let trim_start_first = trim_end(trim_start("xyxyx", "xyx"), "xyx");
/// let trim_end_first = trim_start(trim_end("xyxyx", "xyx"), "xyx");
/// // let trim_together = trim("xyxyx", 'x'); // cannot be defined
/// assert_eq!(trim_start_first, "yx");
/// assert_eq!(trim_end_first, "xy");
/// // assert_eq!(trim_together, /*????*/); // cannot be defined
/// ```
pub unsafe trait DoubleEndedConsumer<A: Hay + ?Sized>: ReverseConsumer<A> {}

/// A needle, a type which can be converted into a searcher.
///
/// When using search algorithms like [`split()`](::ext::split), users will
/// search with a `Needle` e.g. a `&str`. A needle is usually stateless,
/// however for efficient searching, we often need some preprocessing and
/// maintain a mutable state. The preprocessed structure is called the
/// [`Searcher`] of this needle.
///
/// The relationship between `Searcher` and `Needle` is similar to `Iterator`
/// and `IntoIterator`.
pub trait Needle<H: Haystack>: Sized
where H::Target: Hay // FIXME: RFC 2089 or 2289
{
    /// The searcher associated with this needle.
    type Searcher: Searcher<H::Target>;

    /// The consumer associated with this needle.
    type Consumer: Consumer<H::Target>;

    /// Produces a searcher for this needle.
    fn into_searcher(self) -> Self::Searcher;

    /// Produces a consumer for this needle.
    ///
    /// Usually a consumer and a searcher can be the same type.
    /// Some needles may require different types
    /// when the two need different optimization strategies. String searching
    /// is an example of this: we use the Two-Way Algorithm when searching for
    /// substrings, which needs to preprocess the needle. However this is
    /// irrelevant for consuming, which only needs to check for string equality
    /// once. Therefore the Consumer for a string would be a distinct type
    /// using naive search.
    fn into_consumer(self) -> Self::Consumer;
}

/// Searcher of an empty needle.
///
/// This searcher will find all empty subslices between any codewords in a
/// haystack.
#[derive(Clone, Debug, Default)]
pub struct EmptySearcher {
    consumed_start: bool,
    consumed_end: bool,
}

unsafe impl<A: Hay + ?Sized> Searcher<A> for EmptySearcher {
    #[inline]
    fn search(&mut self, span: Span<&A>) -> Option<Range<A::Index>> {
        let (hay, range) = span.into_parts();
        let start = if !self.consumed_start {
            self.consumed_start = true;
            range.start
        } else if range.start == range.end {
            return None;
        } else {
            unsafe { hay.next_index(range.start) }
        };
        Some(start..start)
    }
}

unsafe impl<A: Hay + ?Sized> Consumer<A> for EmptySearcher {
    #[inline]
    fn consume(&mut self, span: Span<&A>) -> Option<A::Index> {
        let (_, range) = span.into_parts();
        Some(range.start)
    }

    #[inline]
    fn trim_start(&mut self, hay: &A) -> A::Index {
        hay.start_index()
    }
}

unsafe impl<A: Hay + ?Sized> ReverseSearcher<A> for EmptySearcher {
    #[inline]
    fn rsearch(&mut self, span: Span<&A>) -> Option<Range<A::Index>> {
        let (hay, range) = span.into_parts();
        let end = if !self.consumed_end {
            self.consumed_end = true;
            range.end
        } else if range.start == range.end {
            return None;
        } else {
            unsafe { hay.prev_index(range.end) }
        };
        Some(end..end)
    }
}

unsafe impl<A: Hay + ?Sized> ReverseConsumer<A> for EmptySearcher {
    #[inline]
    fn rconsume(&mut self, span: Span<&A>) -> Option<A::Index> {
        let (_, range) = span.into_parts();
        Some(range.end)
    }

    #[inline]
    fn trim_end(&mut self, hay: &A) -> A::Index {
        hay.end_index()
    }
}

unsafe impl<A: Hay + ?Sized> DoubleEndedSearcher<A> for EmptySearcher {}
unsafe impl<A: Hay + ?Sized> DoubleEndedConsumer<A> for EmptySearcher {}