Skip to main content

shrimple_parser/pattern/
mod.rs

1//! Abstractions for working with patterns.
2
3mod char_predicates;
4mod into_pattern;
5
6pub use {
7    char_predicates::*,
8    into_pattern::*,
9};
10
11use {
12    crate::{tuple::Tuple, Input},
13    core::ops::Not,
14};
15
16#[cfg(test)]
17use {
18    crate::{
19        parser::{one, until_ex, Parser},
20        error::ParsingError,
21    },
22    core::convert::Infallible,
23};
24
25/// This trait represents an object that can be matched onto a string.
26/// This includes functions, characters, [arrays of] characters, strings, but also custom patterns
27/// like [`NotEscaped`]
28///
29/// See built-in patterns and parser adapters for patterns in the [`pattern`](self) module
30///
31/// Hint: on the success path, the 1st element of the return tuple is the rest of the input (with
32/// or without the matched pattern at the start)
33pub trait Pattern {
34    /// The return values are (rest of the input, matched fragment at the beginning).
35    ///
36    /// # Errors
37    /// In the case of no match, the original `input` is returned as the [`Err`] variant.
38    ///
39    /// Used by [`crate::parser::one`].
40    fn immediate_match<I: Input>(&self, input: I) -> Result<(I, I), I>;
41
42    /// The return values are (rest of the input, contiguous matched fragments from the beginning).
43    ///
44    /// 0 is also a valid number of matches.
45    ///
46    /// Used by [`crate::parser::many`]
47    #[expect(
48        clippy::unwrap_used,
49        reason = "this will only panic if the pattern does"
50    )]
51    fn immediate_matches<I: Input>(&self, input: I) -> (I, I) {
52        let mut rest = Some(input.clone());
53        let rest_ptr = loop {
54            match self.immediate_match(rest.take().unwrap()) {
55                Ok((x, _)) => rest = Some(x),
56                Err(x) => break x.as_ptr(),
57            }
58        };
59        let input_ptr = input.as_ptr();
60        input.split_at(rest_ptr as usize - input_ptr as usize).rev()
61    }
62
63    /// Like [`Pattern::immediate_matches`], but also counts the number of matches.
64    ///
65    /// Used by the [`Pattern`] impl of [`NotEscaped`]
66    #[expect(
67        clippy::unwrap_used,
68        reason = "this will only panic if the pattern does"
69    )]
70    fn immediate_matches_counted<I: Input>(&self, input: I) -> (I, (I, usize)) {
71        let mut rest = Some(input.clone());
72        let mut n = 0;
73        let rest_ptr = loop {
74            match self.immediate_match(rest.take().unwrap()) {
75                Ok((x, _)) => {
76                    rest = Some(x);
77                    n += 1;
78                }
79                Err(x) => break x.as_ptr(),
80            }
81        };
82        let input_ptr = input.as_ptr();
83        input
84            .split_at(rest_ptr as usize - input_ptr as usize)
85            .rev()
86            .map_second(|s| (s, n))
87    }
88
89    /// Like [`Pattern::immediate_match`], but matches at the end of `input`.
90    /// The return values are (the input before the match, the match)
91    ///
92    /// # Errors
93    /// In the case of no match, the original `input` is returned as the [`Err`] variant.
94    ///
95    /// Used by the [`Pattern`] impl of [`NotEscaped`]
96    fn trailing_match<I: Input>(&self, input: I) -> Result<(I, I), I>;
97
98    /// Like [`Pattern::immediate_matches_counted`], but matches at the end of `input`,
99    /// and doesn't return the matched fragment of the input.
100    ///
101    /// Used by the [`Pattern`] impl of [`NotEscaped`]
102    #[expect(
103        clippy::unwrap_used,
104        reason = "this will only panic if the pattern does"
105    )]
106    fn trailing_matches_counted<I: Input>(&self, input: I) -> (I, usize) {
107        let mut rest = Some(input);
108        let mut n = 0;
109        loop {
110            match self.trailing_match(rest.take().unwrap()) {
111                Ok((before, _)) => {
112                    rest = Some(before);
113                    n += 1;
114                }
115                Err(rest) => break (rest, n),
116            }
117        }
118    }
119
120    /// The return values are (the match + rest of the input, (string before the match, the match)).
121    ///
122    /// # Errors
123    /// Returns the provided `input` unchanged in the [`Err`] variant if there's no match.
124    ///
125    /// Used by [`crate::parser::until`].
126    fn first_match<I: Input>(&self, input: I) -> Result<(I, (I, I)), I>;
127
128    /// Like [`Pattern::first_match`], but the match is excluded from the rest of the input.
129    ///
130    /// # Errors
131    /// Returns the provided `input` unchanged in the [`Err`] variant if there's no match.
132    ///
133    /// Used by [`crate::parser::until_ex`].
134    fn first_match_ex<I: Input>(&self, input: I) -> Result<(I, (I, I)), I>;
135
136    /// Get the pattern by reference to avoid moving it, which will happen in generic code
137    ///
138    /// Do not override this method.
139    fn by_ref(&self) -> Ref<'_, Self> {
140        Ref(self)
141    }
142
143    /// Combine `self` and another pattern into a pattern that matches both of them in a sequence,
144    /// with `self` before `other`
145    ///
146    /// Do not override this method.
147    fn and<Other: Pattern>(self, other: Other) -> Chain<Self, Other>
148    where
149        Self: Sized,
150    {
151        Chain(self, other)
152    }
153
154    /// Create a pattern that'll match `self` only if it's not escaped (immediately preceded)
155    /// by the provided pattern.
156    ///
157    /// Do not override this method.
158    fn not_escaped_by<Prefix: Pattern>(self, prefix: Prefix) -> NotEscaped<Prefix, Self>
159    where
160        Self: Sized,
161    {
162        NotEscaped(prefix, self)
163    }
164
165    /// Create a pattern that'll match `self` only if it's not enclosed (preceded & superceded) by
166    /// the provided pattern.
167    ///
168    /// Do not override this method.
169    fn not_enclosed_by<Enclosure: Pattern>(self, enc: Enclosure) -> NotEnclosed<Enclosure, Self>
170    where
171        Self: Sized,
172    {
173        NotEnclosed(enc, self)
174    }
175
176    /// Creates a greedy pattern that matches `self` as many times as possible. See [`Many`]
177    fn many(self) -> Many<Self> 
178    where
179        Self: Sized,
180    {
181        Many(self)
182    }
183
184    /// Creates a pattern that matches `self` 0 or 1 times. See [`Maybe`]
185    fn maybe(self) -> Maybe<Self>
186    where
187        Self: Sized,
188    {
189        Maybe(self)
190    }
191}
192
193impl<F: Fn(char) -> bool> Pattern for F {
194    fn immediate_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
195        match input.chars().next().filter(|c| self(*c)) {
196            Some(c) => Ok(input.split_at(c.len_utf8()).rev()),
197            None => Err(input),
198        }
199    }
200
201    fn immediate_matches<I: Input>(&self, input: I) -> (I, I) {
202        let mid = input.find(|c| !self(c)).unwrap_or(input.len());
203        input.split_at(mid).rev()
204    }
205
206    fn immediate_matches_counted<I: Input>(&self, input: I) -> (I, (I, usize)) {
207        let mut char_index = 0;
208        let byte_index = input
209            .char_indices()
210            .inspect(|_| char_index += 1)
211            .find_map(|(bi, c)| self(c).not().then_some(bi))
212            .inspect(|_| char_index -= 1)
213            .unwrap_or(input.len());
214        input
215            .split_at(byte_index)
216            .rev()
217            .map_second(|s| (s, char_index))
218    }
219
220    fn trailing_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
221        match input.strip_suffix(self).map(str::len) {
222            Some(len) => Ok(input.split_at(len)),
223            None => Err(input),
224        }
225    }
226
227    fn first_match<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
228        match input.char_indices().find(|(_, c)| self(*c)) {
229            Some((at, ch)) => {
230                let (before, after) = input.split_at(at);
231                let r#match = after.clone().before(ch.len_utf8());
232                Ok((after, (before, r#match)))
233            }
234            None => Err(input),
235        }
236    }
237
238    fn first_match_ex<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
239        match input.char_indices().find(|(_, c)| self(*c)) {
240            Some((at, ch)) => {
241                let (before, after) = input.split_at(at);
242                let (r#match, after) = after.split_at(ch.len_utf8());
243                Ok((after, (before, r#match)))
244            }
245            None => Err(input),
246        }
247    }
248}
249
250/// This is a specialised, optimised impl for matching any `char` in the array. For a more general
251/// pattern combinator, use a tuple.
252impl<const N: usize> Pattern for [char; N] {
253    fn immediate_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
254        match input.strip_prefix(self) {
255            Some(rest) => {
256                let matched_pat_len = input.len() - rest.len();
257                Ok(input.split_at(matched_pat_len).rev())
258            }
259            None => Err(input),
260        }
261    }
262
263    fn trailing_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
264        match input.strip_suffix(self) {
265            Some(rest) => {
266                let rest_len = rest.len();
267                Ok(input.split_at(rest_len))
268            }
269            None => Err(input),
270        }
271    }
272
273    fn first_match<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
274        match input.find(self) {
275            Some(at) => {
276                let (prev, match_and_rest) = input.split_at(at);
277                let matched_pat_len = match_and_rest.chars().next().map_or(0, char::len_utf8);
278                let r#match = match_and_rest.clone().before(matched_pat_len);
279                Ok((match_and_rest, (prev, r#match)))
280            }
281            None => Err(input),
282        }
283    }
284
285    fn first_match_ex<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
286        match input.find(self) {
287            Some(at) => {
288                let (prev, match_and_rest) = input.split_at(at);
289                let matched_pat_len = match_and_rest.chars().next().map_or(0, char::len_utf8);
290                let (r#match, rest) = match_and_rest.split_at(matched_pat_len);
291                Ok((rest, (prev, r#match)))
292            }
293            None => Err(input),
294        }
295    }
296}
297
298impl Pattern for &str {
299    fn immediate_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
300        if input.starts_with(*self) {
301            Ok(input.split_at(self.len()).rev())
302        } else {
303            Err(input)
304        }
305    }
306
307    fn immediate_matches<I: Input>(&self, input: I) -> (I, I) {
308        let rest_len = input.trim_start_matches(self).len();
309        let input_len = input.len();
310        input.split_at(input_len - rest_len).rev()
311    }
312
313    fn immediate_matches_counted<I: Input>(&self, input: I) -> (I, (I, usize)) {
314        self.immediate_matches(input)
315            .map_second(|s| (s.len().checked_div(self.len()).unwrap_or(0), s).rev())
316    }
317
318    fn trailing_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
319        if input.ends_with(self) {
320            let mid = input.len() - self.len();
321            Ok(input.split_at(mid))
322        } else {
323            Err(input)
324        }
325    }
326
327    fn trailing_matches_counted<I: Input>(&self, input: I) -> (I, usize) {
328        let trimmed_len = input.trim_end_matches(self).len();
329        let input_len = input.len();
330        (
331            input.before(trimmed_len),
332            (input_len - trimmed_len) / self.len(),
333        )
334    }
335
336    fn first_match<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
337        match input.find(*self) {
338            Some(at) => {
339                let (before, after) = input.split_at(at);
340                let r#match = after.clone().before(self.len());
341                Ok((after, (before, r#match)))
342            }
343            None => Err(input),
344        }
345    }
346
347    fn first_match_ex<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
348        match input.find(*self) {
349            Some(at) => {
350                let (before, after) = input.split_at(at);
351                let (r#match, after) = after.split_at(self.len());
352                Ok((after, (before, r#match)))
353            }
354            None => Err(input),
355        }
356    }
357}
358
359impl Pattern for char {
360    fn immediate_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
361        if input.starts_with(*self) {
362            Ok(input.split_at(self.len_utf8()).rev())
363        } else {
364            Err(input)
365        }
366    }
367
368    fn immediate_matches<I: Input>(&self, input: I) -> (I, I) {
369        let rest_len = input.trim_start_matches(*self).len();
370        let input_len = input.len();
371        input.split_at(input_len - rest_len).rev()
372    }
373
374    fn immediate_matches_counted<I: Input>(&self, input: I) -> (I, (I, usize)) {
375        self.immediate_matches(input)
376            .map_second(|s| (s.len() / self.len_utf8(), s).rev())
377    }
378
379    fn trailing_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
380        if input.ends_with(*self) {
381            let mid = input.len() - self.len_utf8();
382            Ok(input.split_at(mid))
383        } else {
384            Err(input)
385        }
386    }
387
388    fn trailing_matches_counted<I: Input>(&self, input: I) -> (I, usize) {
389        let trimmed_len = input.trim_end_matches(*self).len();
390        let input_len = input.len();
391        (
392            input.before(trimmed_len),
393            (input_len - trimmed_len) / self.len_utf8(),
394        )
395    }
396
397    fn first_match<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
398        match input.find(*self) {
399            Some(at) => {
400                let (before, after) = input.split_at(at);
401                let r#match = after.clone().before(self.len_utf8());
402                Ok((after, (before, r#match)))
403            }
404            None => Err(input),
405        }
406    }
407
408    fn first_match_ex<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
409        match input.find(*self) {
410            Some(at) => {
411                let (before, after) = input.split_at(at);
412                let (r#match, after) = after.split_at(self.len_utf8());
413                Ok((after, (before, r#match)))
414            }
415            None => Err(input),
416        }
417    }
418}
419
420#[cfg(feature = "either")]
421macro_rules! fwd_method_impl {
422    ($(fn $name:ident -> $ret:ty;)+) => {
423        $(
424            fn $name<I: Input>(&self, input: I) -> $ret {
425                match self {
426                    either::Either::Left(l) => l.$name(input),
427                    either::Either::Right(r) => r.$name(input),
428                }
429            }
430        )+
431    };
432}
433
434#[cfg(feature = "either")]
435impl<L: Pattern, R: Pattern> Pattern for either::Either<L, R> {
436    fwd_method_impl! {
437        fn immediate_match -> Result<(I, I), I>;
438        fn immediate_matches -> (I, I);
439        fn immediate_matches_counted -> (I, (I, usize));
440        fn trailing_match -> Result<(I, I), I>;
441        fn trailing_matches_counted -> (I, usize);
442        fn first_match -> Result<(I, (I, I)), I>;
443        fn first_match_ex -> Result<(I, (I, I)), I>;
444    }
445}
446
447/// Pattern that's the reference to another pattern, used in generic code to reuse the pattern.
448#[repr(transparent)]
449pub struct Ref<'this, T: ?Sized + Pattern>(&'this T);
450
451impl<T: ?Sized + Pattern> Clone for Ref<'_, T> {
452    fn clone(&self) -> Self {
453        *self
454    }
455}
456
457impl<T: ?Sized + Pattern> Copy for Ref<'_, T> {}
458
459impl<T: ?Sized + Pattern> Pattern for Ref<'_, T> {
460    fn immediate_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
461        T::immediate_match(self.0, input)
462    }
463
464    fn immediate_matches<I: Input>(&self, input: I) -> (I, I) {
465        T::immediate_matches(self.0, input)
466    }
467
468    fn immediate_matches_counted<I: Input>(&self, input: I) -> (I, (I, usize)) {
469        T::immediate_matches_counted(self.0, input)
470    }
471
472    fn trailing_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
473        T::trailing_match(self.0, input)
474    }
475
476    fn trailing_matches_counted<I: Input>(&self, input: I) -> (I, usize) {
477        T::trailing_matches_counted(self.0, input)
478    }
479
480    fn first_match<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
481        T::first_match(self.0, input)
482    }
483
484    fn first_match_ex<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
485        T::first_match_ex(self.0, input)
486    }
487}
488
489/// Pattern that matches pattern `Inner` not escaped by `Prefix`.
490/// "escaped" here means that the pattern `Inner` is preceded by a `Prefix` that's not preceded by
491/// itself.
492///
493/// For example, for a pattern `NotEscaped('\', '0')`, the strings "0", "\\0" & "\\\\\\0" will have
494/// a match, but the strings "\0", "\\ \0" & "\\\\\\\0" won't.
495#[derive(Clone, Copy)]
496pub struct NotEscaped<Prefix: Pattern, Inner: Pattern>(pub Prefix, pub Inner);
497
498impl<Prefix: Pattern, Inner: Pattern> Pattern for NotEscaped<Prefix, Inner> {
499    fn immediate_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
500        self.1.immediate_match(input)
501    }
502
503    fn trailing_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
504        let (rest, r#match) = self.1.trailing_match(input.clone())?;
505        let (rest, n_prefixes) = self.0.trailing_matches_counted(rest);
506        (n_prefixes % 2 == 0)
507            .then_some((rest, r#match))
508            .ok_or(input)
509    }
510
511    fn trailing_matches_counted<I: Input>(&self, input: I) -> (I, usize) {
512        let (rest, n) = self.1.trailing_matches_counted(input);
513        if n == 0 {
514            return (rest, 0);
515        }
516        let no_1st_prefix = match self.0.trailing_match(rest.clone()) {
517            Ok((x, _)) => x,
518            Err(rest) => return (rest, n),
519        };
520        let (_, n_prefixes_minus_one) = self.0.trailing_matches_counted(no_1st_prefix.clone());
521        if n_prefixes_minus_one % 2 != 0 {
522            (rest, n)
523        } else {
524            (no_1st_prefix, n - 1)
525        }
526    }
527
528    fn first_match<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
529        let mut rest = input.clone();
530        while !rest.is_empty() {
531            let (before, r#match);
532            (rest, (before, r#match)) = self.1.first_match(rest)?;
533            let before = match self.0.trailing_match(before) {
534                Ok((x, _)) => x,
535                Err(before) => return Ok((rest, (before, r#match))),
536            };
537            let (before, n_prefixes_minus_one) = self.0.trailing_matches_counted(before);
538            if n_prefixes_minus_one % 2 != 0 {
539                return Ok((rest, (before, r#match)));
540            }
541        }
542        Err(input)
543    }
544
545    fn first_match_ex<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
546        let mut rest = input.clone();
547        loop {
548            let (before, r#match);
549            (rest, (before, r#match)) = self.1.first_match_ex(rest)?;
550            let Ok((before, _)) = self.0.trailing_match(before) else {
551                let index = r#match.as_ptr() as usize - input.as_ptr() as usize;
552                let before = input.before(index);
553                return Ok((rest, (before, r#match)));
554            };
555            let (_, n_prefixes_minus_one) = self.0.trailing_matches_counted(before);
556            if n_prefixes_minus_one % 2 != 0 {
557                let index = r#match.as_ptr() as usize - input.as_ptr() as usize;
558                let before = input.before(index);
559                return Ok((rest, (before, r#match)));
560            }
561        }
562    }
563}
564
565/// Pattern that matches pattern `Inner` not surrounded by `Enclosure`.
566pub struct NotEnclosed<Enclosure: Pattern, Inner: Pattern>(pub Enclosure, pub Inner);
567
568impl<Enclosure: Pattern, Inner: Pattern> Pattern for NotEnclosed<Enclosure, Inner> {
569    fn immediate_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
570        self.1.immediate_match(input)
571    }
572
573    fn immediate_matches<I: Input>(&self, input: I) -> (I, I) {
574        self.1.immediate_matches(input)
575    }
576
577    fn trailing_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
578        self.1.trailing_match(input)
579    }
580
581    fn first_match<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
582        let mut enclosed = false;
583        let mut rest = &*input;
584        loop {
585            let (after_enc, (before_enc, enc)) =
586                self.0.first_match_ex(rest).unwrap_or(("", (rest, "")));
587            let (after_inner, (before_inner, inner)) =
588                self.1.first_match_ex(rest).unwrap_or(("", (rest, "")));
589
590            if [enc, inner] == ["", ""] {
591                break Err(input);
592            }
593
594            if before_enc.len() < before_inner.len() {
595                rest = after_enc;
596                enclosed = !enclosed;
597            } else if enclosed {
598                rest = after_inner;
599            } else {
600                let match_len = inner.len();
601                let before_len = input.len() - after_inner.len() - match_len;
602                let (before, rest_and_match) = input.split_at(before_len);
603                let r#match = rest_and_match.clone().before(match_len);
604                break Ok((rest_and_match, (before, r#match)));
605            }
606        }
607    }
608
609    fn first_match_ex<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
610        let mut enclosed = false;
611        let mut rest = &*input;
612        loop {
613            let (after_enc, (before_enc, enc)) =
614                self.0.first_match_ex(rest).unwrap_or(("", (rest, "")));
615            let (after_inner, (before_inner, inner)) =
616                self.1.first_match_ex(rest).unwrap_or(("", (rest, "")));
617
618            if [enc, inner] == ["", ""] {
619                break Err(input);
620            }
621
622            if before_enc.len() < before_inner.len() {
623                rest = after_enc;
624                enclosed = !enclosed;
625            } else if enclosed {
626                rest = after_inner;
627            } else {
628                let match_len = inner.len();
629                let before_len = input.len() - after_inner.len() - match_len;
630                let (before, rest_and_match) = input.split_at(before_len);
631                let (r#match, rest) = rest_and_match.split_at(match_len);
632                break Ok((rest, (before, r#match)));
633            }
634        }
635    }
636}
637
638/// A pattern that matches any 1 character.
639#[derive(Clone, Copy)]
640pub struct Any;
641
642impl Pattern for Any {
643    fn immediate_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
644        match input.chars().next() {
645            Some(ch) => Ok(input.split_at(ch.len_utf8()).rev()),
646            None => Err(input),
647        }
648    }
649
650    fn immediate_matches<I: Input>(&self, input: I) -> (I, I) {
651        let input_len = input.len();
652        input.split_at(input_len).rev()
653    }
654
655    fn immediate_matches_counted<I: Input>(&self, input: I) -> (I, (I, usize)) {
656        let input_len = input.len();
657        let input_n_chars = input.chars().count();
658        input.split_at(input_len).rev().map_second(|matched| (matched, input_n_chars))
659    }
660
661    fn trailing_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
662        match input.chars().next_back() {
663            Some(ch) => Ok(input.split_at(ch.len_utf8())),
664            None => Err(input),
665        }
666    }
667
668    fn trailing_matches_counted<I: Input>(&self, input: I) -> (I, usize) {
669        let input_n_chars = input.chars().count();
670        (input.start(), input_n_chars)
671    }
672
673    fn first_match<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
674        let first_char_len = input.chars().next().map_or(0, char::len_utf8);
675        let before = input.clone().start();
676        let first_char_span = input.clone().before(first_char_len);
677        Ok((input, (before, first_char_span)))
678    }
679
680    fn first_match_ex<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
681        let first_char_len = input.chars().next().map_or(0, char::len_utf8);
682        let before = input.clone().start();
683        let (first_char_span, after) = input.split_at(first_char_len);
684        Ok((after, (before, first_char_span)))
685    }
686}
687
688/// A tuple pattern matches either of the 2 patterns in a short-circuiting & commutative manner,
689/// with `self` tried first. Tuples of more than 2 elements can be turned into a pattern via
690/// the [`IntoPattern`] trait.
691///
692/// # Performance
693/// If you want to match either of N chars, use an array of them as a pattern instead, as this
694/// struct has a general impl that may miss optimisations applicable to the case of `[char; N]`
695/// being the pattern.
696impl<P1: Pattern, P2: Pattern> Pattern for (P1, P2) {
697    fn immediate_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
698        self.0
699            .immediate_match(input)
700            .or_else(|input| self.1.immediate_match(input))
701    }
702
703    fn trailing_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
704        self.0
705            .trailing_match(input)
706            .or_else(|input| self.1.trailing_match(input))
707    }
708
709    fn first_match<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
710        let (before1, match1) = self.0.first_match(&*input).map_or((&*input, ""), |x| x.1);
711        let (before2, match2) = self.1.first_match(&*input).map_or((&*input, ""), |x| x.1);
712
713        if [match1, match2] == ["", ""] {
714            return Err(input);
715        }
716
717        let [before_len, match_len] = if before1.len() < before2.len() {
718            [before1.len(), match1.len()]
719        } else {
720            [before2.len(), match2.len()]
721        };
722
723        let (before, match_rest) = input.split_at(before_len);
724        let r#match = match_rest.clone().before(match_len);
725        Ok((match_rest, (before, r#match)))
726    }
727
728    fn first_match_ex<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
729        let (before1, match1) = self.0.first_match(&*input).map_or((&*input, ""), |x| x.1);
730        let (before2, match2) = self.1.first_match(&*input).map_or((&*input, ""), |x| x.1);
731
732        if [match1, match2] == ["", ""] {
733            return Err(input);
734        }
735
736        let [before_len, match_len] = if before1.len() < before2.len() {
737            [before1.len(), match1.len()]
738        } else {
739            [before2.len(), match2.len()]
740        };
741
742        let (before, match_rest) = input.split_at(before_len);
743        let (r#match, rest) = match_rest.split_at(match_len);
744        Ok((rest, (before, r#match)))
745    }
746}
747
748/// A pattern that matches `P1` immediately followed by `P2`.
749///
750/// A match is only produced when **both** patterns match consecutively at
751/// the same position: `P1` at the current position and `P2` right after it.
752/// The combined match spans the entirety of both sub-matches.
753///
754/// # Note
755/// For `first_match` / `first_match_ex`, every occurrence of `P1` in the
756/// input is tried in left-to-right order; the first one where `P2`
757/// immediately follows is returned.  Occurrences of `P1` that are *not*
758/// followed by `P2` are skipped.
759///
760/// More conveniently created via [`Pattern::and`].
761///
762/// # Example
763/// ```rust
764/// # fn main() {
765/// use {
766///     shrimple_parser::{
767///         Pattern,
768///         parser::{one, until_ex},
769///         pattern::{ascii_digit, alphabetic, Chain},
770///     },
771///     core::convert::Infallible,
772/// };
773///
774/// // Matches a digit immediately followed by an alphabetic character.
775/// assert_eq!(
776///     one::<_, Infallible>(ascii_digit.and(alphabetic))("3x rest"),
777///     Ok((" rest", "3x")),
778/// );
779///
780/// // Returns an error when the pattern is not at the start.
781/// assert!(
782///     one::<_, Infallible>(ascii_digit.and(alphabetic))("x3 rest")
783///         .is_err()
784/// );
785///
786/// // Finds the first '$' that is immediately followed by '{'.
787/// assert_eq!(
788///     until_ex::<_, Infallible>('$'.and('{'))("foo${bar}"),
789///     Ok(("bar}", "foo")),
790/// );
791/// # }
792/// ```
793#[derive(Debug, Clone, Copy)]
794pub struct Chain<P1: Pattern, P2: Pattern>(pub P1, pub P2);
795
796impl<P1: Pattern, P2: Pattern> Pattern for Chain<P1, P2> {
797    fn immediate_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
798        // Try P1 at the start; keep the original `input` for the error path.
799        let rest_after_p1 = match self.0.immediate_match(input.clone()) {
800            Ok((rest, _)) => rest,
801            Err(_) => return Err(input),
802        };
803        // Try P2 immediately after P1.
804        match self.1.immediate_match(rest_after_p1) {
805            Ok((rest_after_p2, _)) => {
806                // The combined match spans from the start of `input` to the
807                // start of `rest_after_p2`.
808                let match_len = input.len() - rest_after_p2.len();
809                let (matched, rest) = input.split_at(match_len);
810                Ok((rest, matched))
811            }
812            Err(_) => Err(input),
813        }
814    }
815
816    fn trailing_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
817        // First strip P2 from the end.
818        let before_p2 = match self.1.trailing_match(input.clone()) {
819            Ok((before, _)) => before,
820            Err(_) => return Err(input),
821        };
822        // Then strip P1 from the end of what remains.
823        match self.0.trailing_match(before_p2) {
824            Ok((before_p1, _)) => {
825                // `before_p1.len()` is the byte offset where the chain match
826                // starts inside `input`.
827                Ok(input.split_at(before_p1.len()))
828            }
829            Err(_) => Err(input),
830        }
831    }
832
833    fn first_match<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
834        let mut rest = input.clone();
835        loop {
836            // Find the next P1, advancing `rest` past each failed candidate.
837            let (after_p1, (_, p1_match)) = match self.0.first_match_ex(rest) {
838                Ok(result) => result,
839                Err(_) => return Err(input),
840            };
841            // Check whether P2 immediately follows this P1.
842            match self.1.immediate_match(after_p1.clone()) {
843                Ok((after_p2, _)) => {
844                    // Reconstruct `before` and the chain match relative to the
845                    // original `input` using length arithmetic so that the
846                    // returned slices stay within the same allocation.
847                    let p1_start = input.len() - after_p1.len() - p1_match.len();
848                    let chain_len = p1_match.len() + after_p1.len() - after_p2.len();
849                    let (before, chain_and_rest) = input.split_at(p1_start);
850                    let chain = chain_and_rest.clone().before(chain_len);
851                    return Ok((chain_and_rest, (before, chain)));
852                }
853                // P2 did not follow this P1; advance past P1 and keep looking.
854                Err(_) => rest = after_p1,
855            }
856        }
857    }
858
859    fn first_match_ex<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
860        let mut rest = input.clone();
861        loop {
862            // Find the next P1, advancing `rest` past each failed candidate.
863            let (after_p1, (_, p1_match)) = match self.0.first_match_ex(rest) {
864                Ok(result) => result,
865                Err(_) => return Err(input),
866            };
867            // Check whether P2 immediately follows this P1.
868            match self.1.immediate_match(after_p1.clone()) {
869                Ok((after_p2, _)) => {
870                    let p1_start = input.len() - after_p1.len() - p1_match.len();
871                    let chain_len = p1_match.len() + after_p1.len() - after_p2.len();
872                    let (before, chain_start) = input.split_at(p1_start);
873                    let chain = chain_start.before(chain_len);
874                    return Ok((after_p2, (before, chain)));
875                }
876                // P2 did not follow this P1; advance past P1 and keep looking.
877                Err(_) => rest = after_p1,
878            }
879        }
880    }
881}
882
883/// A pattern that matches any number of contiguous occurrences of the inner pattern `T`,
884/// including 0.
885///
886/// Because 0 occurrences is always a valid match, [`Many`] never fails: [`Pattern::immediate_match`]
887/// and [`Pattern::trailing_match`] always return [`Ok`], and [`Pattern::first_match`] /
888/// [`Pattern::first_match_ex`] always match at the very start of the input (with a possibly
889/// empty match).
890///
891/// More conveniently created via [`Pattern::many`]
892///
893/// # Example
894/// ```rust
895/// # fn main() {
896/// use {
897///     shrimple_parser::{
898///         Pattern,
899///         parser::one,
900///         pattern::{ascii_digit, Many},
901///     },
902///     core::convert::Infallible,
903/// };
904///
905/// // Matches 0 or more ASCII digits from the start of the input.
906/// assert_eq!(
907///     one::<_, Infallible>(ascii_digit.many())("123abc"),
908///     Ok(("abc", "123")),
909/// );
910///
911/// // 0 matches is still a match, with an empty matched fragment.
912/// assert_eq!(
913///     one::<_, Infallible>(ascii_digit.many())("abc"),
914///     Ok(("abc", "")),
915/// );
916/// # }
917/// ```
918#[derive(Debug, Clone, Copy)]
919pub struct Many<T: Pattern>(pub T);
920
921impl<T: Pattern> Pattern for Many<T> {
922    fn immediate_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
923        Ok(self.0.immediate_matches(input))
924    }
925
926    /// Repeating "0 or more `T`" is the same as "0 or more `T`", so this delegates directly to
927    /// `T`'s implementation rather than looping on [`Pattern::immediate_match`], which would
928    /// never terminate since [`Many::immediate_match`] never fails.
929    fn immediate_matches<I: Input>(&self, input: I) -> (I, I) {
930        self.0.immediate_matches(input)
931    }
932
933    fn immediate_matches_counted<I: Input>(&self, input: I) -> (I, (I, usize)) {
934        self.0.immediate_matches_counted(input)
935    }
936
937    fn trailing_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
938        let (rest, _) = self.0.trailing_matches_counted(input.clone());
939        let rest_len = rest.len();
940        Ok(input.split_at(rest_len))
941    }
942
943    /// See [`Many::immediate_matches`] for why this delegates straight to `T` instead of
944    /// looping on [`Pattern::trailing_match`].
945    fn trailing_matches_counted<I: Input>(&self, input: I) -> (I, usize) {
946        self.0.trailing_matches_counted(input)
947    }
948
949    fn first_match<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
950        let (after_first, (before, first)) = self.0.first_match_ex(input.clone())?;
951        let (_, more) = self.0.immediate_matches(after_first);
952
953        let before_len = before.len();
954        let match_len = first.len() + more.len();
955
956        let (before, match_and_rest) = input.split_at(before_len);
957        let matched = match_and_rest.clone().before(match_len);
958        Ok((match_and_rest, (before, matched)))
959    }
960
961    fn first_match_ex<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
962        let (after_first, (before, first)) = self.0.first_match_ex(input.clone())?;
963        let (_, more) = self.0.immediate_matches(after_first);
964
965        let before_len = before.len();
966        let match_len = first.len() + more.len();
967
968        let (before, match_and_rest) = input.split_at(before_len);
969        let (matched, rest) = match_and_rest.split_at(match_len);
970        Ok((rest, (before, matched)))
971    }
972}
973
974/// A pattern that matches 0 or 1 occurrences of the inner pattern `T`, making it optional.
975///
976/// Because both 0 and 1 occurrences are valid matches, [`Maybe`] never fails:
977/// [`Pattern::immediate_match`] and [`Pattern::trailing_match`] always return [`Ok`], falling
978/// back to an empty match at the relevant end of the input if `T` doesn't match there. Likewise,
979/// [`Pattern::first_match`] / [`Pattern::first_match_ex`] always match at the very start of the
980/// input.
981///
982/// More conveniently created via [`Pattern::maybe`]
983///
984/// # Example
985/// ```rust
986/// # fn main() {
987/// use {
988///     shrimple_parser::{
989///         Pattern,
990///         parser::one,
991///         pattern::Maybe,
992///     },
993///     core::convert::Infallible,
994/// };
995///
996/// // Matches an optional '-' sign at the start of the input.
997/// assert_eq!(
998///     one::<_, Infallible>('-'.maybe())("-123"),
999///     Ok(("123", "-")),
1000/// );
1001///
1002/// // No '-' present - still matches, with an empty matched fragment.
1003/// assert_eq!(
1004///     one::<_, Infallible>('-'.maybe())("123"),
1005///     Ok(("123", "")),
1006/// );
1007/// # }
1008/// ```
1009#[derive(Debug, Clone, Copy)]
1010pub struct Maybe<T: Pattern>(pub T);
1011
1012impl<T: Pattern> Pattern for Maybe<T> {
1013    fn immediate_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
1014        self.0
1015            .immediate_match(input)
1016            .or_else(|input| Ok(input.split_at(0).rev()))
1017    }
1018
1019    /// Repeating "0 or 1 `T`" is the same as "0 or more `T`", so this delegates directly to
1020    /// `T`'s implementation rather than looping on [`Pattern::immediate_match`], which would
1021    /// never terminate since [`Maybe::immediate_match`] never fails.
1022    fn immediate_matches<I: Input>(&self, input: I) -> (I, I) {
1023        self.0.immediate_matches(input)
1024    }
1025
1026    fn immediate_matches_counted<I: Input>(&self, input: I) -> (I, (I, usize)) {
1027        self.0.immediate_matches_counted(input)
1028    }
1029
1030    fn trailing_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
1031        self.0.trailing_match(input).or_else(|input| {
1032            let len = input.len();
1033            Ok(input.split_at(len))
1034        })
1035    }
1036
1037    /// See [`Maybe::immediate_matches`] for why this delegates straight to `T` instead of
1038    /// looping on [`Pattern::trailing_match`].
1039    fn trailing_matches_counted<I: Input>(&self, input: I) -> (I, usize) {
1040        self.0.trailing_matches_counted(input)
1041    }
1042
1043    fn first_match<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
1044        let (_, matched) = self.immediate_match(input.clone())?;
1045        let before = input.clone().before(0);
1046        Ok((input, (before, matched)))
1047    }
1048
1049    fn first_match_ex<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
1050        let (rest, matched) = self.immediate_match(input.clone())?;
1051        let before = input.before(0);
1052        Ok((rest, (before, matched)))
1053    }
1054}
1055
1056#[test]
1057fn char_pat() {
1058    assert_eq!(
1059        until_ex::<_, Infallible>('"')
1060            .parse(r#"this is what they call a \"test\", right?" - he said"#),
1061        Ok((
1062            r#"test\", right?" - he said"#,
1063            r"this is what they call a \"
1064        )),
1065    );
1066}
1067
1068#[test]
1069fn not_escaped_pat() {
1070    assert_eq!(
1071        until_ex::<_, Infallible>(NotEscaped('\\', '"'))
1072            .parse(r#"this is what they call a \"test\", right?" - he said"#),
1073        Ok((" - he said", r#"this is what they call a \"test\", right?"#)),
1074    );
1075}
1076
1077#[test]
1078fn str_pat() {
1079    assert_eq!(one::<_, Infallible>("abc")("abcdef"), Ok(("def", "abc")));
1080}
1081
1082#[test]
1083fn array_pat() {
1084    assert_eq!(
1085        until_ex::<_, Infallible>([';', '\''])("abc;def'xyz"),
1086        Ok(("def'xyz", "abc"))
1087    );
1088}
1089
1090#[test]
1091fn union_pat() {
1092    let src = "abc\\def'xyz;";
1093    assert_eq!(
1094        until_ex::<_, Infallible>((';', '\''))(src),
1095        until_ex([';', '\''])(src)
1096    );
1097}
1098
1099#[test]
1100fn chain_pattern_immediate_match_success() {
1101    assert_eq!(
1102        one::<_, Infallible>('a'.and('b'))("abcde"),
1103        Ok(("cde", "ab")),
1104    );
1105}
1106
1107#[test]
1108fn chain_pattern_immediate_match_p1_fails() {
1109    assert_eq!(
1110        one::<_, Infallible>('a'.and('b'))("xbc"),
1111        Err(ParsingError::new_recoverable("xbc")),
1112    );
1113}
1114
1115#[test]
1116fn chain_pattern_immediate_match_p2_fails() {
1117    assert_eq!(
1118        one::<_, Infallible>('a'.and('b'))("axc"),
1119        Err(ParsingError::new_recoverable("axc")),
1120    );
1121}
1122
1123#[test]
1124fn chain_pattern_immediate_match_predicate_patterns() {
1125    assert_eq!(
1126        one::<_, Infallible>(ascii_digit.and(alphabetic))("3x rest"),
1127        Ok((" rest", "3x")),
1128    );
1129}
1130
1131#[test]
1132fn chain_pattern_first_match_ex_found_immediately() {
1133    assert_eq!(
1134        one::<_, Infallible>('$'.and('{'))("${bar}"),
1135        Ok(("bar}", "${")),
1136    );
1137}
1138
1139#[test]
1140fn chain_pattern_first_match_ex_skips_p1_without_p2() {
1141    assert_eq!(
1142        until_ex::<_, Infallible>('a'.and('b'))("xaabyz"),
1143        Ok(("yz", "xa")),
1144    );
1145}
1146
1147#[test]
1148fn chain_pattern_first_match_ex_string_patterns() {
1149    assert_eq!(
1150        until_ex::<_, Infallible>('$'.and('{'))("foo${bar}"),
1151        Ok(("bar}", "foo")),
1152    );
1153}
1154
1155#[test]
1156fn chain_pattern_first_match_ex_no_match() {
1157    // No 'a' is ever immediately followed by 'b'.
1158    assert!(until_ex::<_, Infallible>('a'.and('b'))("xaxcyz").is_err());
1159}
1160
1161#[test]
1162fn chain_pattern_first_match_not_first_p1_match() {
1163    assert_eq!(
1164        until_ex::<_, Infallible>('a'.and("--"))("aaaa--aaa"),
1165        Ok(("aaa", "aaa")),
1166    )
1167}
1168
1169#[test]
1170fn many_matches_zero_or_more() {
1171    assert_eq!(
1172        one::<_, Infallible>(ascii_digit.many())("123abc"),
1173        Ok(("abc", "123")),
1174    );
1175    assert_eq!(
1176        one::<_, Infallible>(ascii_digit.many())("abc"),
1177        Ok(("abc", "")),
1178    );
1179}
1180
1181#[test]
1182fn many_chains_with_other_patterns() {
1183    // "1" or more digits followed by an alphabetic character.
1184    assert_eq!(
1185        one::<_, Infallible>(ascii_digit.and(ascii_digit.many()))("123abc"),
1186        Ok(("abc", "123")),
1187    );
1188}
1189
1190#[test]
1191fn many_used_in_until_ex() {
1192    assert_eq!(
1193        until_ex::<_, Infallible>(ascii_digit.many().and(';'))("abc123;rest"),
1194        Ok(("rest", "abc")),
1195    );
1196}
1197
1198#[test]
1199fn maybe_matches_zero_or_one() {
1200    assert_eq!(
1201        one::<_, Infallible>('-'.maybe())("-123"),
1202        Ok(("123", "-")),
1203    );
1204    assert_eq!(
1205        one::<_, Infallible>('-'.maybe())("123"),
1206        Ok(("123", "")),
1207    );
1208}
1209
1210#[test]
1211fn maybe_chains_with_other_patterns() {
1212    assert_eq!(
1213        one::<_, Infallible>('-'.maybe().and(ascii_digit).and(ascii_digit.many()))("-123abc"),
1214        Ok(("abc", "-123")),
1215    );
1216    assert_eq!(
1217        one::<_, Infallible>('-'.many().and(ascii_digit).and(ascii_digit.many()))("123abc"),
1218        Ok(("abc", "123")),
1219    );
1220}
1221
1222#[test]
1223fn maybe_pattern_immediate_match_p1_fails() {
1224    assert_eq!(
1225        one::<_, Infallible>('-'.maybe().and('1'))("1xyz"),
1226        Ok(("xyz", "1")),
1227    );
1228    assert_eq!(
1229        one::<_, Infallible>('-'.maybe().and('1'))("2xyz"),
1230        Err(ParsingError::new_recoverable("2xyz")),
1231    );
1232}