shrimple_parser/
pattern.rs

1//! Abstractions for working with patterns.
2
3use {
4    crate::{
5        tuple::{first, map_second, Tuple},
6        Input, Parser, ParsingError,
7    },
8    core::ops::Not,
9};
10
11#[cfg(test)]
12use core::convert::Infallible;
13
14/// This trait represents an object that can be matched onto a string.
15/// This includes functions, characters, [arrays of] characters, strings, but also custom patterns
16/// like [`NotEscaped`]
17///
18/// See built-in patterns and parser adapters for patterns in the [`pattern`](self) module
19///
20/// Hint: on the success path, the 1st element of the return tuple is the rest of the input (with
21/// or without the matched pattern at the start)
22pub trait Pattern {
23    /// The return values are (rest of the input, matched fragment at the beginning).
24    ///
25    /// # Errors
26    /// In the case of no match, the original `input` is returned as the [`Err`] variant.
27    ///
28    /// Used by [`parse`].
29    fn immediate_match<I: Input>(&self, input: I) -> Result<(I, I), I>;
30
31    /// The return values are (rest of the input, contiguous matched fragments from the beginning).
32    ///
33    /// 0 is also a valid number of matches.
34    ///
35    /// Used by [`parse_while`]
36    #[expect(
37        clippy::unwrap_used,
38        reason = "this will only panic if the pattern does"
39    )]
40    fn immediate_matches<I: Input>(&self, input: I) -> (I, I) {
41        let mut rest = Some(input.clone());
42        let rest_ptr = loop {
43            match self.immediate_match(rest.take().unwrap()) {
44                Ok((x, _)) => rest = Some(x),
45                Err(x) => break x.as_ptr(),
46            }
47        };
48        let input_ptr = input.as_ptr();
49        input.split_at(rest_ptr as usize - input_ptr as usize).rev()
50    }
51
52    /// Like [`Pattern::immediate_matches`], but also counts the number of matches.
53    ///
54    /// Used by the [`Pattern`] impl of [`NotEscaped`]
55    #[expect(
56        clippy::unwrap_used,
57        reason = "this will only panic if the pattern does"
58    )]
59    fn immediate_matches_counted<I: Input>(&self, input: I) -> (I, (I, usize)) {
60        let mut rest = Some(input.clone());
61        let mut n = 0;
62        let rest_ptr = loop {
63            match self.immediate_match(rest.take().unwrap()) {
64                Ok((x, _)) => {
65                    rest = Some(x);
66                    n += 1;
67                }
68                Err(x) => break x.as_ptr(),
69            }
70        };
71        let input_ptr = input.as_ptr();
72        input
73            .split_at(rest_ptr as usize - input_ptr as usize)
74            .rev()
75            .map_second(|s| (s, n))
76    }
77
78    /// Like [`Pattern::immediate_match`], but matches at the end of `input`.
79    /// The return values are (the input before the match, the match)
80    ///
81    /// # Errors
82    /// In the case of no match, the original `input` is returned as the [`Err`] variant.
83    ///
84    /// Used by the [`Pattern`] impl of [`NotEscaped`]
85    fn trailing_match<I: Input>(&self, input: I) -> Result<(I, I), I>;
86
87    /// Like [`Pattern::immediate_matches_counted`], but matches at the end of `input`,
88    /// and doesn't return the matched fragment of the input.
89    ///
90    /// Used by the [`Pattern`] impl of [`NotEscaped`]
91    #[expect(
92        clippy::unwrap_used,
93        reason = "this will only panic if the pattern does"
94    )]
95    fn trailing_matches_counted<I: Input>(&self, input: I) -> (I, usize) {
96        let mut rest = Some(input);
97        let mut n = 0;
98        loop {
99            match self.trailing_match(rest.take().unwrap()) {
100                Ok((before, _)) => {
101                    rest = Some(before);
102                    n += 1;
103                }
104                Err(rest) => break (rest, n),
105            }
106        }
107    }
108
109    /// The return values are (the match + rest of the input, (string before the match, the match)).
110    ///
111    /// # Errors
112    /// Returns the provided `input` unchanged in the [`Err`] variant if there's no match.
113    ///
114    /// Used by [`parse_until`].
115    fn first_match<I: Input>(&self, input: I) -> Result<(I, (I, I)), I>;
116
117    /// Like [`Pattern::first_match`], but the match is excluded from the rest of the input.
118    ///
119    /// # Errors
120    /// Returns the provided `input` unchanged in the [`Err`] variant if there's no match.
121    ///
122    /// Used by [`parse_until_ex`].
123    fn first_match_ex<I: Input>(&self, input: I) -> Result<(I, (I, I)), I>;
124
125    /// Get the pattern by reference to avoid moving it, which will happen in generic code
126    ///
127    /// Do not override this method.
128    fn by_ref(&self) -> Ref<'_, Self> {
129        Ref(self)
130    }
131
132    /// Combine `self` and another pattern into a pattern that matches either of them in a
133    /// short-circuiting manner, with `self` tried first.
134    ///
135    /// Do not override this method.
136    fn or<Other: Pattern>(self, other: Other) -> Union<Self, Other>
137    where
138        Self: Sized,
139    {
140        Union(self, other)
141    }
142
143    /// Create a pattern that'll match `self` only if it's not escaped (immediately preceded)
144    /// by the provided pattern.
145    fn not_escaped_by<Prefix: Pattern>(self, prefix: Prefix) -> NotEscaped<Prefix, Self>
146    where
147        Self: Sized,
148    {
149        NotEscaped(prefix, self)
150    }
151
152    /// Create a pattern that'll match `self` only if it's not enclosed (preceded & superceded) by
153    /// the provided pattern.
154    fn not_enclosed_by<Enclosure: Pattern>(self, enc: Enclosure) -> NotEnclosed<Enclosure, Self>
155    where
156        Self: Sized,
157    {
158        NotEnclosed(enc, self)
159    }
160}
161
162impl<F: Fn(char) -> bool> Pattern for F {
163    fn immediate_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
164        match input.chars().next().filter(|c| self(*c)) {
165            Some(c) => Ok(input.split_at(c.len_utf8()).rev()),
166            None => Err(input),
167        }
168    }
169
170    fn immediate_matches<I: Input>(&self, input: I) -> (I, I) {
171        let mid = input.find(|c| !self(c)).unwrap_or(input.len());
172        input.split_at(mid).rev()
173    }
174
175    fn immediate_matches_counted<I: Input>(&self, input: I) -> (I, (I, usize)) {
176        let mut char_index = 0;
177        let byte_index = input
178            .char_indices()
179            .inspect(|_| char_index += 1)
180            .find_map(|(bi, c)| self(c).not().then_some(bi))
181            .inspect(|_| char_index -= 1)
182            .unwrap_or(input.len());
183        input
184            .split_at(byte_index)
185            .rev()
186            .map_second(|s| (s, char_index))
187    }
188
189    fn trailing_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
190        match input.strip_suffix(self).map(str::len) {
191            Some(len) => Ok(input.split_at(len)),
192            None => Err(input),
193        }
194    }
195
196    fn first_match<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
197        match input.char_indices().find(|(_, c)| self(*c)) {
198            Some((at, ch)) => {
199                let (before, after) = input.split_at(at);
200                let r#match = after.clone().before(ch.len_utf8());
201                Ok((after, (before, r#match)))
202            }
203            None => Err(input),
204        }
205    }
206
207    fn first_match_ex<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
208        match input.char_indices().find(|(_, c)| self(*c)) {
209            Some((at, ch)) => {
210                let (before, after) = input.split_at(at);
211                let (r#match, after) = after.split_at(ch.len_utf8());
212                Ok((after, (before, r#match)))
213            }
214            None => Err(input),
215        }
216    }
217}
218
219/// This is a specialised, optimised impl for matching any `char` in the array. For a more general
220/// pattern combinator, use the [`Union`] pattern by calling the [`Pattern::or`] method
221impl<const N: usize> Pattern for [char; N] {
222    // TODO: specialise for `[char; N]`
223    fn immediate_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
224        match input.strip_prefix(self) {
225            Some(rest) => {
226                let matched_pat_len = input.len() - rest.len();
227                Ok(input.split_at(matched_pat_len).rev())
228            }
229            None => Err(input),
230        }
231    }
232
233    fn trailing_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
234        match input.strip_suffix(self) {
235            Some(rest) => {
236                let rest_len = rest.len();
237                Ok(input.split_at(rest_len))
238            }
239            None => Err(input),
240        }
241    }
242
243    fn first_match<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
244        match input.find(self) {
245            Some(at) => {
246                let (prev, match_and_rest) = input.split_at(at);
247                let matched_pat_len = match_and_rest.chars().next().map_or(0, char::len_utf8);
248                let r#match = match_and_rest.clone().before(matched_pat_len);
249                Ok((match_and_rest, (prev, r#match)))
250            }
251            None => Err(input),
252        }
253    }
254
255    fn first_match_ex<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
256        match input.find(self) {
257            Some(at) => {
258                let (prev, match_and_rest) = input.split_at(at);
259                let matched_pat_len = match_and_rest.chars().next().map_or(0, char::len_utf8);
260                let (r#match, rest) = match_and_rest.split_at(matched_pat_len);
261                Ok((rest, (prev, r#match)))
262            }
263            None => Err(input),
264        }
265    }
266}
267
268impl Pattern for &str {
269    fn immediate_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
270        if input.starts_with(*self) {
271            Ok(input.split_at(self.len()).rev())
272        } else {
273            Err(input)
274        }
275    }
276
277    fn immediate_matches<I: Input>(&self, input: I) -> (I, I) {
278        let rest_len = input.trim_start_matches(self).len();
279        let input_len = input.len();
280        input.split_at(input_len - rest_len).rev()
281    }
282
283    fn immediate_matches_counted<I: Input>(&self, input: I) -> (I, (I, usize)) {
284        self.immediate_matches(input)
285            .map_second(|s| (s.len().checked_div(self.len()).unwrap_or(0), s).rev())
286    }
287
288    fn trailing_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
289        if input.ends_with(self) {
290            let mid = input.len() - self.len();
291            Ok(input.split_at(mid))
292        } else {
293            Err(input)
294        }
295    }
296
297    fn trailing_matches_counted<I: Input>(&self, input: I) -> (I, usize) {
298        let trimmed_len = input.trim_end_matches(self).len();
299        let input_len = input.len();
300        (
301            input.before(trimmed_len),
302            (input_len - trimmed_len) / self.len(),
303        )
304    }
305
306    fn first_match<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
307        match input.find(*self) {
308            Some(at) => {
309                let (before, after) = input.split_at(at);
310                let r#match = after.clone().before(self.len());
311                Ok((after, (before, r#match)))
312            }
313            None => Err(input),
314        }
315    }
316
317    fn first_match_ex<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
318        match input.find(*self) {
319            Some(at) => {
320                let (before, after) = input.split_at(at);
321                let (r#match, after) = after.split_at(self.len());
322                Ok((after, (before, r#match)))
323            }
324            None => Err(input),
325        }
326    }
327}
328
329impl Pattern for char {
330    fn immediate_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
331        if input.starts_with(*self) {
332            Ok(input.split_at(self.len_utf8()).rev())
333        } else {
334            Err(input)
335        }
336    }
337
338    fn immediate_matches<I: Input>(&self, input: I) -> (I, I) {
339        let rest_len = input.trim_start_matches(*self).len();
340        let input_len = input.len();
341        input.split_at(input_len - rest_len).rev()
342    }
343
344    fn immediate_matches_counted<I: Input>(&self, input: I) -> (I, (I, usize)) {
345        self.immediate_matches(input)
346            .map_second(|s| (s.len() / self.len_utf8(), s).rev())
347    }
348
349    fn trailing_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
350        if input.ends_with(*self) {
351            let mid = input.len() - self.len_utf8();
352            Ok(input.split_at(mid))
353        } else {
354            Err(input)
355        }
356    }
357
358    fn trailing_matches_counted<I: Input>(&self, input: I) -> (I, usize) {
359        let trimmed_len = input.trim_end_matches(*self).len();
360        let input_len = input.len();
361        (
362            input.before(trimmed_len),
363            (input_len - trimmed_len) / self.len_utf8(),
364        )
365    }
366
367    fn first_match<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
368        match input.find(*self) {
369            Some(at) => {
370                let (before, after) = input.split_at(at);
371                let r#match = after.clone().before(self.len_utf8());
372                Ok((after, (before, r#match)))
373            }
374            None => Err(input),
375        }
376    }
377
378    fn first_match_ex<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
379        match input.find(*self) {
380            Some(at) => {
381                let (before, after) = input.split_at(at);
382                let (r#match, after) = after.split_at(self.len_utf8());
383                Ok((after, (before, r#match)))
384            }
385            None => Err(input),
386        }
387    }
388}
389
390#[cfg(feature = "either")]
391macro_rules! fwd_method_impl {
392    ($(fn $name:ident -> $ret:ty;)+) => {
393        $(
394            fn $name<I: Input>(&self, input: I) -> $ret {
395                match self {
396                    either::Either::Left(l) => l.$name(input),
397                    either::Either::Right(r) => r.$name(input),
398                }
399            }
400        )+
401    };
402}
403
404#[cfg(feature = "either")]
405impl<L: Pattern, R: Pattern> Pattern for either::Either<L, R> {
406    fwd_method_impl! {
407        fn immediate_match -> Result<(I, I), I>;
408        fn immediate_matches -> (I, I);
409        fn immediate_matches_counted -> (I, (I, usize));
410        fn trailing_match -> Result<(I, I), I>;
411        fn trailing_matches_counted -> (I, usize);
412        fn first_match -> Result<(I, (I, I)), I>;
413        fn first_match_ex -> Result<(I, (I, I)), I>;
414    }
415}
416
417/// Pattern that's the reference to another pattern, used in generic code to reuse the pattern.
418#[repr(transparent)]
419pub struct Ref<'this, T: ?Sized + Pattern>(&'this T);
420
421impl<T: ?Sized + Pattern> Clone for Ref<'_, T> {
422    fn clone(&self) -> Self {
423        *self
424    }
425}
426
427impl<T: ?Sized + Pattern> Copy for Ref<'_, T> {}
428
429impl<T: ?Sized + Pattern> Pattern for Ref<'_, T> {
430    fn immediate_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
431        T::immediate_match(self.0, input)
432    }
433
434    fn immediate_matches<I: Input>(&self, input: I) -> (I, I) {
435        T::immediate_matches(self.0, input)
436    }
437
438    fn immediate_matches_counted<I: Input>(&self, input: I) -> (I, (I, usize)) {
439        T::immediate_matches_counted(self.0, input)
440    }
441
442    fn trailing_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
443        T::trailing_match(self.0, input)
444    }
445
446    fn trailing_matches_counted<I: Input>(&self, input: I) -> (I, usize) {
447        T::trailing_matches_counted(self.0, input)
448    }
449
450    fn first_match<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
451        T::first_match(self.0, input)
452    }
453
454    fn first_match_ex<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
455        T::first_match_ex(self.0, input)
456    }
457}
458
459/// Pattern that matches pattern `Inner` not escaped by `Prefix`.
460/// "escaped" here means that the pattern `Inner` is preceded by a `Prefix` that's not preceded by
461/// itself.
462///
463/// For example, for a pattern `NotEscaped('\', '0')`, the strings "0", "\\0" & "\\\\\\0" will have
464/// a match, but the strings "\0", "\\ \0" & "\\\\\\\0" won't.
465#[derive(Clone, Copy)]
466pub struct NotEscaped<Prefix: Pattern, Inner: Pattern>(pub Prefix, pub Inner);
467
468impl<Prefix: Pattern, Inner: Pattern> Pattern for NotEscaped<Prefix, Inner> {
469    fn immediate_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
470        self.1.immediate_match(input)
471    }
472
473    fn trailing_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
474        let (rest, r#match) = self.1.trailing_match(input.clone())?;
475        let (rest, n_prefixes) = self.0.trailing_matches_counted(rest);
476        (n_prefixes % 2 == 0)
477            .then_some((rest, r#match))
478            .ok_or(input)
479    }
480
481    fn trailing_matches_counted<I: Input>(&self, input: I) -> (I, usize) {
482        let (rest, n) = self.1.trailing_matches_counted(input);
483        if n == 0 {
484            return (rest, 0);
485        }
486        let no_1st_prefix = match self.0.trailing_match(rest.clone()) {
487            Ok((x, _)) => x,
488            Err(rest) => return (rest, n),
489        };
490        let (_, n_prefixes_minus_one) = self.0.trailing_matches_counted(no_1st_prefix.clone());
491        if n_prefixes_minus_one % 2 != 0 {
492            (rest, n)
493        } else {
494            (no_1st_prefix, n - 1)
495        }
496    }
497
498    fn first_match<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
499        let mut rest = input.clone();
500        while !rest.is_empty() {
501            let (before, r#match);
502            (rest, (before, r#match)) = self.1.first_match(rest)?;
503            let before = match self.0.trailing_match(before) {
504                Ok((x, _)) => x,
505                Err(before) => return Ok((rest, (before, r#match))),
506            };
507            let (before, n_prefixes_minus_one) = self.0.trailing_matches_counted(before);
508            if n_prefixes_minus_one % 2 != 0 {
509                return Ok((rest, (before, r#match)));
510            }
511        }
512        Err(input)
513    }
514
515    fn first_match_ex<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
516        let mut rest = input.clone();
517        loop {
518            let (before, r#match);
519            (rest, (before, r#match)) = self.1.first_match_ex(rest)?;
520            let Ok((before, _)) = self.0.trailing_match(before) else {
521                let index = r#match.as_ptr() as usize - input.as_ptr() as usize;
522                let before = input.before(index);
523                return Ok((rest, (before, r#match)));
524            };
525            let (_, n_prefixes_minus_one) = self.0.trailing_matches_counted(before);
526            if n_prefixes_minus_one % 2 != 0 {
527                let index = r#match.as_ptr() as usize - input.as_ptr() as usize;
528                let before = input.before(index);
529                return Ok((rest, (before, r#match)));
530            }
531        }
532    }
533}
534
535/// Pattern that matches pattern `Inner` not surrounded by `Enclosure`.
536pub struct NotEnclosed<Enclosure: Pattern, Inner: Pattern>(pub Enclosure, pub Inner);
537
538impl<Enclosure: Pattern, Inner: Pattern> Pattern for NotEnclosed<Enclosure, Inner> {
539    fn immediate_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
540        self.1.immediate_match(input)
541    }
542
543    fn immediate_matches<I: Input>(&self, input: I) -> (I, I) {
544        self.1.immediate_matches(input)
545    }
546
547    fn trailing_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
548        self.1.trailing_match(input)
549    }
550
551    fn first_match<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
552        let mut enclosed = false;
553        let mut rest = &*input;
554        loop {
555            let (after_enc, (before_enc, enc)) =
556                self.0.first_match_ex(rest).unwrap_or(("", (rest, "")));
557            let (after_inner, (before_inner, inner)) =
558                self.1.first_match_ex(rest).unwrap_or(("", (rest, "")));
559
560            if [enc, inner] == ["", ""] {
561                break Err(input);
562            }
563
564            if before_enc.len() < before_inner.len() {
565                rest = after_enc;
566                enclosed = !enclosed;
567            } else if enclosed {
568                rest = after_inner;
569            } else {
570                let match_len = inner.len();
571                let before_len = input.len() - after_inner.len() - match_len;
572                let (before, rest_and_match) = input.split_at(before_len);
573                let r#match = rest_and_match.clone().before(match_len);
574                break Ok((rest_and_match, (before, r#match)));
575            }
576        }
577    }
578
579    fn first_match_ex<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
580        let mut enclosed = false;
581        let mut rest = &*input;
582        loop {
583            let (after_enc, (before_enc, enc)) =
584                self.0.first_match_ex(rest).unwrap_or(("", (rest, "")));
585            let (after_inner, (before_inner, inner)) =
586                self.1.first_match_ex(rest).unwrap_or(("", (rest, "")));
587
588            if [enc, inner] == ["", ""] {
589                break Err(input);
590            }
591
592            if before_enc.len() < before_inner.len() {
593                rest = after_enc;
594                enclosed = !enclosed;
595            } else if enclosed {
596                rest = after_inner;
597            } else {
598                let match_len = inner.len();
599                let before_len = input.len() - after_inner.len() - match_len;
600                let (before, rest_and_match) = input.split_at(before_len);
601                let (r#match, rest) = rest_and_match.split_at(match_len);
602                break Ok((rest, (before, r#match)));
603            }
604        }
605    }
606}
607
608/// A pattern that matches anything.
609#[derive(Clone, Copy)]
610pub struct AnyChar;
611
612impl Pattern for AnyChar {
613    fn immediate_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
614        match input.chars().next() {
615            Some(ch) => Ok(input.split_at(ch.len_utf8()).rev()),
616            None => Err(input),
617        }
618    }
619
620    fn trailing_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
621        match input.chars().next_back() {
622            Some(ch) => Ok(input.split_at(ch.len_utf8())),
623            None => Err(input),
624        }
625    }
626
627    fn first_match<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
628        Ok((input.clone(), (I::default(), input)))
629    }
630
631    fn first_match_ex<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
632        Ok((I::default(), (I::default(), input)))
633    }
634}
635
636/// A pattern that matches either of the 2 patterns in a short-circuiting manner,
637/// with `self` tried first. May be created by [`Pattern::or`] for convenience.
638///
639/// # Note
640/// If you want to match either of N chars, use an array of them as a pattern instead, as this
641/// struct has a general impl that may miss optimisations applicable to the case of `[char; N]`
642/// being the pattern. However, unlike the array pattern, the combination of patterns using this
643/// struct is not commutative, since the second pattern is only tried if the former has not been
644/// found in the input.
645#[derive(Debug, Clone, Copy)]
646pub struct Union<P1: Pattern, P2: Pattern>(pub P1, pub P2);
647
648impl<P1: Pattern, P2: Pattern> Pattern for Union<P1, P2> {
649    fn immediate_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
650        self.0
651            .immediate_match(input)
652            .or_else(|input| self.1.immediate_match(input))
653    }
654
655    fn trailing_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
656        self.0
657            .trailing_match(input)
658            .or_else(|input| self.1.trailing_match(input))
659    }
660
661    fn first_match<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
662        let (before1, match1) = self.0.first_match(&*input).map_or((&*input, ""), |x| x.1);
663        let (before2, match2) = self.1.first_match(&*input).map_or((&*input, ""), |x| x.1);
664
665        if [match1, match2] == ["", ""] {
666            return Err(input);
667        }
668
669        let [before_len, match_len] = if before1.len() < before2.len() {
670            [before1.len(), match1.len()]
671        } else {
672            [before2.len(), match2.len()]
673        };
674
675        let (before, match_rest) = input.split_at(before_len);
676        let r#match = match_rest.clone().before(match_len);
677        Ok((match_rest, (before, r#match)))
678    }
679
680    fn first_match_ex<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
681        let (before1, match1) = self.0.first_match(&*input).map_or((&*input, ""), |x| x.1);
682        let (before2, match2) = self.1.first_match(&*input).map_or((&*input, ""), |x| x.1);
683
684        if [match1, match2] == ["", ""] {
685            return Err(input);
686        }
687
688        let [before_len, match_len] = if before1.len() < before2.len() {
689            [before1.len(), match1.len()]
690        } else {
691            [before2.len(), match2.len()]
692        };
693
694        let (before, match_rest) = input.split_at(before_len);
695        let (r#match, rest) = match_rest.split_at(match_len);
696        Ok((rest, (before, r#match)))
697    }
698}
699
700/// Parses 1 instance of pattern `pat`.
701///
702/// # Errors
703/// The returned parser returns a recoverable error if the pattern didn't match at the beginning of
704/// the input.
705pub fn parse<In: Input, Reason>(pat: impl Pattern) -> impl Parser<In, In, Reason> {
706    move |input| {
707        pat.immediate_match(input)
708            .map_err(ParsingError::new_recoverable)
709    }
710}
711
712/// Parses contiguous instances of pattern `pat`.
713///
714/// The returned parser never returns an error, if no matches are found at the start of the input,
715/// the returned string is empty (but also points to the start of the input)
716///
717/// See also [`parse_until`], [`parse_until_ex`].
718pub fn parse_while<In: Input, Reason>(pat: impl Pattern) -> impl Parser<In, In, Reason> {
719    move |input| Ok(pat.immediate_matches(input))
720}
721
722/// Parses a span of the input until a match of pattern `pat` is met.
723///
724/// The returned rest of the input will still have the match.
725///
726/// The returned parser never returns an error, if `pred` returns `false` for all the characters
727/// in the input, then the output is the entire input, and the rest of the input is an empty string.
728///
729/// See also [`parse_while`], [`parse_until_ex`].
730pub fn parse_until<In: Input, Reason>(pat: impl Pattern) -> impl Parser<In, In, Reason> {
731    move |input| {
732        Ok({
733            pat.first_match(input)
734                .map_or_else(|input| (In::default(), input), map_second(first))
735        })
736    }
737}
738
739/// Like [`parse_until`], but also removes the match of `pat` from the rest of the input.
740///
741/// # Errors
742/// Unlike [`parse_until`], this parser returns a recoverable error if `pred` returned `false` for
743/// all the characters in the input.
744pub fn parse_until_ex<In: Input, Reason>(pat: impl Pattern) -> impl Parser<In, In, Reason> {
745    move |input| {
746        pat.first_match_ex(input)
747            .map(map_second(first))
748            .map_err(ParsingError::new_recoverable)
749    }
750}
751
752/// Parse a balanced group of `open` & `close` patterns.
753///
754/// The start & end of the group are <u>included</u> in the output.
755/// See [`parse_group_ex`] for a parser that excludes them.
756///
757/// # Errors
758/// - If no initial `open` was found, a recoverable error is returned.
759/// - If the end was reached before a matching `close` pattern, a fatal error is returned.
760///
761/// An example use of this is parsing balanced parentheses:
762/// ```rust
763/// # fn main() {
764/// use shrimple_parser::{pattern::parse_group, ParsingError};
765/// let src = "(foo ()) bar";
766/// assert_eq!(parse_group('(', ')')(src), Ok((" bar", "(foo ())")));
767///
768/// let src = "(oops";
769/// assert_eq!(parse_group('(', ')')(src), Err(ParsingError::new("oops", ())));
770/// # }
771/// ```
772pub fn parse_group<In: Input>(open: impl Pattern, close: impl Pattern) -> impl Parser<In, In, ()> {
773    move |input| {
774        let Ok((mut rest, _)) = open.immediate_match(&*input) else {
775            return Err(ParsingError::new_recoverable(input));
776        };
777        let mut nesting = 1;
778        while nesting > 0 {
779            let (after_open, (before_open, open)) =
780                open.first_match_ex(rest).unwrap_or(("", (rest, "")));
781            let (after_close, (before_close, close)) =
782                close.first_match_ex(rest).unwrap_or(("", (rest, "")));
783
784            if [open, close] == ["", ""] {
785                // neither `open` nor `close` matched, and nesting > 0
786                let rest_start = input.len() - rest.len();
787                return Err(ParsingError::new(input.after(rest_start), ()));
788            }
789
790            if before_open.len() < before_close.len() {
791                rest = after_open;
792                nesting += 1;
793            } else {
794                rest = after_close;
795                nesting -= 1;
796            }
797        }
798
799        let res_len = input.len() - rest.len();
800        Ok(input.split_at(res_len).rev())
801    }
802}
803
804/// Parse a balanced group of `open` & `close` patterns.
805///
806/// The start & end of the group are <u>excluded</u> in the output.
807/// See [`parse_group`] for a parser that includes them.
808///
809/// # Errors
810/// - If no initial `open` was found, a recoverable error is returned.
811/// - If the end was reached before a matching `close` pattern, a fatal error is returned.
812///
813/// An example use of this is parsing balanced parentheses:
814/// ```rust
815/// # fn main() {
816/// use shrimple_parser::{pattern::parse_group_ex, ParsingError};
817/// let src = "(foo ()) bar";
818/// assert_eq!(parse_group_ex('(', ')')(src), Ok((" bar", "foo ()")));
819///
820/// let src = "(oops";
821/// assert_eq!(parse_group_ex('(', ')')(src), Err(ParsingError::new("oops", ())));
822/// # }
823/// ```
824pub fn parse_group_ex<In: Input>(
825    open: impl Pattern,
826    close: impl Pattern,
827) -> impl Parser<In, In, ()> {
828    move |input| {
829        let input = match open.immediate_match(input) {
830            Ok((rest, _)) => rest,
831            Err(input) => return Err(ParsingError::new_recoverable(input)),
832        };
833        let mut rest = &*input;
834        let mut nesting = 1;
835        let mut close_len = 0;
836        while nesting > 0 {
837            let (after_open, (before_open, open)) =
838                open.first_match_ex(rest).unwrap_or(("", (rest, "")));
839            let (after_close, (before_close, close)) =
840                close.first_match_ex(rest).unwrap_or(("", (rest, "")));
841
842            if [open, close] == ["", ""] {
843                // neither `open` nor `close` matched, and nesting > 0
844                let rest_start = input.len() - rest.len();
845                return Err(ParsingError::new(input.after(rest_start), ()));
846            }
847
848            if before_open.len() < before_close.len() {
849                rest = after_open;
850                nesting += 1;
851            } else {
852                rest = after_close;
853                close_len = close.len();
854                nesting -= 1;
855            }
856        }
857
858        let res_len = input.len() - rest.len() - close_len;
859        Ok(input
860            .split_at(res_len)
861            .map_second(|rest| rest.after(close_len))
862            .rev())
863    }
864}
865
866#[test]
867fn char_pat() {
868    assert_eq!(
869        parse_until_ex::<_, Infallible>('"')
870            .parse(r#"this is what they call a \"test\", right?" - he said"#),
871        Ok((
872            r#"test\", right?" - he said"#,
873            r"this is what they call a \"
874        )),
875    );
876}
877
878#[test]
879fn not_escaped_pat() {
880    assert_eq!(
881        parse_until_ex::<_, Infallible>(NotEscaped('\\', '"'))
882            .parse(r#"this is what they call a \"test\", right?" - he said"#),
883        Ok((" - he said", r#"this is what they call a \"test\", right?"#)),
884    );
885}
886
887#[test]
888fn str_pat() {
889    assert_eq!(parse::<_, Infallible>("abc")("abcdef"), Ok(("def", "abc")));
890}
891
892#[test]
893fn array_pat() {
894    assert_eq!(
895        parse_until_ex::<_, Infallible>([';', '\''])("abc;def'xyz"),
896        Ok(("def'xyz", "abc"))
897    );
898}
899
900#[test]
901fn union_pat() {
902    let src = "abc;def'xyz";
903    assert_eq!(
904        parse_until_ex::<_, Infallible>(';'.or('\''))(src),
905        parse_until_ex([';', '\''])(src)
906    );
907}