shrimple_parser/
pattern.rs

1//! Abstractions for working with patterns.
2
3use {
4    crate::{
5        tuple::{first, map_second, Tuple},
6        Input, Parser, ParsingError,
7    },
8    core::ops::Not,
9};
10
11#[cfg(test)]
12use core::convert::Infallible;
13
14/// This trait represents an object that can be matched onto a string.
15/// This includes functions, characters, [arrays of] characters, strings, but also custom patterns
16/// like [`NotEscaped`]
17///
18/// See built-in patterns and parser adapters for patterns in the [`pattern`](self) module
19///
20/// Hint: on the success path, the 1st element of the return tuple is the rest of the input (with
21/// or without the matched pattern at the start)
22pub trait Pattern {
23    /// The return values are (rest of the input, matched fragment at the beginning).
24    ///
25    /// # Errors
26    /// In the case of no match, the original `input` is returned as the [`Err`] variant.
27    ///
28    /// Used by [`parse`].
29    fn immediate_match<I: Input>(&self, input: I) -> Result<(I, I), I>;
30
31    /// The return values are (rest of the input, contiguous matched fragments from the beginning).
32    ///
33    /// 0 is also a valid number of matches.
34    ///
35    /// Used by [`parse_while`]
36    #[expect(
37        clippy::unwrap_used,
38        reason = "this will only panic if the pattern does"
39    )]
40    fn immediate_matches<I: Input>(&self, input: I) -> (I, I) {
41        let mut rest = Some(input.clone());
42        let rest_ptr = loop {
43            match self.immediate_match(rest.take().unwrap()) {
44                Ok((x, _)) => rest = Some(x),
45                Err(x) => break x.as_ptr(),
46            }
47        };
48        let input_ptr = input.as_ptr();
49        input.split_at(rest_ptr as usize - input_ptr as usize).rev()
50    }
51
52    /// Like [`Pattern::immediate_matches`], but also counts the number of matches.
53    ///
54    /// Used by the [`Pattern`] impl of [`NotEscaped`]
55    #[expect(
56        clippy::unwrap_used,
57        reason = "this will only panic if the pattern does"
58    )]
59    fn immediate_matches_counted<I: Input>(&self, input: I) -> (I, (I, usize)) {
60        let mut rest = Some(input.clone());
61        let mut n = 0;
62        let rest_ptr = loop {
63            match self.immediate_match(rest.take().unwrap()) {
64                Ok((x, _)) => {
65                    rest = Some(x);
66                    n += 1;
67                }
68                Err(x) => break x.as_ptr(),
69            }
70        };
71        let input_ptr = input.as_ptr();
72        input
73            .split_at(rest_ptr as usize - input_ptr as usize)
74            .rev()
75            .map_second(|s| (s, n))
76    }
77
78    /// Like [`Pattern::immediate_match`], but matches at the end of `input`.
79    /// The return values are (the input before the match, the match)
80    ///
81    /// # Errors
82    /// In the case of no match, the original `input` is returned as the [`Err`] variant.
83    ///
84    /// Used by the [`Pattern`] impl of [`NotEscaped`]
85    fn trailing_match<I: Input>(&self, input: I) -> Result<(I, I), I>;
86
87    /// Like [`Pattern::immediate_matches_counted`], but matches at the end of `input`,
88    /// and doesn't return the matched fragment of the input.
89    ///
90    /// Used by the [`Pattern`] impl of [`NotEscaped`]
91    #[expect(
92        clippy::unwrap_used,
93        reason = "this will only panic if the pattern does"
94    )]
95    fn trailing_matches_counted<I: Input>(&self, input: I) -> (I, usize) {
96        let mut rest = Some(input);
97        let mut n = 0;
98        loop {
99            match self.trailing_match(rest.take().unwrap()) {
100                Ok((before, _)) => {
101                    rest = Some(before);
102                    n += 1;
103                }
104                Err(rest) => break (rest, n),
105            }
106        }
107    }
108
109    /// The return values are (the match + rest of the input, (string before the match, the match)).
110    ///
111    /// # Errors
112    /// Returns the provided `input` unchanged in the [`Err`] variant if there's no match.
113    ///
114    /// Used by [`parse_until`].
115    fn first_match<I: Input>(&self, input: I) -> Result<(I, (I, I)), I>;
116
117    /// Like [`Pattern::first_match`], but the match is excluded from the rest of the input.
118    ///
119    /// # Errors
120    /// Returns the provided `input` unchanged in the [`Err`] variant if there's no match.
121    ///
122    /// Used by [`parse_until_ex`].
123    fn first_match_ex<I: Input>(&self, input: I) -> Result<(I, (I, I)), I>;
124
125    /// Get the pattern by reference to avoid moving it, which will happen in generic code
126    ///
127    /// Do not override this method.
128    fn by_ref(&self) -> Ref<'_, Self> {
129        Ref(self)
130    }
131
132    /// Combine `self` and another pattern into a pattern that matches either of them in a
133    /// short-circuiting manner, with `self` tried first.
134    ///
135    /// Do not override this method.
136    fn or<Other: Pattern>(self, other: Other) -> Union<Self, Other>
137    where
138        Self: Sized,
139    {
140        Union(self, other)
141    }
142
143    /// Create a pattern that'll match `self` only if it's not escaped (immediately preceded)
144    /// by the provided pattern.
145    fn not_escaped_by<Prefix: Pattern>(self, prefix: Prefix) -> NotEscaped<Prefix, Self>
146    where
147        Self: Sized,
148    {
149        NotEscaped(prefix, self)
150    }
151
152    /// Create a pattern that'll match `self` only if it's not enclosed (preceded & superceded) by
153    /// the provided pattern.
154    fn not_enclosed_by<Enclosure: Pattern>(self, enc: Enclosure) -> NotEnclosed<Enclosure, Self>
155    where
156        Self: Sized,
157    {
158        NotEnclosed(enc, self)
159    }
160}
161
162impl<F: Fn(char) -> bool> Pattern for F {
163    fn immediate_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
164        match input.chars().next().filter(|c| self(*c)) {
165            Some(c) => Ok(input.split_at(c.len_utf8()).rev()),
166            None => Err(input),
167        }
168    }
169
170    fn immediate_matches<I: Input>(&self, input: I) -> (I, I) {
171        let mid = input.find(|c| !self(c)).unwrap_or(input.len());
172        input.split_at(mid).rev()
173    }
174
175    fn immediate_matches_counted<I: Input>(&self, input: I) -> (I, (I, usize)) {
176        let mut char_index = 0;
177        let byte_index = input
178            .char_indices()
179            .inspect(|_| char_index += 1)
180            .find_map(|(bi, c)| self(c).not().then_some(bi))
181            .inspect(|_| char_index -= 1)
182            .unwrap_or(input.len());
183        input
184            .split_at(byte_index)
185            .rev()
186            .map_second(|s| (s, char_index))
187    }
188
189    fn trailing_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
190        match input.strip_suffix(self).map(str::len) {
191            Some(len) => Ok(input.split_at(len)),
192            None => Err(input),
193        }
194    }
195
196    fn first_match<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
197        match input.char_indices().find(|(_, c)| self(*c)) {
198            Some((at, ch)) => {
199                let (before, after) = input.split_at(at);
200                let r#match = after.clone().before(ch.len_utf8());
201                Ok((after, (before, r#match)))
202            }
203            None => Err(input),
204        }
205    }
206
207    fn first_match_ex<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
208        match input.char_indices().find(|(_, c)| self(*c)) {
209            Some((at, ch)) => {
210                let (before, after) = input.split_at(at);
211                let (r#match, after) = after.split_at(ch.len_utf8());
212                Ok((after, (before, r#match)))
213            }
214            None => Err(input),
215        }
216    }
217}
218
219/// This is a specialised, optimised impl for matching any `char` in the array. For a more general
220/// pattern combinator, use the [`Union`] pattern by calling the [`Pattern::or`] method
221impl<const N: usize> Pattern for [char; N] {
222    // TODO: specialise for `[char; N]`
223    fn immediate_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
224        match input.strip_prefix(self) {
225            Some(rest) => {
226                let matched_pat_len = input.len() - rest.len();
227                Ok(input.split_at(matched_pat_len).rev())
228            }
229            None => Err(input),
230        }
231    }
232
233    fn trailing_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
234        match input.strip_suffix(self) {
235            Some(rest) => {
236                let rest_len = rest.len();
237                Ok(input.split_at(rest_len))
238            }
239            None => Err(input),
240        }
241    }
242
243    fn first_match<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
244        match input.find(self) {
245            Some(at) => {
246                let (prev, match_and_rest) = input.split_at(at);
247                let matched_pat_len = match_and_rest.chars().next().map_or(0, char::len_utf8);
248                let r#match = match_and_rest.clone().before(matched_pat_len);
249                Ok((match_and_rest, (prev, r#match)))
250            }
251            None => Err(input),
252        }
253    }
254
255    fn first_match_ex<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
256        match input.find(self) {
257            Some(at) => {
258                let (prev, match_and_rest) = input.split_at(at);
259                let matched_pat_len = match_and_rest.chars().next().map_or(0, char::len_utf8);
260                let (r#match, rest) = match_and_rest.split_at(matched_pat_len);
261                Ok((rest, (prev, r#match)))
262            }
263            None => Err(input),
264        }
265    }
266}
267
268impl Pattern for &str {
269    fn immediate_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
270        if input.starts_with(*self) {
271            Ok(input.split_at(self.len()).rev())
272        } else {
273            Err(input)
274        }
275    }
276
277    fn immediate_matches<I: Input>(&self, input: I) -> (I, I) {
278        let rest_len = input.trim_start_matches(self).len();
279        let input_len = input.len();
280        input.split_at(input_len - rest_len).rev()
281    }
282
283    fn immediate_matches_counted<I: Input>(&self, input: I) -> (I, (I, usize)) {
284        self.immediate_matches(input)
285            .map_second(|s| (s.len().checked_div(self.len()).unwrap_or(0), s).rev())
286    }
287
288    fn trailing_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
289        if input.ends_with(self) {
290            let mid = input.len() - self.len();
291            Ok(input.split_at(mid))
292        } else {
293            Err(input)
294        }
295    }
296
297    fn trailing_matches_counted<I: Input>(&self, input: I) -> (I, usize) {
298        let trimmed_len = input.trim_end_matches(self).len();
299        let input_len = input.len();
300        (
301            input.before(trimmed_len),
302            (input_len - trimmed_len) / self.len(),
303        )
304    }
305
306    fn first_match<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
307        match input.find(*self) {
308            Some(at) => {
309                let (before, after) = input.split_at(at);
310                let r#match = after.clone().before(self.len());
311                Ok((after, (before, r#match)))
312            }
313            None => Err(input),
314        }
315    }
316
317    fn first_match_ex<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
318        match input.find(*self) {
319            Some(at) => {
320                let (before, after) = input.split_at(at);
321                let (r#match, after) = after.split_at(self.len());
322                Ok((after, (before, r#match)))
323            }
324            None => Err(input),
325        }
326    }
327}
328
329impl Pattern for char {
330    fn immediate_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
331        if input.starts_with(*self) {
332            Ok(input.split_at(self.len_utf8()).rev())
333        } else {
334            Err(input)
335        }
336    }
337
338    fn immediate_matches<I: Input>(&self, input: I) -> (I, I) {
339        let rest_len = input.trim_start_matches(*self).len();
340        let input_len = input.len();
341        input.split_at(input_len - rest_len).rev()
342    }
343
344    fn immediate_matches_counted<I: Input>(&self, input: I) -> (I, (I, usize)) {
345        self.immediate_matches(input)
346            .map_second(|s| (s.len() / self.len_utf8(), s).rev())
347    }
348
349    fn trailing_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
350        if input.ends_with(*self) {
351            let mid = input.len() - self.len_utf8();
352            Ok(input.split_at(mid))
353        } else {
354            Err(input)
355        }
356    }
357
358    fn trailing_matches_counted<I: Input>(&self, input: I) -> (I, usize) {
359        let trimmed_len = input.trim_end_matches(*self).len();
360        let input_len = input.len();
361        (
362            input.before(trimmed_len),
363            (input_len - trimmed_len) / self.len_utf8(),
364        )
365    }
366
367    fn first_match<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
368        match input.find(*self) {
369            Some(at) => {
370                let (before, after) = input.split_at(at);
371                let r#match = after.clone().before(self.len_utf8());
372                Ok((after, (before, r#match)))
373            }
374            None => Err(input),
375        }
376    }
377
378    fn first_match_ex<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
379        match input.find(*self) {
380            Some(at) => {
381                let (before, after) = input.split_at(at);
382                let (r#match, after) = after.split_at(self.len_utf8());
383                Ok((after, (before, r#match)))
384            }
385            None => Err(input),
386        }
387    }
388}
389
390macro_rules! fwd_method_impl {
391    ($(fn $name:ident -> $ret:ty;)+) => {
392        $(
393            fn $name<I: Input>(&self, input: I) -> $ret {
394                match self {
395                    either::Either::Left(l) => l.$name(input),
396                    either::Either::Right(r) => r.$name(input),
397                }
398            }
399        )+
400    };
401}
402
403#[cfg(feature = "either")]
404impl<L: Pattern, R: Pattern> Pattern for either::Either<L, R> {
405    fwd_method_impl! {
406        fn immediate_match -> Result<(I, I), I>;
407        fn immediate_matches -> (I, I);
408        fn immediate_matches_counted -> (I, (I, usize));
409        fn trailing_match -> Result<(I, I), I>;
410        fn trailing_matches_counted -> (I, usize);
411        fn first_match -> Result<(I, (I, I)), I>;
412        fn first_match_ex -> Result<(I, (I, I)), I>;
413    }
414}
415
416/// Pattern that's the reference to another pattern, used in generic code to reuse the pattern.
417#[repr(transparent)]
418pub struct Ref<'this, T: ?Sized + Pattern>(&'this T);
419
420impl<T: ?Sized + Pattern> Clone for Ref<'_, T> {
421    fn clone(&self) -> Self {
422        *self
423    }
424}
425
426impl<T: ?Sized + Pattern> Copy for Ref<'_, T> {}
427
428impl<T: ?Sized + Pattern> Pattern for Ref<'_, T> {
429    fn immediate_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
430        T::immediate_match(self.0, input)
431    }
432
433    fn immediate_matches<I: Input>(&self, input: I) -> (I, I) {
434        T::immediate_matches(self.0, input)
435    }
436
437    fn immediate_matches_counted<I: Input>(&self, input: I) -> (I, (I, usize)) {
438        T::immediate_matches_counted(self.0, input)
439    }
440
441    fn trailing_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
442        T::trailing_match(self.0, input)
443    }
444
445    fn trailing_matches_counted<I: Input>(&self, input: I) -> (I, usize) {
446        T::trailing_matches_counted(self.0, input)
447    }
448
449    fn first_match<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
450        T::first_match(self.0, input)
451    }
452
453    fn first_match_ex<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
454        T::first_match_ex(self.0, input)
455    }
456}
457
458/// Pattern that matches pattern `Inner` not escaped by `Prefix`.
459/// "escaped" here means that the pattern `Inner` is preceded by a `Prefix` that's not preceded by
460/// itself.
461///
462/// For example, for a pattern `NotEscaped('\', '0')`, the strings "0", "\\0" & "\\\\\\0" will have
463/// a match, but the strings "\0", "\\ \0" & "\\\\\\\0" won't.
464#[derive(Clone, Copy)]
465pub struct NotEscaped<Prefix: Pattern, Inner: Pattern>(pub Prefix, pub Inner);
466
467impl<Prefix: Pattern, Inner: Pattern> Pattern for NotEscaped<Prefix, Inner> {
468    fn immediate_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
469        self.1.immediate_match(input)
470    }
471
472    fn trailing_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
473        let (rest, r#match) = self.1.trailing_match(input.clone())?;
474        let (rest, n_prefixes) = self.0.trailing_matches_counted(rest);
475        (n_prefixes % 2 == 0)
476            .then_some((rest, r#match))
477            .ok_or(input)
478    }
479
480    fn trailing_matches_counted<I: Input>(&self, input: I) -> (I, usize) {
481        let (rest, n) = self.1.trailing_matches_counted(input);
482        if n == 0 {
483            return (rest, 0);
484        }
485        let no_1st_prefix = match self.0.trailing_match(rest.clone()) {
486            Ok((x, _)) => x,
487            Err(rest) => return (rest, n),
488        };
489        let (_, n_prefixes_minus_one) = self.0.trailing_matches_counted(no_1st_prefix.clone());
490        if n_prefixes_minus_one % 2 != 0 {
491            (rest, n)
492        } else {
493            (no_1st_prefix, n - 1)
494        }
495    }
496
497    fn first_match<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
498        let mut rest = input.clone();
499        while !rest.is_empty() {
500            let (before, r#match);
501            (rest, (before, r#match)) = self.1.first_match(rest)?;
502            let before = match self.0.trailing_match(before) {
503                Ok((x, _)) => x,
504                Err(before) => return Ok((rest, (before, r#match))),
505            };
506            let (before, n_prefixes_minus_one) = self.0.trailing_matches_counted(before);
507            if n_prefixes_minus_one % 2 != 0 {
508                return Ok((rest, (before, r#match)));
509            }
510        }
511        Err(input)
512    }
513
514    fn first_match_ex<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
515        let mut rest = input.clone();
516        loop {
517            let (before, r#match);
518            (rest, (before, r#match)) = self.1.first_match_ex(rest)?;
519            let Ok((before, _)) = self.0.trailing_match(before) else {
520                let index = r#match.as_ptr() as usize - input.as_ptr() as usize;
521                let before = input.before(index);
522                return Ok((rest, (before, r#match)));
523            };
524            let (_, n_prefixes_minus_one) = self.0.trailing_matches_counted(before);
525            if n_prefixes_minus_one % 2 != 0 {
526                let index = r#match.as_ptr() as usize - input.as_ptr() as usize;
527                let before = input.before(index);
528                return Ok((rest, (before, r#match)));
529            }
530        }
531    }
532}
533
534/// Pattern that matches pattern `Inner` not surrounded by `Enclosure`.
535pub struct NotEnclosed<Enclosure: Pattern, Inner: Pattern>(pub Enclosure, pub Inner);
536
537impl<Enclosure: Pattern, Inner: Pattern> Pattern for NotEnclosed<Enclosure, Inner> {
538    fn immediate_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
539        self.1.immediate_match(input)
540    }
541
542    fn immediate_matches<I: Input>(&self, input: I) -> (I, I) {
543        self.1.immediate_matches(input)
544    }
545
546    fn trailing_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
547        self.1.trailing_match(input)
548    }
549
550    fn first_match<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
551        let mut enclosed = false;
552        let mut rest = &*input;
553        loop {
554            let (after_enc, (before_enc, enc)) =
555                self.0.first_match_ex(rest).unwrap_or(("", (rest, "")));
556            let (after_inner, (before_inner, inner)) =
557                self.1.first_match_ex(rest).unwrap_or(("", (rest, "")));
558
559            if [enc, inner] == ["", ""] {
560                break Err(input);
561            }
562
563            if before_enc.len() < before_inner.len() {
564                rest = after_enc;
565                enclosed = !enclosed;
566            } else if enclosed {
567                rest = after_inner;
568            } else {
569                let match_len = inner.len();
570                let before_len = input.len() - after_inner.len() - match_len;
571                let (before, rest_and_match) = input.split_at(before_len);
572                let r#match = rest_and_match.clone().before(match_len);
573                break Ok((rest_and_match, (before, r#match)));
574            }
575        }
576    }
577
578    fn first_match_ex<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
579        let mut enclosed = false;
580        let mut rest = &*input;
581        loop {
582            let (after_enc, (before_enc, enc)) =
583                self.0.first_match_ex(rest).unwrap_or(("", (rest, "")));
584            let (after_inner, (before_inner, inner)) =
585                self.1.first_match_ex(rest).unwrap_or(("", (rest, "")));
586
587            if [enc, inner] == ["", ""] {
588                break Err(input);
589            }
590
591            if before_enc.len() < before_inner.len() {
592                rest = after_enc;
593                enclosed = !enclosed;
594            } else if enclosed {
595                rest = after_inner;
596            } else {
597                let match_len = inner.len();
598                let before_len = input.len() - after_inner.len() - match_len;
599                let (before, rest_and_match) = input.split_at(before_len);
600                let (r#match, rest) = rest_and_match.split_at(match_len);
601                break Ok((rest, (before, r#match)));
602            }
603        }
604    }
605}
606
607/// A pattern that matches anything.
608#[derive(Clone, Copy)]
609pub struct AnyChar;
610
611impl Pattern for AnyChar {
612    fn immediate_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
613        match input.chars().next() {
614            Some(ch) => Ok(input.split_at(ch.len_utf8()).rev()),
615            None => Err(input),
616        }
617    }
618
619    fn trailing_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
620        match input.chars().next_back() {
621            Some(ch) => Ok(input.split_at(ch.len_utf8())),
622            None => Err(input),
623        }
624    }
625
626    fn first_match<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
627        Ok((input.clone(), (I::default(), input)))
628    }
629
630    fn first_match_ex<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
631        Ok((I::default(), (I::default(), input)))
632    }
633}
634
635/// A pattern that matches either of the 2 patterns in a short-circuiting manner,
636/// with `self` tried first. May be created by [`Pattern::or`] for convenience.
637///
638/// # Note
639/// If you want to match either of N chars, use an array of them as a pattern instead, as this
640/// struct has a general impl that may miss optimisations applicable to the case of `[char; N]`
641/// being the pattern. However, unlike the array pattern, the combination of patterns using this
642/// struct is not commutative, since the second pattern is only tried if the former has not been
643/// found in the input.
644#[derive(Debug, Clone, Copy)]
645pub struct Union<P1: Pattern, P2: Pattern>(pub P1, pub P2);
646
647impl<P1: Pattern, P2: Pattern> Pattern for Union<P1, P2> {
648    fn immediate_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
649        self.0
650            .immediate_match(input)
651            .or_else(|input| self.1.immediate_match(input))
652    }
653
654    fn trailing_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
655        self.0
656            .trailing_match(input)
657            .or_else(|input| self.1.trailing_match(input))
658    }
659
660    fn first_match<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
661        self.0
662            .first_match(input)
663            .or_else(|input| self.1.first_match(input))
664    }
665
666    fn first_match_ex<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
667        self.0
668            .first_match_ex(input)
669            .or_else(|input| self.1.first_match_ex(input))
670    }
671}
672
673/// Parses 1 instance of pattern `pat`.
674///
675/// # Errors
676/// The returned parser returns a recoverable error if the pattern didn't match at the beginning of
677/// the input.
678pub fn parse<In: Input, Reason>(pat: impl Pattern) -> impl Parser<In, In, Reason> {
679    move |input| {
680        pat.immediate_match(input)
681            .map_err(ParsingError::new_recoverable)
682    }
683}
684
685/// Parses contiguous instances of pattern `pat`.
686///
687/// The returned parser never returns an error, if no matches are found at the start of the input,
688/// the returned string is empty (but also points to the start of the input)
689///
690/// See also [`parse_until`], [`parse_until_ex`].
691pub fn parse_while<In: Input, Reason>(pat: impl Pattern) -> impl Parser<In, In, Reason> {
692    move |input| Ok(pat.immediate_matches(input))
693}
694
695/// Parses a span of the input until a match of pattern `pat` is met.
696///
697/// The returned rest of the input will still have the match.
698///
699/// The returned parser never returns an error, if `pred` returns `false` for all the characters
700/// in the input, then the output is the entire input, and the rest of the input is an empty string.
701///
702/// See also [`parse_while`], [`parse_until_ex`].
703pub fn parse_until<In: Input, Reason>(pat: impl Pattern) -> impl Parser<In, In, Reason> {
704    move |input| {
705        Ok({
706            pat.first_match(input)
707                .map_or_else(|input| (In::default(), input), map_second(first))
708        })
709    }
710}
711
712/// Like [`parse_until`], but also removes the match of `pat` from the rest of the input.
713///
714/// # Errors
715/// Unlike [`parse_until`], this parser returns a recoverable error if `pred` returned `false` for
716/// all the characters in the input.
717pub fn parse_until_ex<In: Input, Reason>(pat: impl Pattern) -> impl Parser<In, In, Reason> {
718    move |input| {
719        pat.first_match_ex(input)
720            .map(map_second(first))
721            .map_err(ParsingError::new_recoverable)
722    }
723}
724
725/// Parse a balanced group of `open` & `close` patterns.
726///
727/// The start & end of the group are <u>included</u> in the output.
728/// See [`parse_group_ex`] for a parser that excludes them.
729///
730/// # Errors
731/// - If no initial `open` was found, a recoverable error is returned.
732/// - If the end was reached before a matching `close` pattern, a fatal error is returned.
733///
734/// An example use of this is parsing balanced parentheses:
735/// ```rust
736/// # fn main() {
737/// use shrimple_parser::{pattern::parse_group, ParsingError};
738/// let src = "(foo ()) bar";
739/// assert_eq!(parse_group('(', ')')(src), Ok((" bar", "(foo ())")));
740///
741/// let src = "(oops";
742/// assert_eq!(parse_group('(', ')')(src), Err(ParsingError::new("oops", ())));
743/// # }
744/// ```
745pub fn parse_group<In: Input>(open: impl Pattern, close: impl Pattern) -> impl Parser<In, In, ()> {
746    move |input| {
747        let Ok((mut rest, _)) = open.immediate_match(&*input) else {
748            return Err(ParsingError::new_recoverable(input));
749        };
750        let mut nesting = 1;
751        while nesting > 0 {
752            let (after_open, (before_open, open)) =
753                open.first_match_ex(rest).unwrap_or(("", (rest, "")));
754            let (after_close, (before_close, close)) =
755                close.first_match_ex(rest).unwrap_or(("", (rest, "")));
756
757            if [open, close] == ["", ""] {
758                // neither `open` nor `close` matched, and nesting > 0
759                let rest_start = input.len() - rest.len();
760                return Err(ParsingError::new(input.after(rest_start), ()));
761            }
762
763            if before_open.len() < before_close.len() {
764                rest = after_open;
765                nesting += 1;
766            } else {
767                rest = after_close;
768                nesting -= 1;
769            }
770        }
771
772        let res_len = input.len() - rest.len();
773        Ok(input.split_at(res_len).rev())
774    }
775}
776
777/// Parse a balanced group of `open` & `close` patterns.
778///
779/// The start & end of the group are <u>excluded</u> in the output.
780/// See [`parse_group`] for a parser that includes them.
781///
782/// # Errors
783/// - If no initial `open` was found, a recoverable error is returned.
784/// - If the end was reached before a matching `close` pattern, a fatal error is returned.
785///
786/// An example use of this is parsing balanced parentheses:
787/// ```rust
788/// # fn main() {
789/// use shrimple_parser::{pattern::parse_group_ex, ParsingError};
790/// let src = "(foo ()) bar";
791/// assert_eq!(parse_group_ex('(', ')')(src), Ok((" bar", "foo ()")));
792///
793/// let src = "(oops";
794/// assert_eq!(parse_group_ex('(', ')')(src), Err(ParsingError::new("oops", ())));
795/// # }
796/// ```
797pub fn parse_group_ex<In: Input>(
798    open: impl Pattern,
799    close: impl Pattern,
800) -> impl Parser<In, In, ()> {
801    move |input| {
802        let input = match open.immediate_match(input) {
803            Ok((rest, _)) => rest,
804            Err(input) => return Err(ParsingError::new_recoverable(input)),
805        };
806        let mut rest = &*input;
807        let mut nesting = 1;
808        let mut close_len = 0;
809        while nesting > 0 {
810            let (after_open, (before_open, open)) =
811                open.first_match_ex(rest).unwrap_or(("", (rest, "")));
812            let (after_close, (before_close, close)) =
813                close.first_match_ex(rest).unwrap_or(("", (rest, "")));
814
815            if [open, close] == ["", ""] {
816                // neither `open` nor `close` matched, and nesting > 0
817                let rest_start = input.len() - rest.len();
818                return Err(ParsingError::new(input.after(rest_start), ()));
819            }
820
821            if before_open.len() < before_close.len() {
822                rest = after_open;
823                nesting += 1;
824            } else {
825                rest = after_close;
826                close_len = close.len();
827                nesting -= 1;
828            }
829        }
830
831        let res_len = input.len() - rest.len() - close_len;
832        Ok(input
833            .split_at(res_len)
834            .map_second(|rest| rest.after(close_len))
835            .rev())
836    }
837}
838
839#[test]
840fn char_pat() {
841    assert_eq!(
842        parse_until_ex::<_, Infallible>('"')
843            .parse(r#"this is what they call a \"test\", right?" - he said"#),
844        Ok((
845            r#"test\", right?" - he said"#,
846            r"this is what they call a \"
847        )),
848    );
849}
850
851#[test]
852fn not_escaped_pat() {
853    assert_eq!(
854        parse_until_ex::<_, Infallible>(NotEscaped('\\', '"'))
855            .parse(r#"this is what they call a \"test\", right?" - he said"#),
856        Ok((" - he said", r#"this is what they call a \"test\", right?"#)),
857    );
858}
859
860#[test]
861fn str_pat() {
862    assert_eq!(parse::<_, Infallible>("abc")("abcdef"), Ok(("def", "abc")));
863}
864
865#[test]
866fn array_pat() {
867    assert_eq!(
868        parse_until_ex::<_, Infallible>([';', '\''])("abc;def'xyz"),
869        Ok(("def'xyz", "abc"))
870    );
871}
872
873#[test]
874fn union_pat() {
875    let src = "abc;def'xyz";
876    assert_eq!(
877        parse_until_ex::<_, Infallible>(';'.or('\''))(src),
878        parse_until_ex([';', '\''])(src)
879    );
880}