shrimple_parser/
pattern.rs

1//! Abstractions for working with patterns.
2
3use {
4    crate::{
5        tuple::{first, map_second, Tuple},
6        Input, Parser, ParsingError,
7    },
8    core::ops::Not,
9};
10
11#[cfg(test)]
12use core::convert::Infallible;
13
14/// This trait represents an object that can be matched onto a string.
15/// This includes functions, characters, [arrays of] characters, strings, but also custom patterns
16/// like [`NotEscaped`]
17///
18/// See built-in patterns and parser adapters for patterns in the [`pattern`](self) module
19///
20/// Hint: on the success path, the 1st element of the return tuple is the rest of the input (with
21/// or without the matched pattern at the start)
22pub trait Pattern {
23    /// The return values are (rest of the input, matched fragment at the beginning).
24    ///
25    /// # Errors
26    /// In the case of no match, the original `input` is returned as the [`Err`] variant.
27    ///
28    /// Used by [`parse`].
29    fn immediate_match<I: Input>(&self, input: I) -> Result<(I, I), I>;
30
31    /// The return values are (rest of the input, contiguous matched fragments from the beginning).
32    ///
33    /// 0 is also a valid number of matches.
34    ///
35    /// Used by [`parse_while`]
36    #[expect(
37        clippy::unwrap_used,
38        reason = "this will only panic if the pattern does"
39    )]
40    fn immediate_matches<I: Input>(&self, input: I) -> (I, I) {
41        let mut rest = Some(input.clone());
42        let rest_ptr = loop {
43            match self.immediate_match(rest.take().unwrap()) {
44                Ok((x, _)) => rest = Some(x),
45                Err(x) => break x.as_ptr(),
46            }
47        };
48        let input_ptr = input.as_ptr();
49        input.split_at(rest_ptr as usize - input_ptr as usize).rev()
50    }
51
52    /// Like [`Pattern::immediate_matches`], but also counts the number of matches.
53    ///
54    /// Used by the [`Pattern`] impl of [`NotEscaped`]
55    #[expect(
56        clippy::unwrap_used,
57        reason = "this will only panic if the pattern does"
58    )]
59    fn immediate_matches_counted<I: Input>(&self, input: I) -> (I, (I, usize)) {
60        let mut rest = Some(input.clone());
61        let mut n = 0;
62        let rest_ptr = loop {
63            match self.immediate_match(rest.take().unwrap()) {
64                Ok((x, _)) => {
65                    rest = Some(x);
66                    n += 1;
67                }
68                Err(x) => break x.as_ptr(),
69            }
70        };
71        let input_ptr = input.as_ptr();
72        input
73            .split_at(rest_ptr as usize - input_ptr as usize)
74            .rev()
75            .map_second(|s| (s, n))
76    }
77
78    /// Like [`Pattern::immediate_match`], but matches at the end of `input`.
79    /// The return values are (the input before the match, the match)
80    ///
81    /// # Errors
82    /// In the case of no match, the original `input` is returned as the [`Err`] variant.
83    ///
84    /// Used by the [`Pattern`] impl of [`NotEscaped`]
85    fn trailing_match<I: Input>(&self, input: I) -> Result<(I, I), I>;
86
87    /// Like [`Pattern::immediate_matches_counted`], but matches at the end of `input`,
88    /// and doesn't return the matched fragment of the input.
89    ///
90    /// Used by the [`Pattern`] impl of [`NotEscaped`]
91    #[expect(
92        clippy::unwrap_used,
93        reason = "this will only panic if the pattern does"
94    )]
95    fn trailing_matches_counted<I: Input>(&self, input: I) -> (I, usize) {
96        let mut rest = Some(input);
97        let mut n = 0;
98        loop {
99            match self.trailing_match(rest.take().unwrap()) {
100                Ok((before, _)) => {
101                    rest = Some(before);
102                    n += 1;
103                }
104                Err(rest) => break (rest, n),
105            }
106        }
107    }
108
109    /// The return values are (the match + rest of the input, (string before the match, the match)).
110    ///
111    /// # Errors
112    /// Returns the provided `input` unchanged in the [`Err`] variant if there's no match.
113    ///
114    /// Used by [`parse_until`].
115    fn first_match<I: Input>(&self, input: I) -> Result<(I, (I, I)), I>;
116
117    /// Like [`Pattern::first_match`], but the match is excluded from the rest of the input.
118    ///
119    /// # Errors
120    /// Returns the provided `input` unchanged in the [`Err`] variant if there's no match.
121    ///
122    /// Used by [`parse_until_ex`].
123    fn first_match_ex<I: Input>(&self, input: I) -> Result<(I, (I, I)), I>;
124
125    /// Get the pattern by reference to avoid moving it, which will happen in generic code
126    ///
127    /// Do not override this method.
128    fn by_ref(&self) -> Ref<'_, Self> {
129        Ref(self)
130    }
131
132    /// Combine `self` and another pattern into a pattern that matches either of them in a
133    /// short-circuiting manner, with `self` tried first.
134    ///
135    /// Do not override this method.
136    fn or<Other: Pattern>(self, other: Other) -> Union<Self, Other>
137    where
138        Self: Sized,
139    {
140        Union(self, other)
141    }
142
143    /// Create a pattern that'll match `self` only if it's not escaped (immediately preceded)
144    /// by the provided pattern.
145    fn not_escaped_by<Prefix: Pattern>(self, prefix: Prefix) -> NotEscaped<Prefix, Self>
146    where
147        Self: Sized,
148    {
149        NotEscaped(prefix, self)
150    }
151
152    /// Create a pattern that'll match `self` only if it's not enclosed (preceded & superceded) by
153    /// the provided pattern.
154    fn not_enclosed_by<Enclosure: Pattern>(self, enc: Enclosure) -> NotEnclosed<Enclosure, Self>
155    where
156        Self: Sized,
157    {
158        NotEnclosed(enc, self)
159    }
160}
161
162impl<F: Fn(char) -> bool> Pattern for F {
163    fn immediate_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
164        match input.chars().next().filter(|c| self(*c)) {
165            Some(c) => Ok(input.split_at(c.len_utf8()).rev()),
166            None => Err(input),
167        }
168    }
169
170    fn immediate_matches<I: Input>(&self, input: I) -> (I, I) {
171        let mid = input.find(|c| !self(c)).unwrap_or(input.len());
172        input.split_at(mid).rev()
173    }
174
175    fn immediate_matches_counted<I: Input>(&self, input: I) -> (I, (I, usize)) {
176        let mut char_index = 0;
177        let byte_index = input
178            .char_indices()
179            .inspect(|_| char_index += 1)
180            .find_map(|(bi, c)| self(c).not().then_some(bi))
181            .inspect(|_| char_index -= 1)
182            .unwrap_or(input.len());
183        input
184            .split_at(byte_index)
185            .rev()
186            .map_second(|s| (s, char_index))
187    }
188
189    fn trailing_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
190        match input.strip_suffix(self).map(str::len) {
191            Some(len) => Ok(input.split_at(len)),
192            None => Err(input),
193        }
194    }
195
196    fn first_match<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
197        match input.char_indices().find(|(_, c)| self(*c)) {
198            Some((at, ch)) => {
199                let (before, after) = input.split_at(at);
200                let r#match = after.clone().before(ch.len_utf8());
201                Ok((after, (before, r#match)))
202            }
203            None => Err(input),
204        }
205    }
206
207    fn first_match_ex<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
208        match input.char_indices().find(|(_, c)| self(*c)) {
209            Some((at, ch)) => {
210                let (before, after) = input.split_at(at);
211                let (r#match, after) = after.split_at(ch.len_utf8());
212                Ok((after, (before, r#match)))
213            }
214            None => Err(input),
215        }
216    }
217}
218
219/// This is a specialised, optimised impl for matching any `char` in the array. For a more general
220/// pattern combinator, use the [`Union`] pattern by calling the [`Pattern::or`] method
221impl<const N: usize> Pattern for [char; N] {
222    // TODO: specialise for `[char; N]`
223    fn immediate_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
224        match input.strip_prefix(self) {
225            Some(rest) => {
226                let matched_pat_len = input.len() - rest.len();
227                Ok(input.split_at(matched_pat_len).rev())
228            }
229            None => Err(input),
230        }
231    }
232
233    fn trailing_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
234        match input.strip_suffix(self) {
235            Some(rest) => {
236                let rest_len = rest.len();
237                Ok(input.split_at(rest_len))
238            }
239            None => Err(input),
240        }
241    }
242
243    fn first_match<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
244        match input.find(self) {
245            Some(at) => {
246                let (prev, match_and_rest) = input.split_at(at);
247                let matched_pat_len = match_and_rest.chars().next().map_or(0, char::len_utf8);
248                let r#match = match_and_rest.clone().before(matched_pat_len);
249                Ok((match_and_rest, (prev, r#match)))
250            }
251            None => Err(input),
252        }
253    }
254
255    fn first_match_ex<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
256        match input.find(self) {
257            Some(at) => {
258                let (prev, match_and_rest) = input.split_at(at);
259                let matched_pat_len = match_and_rest.chars().next().map_or(0, char::len_utf8);
260                let (r#match, rest) = match_and_rest.split_at(matched_pat_len);
261                Ok((rest, (prev, r#match)))
262            }
263            None => Err(input),
264        }
265    }
266}
267
268impl Pattern for &str {
269    fn immediate_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
270        if input.starts_with(*self) {
271            Ok(input.split_at(self.len()).rev())
272        } else {
273            Err(input)
274        }
275    }
276
277    fn immediate_matches<I: Input>(&self, input: I) -> (I, I) {
278        let rest_len = input.trim_start_matches(self).len();
279        let input_len = input.len();
280        input.split_at(input_len - rest_len).rev()
281    }
282
283    fn immediate_matches_counted<I: Input>(&self, input: I) -> (I, (I, usize)) {
284        self.immediate_matches(input)
285            .map_second(|s| (s.len().checked_div(self.len()).unwrap_or(0), s).rev())
286    }
287
288    fn trailing_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
289        if input.ends_with(self) {
290            let mid = input.len() - self.len();
291            Ok(input.split_at(mid))
292        } else {
293            Err(input)
294        }
295    }
296
297    fn trailing_matches_counted<I: Input>(&self, input: I) -> (I, usize) {
298        let trimmed_len = input.trim_end_matches(self).len();
299        let input_len = input.len();
300        (
301            input.before(trimmed_len),
302            (input_len - trimmed_len) / self.len(),
303        )
304    }
305
306    fn first_match<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
307        match input.find(*self) {
308            Some(at) => {
309                let (before, after) = input.split_at(at);
310                let r#match = after.clone().before(self.len());
311                Ok((after, (before, r#match)))
312            }
313            None => Err(input),
314        }
315    }
316
317    fn first_match_ex<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
318        match input.find(*self) {
319            Some(at) => {
320                let (before, after) = input.split_at(at);
321                let (r#match, after) = after.split_at(self.len());
322                Ok((after, (before, r#match)))
323            }
324            None => Err(input),
325        }
326    }
327}
328
329impl Pattern for char {
330    fn immediate_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
331        if input.starts_with(*self) {
332            Ok(input.split_at(self.len_utf8()).rev())
333        } else {
334            Err(input)
335        }
336    }
337
338    fn immediate_matches<I: Input>(&self, input: I) -> (I, I) {
339        let rest_len = input.trim_start_matches(*self).len();
340        let input_len = input.len();
341        input.split_at(input_len - rest_len).rev()
342    }
343
344    fn immediate_matches_counted<I: Input>(&self, input: I) -> (I, (I, usize)) {
345        self.immediate_matches(input)
346            .map_second(|s| (s.len() / self.len_utf8(), s).rev())
347    }
348
349    fn trailing_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
350        if input.ends_with(*self) {
351            let mid = input.len() - self.len_utf8();
352            Ok(input.split_at(mid))
353        } else {
354            Err(input)
355        }
356    }
357
358    fn trailing_matches_counted<I: Input>(&self, input: I) -> (I, usize) {
359        let trimmed_len = input.trim_end_matches(*self).len();
360        let input_len = input.len();
361        (
362            input.before(trimmed_len),
363            (input_len - trimmed_len) / self.len_utf8(),
364        )
365    }
366
367    fn first_match<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
368        match input.find(*self) {
369            Some(at) => {
370                let (before, after) = input.split_at(at);
371                let r#match = after.clone().before(self.len_utf8());
372                Ok((after, (before, r#match)))
373            }
374            None => Err(input),
375        }
376    }
377
378    fn first_match_ex<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
379        match input.find(*self) {
380            Some(at) => {
381                let (before, after) = input.split_at(at);
382                let (r#match, after) = after.split_at(self.len_utf8());
383                Ok((after, (before, r#match)))
384            }
385            None => Err(input),
386        }
387    }
388}
389
390/// Pattern that's the reference to another pattern, used in generic code to reuse the pattern.
391#[repr(transparent)]
392pub struct Ref<'this, T: ?Sized + Pattern>(&'this T);
393
394impl<T: ?Sized + Pattern> Clone for Ref<'_, T> {
395    fn clone(&self) -> Self {
396        *self
397    }
398}
399
400impl<T: ?Sized + Pattern> Copy for Ref<'_, T> {}
401
402impl<T: ?Sized + Pattern> Pattern for Ref<'_, T> {
403    fn immediate_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
404        T::immediate_match(self.0, input)
405    }
406
407    fn immediate_matches<I: Input>(&self, input: I) -> (I, I) {
408        T::immediate_matches(self.0, input)
409    }
410
411    fn immediate_matches_counted<I: Input>(&self, input: I) -> (I, (I, usize)) {
412        T::immediate_matches_counted(self.0, input)
413    }
414
415    fn trailing_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
416        T::trailing_match(self.0, input)
417    }
418
419    fn trailing_matches_counted<I: Input>(&self, input: I) -> (I, usize) {
420        T::trailing_matches_counted(self.0, input)
421    }
422
423    fn first_match<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
424        T::first_match(self.0, input)
425    }
426
427    fn first_match_ex<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
428        T::first_match_ex(self.0, input)
429    }
430}
431
432/// Pattern that matches pattern `Inner` not escaped by `Prefix`.
433/// "escaped" here means that the pattern `Inner` is preceded by a `Prefix` that's not preceded by
434/// itself.
435///
436/// For example, for a pattern `NotEscaped('\', '0')`, the strings "0", "\\0" & "\\\\\\0" will have
437/// a match, but the strings "\0", "\\ \0" & "\\\\\\\0" won't.
438#[derive(Clone, Copy)]
439pub struct NotEscaped<Prefix: Pattern, Inner: Pattern>(pub Prefix, pub Inner);
440
441impl<Prefix: Pattern, Inner: Pattern> Pattern for NotEscaped<Prefix, Inner> {
442    fn immediate_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
443        self.1.immediate_match(input)
444    }
445
446    fn trailing_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
447        let (rest, r#match) = self.1.trailing_match(input.clone())?;
448        let (rest, n_prefixes) = self.0.trailing_matches_counted(rest);
449        (n_prefixes % 2 == 0)
450            .then_some((rest, r#match))
451            .ok_or(input)
452    }
453
454    fn trailing_matches_counted<I: Input>(&self, input: I) -> (I, usize) {
455        let (rest, n) = self.1.trailing_matches_counted(input);
456        if n == 0 {
457            return (rest, 0);
458        }
459        let no_1st_prefix = match self.0.trailing_match(rest.clone()) {
460            Ok((x, _)) => x,
461            Err(rest) => return (rest, n),
462        };
463        let (_, n_prefixes_minus_one) = self.0.trailing_matches_counted(no_1st_prefix.clone());
464        if n_prefixes_minus_one % 2 != 0 {
465            (rest, n)
466        } else {
467            (no_1st_prefix, n - 1)
468        }
469    }
470
471    fn first_match<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
472        let mut rest = input.clone();
473        while !rest.is_empty() {
474            let (before, r#match);
475            (rest, (before, r#match)) = self.1.first_match(rest)?;
476            let before = match self.0.trailing_match(before) {
477                Ok((x, _)) => x,
478                Err(before) => return Ok((rest, (before, r#match))),
479            };
480            let (before, n_prefixes_minus_one) = self.0.trailing_matches_counted(before);
481            if n_prefixes_minus_one % 2 != 0 {
482                return Ok((rest, (before, r#match)));
483            }
484        }
485        Err(input)
486    }
487
488    fn first_match_ex<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
489        let mut rest = input.clone();
490        loop {
491            let (before, r#match);
492            (rest, (before, r#match)) = self.1.first_match_ex(rest)?;
493            let Ok((before, _)) = self.0.trailing_match(before) else {
494                let index = r#match.as_ptr() as usize - input.as_ptr() as usize;
495                let before = input.before(index);
496                return Ok((rest, (before, r#match)));
497            };
498            let (_, n_prefixes_minus_one) = self.0.trailing_matches_counted(before);
499            if n_prefixes_minus_one % 2 != 0 {
500                let index = r#match.as_ptr() as usize - input.as_ptr() as usize;
501                let before = input.before(index);
502                return Ok((rest, (before, r#match)));
503            }
504        }
505    }
506}
507
508/// Pattern that matches pattern `Inner` not surrounded by `Enclosure`.
509pub struct NotEnclosed<Enclosure: Pattern, Inner: Pattern>(pub Enclosure, pub Inner);
510
511impl<Enclosure: Pattern, Inner: Pattern> Pattern for NotEnclosed<Enclosure, Inner> {
512    fn immediate_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
513        self.1.immediate_match(input)
514    }
515
516    fn immediate_matches<I: Input>(&self, input: I) -> (I, I) {
517        self.1.immediate_matches(input)
518    }
519
520    fn trailing_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
521        self.1.trailing_match(input)
522    }
523
524    fn first_match<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
525        let mut enclosed = false;
526        let mut rest = &*input;
527        loop {
528            let (after_enc, (before_enc, enc)) =
529                self.0.first_match_ex(rest).unwrap_or(("", (rest, "")));
530            let (after_inner, (before_inner, inner)) =
531                self.1.first_match_ex(rest).unwrap_or(("", (rest, "")));
532
533            if [enc, inner] == ["", ""] {
534                break Err(input);
535            }
536
537            if before_enc.len() < before_inner.len() {
538                rest = after_enc;
539                enclosed = !enclosed;
540            } else if enclosed {
541                rest = after_inner;
542            } else {
543                let match_len = inner.len();
544                let before_len = input.len() - after_inner.len() - match_len;
545                let (before, rest_and_match) = input.split_at(before_len);
546                let r#match = rest_and_match.clone().before(match_len);
547                break Ok((rest_and_match, (before, r#match)));
548            }
549        }
550    }
551
552    fn first_match_ex<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
553        let mut enclosed = false;
554        let mut rest = &*input;
555        loop {
556            let (after_enc, (before_enc, enc)) =
557                self.0.first_match_ex(rest).unwrap_or(("", (rest, "")));
558            let (after_inner, (before_inner, inner)) =
559                self.1.first_match_ex(rest).unwrap_or(("", (rest, "")));
560
561            if [enc, inner] == ["", ""] {
562                break Err(input);
563            }
564
565            if before_enc.len() < before_inner.len() {
566                rest = after_enc;
567                enclosed = !enclosed;
568            } else if enclosed {
569                rest = after_inner;
570            } else {
571                let match_len = inner.len();
572                let before_len = input.len() - after_inner.len() - match_len;
573                let (before, rest_and_match) = input.split_at(before_len);
574                let (r#match, rest) = rest_and_match.split_at(match_len);
575                break Ok((rest, (before, r#match)));
576            }
577        }
578    }
579}
580
581/// A pattern that matches anything.
582#[derive(Clone, Copy)]
583pub struct AnyChar;
584
585impl Pattern for AnyChar {
586    fn immediate_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
587        match input.chars().next() {
588            Some(ch) => Ok(input.split_at(ch.len_utf8()).rev()),
589            None => Err(input),
590        }
591    }
592
593    fn trailing_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
594        match input.chars().next_back() {
595            Some(ch) => Ok(input.split_at(ch.len_utf8())),
596            None => Err(input),
597        }
598    }
599
600    fn first_match<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
601        Ok((input.clone(), (I::default(), input)))
602    }
603
604    fn first_match_ex<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
605        Ok((I::default(), (I::default(), input)))
606    }
607}
608
609/// A pattern that matches either of the 2 patterns in a short-circuiting manner,
610/// with `self` tried first. May be created by [`Pattern::or`] for convenience.
611///
612/// # Note
613/// If you want to match either of N chars, use an array of them as a pattern instead, as this
614/// struct has a general impl that may miss optimisations applicable to the case of `[char; N]`
615/// being the pattern. However, unlike the array pattern, the combination of patterns using this
616/// struct is not commutative, since the second pattern is only tried if the former has not been
617/// found in the input.
618#[derive(Debug, Clone, Copy)]
619pub struct Union<P1: Pattern, P2: Pattern>(pub P1, pub P2);
620
621impl<P1: Pattern, P2: Pattern> Pattern for Union<P1, P2> {
622    fn immediate_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
623        self.0
624            .immediate_match(input)
625            .or_else(|input| self.1.immediate_match(input))
626    }
627
628    fn trailing_match<I: Input>(&self, input: I) -> Result<(I, I), I> {
629        self.0
630            .trailing_match(input)
631            .or_else(|input| self.1.trailing_match(input))
632    }
633
634    fn first_match<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
635        self.0
636            .first_match(input)
637            .or_else(|input| self.1.first_match(input))
638    }
639
640    fn first_match_ex<I: Input>(&self, input: I) -> Result<(I, (I, I)), I> {
641        self.0
642            .first_match_ex(input)
643            .or_else(|input| self.1.first_match_ex(input))
644    }
645}
646
647/// Parses 1 instance of pattern `pat`.
648///
649/// # Errors
650/// The returned parser returns a recoverable error if the pattern didn't match at the beginning of
651/// the input.
652pub fn parse<In: Input, Reason>(pat: impl Pattern) -> impl Parser<In, In, Reason> {
653    move |input| {
654        pat.immediate_match(input)
655            .map_err(ParsingError::new_recoverable)
656    }
657}
658
659/// Parses contiguous instances of pattern `pat`.
660///
661/// The returned parser never returns an error, if no matches are found at the start of the input,
662/// the returned string is empty (but also points to the start of the input)
663///
664/// See also [`parse_until`], [`parse_until_ex`].
665pub fn parse_while<In: Input, Reason>(pat: impl Pattern) -> impl Parser<In, In, Reason> {
666    move |input| Ok(pat.immediate_matches(input))
667}
668
669/// Parses a span of the input until a match of pattern `pat` is met.
670///
671/// The returned rest of the input will still have the match.
672///
673/// The returned parser never returns an error, if `pred` returns `false` for all the characters
674/// in the input, then the output is the entire input, and the rest of the input is an empty string.
675///
676/// See also [`parse_while`], [`parse_until_ex`].
677pub fn parse_until<In: Input, Reason>(pat: impl Pattern) -> impl Parser<In, In, Reason> {
678    move |input| {
679        Ok({
680            pat.first_match(input)
681                .map_or_else(|input| (In::default(), input), map_second(first))
682        })
683    }
684}
685
686/// Like [`parse_until`], but also removes the match of `pat` from the rest of the input.
687///
688/// # Errors
689/// Unlike [`parse_until`], this parser returns a recoverable error if `pred` returned `false` for
690/// all the characters in the input.
691pub fn parse_until_ex<In: Input, Reason>(pat: impl Pattern) -> impl Parser<In, In, Reason> {
692    move |input| {
693        pat.first_match_ex(input)
694            .map(map_second(first))
695            .map_err(ParsingError::new_recoverable)
696    }
697}
698
699/// Parse a balanced group of `open` & `close` patterns.
700///
701/// The start & end of the group are <u>included</u> in the output.
702/// See [`parse_group_ex`] for a parser that excludes them.
703///
704/// # Errors
705/// - If no initial `open` was found, a recoverable error is returned.
706/// - If the end was reached before a matching `close` pattern, a fatal error is returned.
707///
708/// An example use of this is parsing balanced parentheses:
709/// ```rust
710/// # fn main() {
711/// use shrimple_parser::{pattern::parse_group, ParsingError};
712/// let src = "(foo ()) bar";
713/// assert_eq!(parse_group('(', ')')(src), Ok((" bar", "(foo ())")));
714///
715/// let src = "(oops";
716/// assert_eq!(parse_group('(', ')')(src), Err(ParsingError::new("oops", ())));
717/// # }
718/// ```
719pub fn parse_group<In: Input>(open: impl Pattern, close: impl Pattern) -> impl Parser<In, In, ()> {
720    move |input| {
721        let Ok((mut rest, _)) = open.immediate_match(&*input) else {
722            return Err(ParsingError::new_recoverable(input));
723        };
724        let mut nesting = 1;
725        while nesting > 0 {
726            let (after_open, (before_open, open)) =
727                open.first_match_ex(rest).unwrap_or(("", (rest, "")));
728            let (after_close, (before_close, close)) =
729                close.first_match_ex(rest).unwrap_or(("", (rest, "")));
730
731            if [open, close] == ["", ""] {
732                // neither `open` nor `close` matched, and nesting > 0
733                let rest_start = input.len() - rest.len();
734                return Err(ParsingError::new(input.after(rest_start), ()));
735            }
736
737            if before_open.len() < before_close.len() {
738                rest = after_open;
739                nesting += 1;
740            } else {
741                rest = after_close;
742                nesting -= 1;
743            }
744        }
745
746        let res_len = input.len() - rest.len();
747        Ok(input.split_at(res_len).rev())
748    }
749}
750
751/// Parse a balanced group of `open` & `close` patterns.
752///
753/// The start & end of the group are <u>excluded</u> in the output.
754/// See [`parse_group`] for a parser that includes them.
755///
756/// # Errors
757/// - If no initial `open` was found, a recoverable error is returned.
758/// - If the end was reached before a matching `close` pattern, a fatal error is returned.
759///
760/// An example use of this is parsing balanced parentheses:
761/// ```rust
762/// # fn main() {
763/// use shrimple_parser::{pattern::parse_group_ex, ParsingError};
764/// let src = "(foo ()) bar";
765/// assert_eq!(parse_group_ex('(', ')')(src), Ok((" bar", "foo ()")));
766///
767/// let src = "(oops";
768/// assert_eq!(parse_group_ex('(', ')')(src), Err(ParsingError::new("oops", ())));
769/// # }
770/// ```
771pub fn parse_group_ex<In: Input>(
772    open: impl Pattern,
773    close: impl Pattern,
774) -> impl Parser<In, In, ()> {
775    move |input| {
776        let input = match open.immediate_match(input) {
777            Ok((rest, _)) => rest,
778            Err(input) => return Err(ParsingError::new_recoverable(input)),
779        };
780        let mut rest = &*input;
781        let mut nesting = 1;
782        let mut close_len = 0;
783        while nesting > 0 {
784            let (after_open, (before_open, open)) =
785                open.first_match_ex(rest).unwrap_or(("", (rest, "")));
786            let (after_close, (before_close, close)) =
787                close.first_match_ex(rest).unwrap_or(("", (rest, "")));
788
789            if [open, close] == ["", ""] {
790                // neither `open` nor `close` matched, and nesting > 0
791                let rest_start = input.len() - rest.len();
792                return Err(ParsingError::new(input.after(rest_start), ()));
793            }
794
795            if before_open.len() < before_close.len() {
796                rest = after_open;
797                nesting += 1;
798            } else {
799                rest = after_close;
800                close_len = close.len();
801                nesting -= 1;
802            }
803        }
804
805        let res_len = input.len() - rest.len() - close_len;
806        Ok(input
807            .split_at(res_len)
808            .map_second(|rest| rest.after(close_len))
809            .rev())
810    }
811}
812
813#[test]
814fn char_pat() {
815    assert_eq!(
816        parse_until_ex::<_, Infallible>('"')
817            .parse(r#"this is what they call a \"test\", right?" - he said"#),
818        Ok((
819            r#"test\", right?" - he said"#,
820            r"this is what they call a \"
821        )),
822    );
823}
824
825#[test]
826fn not_escaped_pat() {
827    assert_eq!(
828        parse_until_ex::<_, Infallible>(NotEscaped('\\', '"'))
829            .parse(r#"this is what they call a \"test\", right?" - he said"#),
830        Ok((" - he said", r#"this is what they call a \"test\", right?"#)),
831    );
832}
833
834#[test]
835fn str_pat() {
836    assert_eq!(parse::<_, Infallible>("abc")("abcdef"), Ok(("def", "abc")));
837}
838
839#[test]
840fn array_pat() {
841    assert_eq!(
842        parse_until_ex::<_, Infallible>([';', '\''])("abc;def'xyz"),
843        Ok(("def'xyz", "abc"))
844    );
845}
846
847#[test]
848fn union_pat() {
849    let src = "abc;def'xyz";
850    assert_eq!(
851        parse_until_ex::<_, Infallible>(';'.or('\''))(src),
852        parse_until_ex([';', '\''])(src)
853    );
854}