parser_compose/
parser.rs

1use std::fmt;
2use std::ops::{ControlFlow, RangeFrom, RangeInclusive, RangeToInclusive};
3
4// These types were extracted as their signatures were getting too long for clippy
5
6/// A [`Fold`] init function that uses a `Vec` as an accumulator
7pub type AccumulateInit<Out> = fn() -> Vec<Out>;
8/// A [`Fold`] operation function that uses a `Vec` as an accumulator
9pub type AccumulateOperation<Out> = fn(Vec<Out>, Out) -> Vec<Out>;
10/// A [`Fold`] init function that does not use an accumulator.
11pub type RepeatsInit = fn() -> ();
12/// A [`Fold`] operation function that does nothing with what is given to it
13pub type RepeatsOperation<Out> = fn((), Out) -> ();
14
15/// A trait for parsers
16///
17/// A `Parser` accepts input of type `In` and tries to match it. If it succeeds, it returns a
18/// value of type `Some<(Out, In)>`. The first element of the tuple is the parser's output, and the
19/// second is whatever is left of the input after parsing. If the parser fails, it returns `None`
20///
21///
22/// Given a _thing_ that implements `Parser`, calling one of the associated methods returns a new
23/// `Parser` that augments its functionality.
24pub trait Parser<In> {
25    // Note: I had tried both of these definitions:
26    //
27    // (a) `Parser<In, Out>` and
28    // (b) `Parser` with `In` and `Out` associated types.
29    //
30    // (a) was problematic when implementing the trait for `Map` or parsers with a concrete return
31    // type. You end up with an unconstrained `Out` type parameter. I got around this by using
32    // `PhantomData`...which i didn't really want to keep around.
33    //
34    // (b) meant I could not implement the trait for all `F` where `F:
35    // Fn(In) -> Out` because the `In` type would be unconstrained.
36    //
37    // The Fix to this is to make `In` generic and `Out` associated. There seems to be some theory
38    // to why this is the right way to go, but I can't quite articulate it. So i'll leave some
39    // relevant links:
40    //
41    // My question in the forums: https://users.rust-lang.org/t/unconstrained-type-parameter-when-using-generic-trait/100698
42    // Discussion around why Fn output should be an associated type: https://github.com/rust-lang/rust/issues/20871
43
44    /// The type this parser outputs if successfull
45    type Out;
46
47    /// Recognizes a value from the input and returns the result
48    ///
49    /// Reports an error if the input could not be matched.
50    fn try_parse(&self, input: In) -> Option<(Self::Out, In)>;
51
52    /// Returns a parser that applies the function `f` to its output
53    ///
54    /// ```
55    /// use parser_compose::Parser;
56    ///
57    /// let msg = "a";
58    ///
59    /// let (value, _) = "a".map(|s| s.to_ascii_uppercase()).try_parse(msg).unwrap();
60    ///
61    /// assert_eq!(value, "A");
62    /// ```
63    fn map<F, Mapped>(self, f: F) -> Map<Self, F>
64    where
65        Self: Sized,
66        F: Fn(Self::Out) -> Mapped,
67    {
68        Map { parser: self, f }
69    }
70
71    /// Returns a parser that calls the `next` parser if it fails to match its input.
72    ///
73    /// ```
74    /// use  parser_compose::Parser;
75    ///
76    /// let msg = "a";
77    ///
78    /// let (value, _) = "1".or("a").try_parse(msg).unwrap();
79    ///
80    /// assert_eq!(value, "a");
81    /// ```
82    fn or<P>(self, next: P) -> Or<Self, P>
83    where
84        Self: Sized,
85    {
86        Or {
87            parser1: self,
88            parser2: next,
89        }
90    }
91
92    /// Returns a parser that only succeeds if `pred` returns `true` when given the parser's output
93    /// ```
94    /// use parser_compose::{first_utf8_scalar,Parser};
95    ///
96    /// let msg = "boo";
97    ///
98    /// let (value, _) = first_utf8_scalar.when(|s| s == "b").try_parse(msg).unwrap();
99    ///
100    /// assert_eq!(value, "b");
101    /// ```
102    fn when<F>(self, predicate: F) -> Predicate<Self, F>
103    where
104        Self: Sized,
105        F: Fn(Self::Out) -> bool,
106    {
107        Predicate {
108            parser: self,
109            predicate,
110        }
111    }
112
113    /// Returns a parser that succeeds if it is able to match its input `count` times.
114    ///
115    /// The funcion `op` is executed for each successful repetition. Its return value is used as an
116    /// argument for its next invocation.
117    ///
118    /// The function `init` determines what the argument to `op` will be the first time it is
119    /// called.
120    ///
121    /// `.fold()` is useful whenever you want invoke a parser multiple times and do something with
122    /// the result of each invocation.
123    ///
124    /// Here is a contrived example:
125    /// ```
126    /// use parser_compose::{Parser, utf8_scalar};
127    /// use std::str::FromStr;
128    ///
129    /// fn digit(input: &str) -> Option<(u8, &str)> {
130    ///     utf8_scalar(0x30..=0x39)
131    ///     .and_then(u8::from_str)
132    ///     .try_parse(input)
133    /// }
134    ///
135    /// // We want to sum the digits in this string
136    /// let input = "8.8.2.4";
137    ///
138    /// let sum_parser = (digit, ".".optional()).fold(4, |accum, curr| accum + curr.0, || 0u8);
139    ///
140    /// let (sum, rest) = sum_parser.try_parse(input).unwrap();
141    ///
142    /// assert_eq!(sum , 22);
143    /// assert!(rest.is_empty());
144    /// ```
145    fn fold<R, Op, Init, IV>(self, count: R, op: Op, init: Init) -> Fold<Self, Op, Init>
146    where
147        R: RepetitionArgument,
148        Init: Fn() -> IV,
149        Op: Fn(IV, Self::Out) -> IV,
150        Self: Sized,
151    {
152        Fold {
153            parser: self,
154            at_least: count.at_least(),
155            at_most: count.at_most(),
156            op,
157            init,
158        }
159    }
160
161    /// Returns a parser that succeeds if it is able to match its input `count` times, discarding
162    /// any output along the way
163    ///
164    /// ```
165    /// use parser_compose::{Parser, utf8_scalar};
166    ///
167    /// let valid_number = "123-456-7899";
168    /// let invalid_number = "123-3454-34";
169    ///
170    /// let digit = utf8_scalar(0x30..=0x39);
171    /// let validator = (digit.repeats(3), "-", digit.repeats(3), "-", digit.repeats(4));
172    ///
173    /// let (value, rest) = validator.try_parse(valid_number).unwrap();
174    /// assert_eq!(value, ((), "-", (), "-", ()));
175    /// assert_eq!(rest, "");
176    ///
177    /// let res = validator.try_parse(invalid_number.into());
178    /// assert!(res.is_none());
179    ///
180    /// ```
181    fn repeats<R>(self, count: R) -> Fold<Self, RepeatsOperation<Self::Out>, RepeatsInit>
182    where
183        Self: Sized,
184        R: RepetitionArgument,
185    {
186        Fold {
187            parser: self,
188            at_least: count.at_least(),
189            at_most: count.at_most(),
190            op: |_, _| (),
191            init: || (),
192        }
193    }
194
195    /// Returns a parser that succeeds if it is able to match its input `count` times, accumulating
196    /// output into a `Vec` along the way.
197    ///
198    /// ```
199    /// use parser_compose::Parser;
200    ///
201    /// let msg = "AAAA";
202    /// let (value, rest) = "A".accumulate(2..=3).try_parse(msg).unwrap();
203    ///
204    /// assert_eq!(value, vec!["A", "A", "A"]);
205    /// assert_eq!(rest, "A");
206    /// ```
207    fn accumulate<R>(
208        self,
209        count: R,
210    ) -> Fold<Self, AccumulateOperation<Self::Out>, AccumulateInit<Self::Out>>
211    where
212        R: RepetitionArgument,
213        Self: Sized,
214    {
215        Fold {
216            parser: self,
217            at_least: count.at_least(),
218            at_most: count.at_most(),
219            init: Vec::<Self::Out>::new,
220            op: |mut accum: Vec<Self::Out>, res: Self::Out| {
221                accum.push(res);
222                accum
223            },
224        }
225    }
226
227    /// Returns a parser that outputs the slice of the input that was recognized.
228    ///
229    /// Works very well with the `.repeats()` combinator as an alternative to `.accumulate()` if you
230    /// want to avoid allocating a `Vec`.
231    ///
232    /// Here is the same example from `.accumulate()`, this time using `.input()`:
233    ///
234    /// ```
235    /// use parser_compose::Parser;
236    ///
237    /// let msg = "AAAA";
238    /// let (value, rest) = "A".repeats(2..=3).input().try_parse(msg).unwrap();
239    ///
240    /// assert_eq!(value, "AAA");
241    /// assert_eq!(rest, "A");
242    /// ```
243    fn input(self) -> Input<Self>
244    where
245        Self: Sized,
246    {
247        Input { inner: self }
248    }
249
250    /// Returns a parser always succeeds but wraps the output in an [`Option`]. If the original
251    /// parser would have failed, the parser outputs a `Some(None)`.
252    ///
253    /// ```
254    /// use parser_compose::Parser;
255    ///
256    /// let msg = "a";
257    ///
258    /// let ((b, a), _) = ("b".optional(), "a").try_parse(msg).unwrap();
259    ///
260    /// assert_eq!(b, None);
261    /// assert_eq!(a, "a");
262    /// ```
263    fn optional(self) -> Optional<Self>
264    where
265        Self: Sized,
266    {
267        Optional { inner: self }
268    }
269
270    /// Returns a parser that never consumes any input regardless of its outcome. It can be used to
271    /// look ahead.
272    ///
273    /// ```
274    /// use parser_compose::Parser;
275    ///
276    /// // Recognize the sequence "a" followed by "b", but only if it is followed by a "c"
277    /// let a_then_b = ("a", "b", "c".peek());
278    ///
279    /// let (value, rest) = a_then_b.try_parse("abc".into()).unwrap();
280    /// // The peeked output is still returned, but is not consumed
281    /// assert_eq!(value, ("a", "b", "c"));
282    /// assert_eq!(rest, "c");
283    ///
284    /// let result = a_then_b.try_parse("abb");
285    /// assert!(result.is_none());
286    /// ```
287    fn peek(self) -> Peek<Self>
288    where
289        Self: Sized,
290    {
291        Peek { inner: self }
292    }
293
294    /// Returns a parser that succeeds if it was not able to recognize its input and fails if it
295    /// was able to. It never consumes any input
296    ///
297    /// ```
298    /// use parser_compose::Parser;
299    ///
300    /// // This parser matches "foo", but only if it is not followed by  "bar"
301    /// let parser = ("foo", "bar".not());
302    ///
303    /// let msg = "foobar";
304    ///
305    /// let result = parser.try_parse(msg);
306    ///
307    /// assert!(result.is_none());
308    ///
309    /// let (value, rest) = parser.try_parse("foobaz").unwrap();
310    ///
311    /// assert_eq!(value, ("foo", ()));
312    /// assert_eq!(rest, "baz");
313    /// ```
314    fn not(self) -> Not<Self>
315    where
316        Self: Sized,
317    {
318        Not { inner: self }
319    }
320
321    /// Returns a parser that applies a falible function `f` to its output. The parser will return
322    /// `None` if `f` fails.
323    ///
324    /// ```
325    /// use parser_compose::{Parser};
326    ///
327    /// let msg = [98].as_slice();
328    ///
329    /// let (value, _) = [98].and_then(|b| {
330    ///     // converting to utf8 can fail
331    ///     std::str::from_utf8(b)
332    /// }).try_parse(msg).unwrap();
333    ///
334    /// assert_eq!("b", value);
335    /// ```
336    fn and_then<F, U, E>(self, f: F) -> AndThen<Self, F>
337    where
338        Self: Sized,
339        F: Fn(Self::Out) -> Result<U, E>,
340    {
341        AndThen { inner: self, f }
342    }
343}
344
345/// ## `Parser` implementation for functions
346///
347/// The [`Parser`](crate::Parser) trait is automatically implemented for any function with
348/// the following signature:
349///
350/// `Fn(In) -> Option<(Out, In)>`
351///
352/// See the trait documentation for more info about the type parameters.
353impl<In, Out, F> Parser<In> for F
354where
355    F: Fn(In) -> Option<(Out, In)>,
356{
357    type Out = Out;
358
359    fn try_parse(&self, input: In) -> Option<(Self::Out, In)> {
360        (self)(input)
361    }
362}
363
364/// ## Parser implementation for slices
365///
366/// The [`Parser`](crate::Parser) trait is implemented for all slices, which means all `&[T]` will have the
367/// `try_parse()` method. Calling it will try to do a prefix match of the input with the slice used
368/// as the pattern.
369///
370/// ```
371/// use parser_compose::Parser;
372///
373/// let msg = &['H', 'E', 'L', 'L', 'O'][..];
374///
375/// let (res, rest) = ['H', 'E'].as_slice().try_parse(msg).unwrap();
376///
377///
378/// assert_eq!(res, &['H', 'E'][..]);
379/// assert_eq!(rest, &['L', 'L', 'O'][..]);
380/// ```
381impl<'p, 'i, T> Parser<&'i [T]> for &'p [T]
382where
383    T: PartialEq + fmt::Debug,
384{
385    type Out = &'p [T];
386    fn try_parse(&self, input: &'i [T]) -> Option<(Self::Out, &'i [T])> {
387        if input.starts_with(self) {
388            Some((*self, &input[self.len()..]))
389        } else {
390            None
391        }
392    }
393}
394
395/// ## Parser implementation for string slices
396///
397/// The [`Parser`](crate::Parser) trait is implemented for string slices, which means all `&str`s
398/// will have the `try_parse()` method. Calling it will try to do a prefix match of the input with
399/// the `&str` used as the pattern.
400///
401/// ```
402/// use parser_compose::Parser;
403///
404/// let msg = "HELLO";
405///
406/// let (value, rest) = "HE".try_parse(msg).unwrap();
407///
408/// assert_eq!(value, "HE");
409/// assert_eq!(rest, "LLO");
410/// ```
411impl<'input, 'pat> Parser<&'input str> for &'pat str {
412    type Out = &'pat str;
413
414    fn try_parse(&self, input: &'input str) -> Option<(Self::Out, &'input str)> {
415        if let Some(rest) = input.strip_prefix(self) {
416            return Some((self, rest));
417        }
418        None
419    }
420}
421
422/// A parser that recognizes the first unicode scalar value at the start of a string slice.
423///
424/// A unicode scalar value is not always what you might consider a
425/// "character". This function will output the first thing that rust considers a [`char`](char).
426///
427/// ```
428/// use parser_compose::{Parser, first_utf8_scalar};
429///
430/// let msg = "👻Boo";
431///
432/// let (value, rest) = first_utf8_scalar(msg).unwrap();
433///
434/// assert_eq!(value, "👻");
435/// ```
436pub fn first_utf8_scalar(input: &str) -> Option<(&str, &str)> {
437    let mut iter = input.char_indices();
438    let Some((start_idx, _)) = iter.next() else {
439        return None;
440    };
441
442    // If there was just one char in the &str, this `next()` call would return `None`.
443    let (end, _) = iter.next().unwrap_or((input.len(), ' '));
444    Some((&input[start_idx..end], &input[end..]))
445}
446
447/// Returns a parser that recognizes the first unicode scalar value in a string slice if its value
448/// is in the specified range
449///
450/// The range must be bounded on both ends. Only inclusive ranges are allowed.
451///
452/// ```
453/// use parser_compose::{Parser, utf8_scalar};
454///
455/// let msg = "a1";
456///
457/// let alphabetic = utf8_scalar(97..=122);
458/// let (value, rest) = alphabetic.try_parse(msg).unwrap();
459///
460/// assert_eq!(value, "a");
461/// assert_eq!(rest, "1");
462///
463/// let result = alphabetic.try_parse(rest);
464/// assert!(result.is_none());
465///
466/// ```
467pub fn utf8_scalar(range: RangeInclusive<u32>) -> Utf8Scalar {
468    Utf8Scalar {
469        start: *range.start(),
470        end: *range.end(),
471    }
472}
473
474/// Returns a parser that recognizes the first byte in a slice if its value is in the specified
475/// range.
476///
477/// The range can be specified in two ways:
478/// - An inclusive range (e.g. `0..=30`)
479/// - A single u8 value (e.g. `8`)
480///
481/// ```
482/// use parser_compose::{Parser, byte};
483///
484/// let msg = b"1a";
485///
486/// let digit = byte(0x30..=0x39);
487/// let (value, rest) = digit.try_parse(msg).unwrap();
488///
489/// assert_eq!(value, [0x31]);
490/// assert_eq!(rest, b"a");
491///
492/// let result = digit.try_parse(rest);
493/// assert!(result.is_none());
494/// ```
495pub fn byte(range: impl ByteRange) -> Byte {
496    Byte {
497        start: range.start(),
498        end: range.end(),
499    }
500}
501
502/// A parser that recognizes the first item in a slice.
503///
504/// ```
505/// use parser_compose::{Parser, first_slice_item};
506///
507/// let msg = &[254, 1, 2][..];
508///
509/// let (value, _) = first_slice_item(msg).unwrap();
510///
511/// assert_eq!(*value, [254]);
512/// ```
513pub fn first_slice_item<T>(input: &[T]) -> Option<(&[T], &[T])> {
514    if input.is_empty() {
515        None
516    } else {
517        Some((&input[0..1], &input[1..]))
518    }
519}
520
521/// See [`utf8_scalar`](crate::parser::utf8_scalar)
522#[derive(Copy, Clone)]
523pub struct Utf8Scalar {
524    start: u32,
525    end: u32,
526}
527
528impl<'i> Parser<&'i str> for Utf8Scalar {
529    type Out = &'i str;
530    fn try_parse(&self, input: &'i str) -> Option<(Self::Out, &'i str)> {
531        first_utf8_scalar
532            .when(|s| {
533                let char = s
534                    .chars()
535                    .next()
536                    .expect("first_utf8_scalar should only yield one char")
537                    as u32;
538                char >= self.start && char <= self.end
539            })
540            .try_parse(input)
541    }
542}
543
544/// See [`byte`](crate::parser::byte)
545#[derive(Copy, Clone)]
546pub struct Byte {
547    start: u8,
548    end: u8,
549}
550
551impl<'i> Parser<&'i [u8]> for Byte {
552    type Out = &'i [u8];
553
554    fn try_parse(&self, input: &'i [u8]) -> Option<(Self::Out, &'i [u8])> {
555        first_slice_item
556            .when(|s| s[0] >= self.start && s[0] <= self.end)
557            .try_parse(input)
558    }
559}
560
561/// Trait used to specify a range for the value of a byte
562pub trait ByteRange {
563    /// The inclusive lower bound
564    fn start(&self) -> u8;
565    /// The inclusive uppler bound
566    fn end(&self) -> u8;
567}
568
569impl ByteRange for RangeInclusive<u8> {
570    fn start(&self) -> u8 {
571        *self.start()
572    }
573
574    fn end(&self) -> u8 {
575        *self.end()
576    }
577}
578
579impl ByteRange for u8 {
580    fn start(&self) -> u8 {
581        *self
582    }
583    fn end(&self) -> u8 {
584        *self
585    }
586}
587
588/// Trait used to specify how many times a parser should run.
589///
590/// The following types implement this trait:
591///
592/// - [`usize`] (e.g. `8`):  Run the parser 8 times.
593/// - [`RangeFrom`] (e.g. `4..`) Run the parser at least 4 times (maybe more).
594/// - [`RangeInclusive`] (e.g. `3..=7`) Run the parser at least 3 times,  but no more than 7 times.
595/// - [`RangeToInclusive`] (e.g. `..=4`) Run the parser at most 4 times (maybe less, even 0).
596pub trait RepetitionArgument: Clone {
597    /// The minimum amount of times the _thing_ should be repeated
598    fn at_least(&self) -> usize;
599    /// The maximum aount of times the _thing_ should be repeated. If it is unbounded, this will
600    /// return `None`
601    fn at_most(&self) -> Option<usize>;
602}
603
604impl RepetitionArgument for RangeFrom<usize> {
605    fn at_least(&self) -> usize {
606        self.start
607    }
608    fn at_most(&self) -> Option<usize> {
609        None
610    }
611}
612
613impl RepetitionArgument for RangeInclusive<usize> {
614    fn at_least(&self) -> usize {
615        *self.start()
616    }
617
618    fn at_most(&self) -> Option<usize> {
619        Some(*self.end())
620    }
621}
622
623impl RepetitionArgument for RangeToInclusive<usize> {
624    fn at_least(&self) -> usize {
625        0
626    }
627    fn at_most(&self) -> Option<usize> {
628        Some(self.end)
629    }
630}
631
632impl RepetitionArgument for usize {
633    fn at_least(&self) -> usize {
634        *self
635    }
636
637    fn at_most(&self) -> Option<usize> {
638        Some(*self)
639    }
640}
641
642///  See [`fold()`](crate::Parser::fold)
643#[derive(Clone, Copy)]
644pub struct Fold<P, Operation, Init> {
645    at_least: usize,
646    at_most: Option<usize>,
647    parser: P,
648    op: Operation,
649    init: Init,
650}
651
652impl<In, Out, P, Operation, Init, Accum> Parser<In> for Fold<P, Operation, Init>
653where
654    P: Parser<In, Out = Out>,
655    Init: Fn() -> Accum,
656    Operation: Fn(Accum, Out) -> Accum,
657    In: Clone,
658{
659    type Out = Accum;
660
661    fn try_parse(&self, input: In) -> Option<(Self::Out, In)> {
662        let lower_bound = self.at_least;
663        let upper_bound = self.at_most;
664
665        // Do not proceed any further if the lower bound is greater than the upper bound
666        // I consider this a programmer error and deem it non-recoverable.
667        if let Some(u) = upper_bound {
668            assert!(
669                u >= lower_bound,
670                "upper bound should be greater than or equal to the lower bound"
671            );
672        };
673
674        // Given our current state (e.g. upper and lower bounds, how many loops we have dong so far
675        // etc..), determines if we should continue looping or if we should break out.
676        let check = |counter: usize, rest: In, accumulator: Accum, latest_attempt: Option<In>| {
677            // What we do depends on the latest parsing result
678            match latest_attempt {
679                // If the last call to `try_parse` was successful...
680                Some(n) => {
681                    if let Some(u) = upper_bound {
682                        // Break out of the loop if we have satisfied the lower bound and the current
683                        // count is equal to the upper bound
684                        if counter >= lower_bound && counter == u {
685                            return ControlFlow::Break(Some((accumulator, n)));
686                        }
687                    }
688                    ControlFlow::Continue(accumulator)
689                }
690                // If the latest attempt was a failure...
691                None => {
692                    if counter < lower_bound {
693                        // and we have not yet hit the lower bound we need to break out
694                        return ControlFlow::Break(None);
695                    }
696
697                    // otherwise, we consider it a success and break out with whatever we have
698                    // accumulated so far.
699                    ControlFlow::Break(Some((accumulator, rest)))
700                }
701            }
702        };
703
704        // Keeps track of how many successful parser executions we have had so far
705        let mut counter = 0;
706
707        // The input returned from the last successful parser execution
708        let mut rest = input;
709
710        // The result of the last parser execution, successful or not.
711        let mut latest_attempt = Some(rest.clone());
712
713        // The `init` call gives us the initial value of whatever it is we are accumulating into.
714        let mut accumulator = (self.init)();
715
716        loop {
717            // Note that `accumulator` is opaque, we know nothing about it and make no requirements on
718            // it implementing Clone/Copy.
719            // So when we move it to `check`, we need to make sure we get it back.
720            accumulator = match check(counter, rest.clone(), accumulator, latest_attempt) {
721                ControlFlow::Break(outcome) => return outcome,
722                ControlFlow::Continue(a) => a,
723            };
724
725            latest_attempt = match self.parser.try_parse(rest.clone()) {
726                Some((o, r)) => {
727                    counter += 1;
728                    rest = r;
729                    accumulator = (self.op)(accumulator, o);
730                    Some(rest.clone())
731                }
732                None => None,
733            };
734        }
735    }
736}
737
738/// See [`when()`](crate::Parser::when)
739#[derive(Clone, Copy)]
740pub struct Predicate<P, F> {
741    parser: P,
742    predicate: F,
743}
744
745impl<In, Out, P, F> Parser<In> for Predicate<P, F>
746where
747    P: Parser<In, Out = Out>,
748    F: Fn(Out) -> bool,
749    Out: Clone,
750{
751    type Out = Out;
752    fn try_parse(&self, input: In) -> Option<(Self::Out, In)> {
753        match self.parser.try_parse(input) {
754            Some((value, rest)) => match (self.predicate)(value.clone()) {
755                true => Some((value, rest)),
756                false => None,
757            },
758            None => None,
759        }
760    }
761}
762
763/// See [`optional()`](crate::Parser::optional)
764#[derive(Clone, Copy)]
765pub struct Optional<P> {
766    inner: P,
767}
768
769impl<In, Out, P> Parser<In> for Optional<P>
770where
771    In: Clone,
772    P: Parser<In, Out = Out>,
773{
774    type Out = Option<Out>;
775    fn try_parse(&self, input: In) -> Option<(Self::Out, In)> {
776        match self.inner.try_parse(input.clone()) {
777            Some((value, rest)) => Some((Some(value), rest)),
778            None => Some((None, input)),
779        }
780    }
781}
782
783/// See [`peek()`](crate::Parser::peek)
784#[derive(Clone, Copy)]
785pub struct Peek<P> {
786    inner: P,
787}
788
789impl<In, Out, P> Parser<In> for Peek<P>
790where
791    In: Clone,
792    P: Parser<In, Out = Out>,
793{
794    type Out = Out;
795    fn try_parse(&self, input: In) -> Option<(Self::Out, In)> {
796        self.inner.try_parse(input.clone()).map(|(p, _)| (p, input))
797    }
798}
799
800/// See [`not()`](crate::Parser::not)
801#[derive(Clone, Copy)]
802pub struct Not<P> {
803    inner: P,
804}
805
806impl<In, Out, P> Parser<In> for Not<P>
807where
808    In: Clone,
809    P: Parser<In, Out = Out>,
810{
811    type Out = ();
812    fn try_parse(&self, input: In) -> Option<(Self::Out, In)> {
813        match self.inner.try_parse(input.clone()) {
814            Some(_) => None,
815            None => Some(((), input)),
816        }
817    }
818}
819
820/// See [`and_then()`](crate::Parser::and_then)
821#[derive(Clone, Copy)]
822pub struct AndThen<P, F> {
823    inner: P,
824    f: F,
825}
826
827impl<In, Out, E, P, F, U> Parser<In> for AndThen<P, F>
828where
829    P: Parser<In, Out = Out>,
830    F: Fn(P::Out) -> Result<U, E>,
831{
832    type Out = U;
833
834    fn try_parse(&self, input: In) -> Option<(Self::Out, In)> {
835        match self.inner.try_parse(input) {
836            Some((value, rest)) => match (self.f)(value) {
837                Ok(m) => Some((m, rest)),
838                Err(_) => None,
839            },
840            None => None,
841        }
842    }
843}
844
845macro_rules! impl_tuple {
846    ($($parser:ident : $parser_type:ident : $out:ident : $out_type:ident),+) => {
847        /// A tuple of parsers is treated as a parser that tries its inner parsers in turn, feeding
848        /// the leftover input from the first as the input to the other and so on
849        ///
850        /// Calling the `.try_parse()` on the tuple returns a new tuple containing the extracted values.
851        ///
852        /// This is implemented for tuples up to 12 items long
853        impl<In $(, $parser_type, $out_type)+> Parser<In> for ($($parser_type,)+)
854        where $($parser_type: $crate::Parser<In, Out=$out_type>,)+
855        {
856            type Out = ($($out_type,)+);
857
858            #[allow(unused_assignments)]
859            fn try_parse(&self, ctx: In) -> Option<(($($out_type,)+), In)> {
860                let rest = ctx;
861                let ( $( $parser, )+) = self;
862
863                $(
864                    let ($out, rest) = match $parser.try_parse(rest) {
865                        Some(v) => v,
866                        None => return None,
867                    };
868                ) *
869
870                Some((($($out, )+) , rest))
871            }
872        }
873    }
874}
875
876// Ah, good 'ole macros.
877//
878// The purpose of the `impl_tuple` macro is to generate the following impl for a tuple whose length
879// is the number of arguments to the macro.
880// `impl Parser<...> for (T1, ) where T1: Parser<..> { ... }`
881//
882// So this call: `impl_tuple!(p0:P0:o0:O0, p1:P1:o1:O1);` generates the Parser impl for tuples of
883// length 2.
884// This call to `impl_tuple!(p0:P0:o0:O0, p1:P1:o1:O1, p2:P2:o2:O2);`  generates the impl for
885// tuples of length 3, and so on.
886//
887// Each argument to the macro is a colon delimited keyword that will be used as is in the
888// implementation to refer to the type/name of the parser or its output at that tuple location
889impl_tuple!(p0:P0:o0:O0);
890impl_tuple!(p0:P0:o0:O0, p1:P1:o1:O1);
891impl_tuple!(p0:P0:o0:O0, p1:P1:o1:O1, p2:P2:o2:O2);
892impl_tuple!(p0:P0:o0:O0, p1:P1:o1:O1, p2:P2:o2:O2, p3:P3:o3:O3);
893impl_tuple!(p0:P0:o0:O0, p1:P1:o1:O1, p2:P2:o2:O2, p3:P3:o3:O3, p4:P4:o4:O4);
894impl_tuple!(p0:P0:o0:O0, p1:P1:o1:O1, p2:P2:o2:O2, p3:P3:o3:O3, p4:P4:o4:O4, p5:P5:o5:O5);
895impl_tuple!(p0:P0:o0:O0, p1:P1:o1:O1, p2:P2:o2:O2, p3:P3:o3:O3, p4:P4:o4:O4, p5:P5:o5:O5, p6:P6:o6:O6);
896impl_tuple!(p0:P0:o0:O0, p1:P1:o1:O1, p2:P2:o2:O2, p3:P3:o3:O3, p4:P4:o4:O4, p5:P5:o5:O5, p6:P6:o6:O6, p7:P7:o7:O7);
897impl_tuple!(p0:P0:o0:O0, p1:P1:o1:O1, p2:P2:o2:O2, p3:P3:o3:O3, p4:P4:o4:O4, p5:P5:o5:O5, p6:P6:o6:O6, p7:P7:o7:O7, p8:P8:o8:O8);
898impl_tuple!(p0:P0:o0:O0, p1:P1:o1:O1, p2:P2:o2:O2, p3:P3:o3:O3, p4:P4:o4:O4, p5:P5:o5:O5, p6:P6:o6:O6, p7:P7:o7:O7, p8:P8:o8:O8, p9:P9:o9:O9);
899impl_tuple!(p0:P0:o0:O0, p1:P1:o1:O1, p2:P2:o2:O2, p3:P3:o3:O3, p4:P4:o4:O4, p5:P5:o5:O5, p6:P6:o6:O6, p7:P7:o7:O7, p8:P8:o8:O8, p9:P9:o9:O9, p10:P10:o10:O10);
900impl_tuple!(p0:P0:o0:O0, p1:P1:o1:O1, p2:P2:o2:O2, p3:P3:o3:O3, p4:P4:o4:O4, p5:P5:o5:O5, p6:P6:o6:O6, p7:P7:o7:O7, p8:P8:o8:O8, p9:P9:o9:O9, p10:P10:o10:O10, p11:P11:o11:O11);
901
902/// See [`or()`](crate::Parser::or)
903#[derive(Clone, Copy)]
904pub struct Or<P1, P2> {
905    parser1: P1,
906    parser2: P2,
907}
908
909impl<In, Out, P1, P2> Parser<In> for Or<P1, P2>
910where
911    P1: Parser<In, Out = Out>,
912    P2: Parser<In, Out = Out>,
913    In: Clone,
914{
915    type Out = Out;
916    fn try_parse(&self, input: In) -> Option<(Self::Out, In)> {
917        match self.parser1.try_parse(input.clone()) {
918            Some((value, rest)) => Some((value, rest)),
919            None => self.parser2.try_parse(input),
920        }
921    }
922}
923
924/// See [`map()`](crate::Parser::map).
925#[derive(Clone, Copy)]
926pub struct Map<P, F> {
927    parser: P,
928    f: F,
929}
930
931impl<In, Out, P, F, M> Parser<In> for Map<P, F>
932where
933    P: Parser<In, Out = Out>,
934    F: Fn(P::Out) -> M,
935{
936    type Out = M;
937    fn try_parse(&self, input: In) -> Option<(Self::Out, In)> {
938        self.parser
939            .try_parse(input)
940            .map(|(v, rest)| ((self.f)(v), rest))
941    }
942}
943
944#[derive(Clone, Copy)]
945pub struct Input<P> {
946    inner: P,
947}
948
949impl<'a, P, Out> Parser<&'a str> for Input<P>
950where
951    P: Parser<&'a str, Out = Out>,
952{
953    type Out = &'a str;
954
955    fn try_parse(&self, input: &'a str) -> Option<(Self::Out, &'a str)> {
956        match self.inner.try_parse(input) {
957            Some((_, rest)) => {
958                let consumed = input.strip_suffix(rest).unwrap();
959                Some((consumed, rest))
960            }
961            None => None,
962        }
963    }
964}
965
966impl<'a, P, T, Out> Parser<&'a [T]> for Input<P>
967where
968    P: Parser<&'a [T], Out = Out>,
969    T: PartialEq,
970{
971    type Out = &'a [T];
972
973    fn try_parse(&self, input: &'a [T]) -> Option<(Self::Out, &'a [T])> {
974        match self.inner.try_parse(input) {
975            Some((_, rest)) => {
976                let consumed = input.strip_suffix(rest).unwrap();
977                Some((consumed, rest))
978            }
979            None => None,
980        }
981    }
982}
983
984#[cfg(test)]
985mod test {
986    use crate::{first_slice_item, Parser};
987
988    #[derive(PartialEq, Eq, Debug)]
989    enum Token {
990        A,
991        B,
992        C,
993        D,
994        E,
995    }
996
997    #[test]
998    fn combinators_can_be_used_with_arbitrary_structs() {
999        let msg = vec![Token::A, Token::B, Token::C, Token::D, Token::E];
1000
1001        let (value, rest) = first_slice_item.repeats(2).input().try_parse(&msg).unwrap();
1002
1003        assert_eq!(value, &[Token::A, Token::B]);
1004        assert_eq!(rest, &[Token::C, Token::D, Token::E]);
1005    }
1006}
1007#[cfg(test)]
1008mod test_map_combinator {
1009    use crate::Parser;
1010
1011    #[test]
1012    fn test_map() {
1013        let msg = "ABCABC";
1014        let (value, rest) = ("A", "B", "C").map(|s| s.0).try_parse(msg).unwrap();
1015
1016        assert_eq!(value, "A");
1017        assert_eq!(rest, "ABC");
1018    }
1019}
1020
1021#[cfg(test)]
1022mod test_or_combinator {
1023    use crate::Parser;
1024
1025    #[test]
1026    fn it_works() {
1027        let msg = "GET";
1028
1029        let result = "POST".or("PUT").try_parse(msg.into());
1030        assert!(result.is_none());
1031
1032        let (value, _) = "GET".or("POST").try_parse(msg.into()).unwrap();
1033        assert_eq!(value, "GET");
1034
1035        // The first match is reported
1036        let (value, _) = "G".or("GE").or("GET").try_parse(msg.into()).unwrap();
1037        assert_eq!(value, "G");
1038    }
1039}
1040
1041#[cfg(test)]
1042mod test_when_combinator {
1043    use crate::Parser;
1044
1045    #[test]
1046    fn it_works() {
1047        let msg = "GET";
1048        let pass = false;
1049
1050        let result = "GET".when(|_| pass).try_parse(msg.into());
1051        assert!(result.is_none());
1052
1053        let pass = true;
1054        let (value, _) = "GET".when(|_| pass).try_parse(msg.into()).unwrap();
1055        assert_eq!(value, "GET");
1056    }
1057}
1058
1059#[cfg(test)]
1060mod test_tuple_combinator {
1061    use crate::{utf8_scalar, Parser};
1062
1063    #[test]
1064    fn it_works() {
1065        let msg = "GET https://example.org HTTP/1.1";
1066
1067        let method_parser = "GET".or("POST").or("PUT");
1068
1069        let scheme_parser = "http://".or("https://");
1070
1071        let authority_parser = "example.org";
1072
1073        let version_parser = "HTTP/1.0".or("HTTP/1.1");
1074
1075        let whitespace = utf8_scalar(0x20..=0x20);
1076
1077        let (method, _, scheme, authority, _, version) = (
1078            method_parser,
1079            whitespace,
1080            scheme_parser,
1081            authority_parser,
1082            whitespace,
1083            version_parser,
1084        )
1085            .try_parse(msg.into())
1086            .unwrap()
1087            .0;
1088
1089        assert_eq!(method, "GET");
1090        assert_eq!(scheme, "https://");
1091        assert_eq!(authority, "example.org");
1092        assert_eq!(version, "HTTP/1.1");
1093    }
1094}
1095
1096#[cfg(test)]
1097mod test_repeated_combinator {
1098    use crate::Parser;
1099
1100    #[test]
1101    fn test_single_value_count() {
1102        let msg = "AAA";
1103
1104        let (value, rest) = "A".accumulate(2).try_parse(msg.into()).unwrap();
1105        assert_eq!(value, vec!["A", "A"]);
1106        assert_eq!(rest, "A");
1107
1108        let result = "A".accumulate(4).try_parse(msg.into());
1109        assert!(result.is_none());
1110
1111        let (value, rest) = "A".accumulate(0).try_parse(msg.into()).unwrap();
1112        assert!(value.is_empty());
1113        assert_eq!(rest, "AAA");
1114    }
1115
1116    #[test]
1117    fn test_bounded_both_sides() {
1118        let msg = "AAAABB";
1119
1120        let (value, rest) = "A".accumulate(0..=0).try_parse(msg.into()).unwrap();
1121        assert!(value.is_empty());
1122        assert_eq!(rest, msg);
1123
1124        let (value, rest) = "A".accumulate(1..=1).try_parse(msg.into()).unwrap();
1125        assert_eq!(value, vec!["A"]);
1126        assert_eq!(rest, "AAABB");
1127
1128        let (value, rest) = "A".accumulate(1..=3).try_parse(msg.into()).unwrap();
1129        assert_eq!(value, vec!["A", "A", "A"]);
1130        assert_eq!(rest, "ABB");
1131
1132        let (value, rest) = "A".accumulate(1..=10).try_parse(msg.into()).unwrap();
1133        assert_eq!(value, vec!["A", "A", "A", "A"]);
1134        assert_eq!(rest, "BB");
1135    }
1136
1137    #[test]
1138    #[should_panic]
1139    fn test_bounded_both_sides_panics_if_lower_is_greater_than_upper() {
1140        let msg = "AAAABB";
1141        let _ = "A".accumulate(1..=0).try_parse(msg.into());
1142    }
1143
1144    #[test]
1145    fn test_lower_bound() {
1146        let msg = "AAAB";
1147
1148        let result = "A".accumulate(4..).try_parse(msg.into());
1149        assert!(result.is_none());
1150
1151        let (value, rest) = "A".accumulate(1..).try_parse(msg.into()).unwrap();
1152        assert_eq!(value, vec!["A", "A", "A"]);
1153        assert_eq!(rest, "B");
1154    }
1155
1156    #[test]
1157    fn test_upper_bound() {
1158        let msg = "BB";
1159
1160        let (value, rest) = "A".accumulate(..=3).try_parse(msg.into()).unwrap();
1161        assert!(value.is_empty());
1162        assert_eq!(rest, "BB");
1163
1164        let msg = "AAB";
1165        let (value, rest) = "A".accumulate(..=0).try_parse(msg.into()).unwrap();
1166        assert!(value.is_empty());
1167        assert_eq!(rest, "AAB");
1168
1169        let (value, rest) = "A".accumulate(..=1).try_parse(msg.into()).unwrap();
1170        assert_eq!(value, vec!["A"]);
1171        assert_eq!(rest, "AB");
1172
1173        let (value, rest) = "A".accumulate(..=10).try_parse(msg.into()).unwrap();
1174        assert_eq!(value, vec!["A", "A"]);
1175        assert_eq!(rest, "B");
1176    }
1177
1178    #[test]
1179    fn always_succeeds_with_zero_lower_bound() {
1180        let msg = "GG";
1181
1182        let (value, rest) = "A".accumulate(0..).try_parse(msg.into()).unwrap();
1183        assert_eq!(value, vec![] as Vec<&str>);
1184        assert_eq!(rest, "GG");
1185    }
1186}
1187
1188#[cfg(test)]
1189mod test_fold_combinator {
1190    use crate::Parser;
1191
1192    #[test]
1193    fn operation_is_not_run_if_parser_fails() {
1194        let msg = "EE";
1195
1196        let (value, rest) = "A"
1197            .fold(0.., |a, _| a + 1, || 0u8)
1198            .try_parse(msg.into())
1199            .unwrap();
1200
1201        assert_eq!(value, 0);
1202        assert_eq!(rest, "EE");
1203    }
1204
1205    #[test]
1206    fn operation_is_run_on_each_repetition() {
1207        let msg = "AAAA";
1208
1209        let (value, rest) = "A"
1210            .fold(2..=3, |a, _| a + 1, || 0u8)
1211            .try_parse(msg.into())
1212            .unwrap();
1213        assert_eq!(3, value);
1214        assert_eq!(rest, "A");
1215    }
1216}
1217
1218#[cfg(test)]
1219mod test_peeked_combinator {
1220    use crate::Parser;
1221
1222    #[test]
1223    fn test_peeked_does_not_consume_input_on_success() {
1224        let method = "GET".or("POST");
1225        let host = "example.com".or("example.org");
1226
1227        let msg = "GET/example.org";
1228
1229        let ((method, peeked), rest) = (method, ("/", host).peek()).try_parse(msg.into()).unwrap();
1230
1231        assert_eq!(method, "GET");
1232        assert_eq!(peeked, ("/", "example.org"));
1233        assert_eq!(rest, "/example.org");
1234    }
1235
1236    #[test]
1237    fn test_not() {
1238        // This parser matches a single "a", but only if it is not part of an arbitrary long
1239        // sequence of "a"'s followed by a "b".
1240        // I got this from the wikipedia page on parsing expression grammars
1241        let tricky = (("a".accumulate(1..), "b").not(), "a");
1242
1243        let fail = "aaaba";
1244        let pass = "aaaa";
1245
1246        let result = tricky.try_parse(fail.into());
1247        assert!(result.is_none());
1248
1249        let (value, rest) = tricky.try_parse(pass.into()).unwrap();
1250
1251        assert_eq!(value, ((), "a"));
1252        assert_eq!(rest, "aaa");
1253    }
1254}
1255
1256#[cfg(test)]
1257mod test_input_combinator {
1258    use crate::{utf8_scalar, Parser};
1259    use std::str::FromStr;
1260
1261    #[test]
1262    fn test_input() {
1263        let digit = utf8_scalar(0x30..=0x39).and_then(u8::from_str);
1264        let msg = "1234.4";
1265        let decimal = (digit.repeats(1..), ".", digit.repeats(1..)).input();
1266
1267        let (value, rest) = decimal.try_parse(msg.into()).unwrap();
1268        assert_eq!(value, "1234.4");
1269        assert_eq!(rest, "");
1270    }
1271
1272    #[test]
1273    fn test_input_with_peek() {
1274        let msg = "ABC";
1275        let (value, rest) = "ABC".peek().input().try_parse(msg.into()).unwrap();
1276
1277        assert_eq!(value, "");
1278        assert_eq!(rest, "ABC");
1279    }
1280}
1281
1282#[cfg(test)]
1283mod test_parsers {
1284    use crate::{first_utf8_scalar, utf8_scalar, Parser};
1285    #[test]
1286    fn empty_str() {
1287        let msg = "H";
1288        let (value, rest) = "".try_parse(msg.into()).unwrap();
1289        assert_eq!(value, "");
1290        assert_eq!(rest, msg);
1291    }
1292
1293    #[test]
1294    fn test_first_utf8_scalar() {
1295        let msg = "🏠";
1296        let (value, rest) = first_utf8_scalar.try_parse(msg.into()).unwrap();
1297
1298        assert_eq!(value, "🏠");
1299        assert_eq!(rest, "");
1300    }
1301
1302    #[test]
1303    fn test_utf8_scalar() {
1304        let msg = "abc";
1305        let (value, rest) = utf8_scalar(97..=97).try_parse(msg.into()).unwrap();
1306        assert_eq!(value, "a");
1307        let result = utf8_scalar(97..=97).try_parse(rest);
1308        assert!(result.is_none());
1309
1310        let msg = "🤣Hello";
1311
1312        let (value, _) = utf8_scalar('\u{1f923}' as u32..='\u{1f923}' as u32)
1313            .try_parse(msg.into())
1314            .unwrap();
1315        assert_eq!(value, "🤣");
1316    }
1317}
1318
1319#[doc = include_str!("../README.md")]
1320#[cfg(doctest)]
1321pub struct ReadmeDocTests {}