rcombinators/
combinators.rs

1use crate::parser::{ParseError, ParseResult, Parser};
2use crate::state::ParseState;
3
4/// Transform applies a function (which may fail) to the result of a parser. Transform only
5/// succeeds if the applied function succeeds, too.
6pub struct Transform<R, R2, P: Parser<Result = R>, F: Fn(R) -> ParseResult<R2>> {
7    f: F,
8    p: P,
9}
10
11impl<R, R2, P: Parser<Result = R>, F: Fn(R) -> ParseResult<R2>> Transform<R, R2, P, F> {
12    /// Create a new Transform parser using f.
13    pub fn new(p: P, f: F) -> Transform<R, R2, P, F> {
14        Transform { f: f, p: p }
15    }
16}
17
18impl<R, R2, P: Parser<Result = R>, F: Fn(R) -> ParseResult<R2>> Parser for Transform<R, R2, P, F> {
19    type Result = R2;
20    fn parse(
21        &mut self,
22        st: &mut ParseState<impl Iterator<Item = char>>,
23    ) -> ParseResult<Self::Result> {
24        match self.p.parse(st) {
25            Ok(o) => (self.f)(o),
26            Err(e) => Err(e),
27        }
28    }
29}
30
31pub struct Alternative<T>(T);
32
33impl<T> Alternative<T> {
34    pub fn new(tuple: T) -> Alternative<T> {
35        Alternative(tuple)
36    }
37}
38
39macro_rules! alt_impl {
40    ( ( $($ptype:ident/$ix:tt),* ) ) => {
41        impl<R, $($ptype : Parser<Result=R>, )*> Parser for Alternative<($($ptype,)*)> {
42            type Result = R;
43            fn parse(&mut self, st: &mut ParseState<impl Iterator<Item = char>>) -> ParseResult<Self::Result> {
44                $(
45                    let hold = st.hold();
46                    match (self.0).$ix.parse(st) {
47                        Err(_) => (),
48                        Ok(o) => { st.release(hold); return Ok(o) }
49                    }
50                    st.reset(hold);
51                )*
52                return Err(ParseError::Fail("no alternative matched", st.index()))
53            }
54        }
55    }
56}
57
58alt_impl!((P0 / 0, P1 / 1));
59alt_impl!((P0 / 0, P1 / 1, P2 / 2));
60alt_impl!((P0 / 0, P1 / 1, P2 / 2, P3 / 3));
61alt_impl!((P0 / 0, P1 / 1, P2 / 2, P3 / 3, P4 / 4));
62alt_impl!((P0 / 0, P1 / 1, P2 / 2, P3 / 3, P4 / 4, P5 / 5));
63alt_impl!((P0 / 0, P1 / 1, P2 / 2, P3 / 3, P4 / 4, P5 / 5, P6 / 6));
64alt_impl!((
65    P0 / 0,
66    P1 / 1,
67    P2 / 2,
68    P3 / 3,
69    P4 / 4,
70    P5 / 5,
71    P6 / 6,
72    P7 / 7
73));
74alt_impl!((
75    P0 / 0,
76    P1 / 1,
77    P2 / 2,
78    P3 / 3,
79    P4 / 4,
80    P5 / 5,
81    P6 / 6,
82    P7 / 7,
83    P8 / 8
84));
85alt_impl!((
86    P0 / 0,
87    P1 / 1,
88    P2 / 2,
89    P3 / 3,
90    P4 / 4,
91    P5 / 5,
92    P6 / 6,
93    P7 / 7,
94    P8 / 8,
95    P9 / 9
96));
97
98/// Sequence concatenates parsers and only succeeds if all of them do. T is always a tuple in order
99/// for Sequence to implement the Parser trait. The result is a tuple of all the parser results.
100///
101/// Individual parsers need to have result types implementing Default.
102pub struct Sequence<T>(T);
103
104impl<T> Sequence<T> {
105    pub fn new(tuple: T) -> Sequence<T> {
106        Sequence(tuple)
107    }
108}
109
110/// Macro for implementing sequence parsers for arbitrary tuples. Not for public use.
111macro_rules! seq_impl {
112    ( ( $($ptype:ident/$ix:tt),+ ) ) => {
113        impl<$($ptype : Parser<Result=impl Default>, )*> Parser for Sequence<($($ptype,)*)> {
114            type Result = ($($ptype::Result,)*);
115            fn parse(&mut self, st: &mut ParseState<impl Iterator<Item = char>>) -> ParseResult<Self::Result> {
116                let hold = st.hold();
117                let mut result = Self::Result::default();
118                $(
119                    let r = (self.0).$ix.parse(st);
120                    if r.is_err() {
121                        st.reset(hold);
122                        return Err(r.err().unwrap());
123                    }
124                    result.$ix = r.unwrap();
125                )*
126                st.release(hold);
127                return Ok(result);
128            }
129        }
130    }
131}
132
133seq_impl!((P0 / 0, P1 / 1));
134seq_impl!((P0 / 0, P1 / 1, P2 / 2));
135seq_impl!((P0 / 0, P1 / 1, P2 / 2, P3 / 3));
136seq_impl!((P0 / 0, P1 / 1, P2 / 2, P3 / 3, P4 / 4));
137seq_impl!((P0 / 0, P1 / 1, P2 / 2, P3 / 3, P4 / 4, P5 / 5));
138seq_impl!((P0 / 0, P1 / 1, P2 / 2, P3 / 3, P4 / 4, P5 / 5, P6 / 6));
139seq_impl!((
140    P0 / 0,
141    P1 / 1,
142    P2 / 2,
143    P3 / 3,
144    P4 / 4,
145    P5 / 5,
146    P6 / 6,
147    P7 / 7
148));
149seq_impl!((
150    P0 / 0,
151    P1 / 1,
152    P2 / 2,
153    P3 / 3,
154    P4 / 4,
155    P5 / 5,
156    P6 / 6,
157    P7 / 7,
158    P8 / 8
159));
160seq_impl!((
161    P0 / 0,
162    P1 / 1,
163    P2 / 2,
164    P3 / 3,
165    P4 / 4,
166    P5 / 5,
167    P6 / 6,
168    P7 / 7,
169    P8 / 8,
170    P9 / 9
171));
172
173/// PartialSequence concatenates parsers and tries to parse as far as possible.
174///
175/// Individual parsers need to have result types implementing Default.
176pub struct PartialSequence<T>(T);
177
178impl<T> PartialSequence<T> {
179    pub fn new(tuple: T) -> PartialSequence<T> {
180        PartialSequence(tuple)
181    }
182}
183
184/// Macro for implementing sequence parsers for arbitrary tuples. Not for public use.
185macro_rules! pseq_impl {
186    ( ( $($ptype:ident/$ix:tt),+ ) ) => {
187        impl<$($ptype : Parser<Result=impl Default>, )*> Parser for PartialSequence<($($ptype,)*)> {
188            type Result = ($(Option<$ptype::Result>,)*);
189            fn parse(&mut self, st: &mut ParseState<impl Iterator<Item = char>>) -> ParseResult<Self::Result> {
190                let hold = st.hold();
191                let mut result = Self::Result::default();
192                $(
193                    let r = (self.0).$ix.parse(st);
194                    if r.is_err() {
195                        st.release(hold);
196                        return Ok(result);
197                    }
198                    result.$ix = Some(r.unwrap());
199                )*
200                st.release(hold);
201                return Ok(result);
202            }
203        }
204    }
205}
206
207pseq_impl!((P0 / 0, P1 / 1));
208pseq_impl!((P0 / 0, P1 / 1, P2 / 2));
209pseq_impl!((P0 / 0, P1 / 1, P2 / 2, P3 / 3));
210pseq_impl!((P0 / 0, P1 / 1, P2 / 2, P3 / 3, P4 / 4));
211pseq_impl!((P0 / 0, P1 / 1, P2 / 2, P3 / 3, P4 / 4, P5 / 5));
212pseq_impl!((P0 / 0, P1 / 1, P2 / 2, P3 / 3, P4 / 4, P5 / 5, P6 / 6));
213pseq_impl!((
214    P0 / 0,
215    P1 / 1,
216    P2 / 2,
217    P3 / 3,
218    P4 / 4,
219    P5 / 5,
220    P6 / 6,
221    P7 / 7
222));
223pseq_impl!((
224    P0 / 0,
225    P1 / 1,
226    P2 / 2,
227    P3 / 3,
228    P4 / 4,
229    P5 / 5,
230    P6 / 6,
231    P7 / 7,
232    P8 / 8
233));
234pseq_impl!((
235    P0 / 0,
236    P1 / 1,
237    P2 / 2,
238    P3 / 3,
239    P4 / 4,
240    P5 / 5,
241    P6 / 6,
242    P7 / 7,
243    P8 / 8,
244    P9 / 9
245));
246
247pub enum RepeatSpec {
248    /// Any is equivalent to Min(0).
249    Any,
250    Min(usize),
251    Max(usize),
252    Between(usize, usize),
253}
254
255pub struct Repeat<P: Parser> {
256    inner: P,
257    repeat: RepeatSpec,
258}
259
260impl<P: Parser> Repeat<P> {
261    pub fn new(p: P, r: RepeatSpec) -> Repeat<P> {
262        Repeat {
263            inner: p,
264            repeat: r,
265        }
266    }
267}
268
269impl<R, P: Parser<Result = R>> Parser for Repeat<P> {
270    type Result = Vec<R>;
271    fn parse(
272        &mut self,
273        st: &mut ParseState<impl Iterator<Item = char>>,
274    ) -> ParseResult<Self::Result> {
275        let (min, max) = match self.repeat {
276            RepeatSpec::Any => (0, std::usize::MAX),
277            RepeatSpec::Min(min) => (min as usize, std::usize::MAX),
278            RepeatSpec::Max(max) => (0, max as usize),
279            RepeatSpec::Between(min, max) => (min as usize, max as usize),
280        };
281        let mut v: Self::Result = Vec::new();
282        let hold = st.hold();
283        for i in 0.. {
284            match self.inner.parse(st) {
285                Ok(r) => v.push(r),
286                Err(e) => {
287                    if i >= min {
288                        st.release(hold);
289                        return Ok(v);
290                    } else {
291                        st.reset(hold);
292                        return Err(e);
293                    }
294                }
295            }
296            if i >= max - 1 {
297                st.release(hold);
298                return Ok(v);
299            }
300        }
301        unreachable!()
302    }
303}
304
305/// Maybe is a combinator returning Option<T> for a parser returning T, meaning it does not stop
306/// parsing if an optional input was not encountered. It is very similar to a `Repeat` parser with
307/// `RepeatSpec::Max(1)`.
308pub struct Maybe<Inner: Parser> {
309    inner: Inner,
310}
311
312impl<Inner: Parser> Maybe<Inner> {
313    pub fn new(p: Inner) -> Maybe<Inner> {
314        Maybe { inner: p }
315    }
316}
317
318impl<R, P: Parser<Result = R>> Parser for Maybe<P> {
319    type Result = Option<R>;
320    fn parse(
321        &mut self,
322        st: &mut ParseState<impl Iterator<Item = char>>,
323    ) -> ParseResult<Self::Result> {
324        match self.inner.parse(st) {
325            Ok(r) => Ok(Some(r)),
326            Err(_) => Ok(None),
327        }
328    }
329}
330
331/// Ignore ignores the result of an inner parser, effectively hiding the result. Useful if consumed
332/// input should not be processed further, and simplifies types in combined parsers.
333pub struct Ignore<Inner: Parser> {
334    inner: Inner,
335}
336
337impl<Inner: Parser> Ignore<Inner> {
338    pub fn new(p: Inner) -> Ignore<Inner> {
339        Ignore { inner: p }
340    }
341}
342
343impl<R, P: Parser<Result = R>> Parser for Ignore<P> {
344    type Result = ();
345    fn parse(
346        &mut self,
347        st: &mut ParseState<impl Iterator<Item = char>>,
348    ) -> ParseResult<Self::Result> {
349        match self.inner.parse(st) {
350            Ok(_) => Ok(()),
351            Err(e) => Err(e),
352        }
353    }
354}
355
356/// Applies one parser, discards the result, and returns the second parser's results if the first
357/// one succeeded. To skip the input consumed by several parsers, use a `Sequence` combinators as
358/// `A`.
359pub struct Then<A: Parser, B: Parser> {
360    a: A,
361    b: B,
362}
363
364impl<A: Parser, B: Parser> Then<A, B> {
365    pub fn new(first: A, second: B) -> Then<A, B> {
366        Then {
367            a: first,
368            b: second,
369        }
370    }
371}
372
373impl<A: Parser, B: Parser> Parser for Then<A, B> {
374    type Result = B::Result;
375    fn parse(
376        &mut self,
377        st: &mut ParseState<impl Iterator<Item = char>>,
378    ) -> ParseResult<Self::Result> {
379        match self.a.parse(st) {
380            Ok(_) => (),
381            Err(e) => return Err(e),
382        }
383        self.b.parse(st)
384    }
385}
386
387/// Lazy is a helper for a typical situation where you have an `Alternative` or a `Sequence` and
388/// don't want to construct an expensive parser every time just in order for it to be dropped
389/// without having parsed anything. For example:
390///
391/// ```ignore
392/// // Let's say dict is really expensive! the first ones not as much
393/// let mut p = Alternative::new((number(), string(), atom(), dict()));
394/// ```
395///
396/// Then you can wrap the `dict` parser constructor in a `Lazy` parser. Then it will only be
397/// constructed if the `Alternative` actually needs a `dict` parser:
398///
399/// ```ignore
400/// let mut p = Alternative::new((number(), string(), atom(), Lazy::new(dict)));
401/// ```
402///
403/// Constructing a `Lazy` combinator is in comparison quite cheap, as it only involves copying a
404/// function pointer. `Lazy` also caches the result of the function, meaning it will be called at
405/// most once.
406pub struct Lazy<P, F: FnMut() -> P>(F, Option<P>);
407
408impl<R, P: Parser<Result = R>, F: FnMut() -> P> Lazy<P, F> {
409    /// Create a new instance of `Lazy`:
410    ///
411    /// ```ignore
412    /// let l = Lazy::new(|| some_expensive_function());
413    /// ```
414    pub fn new(f: F) -> Lazy<P, F> {
415        Lazy(f, None)
416    }
417}
418
419impl<R, P: Parser<Result = R>, F: FnMut() -> P> Parser for Lazy<P, F> {
420    type Result = R;
421    fn parse(
422        &mut self,
423        st: &mut ParseState<impl Iterator<Item = char>>,
424    ) -> ParseResult<Self::Result> {
425        if self.1.is_none() {
426            self.1 = Some((self.0)());
427        }
428        self.1.as_mut().unwrap().parse(st)
429    }
430}
431
432#[cfg(test)]
433mod tests {
434    use super::*;
435    use crate::parser::Parser;
436    use crate::primitives::*;
437
438    #[test]
439    fn test_pair() {
440        let mut p = Sequence::new((Int64::new(), StringParser::new(" aba".to_string())));
441        let mut ps = ParseState::new("123 aba");
442        assert_eq!(Ok((123, " aba".to_string())), p.parse(&mut ps));
443    }
444
445    #[test]
446    fn test_long_seq() {
447        let s = || StringParser::new("a");
448        let mut p = Sequence::new((s(), s(), s(), s(), s(), s(), s(), s(), s(), s()));
449        let mut ps = ParseState::new("aaaaaaaaaa");
450        assert_eq!(
451            Ok((
452                "a".to_string(),
453                "a".to_string(),
454                "a".to_string(),
455                "a".to_string(),
456                "a".to_string(),
457                "a".to_string(),
458                "a".to_string(),
459                "a".to_string(),
460                "a".to_string(),
461                "a".to_string()
462            )),
463            p.parse(&mut ps)
464        );
465    }
466
467    #[test]
468    fn test_then() {
469        let mut ps = ParseState::new("abcdef 123");
470        let mut p = StringParser::new("abc")
471            .then(StringParser::new("def"))
472            .then(whitespace())
473            .then(Int32::new());
474        assert_eq!(Ok(123), p.parse(&mut ps));
475    }
476
477    #[test]
478    fn test_alternative() {
479        let mut p = Alternative::new((
480            StringParser::new("ab"),
481            StringParser::new("de"),
482            StringParser::new(" "),
483            Transform::new(Int64::new(), |i| Ok(i.to_string())),
484        ));
485        let mut ps = ParseState::new("de 34");
486        assert_eq!(Ok("de".to_string()), p.parse(&mut ps));
487        assert_eq!(Ok(" ".to_string()), p.parse(&mut ps));
488        assert_eq!(Ok("34".to_string()), p.parse(&mut ps));
489    }
490
491    #[test]
492    fn test_repeat() {
493        let mut ps = ParseState::new("aaa aaa aaaa aaaa");
494        assert_eq!(
495            3,
496            Repeat::new(StringParser::new("a"), RepeatSpec::Any)
497                .parse(&mut ps)
498                .unwrap()
499                .len()
500        );
501        assert!(StringParser::new(" ").parse(&mut ps).is_ok());
502        assert_eq!(
503            3,
504            Repeat::new(StringParser::new("a"), RepeatSpec::Min(2))
505                .parse(&mut ps)
506                .unwrap()
507                .len()
508        );
509        assert!(StringParser::new(" ").parse(&mut ps).is_ok());
510        assert_eq!(
511            3,
512            Repeat::new(StringParser::new("a"), RepeatSpec::Max(3))
513                .parse(&mut ps)
514                .unwrap()
515                .len()
516        );
517        assert!(StringParser::new("a ").parse(&mut ps).is_ok());
518        assert_eq!(
519            3,
520            Repeat::new(StringParser::new("a"), RepeatSpec::Between(1, 3))
521                .parse(&mut ps)
522                .unwrap()
523                .len()
524        );
525        assert!(StringParser::new("a").parse(&mut ps).is_ok());
526    }
527
528    #[test]
529    fn test_partial_sequence() {
530        let mut p =
531            PartialSequence::new((StringParser::new("a"), StringParser::new("c"), Int64::new()));
532        let mut ps = ParseState::new("acde");
533        assert_eq!(
534            Ok((Some("a".to_string()), Some("c".to_string()), None)),
535            p.parse(&mut ps)
536        );
537
538        let mut p = PartialSequence::new((
539            Sequence::new((Int64::new(), StringParser::new(" "), Int64::new())),
540            StringParser::new("x"),
541        ));
542        let mut ps = ParseState::new("12 -12 nothing else");
543        assert_eq!(
544            Ok((Some((12, " ".to_string(), -12)), None)),
545            p.parse(&mut ps)
546        );
547    }
548
549    #[test]
550    fn test_lazy() {
551        let mut ps = ParseState::new("123");
552        let mut p = Alternative::new((
553            Uint8::new(),
554            Lazy::new(|| {
555                panic!("lazy should not run this function!");
556                Uint8::new()
557            }),
558        ));
559        assert_eq!(Ok(123), p.parse(&mut ps));
560    }
561
562    #[test]
563    fn test_lazy2() {
564        let mut ps = ParseState::new("123");
565        let mut p = Alternative::new((
566            string_none_of("01234", RepeatSpec::Min(1)),
567            Lazy::new(|| Uint8::new().apply(|i| Ok(i.to_string()))),
568        ));
569        assert_eq!(Ok("123".to_string()), p.parse(&mut ps));
570    }
571
572    #[test]
573    fn test_lazy3() {
574        let mut i = 0;
575        let mut ps = ParseState::new("123 124");
576        let lzy = || {
577            assert_eq!(0, i);
578            i += 1;
579            string_of("0123456789", RepeatSpec::Min(1))
580        };
581        let mut p = Alternative::new((string_of("a", RepeatSpec::Min(1)), Lazy::new(lzy)));
582        assert_eq!(Ok("123".to_string()), p.parse(&mut ps));
583        assert!(whitespace().parse(&mut ps).is_ok());
584        assert_eq!(Ok("124".to_string()), p.parse(&mut ps));
585    }
586}