combine_language/
lib.rs

1//! # Example
2//!
3//! ```
4//! # extern crate combine;
5//! # extern crate combine_language;
6//! # use combine::{satisfy, EasyParser, Parser};
7//! # use combine::parser::char::{alpha_num, letter, string};
8//! # use combine_language::{Identifier, LanguageEnv, LanguageDef};
9//! # fn main() {
10//! let env = LanguageEnv::new(LanguageDef {
11//!     ident: Identifier {
12//!         start: letter(),
13//!         rest: alpha_num(),
14//!         reserved: ["if", "then", "else", "let", "in", "type"].iter()
15//!                                                              .map(|x| (*x).into())
16//!                                                              .collect(),
17//!     },
18//!     op: Identifier {
19//!         start: satisfy(|c| "+-*/".chars().any(|x| x == c)),
20//!         rest: satisfy(|c| "+-*/".chars().any(|x| x == c)),
21//!         reserved: ["+", "-", "*", "/"].iter().map(|x| (*x).into()).collect()
22//!     },
23//!     comment_start: string("/*").map(|_| ()),
24//!     comment_end: string("*/").map(|_| ()),
25//!     comment_line: string("//").map(|_| ()),
26//! });
27//! let id = env.identifier();//An identifier parser
28//! let integer = env.integer();//An integer parser
29//! let result = (id, integer).easy_parse("this /* Skips comments */ 42");
30//! assert_eq!(result, Ok(((String::from("this"), 42), "")));
31//! # }
32//! ```
33
34#[macro_use]
35extern crate combine;
36
37use std::borrow::Cow;
38use std::cell::RefCell;
39use std::marker::PhantomData;
40use std::str;
41
42use combine::{
43    any, attempt, between, error,
44    error::{Commit, ParseResult::*, StdParseResult, StreamError, Tracked},
45    from_str, many, not_followed_by, one_of, optional, parser,
46    parser::{
47        char::{self, char, digit, space},
48        combinator::{no_partial, recognize, NotFollowedBy, Try},
49        error::Expected,
50        function::{env_parser, EnvParser},
51        range::take,
52        sequence::{Between, Skip, With},
53        token::{tokens_cmp, value, Token, TokensCmp, Value},
54    },
55    satisfy, skip_many, skip_many1,
56    stream::{RangeStream, ResetStream, Stream, StreamOnce},
57    token, unexpected, ErrorOffset, ParseError, ParseResult, Parser,
58};
59
60type Str<I> = Expected<
61    With<TokensCmp<fn(char, char) -> bool, str::Chars<'static>, I>, Value<I, &'static str>>,
62    &'static str,
63>;
64fn string<'a, Input>(s: &'static str) -> Str<Input>
65where
66    Input: Stream<Token = char>,
67    Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
68{
69    tokens_cmp(s.chars(), (|l, r| l == r) as fn(_, _) -> _)
70        .with(value(s))
71        .expected(s)
72}
73
74macro_rules! forward_parser {
75    ($input: ty, $method: ident $( $methods: ident)*, $($field: tt)*) => {
76        forward_parser!($input, $method $($field)+);
77        forward_parser!($input, $($methods)*, $($field)+);
78    };
79    ($input: ty, parse_mode $($field: tt)+) => {
80        #[inline]
81        fn parse_mode_impl<M>(
82            &mut self,
83            mode: M,
84            input: &mut $input,
85            state: &mut Self::PartialState,
86        ) -> ParseResult<Self::Output, <$input as $crate::StreamOnce>::Error>
87        where
88            M: ParseMode,
89        {
90            self.$($field)+.parse_mode(mode, input, state).map(|(a, _)| a)
91        }
92    };
93    ($input: ty, parse_lazy $($field: tt)+) => {
94        fn parse_lazy(
95            &mut self,
96            input: &mut $input,
97        ) -> ParseResult<Self::Output, <$input as $crate::StreamOnce>::Error> {
98            self.$($field)+.parse_lazy(input)
99        }
100    };
101    ($input: ty, parse_first $($field: tt)+) => {
102        fn parse_first(
103            &mut self,
104            input: &mut $input,
105            state: &mut Self::PartialState,
106        ) -> ParseResult<Self::Output, <$input as $crate::StreamOnce>::Error> {
107            self.$($field)+.parse_first(input, state)
108        }
109    };
110    ($input: ty, parse_partial $($field: tt)+) => {
111        fn parse_partial(
112            &mut self,
113            input: &mut $input,
114            state: &mut Self::PartialState,
115        ) -> ParseResult<Self::Output, <$input as $crate::StreamOnce>::Error> {
116            self.$($field)+.parse_partial(input, state)
117        }
118    };
119    ($input: ty, add_error $($field: tt)+) => {
120
121        fn add_error(&mut self, error: &mut $crate::error::Tracked<<$input as $crate::StreamOnce>::Error>) {
122            self.$($field)+.add_error(error)
123        }
124    };
125    ($input: ty, add_committed_expected_error $($field: tt)+) => {
126        fn add_committed_expected_error(&mut self, error: &mut $crate::error::Tracked<<$input as $crate::StreamOnce>::Error>) {
127            self.$($field)+.add_committed_expected_error(error)
128        }
129    };
130    ($input: ty, parser_count $($field: tt)+) => {
131        fn parser_count(&self) -> $crate::ErrorOffset {
132            self.$($field)+.parser_count()
133        }
134    };
135    ($input: ty, $field: tt) => {
136        forward_parser!($input, parse_lazy parse_first parse_partial add_error add_committed_expected_error parser_count, $field);
137    };
138    ($input: ty, $($field: tt)+) => {
139    };
140}
141
142pub type LanguageParser<'a, 'b, I, T> =
143    Expected<EnvParser<&'b LanguageEnv<'a, I>, I, T>, &'static str>;
144pub type LexLanguageParser<'a, 'b, I, T> = Lex<'a, 'b, LanguageParser<'a, 'b, I, T>, I>;
145
146/// A lexing parser for a language
147pub struct Lex<'a, 'b, P, Input>
148where
149    Input: Stream<Token = char>,
150{
151    parser: Skip<P, WhiteSpace<'a, 'b, Input>>,
152}
153
154impl<'a, 'b, P, Input> Parser<Input> for Lex<'a, 'b, P, Input>
155where
156    Input: Stream,
157    P: Parser<Input>,
158    Input: Stream<Token = char> + 'b,
159    <Input as StreamOnce>::Error:
160        ParseError<char, <Input as StreamOnce>::Range, <Input as StreamOnce>::Position>,
161{
162    type Output = P::Output;
163    type PartialState = <Skip<P, WhiteSpace<'a, 'b, Input>> as Parser<Input>>::PartialState;
164
165    forward_parser!(Input, parser);
166}
167
168/// A whitespace parser for a language
169#[derive(Clone)]
170pub struct WhiteSpace<'a, 'b, I>
171where
172    I: Stream<Token = char>,
173    I::Error: ParseError<I::Token, I::Range, I::Position>,
174{
175    env: &'b LanguageEnv<'a, I>,
176}
177
178impl<'a, 'b, Input> Parser<Input> for WhiteSpace<'a, 'b, Input>
179where
180    Input: Stream<Token = char>,
181    Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
182{
183    type Output = ();
184    type PartialState = ();
185
186    fn parse_lazy(&mut self, input: &mut Input) -> ParseResult<(), Input::Error> {
187        let mut comment_start = self.env.comment_start.borrow_mut();
188        let mut comment_end = self.env.comment_end.borrow_mut();
189        let mut comment_line = self.env.comment_line.borrow_mut();
190        parse_comment(
191            &mut **comment_start,
192            &mut **comment_end,
193            &mut **comment_line,
194            input,
195        )
196        .into()
197    }
198}
199
200fn parse_comment<I, P>(
201    mut comment_start: P,
202    mut comment_end: P,
203    comment_line: P,
204    input: &mut I,
205) -> StdParseResult<(), I>
206where
207    I: Stream<Token = char>,
208    P: Parser<I, Output = ()>,
209    I::Error: ParseError<I::Token, I::Range, I::Position>,
210{
211    let linecomment: &mut (dyn Parser<I, Output = (), PartialState = _>) =
212        &mut attempt(comment_line)
213            .and(skip_many(satisfy(|c| c != '\n')))
214            .map(|_| ());
215    let blockcomment = parser(|input| {
216        let (_, mut consumed) = attempt(&mut comment_start)
217            .parse_lazy(input)
218            .into_result()?;
219        loop {
220            match consumed.combine(|_| attempt(&mut comment_end).parse_lazy(input).into_result()) {
221                Ok((_, consumed)) => return Ok(((), consumed)),
222                Err(_) => match consumed.combine(|_| any().parse_stream(input).into_result()) {
223                    Ok((_, rest)) => consumed = rest,
224                    Err(err) => return Err(err),
225                },
226            }
227        }
228    });
229    let whitespace = skip_many1(space()).or(linecomment).or(blockcomment);
230    skip_many(whitespace).parse_stream(input).into_result()
231}
232
233/// Parses a reserved word
234pub struct Reserved<'a, 'b, Input>
235where
236    Input: Stream<Token = char>,
237    Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
238{
239    parser: Lex<
240        'a,
241        'b,
242        Try<Skip<Str<Input>, NotFollowedBy<LanguageParser<'a, 'b, Input, char>>>>,
243        Input,
244    >,
245}
246
247impl<'a, 'b, Input> Parser<Input> for Reserved<'a, 'b, Input>
248where
249    Input: Stream<Token = char> + 'b,
250    Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
251{
252    type Output = &'static str;
253    type PartialState = <Lex<
254        'a,
255        'b,
256        Try<Skip<Str<Input>, NotFollowedBy<LanguageParser<'a, 'b, Input, char>>>>,
257        Input,
258    > as Parser<Input>>::PartialState;
259
260    forward_parser!(Input, parser);
261}
262
263/// Parses `P` between two delimiter characters
264pub struct BetweenChar<'a, 'b, P, Input>
265where
266    P: Parser<Input>,
267    Input: Stream<Token = char>,
268    <Input as StreamOnce>::Error:
269        ParseError<char, <Input as StreamOnce>::Range, <Input as StreamOnce>::Position>,
270{
271    parser: Between<Input, Lex<'a, 'b, Token<Input>, Input>, Lex<'a, 'b, Token<Input>, Input>, P>,
272}
273
274impl<'a, 'b, Input, P> Parser<Input> for BetweenChar<'a, 'b, P, Input>
275where
276    Input: Stream<Token = char> + 'b,
277    P: Parser<Input>,
278    Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
279{
280    type Output = P::Output;
281    type PartialState = <Between<
282        Input,
283        Lex<'a, 'b, Token<Input>, Input>,
284        Lex<'a, 'b, Token<Input>, Input>,
285        P,
286    > as Parser<Input>>::PartialState;
287
288    forward_parser!(Input, parser);
289}
290
291/// Defines how to define an identifier (or operator)
292pub struct Identifier<PS, P> {
293    /// Parses a valid starting character for an identifier
294    pub start: PS,
295    /// Parses the rest of the characthers in a valid identifier
296    pub rest: P,
297    /// A number of reserved words which cannot be identifiers
298    pub reserved: Vec<Cow<'static, str>>,
299}
300
301/// A struct type which contains the necessary definitions to construct a language parser
302pub struct LanguageDef<IS, I, OS, O, CL, CS, CE> {
303    /// How to parse an identifier
304    pub ident: Identifier<IS, I>,
305    /// How to parse an operator
306    pub op: Identifier<OS, O>,
307    /// Describes the start of a line comment
308    pub comment_line: CL,
309    /// Describes the start of a block comment
310    pub comment_start: CS,
311    /// Describes the end of a block comment
312    pub comment_end: CE,
313}
314
315type IdentParser<'a, I> = (
316    Box<dyn Parser<I, Output = char, PartialState = ()> + 'a>,
317    Box<dyn Parser<I, Output = char, PartialState = ()> + 'a>,
318);
319
320/// A type containing parsers for a specific language.
321/// For some parsers there are two version where the parser which ends with a `_` is a variant
322/// which does not skip whitespace and comments after parsing the token itself.
323pub struct LanguageEnv<'a, I> {
324    ident: RefCell<IdentParser<'a, I>>,
325    reserved: Vec<Cow<'static, str>>,
326    op: RefCell<IdentParser<'a, I>>,
327    op_reserved: Vec<Cow<'static, str>>,
328    comment_line: RefCell<Box<dyn Parser<I, Output = (), PartialState = ()> + 'a>>,
329    comment_start: RefCell<Box<dyn Parser<I, Output = (), PartialState = ()> + 'a>>,
330    comment_end: RefCell<Box<dyn Parser<I, Output = (), PartialState = ()> + 'a>>,
331    /// A buffer for storing characters when parsing numbers
332    buffer: RefCell<String>,
333    _marker: PhantomData<fn(I) -> I>,
334}
335
336impl<'a, I> LanguageEnv<'a, I>
337where
338    I: Stream<Token = char>,
339    I::Error: ParseError<I::Token, I::Range, I::Position>,
340{
341    /// Constructs a new parser from a language defintion
342    pub fn new<A, B, C, D, E, F, G>(def: LanguageDef<A, B, C, D, E, F, G>) -> LanguageEnv<'a, I>
343    where
344        A: Parser<I, Output = char> + 'a,
345        B: Parser<I, Output = char> + 'a,
346        C: Parser<I, Output = char> + 'a,
347        D: Parser<I, Output = char> + 'a,
348        E: Parser<I, Output = ()> + 'a,
349        F: Parser<I, Output = ()> + 'a,
350        G: Parser<I, Output = ()> + 'a,
351    {
352        let LanguageDef {
353            ident:
354                Identifier {
355                    start: ident_start,
356                    rest: ident_rest,
357                    reserved: ident_reserved,
358                },
359            op:
360                Identifier {
361                    start: op_start,
362                    rest: op_rest,
363                    reserved: op_reserved,
364                },
365            comment_line,
366            comment_start,
367            comment_end,
368        } = def;
369        LanguageEnv {
370            ident: RefCell::new((
371                Box::new(no_partial(ident_start)),
372                Box::new(no_partial(ident_rest)),
373            )),
374            reserved: ident_reserved,
375            op: RefCell::new((
376                Box::new(no_partial(op_start)),
377                Box::new(no_partial(op_rest)),
378            )),
379            op_reserved: op_reserved,
380            comment_line: RefCell::new(Box::new(no_partial(comment_line))),
381            comment_start: RefCell::new(Box::new(no_partial(comment_start))),
382            comment_end: RefCell::new(Box::new(no_partial(comment_end))),
383            buffer: RefCell::new(String::new()),
384            _marker: PhantomData,
385        }
386    }
387
388    fn parser<'b, T>(
389        &'b self,
390        parser: fn(&LanguageEnv<'a, I>, &mut I) -> StdParseResult<T, I>,
391        expected: &'static str,
392    ) -> LanguageParser<'a, 'b, I, T> {
393        env_parser(self, parser).expected(expected)
394    }
395
396    /// Creates a lexing parser from `p`
397    pub fn lex<'b, P>(&'b self, p: P) -> Lex<'a, 'b, P, I>
398    where
399        P: Parser<I> + 'b,
400    {
401        Lex {
402            parser: p.skip(self.white_space()),
403        }
404    }
405
406    /// Skips spaces and comments
407    pub fn white_space<'b>(&'b self) -> WhiteSpace<'a, 'b, I> {
408        WhiteSpace { env: self }
409    }
410
411    /// Parses a symbol, lexing the stream if it is successful
412    pub fn symbol<'b>(&'b self, name: &'static str) -> Lex<'a, 'b, Str<I>, I> {
413        self.lex(string(name))
414    }
415
416    /// Parses an identifier, failing if it parses something that is a reserved identifier
417    pub fn identifier<'b>(&'b self) -> LexLanguageParser<'a, 'b, I, String> {
418        self.lex(self.identifier_())
419    }
420
421    pub fn identifier_<'b>(&'b self) -> LanguageParser<'a, 'b, I, String> {
422        self.parser(LanguageEnv::<I>::parse_ident, "identifier")
423    }
424
425    fn parse_ident(&self, input: &mut I) -> StdParseResult<String, I> {
426        let mut ident = self.ident.borrow_mut();
427        let (first, _) = ident.0.parse_lazy(input).into_result()?;
428        let mut buffer = String::new();
429        buffer.push(first);
430        let (s, consumed) = {
431            let mut iter = (&mut *ident.1).iter(input);
432            buffer.extend(iter.by_ref());
433            // We definitely consumed the char `first` so make sure that the input is consumed
434            Commit::Commit(()).combine(|_| iter.into_result(buffer))?
435        };
436        match self.reserved.iter().find(|r| **r == s) {
437            Some(ref _reserved) => Err(consumed.map(|_| {
438                I::Error::from_error(
439                    input.position(),
440                    StreamError::expected_static_message("identifier"),
441                )
442                .into()
443            })),
444            None => Ok((s, consumed)),
445        }
446    }
447
448    /// Parses an identifier, failing if it parses something that is a reserved identifier
449    pub fn range_identifier<'b>(&'b self) -> LexLanguageParser<'a, 'b, I, &'a str>
450    where
451        I: RangeStream<Range = &'a str>,
452    {
453        self.lex(self.range_identifier_())
454    }
455
456    pub fn range_identifier_<'b>(&'b self) -> LanguageParser<'a, 'b, I, &'a str>
457    where
458        I: RangeStream<Range = &'a str>,
459    {
460        self.parser(LanguageEnv::<I>::parse_range_ident, "identifier")
461    }
462
463    fn parse_range_ident(&self, input: &mut I) -> StdParseResult<&'a str, I>
464    where
465        I: RangeStream<Range = &'a str>,
466    {
467        let mut ident = self.ident.borrow_mut();
468        let checkpoint = input.checkpoint();
469        let (first, _) = ident.0.parse_lazy(input).into_result()?;
470        let len = {
471            let mut iter = (&mut *ident.1).iter(input);
472            iter.by_ref()
473                .fold(first.len_utf8(), |acc, c| c.len_utf8() + acc)
474        };
475        input
476            .reset(checkpoint)
477            .map_err(|err| Commit::Commit(err.into()))?;
478        let (s, consumed) = take(len).parse_lazy(input).into_result()?;
479        match self.reserved.iter().find(|r| **r == s) {
480            Some(ref _reserved) => Err(consumed.map(|_| {
481                I::Error::from_error(
482                    input.position(),
483                    StreamError::expected_static_message("identifier"),
484                )
485                .into()
486            })),
487            None => Ok((s, consumed)),
488        }
489    }
490
491    /// Parses the reserved identifier `name`
492    pub fn reserved<'b>(&'b self, name: &'static str) -> Reserved<'a, 'b, I>
493    where
494        I::Range: 'b,
495    {
496        let ident_letter = self.parser(LanguageEnv::<I>::ident_letter, "identifier letter");
497        Reserved {
498            parser: self.lex(attempt(string(name).skip(not_followed_by(ident_letter)))),
499        }
500    }
501
502    fn ident_letter(&self, input: &mut I) -> StdParseResult<char, I> {
503        self.ident.borrow_mut().1.parse_lazy(input).into()
504    }
505
506    /// Parses an operator, failing if it parses something that is a reserved operator
507    pub fn op<'b>(&'b self) -> LexLanguageParser<'a, 'b, I, String> {
508        self.lex(self.op_())
509    }
510
511    pub fn op_<'b>(&'b self) -> LanguageParser<'a, 'b, I, String> {
512        self.parser(LanguageEnv::<I>::parse_op, "operator")
513    }
514
515    fn parse_op(&self, input: &mut I) -> StdParseResult<String, I> {
516        let mut op = self.op.borrow_mut();
517        let (first, _) = op.0.parse_lazy(input).into_result()?;
518        let mut buffer = String::new();
519        buffer.push(first);
520        let (s, consumed) = {
521            let mut iter = (&mut *op.1).iter(input);
522            buffer.extend(iter.by_ref());
523            // We definitely consumed the char `first` so make sure that the input is consumed
524            Commit::Commit(()).combine(|_| iter.into_result(buffer))?
525        };
526        match self.op_reserved.iter().find(|r| **r == s) {
527            Some(ref _reserved) => Err(consumed.map(|_| {
528                I::Error::from_error(
529                    input.position(),
530                    StreamError::expected_static_message("operator"),
531                )
532                .into()
533            })),
534            None => Ok((s, consumed)),
535        }
536    }
537
538    /// Parses an identifier, failing if it parses something that is a reserved identifier
539    pub fn range_op<'b>(&'b self) -> LexLanguageParser<'a, 'b, I, &'a str>
540    where
541        I: RangeStream<Range = &'a str>,
542    {
543        self.lex(self.range_op_())
544    }
545
546    pub fn range_op_<'b>(&'b self) -> LanguageParser<'a, 'b, I, &'a str>
547    where
548        I: RangeStream<Range = &'a str>,
549    {
550        self.parser(LanguageEnv::<I>::parse_range_op, "operator")
551    }
552
553    fn parse_range_op(&self, input: &mut I) -> StdParseResult<&'a str, I>
554    where
555        I: RangeStream<Range = &'a str>,
556    {
557        let mut op = self.op.borrow_mut();
558        let checkpoint = input.checkpoint();
559        let (first, _) = op.0.parse_lazy(input).into_result()?;
560        let len = {
561            let mut iter = (&mut *op.1).iter(input);
562            iter.by_ref()
563                .fold(first.len_utf8(), |acc, c| c.len_utf8() + acc)
564        };
565        input
566            .reset(checkpoint)
567            .map_err(|err| Commit::Commit(err.into()))?;
568        let (s, consumed) = take(len).parse_lazy(input).into_result()?;
569        match self.op_reserved.iter().find(|r| **r == s) {
570            Some(ref _reserved) => Err(consumed.map(|_| {
571                I::Error::from_error(
572                    input.position(),
573                    StreamError::expected_static_message("identifier"),
574                )
575                .into()
576            })),
577            None => Ok((s, consumed)),
578        }
579    }
580
581    /// Parses the reserved operator `name`
582    pub fn reserved_op<'b>(&'b self, name: &'static str) -> Lex<'a, 'b, Reserved<'a, 'b, I>, I>
583    where
584        I::Range: 'b,
585    {
586        self.lex(self.reserved_op_(name))
587    }
588
589    pub fn reserved_op_<'b>(&'b self, name: &'static str) -> Reserved<'a, 'b, I>
590    where
591        I::Range: 'b,
592    {
593        let op_letter = self.parser(LanguageEnv::<I>::op_letter, "operator letter");
594        Reserved {
595            parser: self.lex(attempt(string(name).skip(not_followed_by(op_letter)))),
596        }
597    }
598
599    fn op_letter(&self, input: &mut I) -> StdParseResult<char, I> {
600        self.op.borrow_mut().1.parse_lazy(input).into()
601    }
602
603    /// Parses a character literal taking escape sequences into account
604    pub fn char_literal<'b>(&'b self) -> LexLanguageParser<'a, 'b, I, char> {
605        self.lex(self.char_literal_())
606    }
607
608    pub fn char_literal_<'b>(&'b self) -> LanguageParser<'a, 'b, I, char> {
609        self.parser(LanguageEnv::<I>::char_literal_parser, "character")
610    }
611
612    fn char_literal_parser(&self, input: &mut I) -> StdParseResult<char, I> {
613        between(string("\'"), string("\'"), parser(LanguageEnv::<I>::char))
614            .expected("character")
615            .parse_lazy(input)
616            .into()
617    }
618
619    fn char(input: &mut I) -> StdParseResult<char, I> {
620        let (c, consumed) = any().parse_lazy(input).into_result()?;
621        let mut back_slash_char =
622            satisfy(|c| "'\\/bfnrt".chars().find(|x| *x == c).is_some()).map(escape_char);
623        match c {
624            '\\' => consumed.combine(|_| back_slash_char.parse_stream(input).into_result()),
625            '\'' => unexpected("'")
626                .parse_stream(input)
627                .into_result()
628                .map(|_| unreachable!()),
629            _ => Ok((c, consumed)),
630        }
631    }
632
633    /// Parses a string literal taking character escapes into account
634    pub fn string_literal<'b>(&'b self) -> LexLanguageParser<'a, 'b, I, String> {
635        self.lex(self.string_literal_())
636    }
637
638    pub fn string_literal_<'b>(&'b self) -> LanguageParser<'a, 'b, I, String> {
639        self.parser(LanguageEnv::<I>::string_literal_parser, "string")
640    }
641
642    fn string_literal_parser(&self, input: &mut I) -> StdParseResult<String, I> {
643        between(
644            string("\""),
645            string("\""),
646            many(parser(LanguageEnv::<I>::string_char)),
647        )
648        .parse_lazy(input)
649        .into()
650    }
651
652    fn string_char(input: &mut I) -> StdParseResult<char, I> {
653        let (c, consumed) = any().parse_lazy(input).into_result()?;
654        let mut back_slash_char =
655            satisfy(|c| "\"\\/bfnrt".chars().find(|x| *x == c).is_some()).map(escape_char);
656        match c {
657            '\\' => consumed.combine(|_| back_slash_char.parse_stream(input).into_result()),
658            '"' => unexpected("\"")
659                .parse_stream(input)
660                .into_result()
661                .map(|_| unreachable!()),
662            _ => Ok((c, consumed)),
663        }
664    }
665
666    /// Parses `p` inside angle brackets
667    /// `< p >`
668    pub fn angles<'b, P>(&'b self, parser: P) -> BetweenChar<'a, 'b, P, I>
669    where
670        P: Parser<I>,
671        I::Range: 'b,
672    {
673        self.between('<', '>', parser)
674    }
675
676    /// Parses `p` inside braces
677    /// `{ p }`
678    pub fn braces<'b, P>(&'b self, parser: P) -> BetweenChar<'a, 'b, P, I>
679    where
680        P: Parser<I>,
681        I::Range: 'b,
682    {
683        self.between('{', '}', parser)
684    }
685
686    /// Parses `p` inside brackets
687    /// `[ p ]`
688    pub fn brackets<'b, P>(&'b self, parser: P) -> BetweenChar<'a, 'b, P, I>
689    where
690        P: Parser<I>,
691        I::Range: 'b,
692    {
693        self.between('[', ']', parser)
694    }
695
696    /// Parses `p` inside parentheses
697    /// `( p )`
698    pub fn parens<'b, P>(&'b self, parser: P) -> BetweenChar<'a, 'b, P, I>
699    where
700        P: Parser<I>,
701        I::Range: 'b,
702    {
703        self.between('(', ')', parser)
704    }
705
706    fn between<'b, P>(&'b self, start: char, end: char, parser: P) -> BetweenChar<'a, 'b, P, I>
707    where
708        P: Parser<I>,
709        I::Range: 'b,
710    {
711        BetweenChar {
712            parser: between(self.lex(char(start)), self.lex(char(end)), parser),
713        }
714    }
715
716    /// Parses an integer
717    pub fn integer<'b>(&'b self) -> LexLanguageParser<'a, 'b, I, i64> {
718        self.lex(self.integer_())
719    }
720
721    pub fn integer_<'b>(&'b self) -> LanguageParser<'a, 'b, I, i64> {
722        self.parser(LanguageEnv::integer_parser, "integer")
723    }
724
725    fn integer_parser(&self, input: &mut I) -> StdParseResult<i64, I> {
726        let mut buffer = self.buffer.borrow_mut();
727        buffer.clear();
728        let ((), consumed) = LanguageEnv::push_digits(&mut buffer, input)?;
729        match buffer.parse() {
730            Ok(i) => Ok((i, consumed)),
731            Err(_) => Err(consumed.map(|()| I::Error::empty(input.position()).into())),
732        }
733    }
734
735    fn push_digits(buffer: &mut String, input: &mut I) -> StdParseResult<(), I> {
736        let mut iter = digit().iter(input);
737        buffer.extend(&mut iter);
738        iter.into_result(())
739    }
740
741    /// Parses a floating point number
742    pub fn float<'b>(&'b self) -> LexLanguageParser<'a, 'b, I, f64> {
743        self.lex(self.float_())
744    }
745
746    pub fn float_<'b>(&'b self) -> LanguageParser<'a, 'b, I, f64> {
747        self.parser(
748            |_, input| float().parse_stream(input).into_result(),
749            "float",
750        )
751    }
752}
753
754pub fn float<I>() -> impl Parser<I, Output = f64>
755where
756    I: Stream<Token = char>,
757    I::Error: ParseError<I::Token, I::Range, I::Position>,
758{
759    from_str(recognize::<String, _, _>((
760        optional(token('-')),
761        (token('.').and(skip_many1(digit())).map(|_| '0')).or((
762            token('0').skip(not_followed_by(digit())).or((
763                one_of("123456789".chars()),
764                skip_many(digit()),
765            )
766                .map(|_| '0')),
767            optional((token('.'), skip_many(digit()))),
768        )
769            .map(|_| '0')),
770        optional((
771            (one_of("eE".chars()), optional(one_of("+-".chars()))),
772            skip_many1(digit()),
773        )),
774    )))
775    .expected("float")
776}
777
778fn escape_char(c: char) -> char {
779    match c {
780        '\'' => '\'',
781        '"' => '"',
782        '\\' => '\\',
783        '/' => '/',
784        'b' => '\u{0008}',
785        'f' => '\u{000c}',
786        'n' => '\n',
787        'r' => '\r',
788        't' => '\t',
789        c => c, //Should never happen
790    }
791}
792
793/// Enumeration on fixities for the expression parser
794#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Debug)]
795pub enum Fixity {
796    Left,
797    Right,
798}
799
800/// Struct for encompassing the associativity of an operator
801#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Debug)]
802pub struct Assoc {
803    /// Operator fixity
804    pub fixity: Fixity,
805    /// Operator precedence
806    pub precedence: i32,
807}
808
809/// Expression parser which handles binary operators
810#[derive(Clone, Debug)]
811pub struct Expression<O, P, F> {
812    term: P,
813    op: O,
814    f: F,
815}
816
817// Macro which breaks on empty consumed instead of returning
818macro_rules! tryb {
819    ($e: expr) => {
820        match $e {
821            PeekOk(x) => (x, Commit::Peek(())),
822            CommitOk(x) => (x, Commit::Commit(())),
823            PeekErr(_) => break,
824            CommitErr(err) => return Err(Commit::Commit(err.into())),
825        }
826    };
827}
828
829impl<O, P, F> Expression<O, P, F> {
830    fn parse_expr<Input, T>(
831        &mut self,
832        min_precedence: i32,
833        mut l: P::Output,
834        mut consumed: Commit<()>,
835        input: &mut Input,
836    ) -> StdParseResult<P::Output, Input>
837    where
838        Input: Stream,
839        O: Parser<Input, Output = (T, Assoc)>,
840        P: Parser<Input>,
841        F: Fn(P::Output, T, P::Output) -> P::Output,
842    {
843        loop {
844            let checkpoint = input.checkpoint();
845            let ((op, op_assoc), rest) = tryb!(self.op.parse_lazy(input));
846
847            if op_assoc.precedence < min_precedence {
848                input
849                    .reset(checkpoint)
850                    .map_err(|err| Commit::Commit(err.into()))?;
851                return Ok((l, consumed));
852            }
853
854            let (mut r, rest) = rest.combine(|_| self.term.parse_stream(input).into_result())?;
855            consumed = rest;
856
857            loop {
858                let checkpoint = input.checkpoint();
859                let ((_, assoc), _) = tryb!(self.op.parse_lazy(input));
860                input
861                    .reset(checkpoint)
862                    .map_err(|err| Commit::Commit(err.into()))?;
863
864                let proceed = assoc.precedence > op_assoc.precedence
865                    || assoc.fixity == Fixity::Right && assoc.precedence == op_assoc.precedence;
866                if !proceed {
867                    break;
868                }
869
870                let (new_r, rest) = self.parse_expr(assoc.precedence, r, consumed, input)?;
871                r = new_r;
872                consumed = rest;
873            }
874            l = (self.f)(l, op, r);
875        }
876        Ok((l, consumed)).into()
877    }
878}
879
880impl<O, P, F, T, Input> Parser<Input> for Expression<O, P, F>
881where
882    Input: Stream,
883    O: Parser<Input, Output = (T, Assoc)>,
884    P: Parser<Input>,
885    F: Fn(P::Output, T, P::Output) -> P::Output,
886{
887    type Output = P::Output;
888    type PartialState = ();
889
890    fn parse_lazy(&mut self, input: &mut Input) -> ParseResult<Self::Output, Input::Error> {
891        let (l, consumed) = ctry!(self.term.parse_lazy(input));
892        self.parse_expr(0, l, consumed, input).into()
893    }
894    fn add_error(&mut self, errors: &mut Tracked<<Input as StreamOnce>::Error>) {
895        self.term.add_error(errors);
896    }
897}
898
899/// Constructs an expression parser out of a term parser, an operator parser and a function which
900/// combines a binary expression to new expressions.
901///
902/// ```
903/// # extern crate combine;
904/// # extern crate combine_language;
905/// # use combine::{many, EasyParser, Parser};
906/// # use combine::parser::char::{letter, spaces, string};
907/// # use combine_language::{expression_parser, Assoc, Fixity};
908/// use self::Expr::*;
909/// #[derive(PartialEq, Debug)]
910/// enum Expr {
911///      Id(String),
912///      Op(Box<Expr>, &'static str, Box<Expr>)
913/// }
914/// fn op(l: Expr, o: &'static str, r: Expr) -> Expr {
915///     Op(Box::new(l), o, Box::new(r))
916/// }
917/// fn id(s: &str) -> Expr {
918///     Id(String::from(s))
919/// }
920/// # fn main() {
921/// let op_parser = string("+").or(string("*"))
922///     .map(|op| {
923///         let prec = match op {
924///             "+" => 6,
925///             "*" => 7,
926///             _ => unreachable!()
927///         };
928///         (op, Assoc { precedence: prec, fixity: Fixity::Left })
929///     })
930///     .skip(spaces());
931/// let term = many(letter())
932///     .map(Id)
933///     .skip(spaces());
934/// let mut parser = expression_parser(term, op_parser, op);
935/// let result = parser.easy_parse("a + b * c + d");
936/// assert_eq!(result, Ok((op(op(id("a"), "+", op(id("b"), "*", id("c"))), "+", id("d")), "")));
937/// # }
938/// ```
939pub fn expression_parser<O, P, F, T, Input>(term: P, op: O, f: F) -> Expression<O, P, F>
940where
941    Input: Stream,
942    O: Parser<Input, Output = (T, Assoc)>,
943    P: Parser<Input>,
944    F: Fn(P::Output, T, P::Output) -> P::Output,
945{
946    Expression { term, op, f }
947}
948
949#[cfg(test)]
950mod tests {
951    use super::*;
952    use combine::easy::Error;
953    use combine::parser::char::{alpha_num, letter, string};
954    use combine::parser::combinator::opaque;
955    use combine::*;
956
957    fn env<I>() -> LanguageEnv<'static, I>
958    where
959        I: Stream<Token = char> + 'static,
960        I::Error: ParseError<I::Token, I::Range, I::Position>,
961    {
962        LanguageEnv::new(LanguageDef {
963            ident: Identifier {
964                start: letter(),
965                rest: alpha_num(),
966                reserved: ["if", "then", "else", "let", "in", "type"]
967                    .iter()
968                    .map(|x| (*x).into())
969                    .collect(),
970            },
971            op: Identifier {
972                start: satisfy(|c| "+-*/".chars().find(|x| *x == c).is_some()),
973                rest: satisfy(|c| "+-*/".chars().find(|x| *x == c).is_some()),
974                reserved: ["+", "-", "*", "/"].iter().map(|x| (*x).into()).collect(),
975            },
976            comment_start: string("/*").map(|_| ()),
977            comment_end: string("*/").map(|_| ()),
978            comment_line: string("//").map(|_| ()),
979        })
980    }
981
982    #[test]
983    fn string_literal() {
984        let result = env().string_literal().easy_parse(r#""abc\n\r213" "#);
985        assert_eq!(result, Ok(("abc\n\r213".to_string(), "")));
986    }
987
988    #[test]
989    fn char_literal() {
990        let e = env();
991        let mut parser = e.char_literal();
992        assert_eq!(parser.easy_parse("'a'"), Ok(('a', "")));
993        assert_eq!(parser.easy_parse(r#"'\n'"#), Ok(('\n', "")));
994        assert_eq!(parser.easy_parse(r#"'\\'"#), Ok(('\\', "")));
995        assert!(parser.easy_parse(r#"'\1'"#).is_err());
996        assert_eq!(parser.easy_parse(r#"'"'"#), Ok(('"', "")));
997        assert!(parser.easy_parse(r#"'\"'"#).is_err());
998    }
999
1000    #[test]
1001    fn integer_literal() {
1002        let result = env().integer().easy_parse("213  ");
1003        assert_eq!(result, Ok((213, "")));
1004    }
1005
1006    #[test]
1007    fn float_literal() {
1008        let result = env().float().easy_parse("123.456  ");
1009        assert_eq!(result, Ok((123.456, "")));
1010
1011        let result = env().float().easy_parse("123.456e10  ");
1012        assert_eq!(result, Ok((123.456e10, "")));
1013
1014        let result = env().float().easy_parse("123.456E-10  ");
1015        assert_eq!(result, Ok((123.456E-10, "")));
1016
1017        let result = env().float().easy_parse("123e1 ");
1018        assert_eq!(result, Ok((123e1, "")));
1019
1020        let result = env().float().easy_parse("0.1  ");
1021        assert_eq!(result, Ok((0.1, "")));
1022
1023        let result = env().float().easy_parse(".1  ");
1024        assert_eq!(result, Ok((0.1, "")));
1025
1026        let result = env().float().easy_parse("1.  ");
1027        assert_eq!(result, Ok((1.0, "")));
1028
1029        let result = env().float().easy_parse("1e+0  ");
1030        assert_eq!(result, Ok((1.0, "")));
1031
1032        let result = env().float().easy_parse("  ");
1033        assert!(result.is_err());
1034
1035        let result = env().float().easy_parse(". ");
1036        assert!(result.is_err());
1037
1038        let result = env().float().easy_parse("000.1  ");
1039        assert!(result.is_err());
1040    }
1041
1042    #[test]
1043    fn identifier() {
1044        let e = env();
1045        let result = e.identifier().easy_parse("a12bc");
1046        assert_eq!(result, Ok(("a12bc".to_string(), "")));
1047        assert!(e.identifier().easy_parse("1bcv").is_err());
1048        assert!(e.identifier().easy_parse("if").is_err());
1049        assert_eq!(e.reserved("if").easy_parse("if"), Ok(("if", "")));
1050        assert!(e.reserved("if").easy_parse("ifx").is_err());
1051    }
1052
1053    #[test]
1054    fn operator() {
1055        let e = env();
1056        let result = e.op().easy_parse("++  ");
1057        assert_eq!(result, Ok(("++".to_string(), "")));
1058        assert!(e.identifier().easy_parse("+").is_err());
1059        assert_eq!(e.reserved_op("-").easy_parse("-       "), Ok(("-", "")));
1060        assert!(e.reserved_op("-").easy_parse("--       ").is_err());
1061    }
1062
1063    use self::Expr::*;
1064    #[derive(PartialEq, Debug)]
1065    enum Expr {
1066        Int(i64),
1067        Op(Box<Expr>, &'static str, Box<Expr>),
1068    }
1069
1070    fn op(l: Expr, op: &'static str, r: Expr) -> Expr {
1071        Expr::Op(Box::new(l), op, Box::new(r))
1072    }
1073
1074    fn test_expr1() -> (&'static str, Expr) {
1075        let mul_2_3 = op(Int(2), "*", Int(3));
1076        let div_4_5 = op(Int(4), "/", Int(5));
1077        (
1078            "1 + 2 * 3 - 4 / 5",
1079            op(op(Int(1), "+", mul_2_3), "-", div_4_5),
1080        )
1081    }
1082    fn test_expr2() -> (&'static str, Expr) {
1083        let mul_2_3_4 = op(op(Int(2), "*", Int(3)), "/", Int(4));
1084        let add_1_mul = op(Int(1), "+", mul_2_3_4);
1085        (
1086            "1 + 2 * 3 / 4 - 5 + 6",
1087            op(op(add_1_mul, "-", Int(5)), "+", Int(6)),
1088        )
1089    }
1090
1091    parser! {
1092    fn op_parser[I]()(I) -> (&'static str, Assoc)
1093        where [I: Stream<Token = char>,]
1094    {
1095        opaque(|f| {
1096            let mut ops = ["*", "/", "+", "-", "^", "&&", "||", "!!"]
1097                .iter()
1098                .cloned()
1099                .map(string)
1100                .collect::<Vec<_>>();
1101            f(&mut choice(&mut ops[..])
1102                .map(|s| {
1103                    let prec = match s {
1104                        "||" => 2,
1105                        "&&" => 3,
1106                        "+" | "-" => 6,
1107                        "*" | "/" => 7,
1108                        "^" => 8,
1109                        "!!" => 9,
1110                        _ => panic!("Impossible"),
1111                    };
1112                    let fixity = match s {
1113                        "+" | "-" | "*" | "/" => Fixity::Left,
1114                        "^" | "&&" | "||" => Fixity::Right,
1115                        _ => panic!("Impossible"),
1116                    };
1117                    (
1118                        s,
1119                        Assoc {
1120                            fixity: fixity,
1121                            precedence: prec,
1122                        },
1123                    )
1124                }))
1125        })
1126    }
1127    }
1128
1129    #[test]
1130    fn expression() {
1131        let e = env();
1132        let mut expr = expression_parser(e.integer().map(Expr::Int), e.lex(op_parser()), op);
1133        let (s1, e1) = test_expr1();
1134        let result = expr.easy_parse(s1);
1135        assert_eq!(result, Ok((e1, "")));
1136        let (s2, e2) = test_expr2();
1137        let result = expr.easy_parse(s2);
1138        assert_eq!(result, Ok((e2, "")));
1139    }
1140    #[test]
1141    fn right_assoc_expression() {
1142        let e = env();
1143        let mut expr = expression_parser(e.integer().map(Expr::Int), e.lex(op_parser()), op);
1144        let result = expr.easy_parse("1 + 2 * 3 ^ 4 / 5");
1145        let power_3_4 = op(Int(3), "^", Int(4));
1146        let mul_2_3_5 = op(op(Int(2), "*", power_3_4), "/", Int(5));
1147        let add_1_mul = op(Int(1), "+", mul_2_3_5);
1148        assert_eq!(result, Ok((add_1_mul, "")));
1149        let result = expr.easy_parse("1 ^ 2 && 3 ^ 4");
1150        let e_1_2 = op(Int(1), "^", Int(2));
1151        let e_3_4 = op(Int(3), "^", Int(4));
1152        assert_eq!(result, Ok((op(e_1_2, "&&", e_3_4), "")));
1153    }
1154    #[test]
1155    fn expression_error() {
1156        let e = env();
1157        let mut expr = expression_parser(e.integer().map(Expr::Int), e.lex(op_parser()), op);
1158        let errors = expr.easy_parse("+ 1").map_err(|err| err.errors);
1159        assert_eq!(
1160            errors,
1161            Err(vec![
1162                Error::Unexpected('+'.into()),
1163                Error::Expected("integer".into()),
1164            ])
1165        );
1166    }
1167
1168    #[test]
1169    fn range_identifier() {
1170        let e = env();
1171        let mut id = e.range_identifier();
1172        assert_eq!(id.easy_parse("t"), Ok(("t", "")));
1173        assert_eq!(id.easy_parse("test123 123"), Ok(("test123", "123")));
1174        assert_eq!(
1175            id.easy_parse("123").map_err(|err| err.errors),
1176            Err(vec![
1177                Error::Unexpected('1'.into()),
1178                Error::Expected("identifier".into()),
1179            ])
1180        );
1181    }
1182
1183    #[test]
1184    fn range_operator() {
1185        let e = env();
1186        let mut id = e.range_op();
1187        assert_eq!(id.easy_parse("+-+ 123"), Ok(("+-+", "123")));
1188        assert_eq!(
1189            id.easy_parse("abc").map_err(|err| err.errors),
1190            Err(vec![
1191                Error::Unexpected('a'.into()),
1192                Error::Expected("operator".into()),
1193            ])
1194        );
1195    }
1196}