parser_combinators/
lib.rs

1//! This crate contains parser combinators, roughly based on the Haskell library [parsec](http://hackage.haskell.org/package/parsec).
2//!
3//! A parser in this library can be described as a function which takes some input and if it
4//! is succesful, returns a value together with the remaining input.
5//! A parser combinator is a function which takes one or more parsers and returns a new parser.
6//! For instance the `many` parser can be used to convert a parser for single digits into one that
7//! parses multiple digits.
8//!
9//!# Overview
10//!
11//! This library is currently split into three modules.
12//!
13//! * `primitives` contains the `Parser` trait as well as various structs dealing with input
14//! streams and errors.
15//!
16//! * `combinator` contains the before mentioned parser combinators and thus contains the main
17//! building blocks for creating any sort of more complex parsers. It consists of free functions as
18//! well as a the `ParserExt` trait which provides a few functions which are more naturally used
19//! through method calls.
20//!
21//! * `char` is the last module. It provides parsers specifically working with streams of
22//! characters. As a few examples it has parsers for accepting digits, letters or whitespace.
23//!
24//!
25//!# Examples
26//!
27//!```
28//! extern crate parser_combinators;
29//! use parser_combinators::{spaces, many1, sep_by, digit, char, Parser, ParserExt, ParseError};
30//! 
31//! fn main() {
32//!     let input = "1234, 45,78";
33//!     let spaces = spaces();
34//!     let integer = spaces.clone()//Parse spaces first and use the with method to only keep the result of the next parser
35//!         .with(many1(digit()).map(|string: String| string.parse::<i32>().unwrap()));//parse a string of digits into an i32
36//!     //Parse integers separated by commas, skipping whitespace
37//!     let mut integer_list = sep_by(integer, spaces.skip(char(',')));
38//! 
39//!     //Call parse with the input to execute the parser
40//!     let result: Result<(Vec<i32>, &str), ParseError<char>> = integer_list.parse(input);
41//!     match result {
42//!         Ok((value, _remaining_input)) => println!("{:?}", value),
43//!         Err(err) => println!("{}", err)
44//!     }
45//! }
46//!```
47//!
48//! If we need a parser that is mutually recursive we can define a free function which internally 
49//! can in turn be used as a parser (Note that we need to explicitly cast the function, this should
50//! not be necessary once changes in rustc to make orphan checking less restrictive gets implemented)
51//!
52//! `expr` is written fully general here which may not be necessary in a specific implementation
53//! The `Stream` trait is predefined to work with array slices, string slices and iterators
54//! meaning that in this case it could be defined as
55//! `fn expr(input: State<&str>) -> ParseResult<Expr, &str>`
56//!
57//!```
58//! extern crate parser_combinators;
59//! use parser_combinators::{between, char, letter, spaces, many1, parser, sep_by, Parser, ParserExt,
60//! ParseResult};
61//! use parser_combinators::primitives::{State, Stream};
62//!
63//! #[derive(Debug, PartialEq)]
64//! enum Expr {
65//!     Id(String),
66//!     Array(Vec<Expr>),
67//!     Pair(Box<Expr>, Box<Expr>)
68//! }
69//!
70//! fn expr<I>(input: State<I>) -> ParseResult<Expr, I>
71//!     where I: Stream<Item=char> {
72//!     let word = many1(letter());
73//!     //Creates a parser which parses a char and skips any trailing whitespace
74//!     let lex_char = |c| char(c).skip(spaces());
75//!     let comma_list = sep_by(parser(expr::<I>), lex_char(','));
76//!     let array = between(lex_char('['), lex_char(']'), comma_list);
77//!     //We can use tuples to run several parsers in sequence
78//!     //The resulting type is a tuple containing each parsers output
79//!     let pair = (lex_char('('), parser(expr::<I>), lex_char(','), parser(expr::<I>), lex_char(')'))
80//!         .map(|t| Expr::Pair(Box::new(t.1), Box::new(t.3)));
81//!     word.map(Expr::Id)
82//!         .or(array.map(Expr::Array))
83//!         .or(pair)
84//!         .skip(spaces())
85//!         .parse_state(input)
86//! }
87//! 
88//! fn main() {
89//!     let result = parser(expr)
90//!         .parse("[[], (hello, world), [rust]]");
91//!     let expr = Expr::Array(vec![
92//!           Expr::Array(Vec::new())
93//!         , Expr::Pair(Box::new(Expr::Id("hello".to_string())),
94//!                      Box::new(Expr::Id("world".to_string())))
95//!         , Expr::Array(vec![Expr::Id("rust".to_string())])
96//!     ]);
97//!     assert_eq!(result, Ok((expr, "")));
98//! }
99//!```
100
101#[doc(inline)]
102pub use primitives::{Parser, ParseError, State, from_iter};
103#[doc(inline)]
104pub use char::{
105    char,
106    digit,
107    space,
108    spaces,
109    newline,
110    crlf,
111    tab,
112    upper,
113    lower,
114    letter,
115    alpha_num,
116    hex_digit,
117    oct_digit,
118    string,
119
120    ParseResult//use char::ParseResult for compatibility
121};
122#[doc(inline)]
123pub use combinator::{
124    any,
125    between,
126    chainl1,
127    choice,
128    many,
129    many1,
130    optional,
131    parser,
132    satisfy,
133    sep_by,
134    skip_many,
135    skip_many1,
136    token,
137    try,
138    value,
139    unexpected,
140    not_followed_by,
141
142    ParserExt
143};
144
145macro_rules! static_fn {
146    (($($arg: pat, $arg_ty: ty),*) -> $ret: ty { $body: expr }) => { {
147        fn temp($($arg: $arg_ty),*) -> $ret { $body }
148        let temp: fn (_) -> _ = temp;
149        temp
150    } }
151}
152
153///Module containing the primitive types which is used to create and compose more advanced parsers
154pub mod primitives;
155///Module containing all specific parsers
156pub mod combinator;
157///Module containg parsers specialized on character streams
158pub mod char;
159
160#[cfg(test)]
161mod tests {
162    use super::*;
163    use super::primitives::{SourcePosition, Stream, Error, Consumed};
164    
165
166    fn integer<'a, I>(input: State<I>) -> ParseResult<i64, I>
167        where I: Stream<Item=char> {
168        let (s, input) = try!(many1::<String, _>(digit())
169            .expected("integer")
170            .parse_state(input));
171        let mut n = 0;
172        for c in s.chars() {
173            n = n * 10 + (c as i64 - '0' as i64);
174        }
175        Ok((n, input))
176    }
177
178    #[test]
179    fn test_integer() {
180        let result = parser(integer).parse("123");
181        assert_eq!(result, Ok((123i64, "")));
182    }
183    #[test]
184    fn list() {
185        let mut p = sep_by(parser(integer), char(','));
186        let result = p.parse("123,4,56");
187        assert_eq!(result, Ok((vec![123i64, 4, 56], "")));
188    }
189    #[test]
190    fn iterator() {
191        let result = parser(integer).parse(from_iter("123".chars()))
192            .map(|(i, input)| (i, input.uncons().err().map(|_| ())));
193        assert_eq!(result, Ok((123i64, Some(()))));
194    }
195    #[test]
196    fn field() {
197        let word = || many(alpha_num());
198        let spaces = spaces();
199        let c_decl = (word(), spaces.clone(), char(':'), spaces, word())
200            .map(|t| (t.0, t.4))
201            .parse("x: int");
202        assert_eq!(c_decl, Ok((("x".to_string(), "int".to_string()), "")));
203    }
204    #[test]
205    fn source_position() {
206        let source =
207r"
208123
209";
210        let result = (spaces(), parser(integer), spaces())
211            .map(|t| t.1)
212            .parse_state(State::new(source));
213        let state = Consumed::Consumed(State {
214            position: SourcePosition { line: 3, column: 1 },
215            input: ""
216        });
217        assert_eq!(result, Ok((123i64, state)));
218    }
219
220    #[derive(Debug, PartialEq)]
221    enum Expr {
222        Id(String),
223        Int(i64),
224        Array(Vec<Expr>),
225        Plus(Box<Expr>, Box<Expr>),
226        Times(Box<Expr>, Box<Expr>),
227    }
228
229    #[allow(unconditional_recursion)]
230    fn expr(input: State<&str>) -> ParseResult<Expr, &str> {
231        let word = many1(letter())
232            .expected("identifier");
233        let integer = parser(integer);
234        let array = between(char('['), char(']'), sep_by(parser(expr), char(',')))
235            .expected("[");
236        let paren_expr = between(char('('), char(')'), parser(term))
237            .expected("(");
238        let spaces = spaces();
239        spaces.clone().with(
240                word.map(Expr::Id)
241                .or(integer.map(Expr::Int))
242                .or(array.map(Expr::Array))
243                .or(paren_expr)
244            ).skip(spaces)
245            .parse_state(input)
246    }
247
248    #[test]
249    fn expression() {
250        let result = sep_by(parser(expr), char(','))
251            .parse("int, 100, [[], 123]");
252        let exprs = vec![
253              Expr::Id("int".to_string())
254            , Expr::Int(100)
255            , Expr::Array(vec![Expr::Array(vec![]), Expr::Int(123)])
256        ];
257        assert_eq!(result, Ok((exprs, "")));
258    }
259
260    #[test]
261    fn expression_error() {
262        let input =
263r"
264,123
265";
266        let result = parser(expr)
267            .parse(input);
268        let err = ParseError {
269            position: SourcePosition { line: 2, column: 1 },
270                errors: vec![
271                    Error::Unexpected(','),
272                    Error::Expected("integer".into()),
273                    Error::Expected("identifier".into()),
274                    Error::Expected("[".into()),
275                    Error::Expected("(".into()),
276                ]
277        };
278        assert_eq!(result, Err(err));
279    }
280
281    #[test]
282    fn expression_error_message() {
283        let input =
284r"
285,123
286";
287        let result = parser(expr)
288            .parse(input);
289        let m = format!("{}", result.unwrap_err());
290let expected =
291r"Parse error at line: 2, column: 1
292Unexpected token ','
293Expected 'integer', 'identifier', '[' or '('
294";
295        assert_eq!(m, expected);
296    }
297
298    fn term(input: State<&str>) -> ParseResult<Expr, &str> {
299        fn times(l: Expr, r: Expr) -> Expr { Expr::Times(Box::new(l), Box::new(r)) }
300        fn plus(l: Expr, r: Expr) -> Expr { Expr::Plus(Box::new(l), Box::new(r)) }
301        let mul = char('*')
302            .map(|_| times);
303        let add = char('+')
304            .map(|_| plus);
305        let factor = chainl1(parser(expr), mul);
306        chainl1(factor, add)
307            .parse_state(input)
308    }
309
310    #[test]
311    fn operators() {
312        let input =
313r"
3141 * 2 + 3 * test
315";
316        let (result, _) = parser(term)
317            .parse(input)
318            .unwrap();
319
320        let e1 = Expr::Times(Box::new(Expr::Int(1)), Box::new(Expr::Int(2)));
321        let e2 = Expr::Times(Box::new(Expr::Int(3)), Box::new(Expr::Id("test".to_string())));
322        assert_eq!(result, Expr::Plus(Box::new(e1), Box::new(e2)));
323    }
324
325
326    fn follow(input: State<&str>) -> ParseResult<(), &str> {
327        match input.clone().uncons() {
328            Ok((c, _)) => {
329                if c.is_alphanumeric() {
330                    Err(Consumed::Empty(ParseError::new(input.position, Error::Unexpected(c))))
331                }
332                else {
333                    Ok(((), Consumed::Empty(input)))
334                }
335            }
336            Err(_) => Ok(((), Consumed::Empty(input)))
337        }
338    }
339    #[test]
340    fn error_position() {
341        let mut p = string("let").skip(parser(follow)).map(|x| x.to_string())
342            .or(many1(digit()));
343        match p.parse("le123") {
344            Ok(_) => assert!(false),
345            Err(err) => assert_eq!(err.position, SourcePosition { line: 1, column: 1 })
346        }
347        match p.parse("let1") {
348            Ok(_) => assert!(false),
349            Err(err) => assert_eq!(err.position, SourcePosition { line: 1, column: 4 })
350        }
351    }
352
353    #[test]
354    fn sep_by_error_consume() {
355        let mut p = sep_by::<Vec<_>, _, _>(string("abc"), char(','));
356        let err = p.parse("ab,abc")
357            .map(|x| format!("{:?}", x))
358            .unwrap_err();
359        assert_eq!(err.position, SourcePosition { line: 1, column: 1});
360    }
361
362    #[test]
363    fn optional_error_consume() {
364        let mut p = optional(string("abc"));
365        let err = p.parse("ab")
366            .map(|x| format!("{:?}", x))
367            .unwrap_err();
368        assert_eq!(err.position, SourcePosition { line: 1, column: 1});
369    }
370    #[test]
371    fn chainl1_error_consume() {
372        fn first<T, U>(t: T, _: U) -> T { t }
373        let mut p = chainl1(string("abc"), char(',').map(|_| first));
374        assert!(p.parse("abc,ab").is_err());
375    }
376
377    #[test]
378    fn inner_error_consume() {
379        let mut p = many::<Vec<_>, _>(between(char('['), char(']'), digit()));
380        let result = p.parse("[1][2][]");
381        assert!(result.is_err(), format!("{:?}", result));
382        let error = result
383            .map(|x| format!("{:?}", x))
384            .unwrap_err();
385        assert_eq!(error.position, SourcePosition { line: 1, column: 8 });
386    }
387
388    #[test]
389    fn infinite_recursion_in_box_parser() {
390        let _: Result<(Vec<_>, _), _> = (many(Box::new(digit())))
391            .parse("1");
392    }
393
394    #[test]
395    fn unsized_parser() {
396        let mut parser: Box<Parser<Input=&str, Output=char>> = Box::new(digit());
397        let borrow_parser = &mut *parser;
398        assert_eq!(borrow_parser.parse("1"), Ok(('1', "")));
399    }
400
401    #[test]
402    fn choice_strings() {
403        let mut fruits = [
404            try(string("Apple")),
405            try(string("Banana")),
406            try(string("Cherry")),
407            try(string("Date")),
408            try(string("Fig")),
409            try(string("Grape")),
410        ];
411        let mut parser = choice(&mut fruits);
412        assert_eq!(parser.parse("Apple"), Ok(("Apple", "")));
413        assert_eq!(parser.parse("Banana"), Ok(("Banana", "")));
414        assert_eq!(parser.parse("Cherry"), Ok(("Cherry", "")));
415        assert_eq!(parser.parse("DateABC"), Ok(("Date", "ABC")));
416        assert_eq!(parser.parse("Fig123"), Ok(("Fig", "123")));
417        assert_eq!(parser.parse("GrapeApple"), Ok(("Grape", "Apple")));
418    }
419
420    #[test]
421    fn std_error() {
422        use std::fmt;
423        use std::error::Error as StdError;
424        #[derive(Debug)]
425        struct Error;
426        impl fmt::Display for Error {
427            fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
428                write!(f, "error")
429            }
430        }
431        impl StdError for Error {
432            fn description(&self) -> &str { "error" }
433        }
434        let result: Result<((), _), _> = string("abc")
435            .and_then(|_| Err(Error))
436            .parse("abc");
437        assert!(result.is_err());
438        //Test that ParseError can be coerced to a StdError
439        let _ = result.map_err(|err| { let err: Box<StdError> = Box::new(err); err });
440    }
441}