parser-combinators 0.6.0

Parser combinators based on the Haskell library parsec
Documentation
//! This crate contains parser combinators, roughly based on the Haskell library [parsec](http://hackage.haskell.org/package/parsec).
//!
//! A parser in this library can be described as a function which takes some input and if it
//! is succesful, returns a value together with the remaining input.
//! A parser combinator is a function which takes one or more parsers and returns a new parser.
//! For instance the `many` parser can be used to convert a parser for single digits into one that
//! parses multiple digits.
//!
//!# Overview
//!
//! This library is currently split into three modules.
//!
//! * `primitives` contains the `Parser` trait as well as various structs dealing with input
//! streams and errors.
//!
//! * `combinator` contains the before mentioned parser combinators and thus contains the main
//! building blocks for creating any sort of more complex parsers. It consists of free functions as
//! well as a the `ParserExt` trait which provides a few functions which are more naturally used
//! through method calls.
//!
//! * `char` is the last module. It provides parsers specifically working with streams of
//! characters. As a few examples it has parsers for accepting digits, letters or whitespace.
//!
//!
//!# Examples
//!
//!```
//! extern crate parser_combinators;
//! use parser_combinators::{spaces, many1, sep_by, digit, char, Parser, ParserExt, ParseError};
//! 
//! fn main() {
//!     let input = "1234, 45,78";
//!     let spaces = spaces();
//!     let integer = spaces.clone()//Parse spaces first and use the with method to only keep the result of the next parser
//!         .with(many1(digit()).map(|string: String| string.parse::<i32>().unwrap()));//parse a string of digits into an i32
//!     //Parse integers separated by commas, skipping whitespace
//!     let mut integer_list = sep_by(integer, spaces.skip(char(',')));
//! 
//!     //Call parse with the input to execute the parser
//!     let result: Result<(Vec<i32>, &str), ParseError<char>> = integer_list.parse(input);
//!     match result {
//!         Ok((value, _remaining_input)) => println!("{:?}", value),
//!         Err(err) => println!("{}", err)
//!     }
//! }
//!```
//!
//! If we need a parser that is mutually recursive we can define a free function which internally 
//! can in turn be used as a parser (Note that we need to explicitly cast the function, this should
//! not be necessary once changes in rustc to make orphan checking less restrictive gets implemented)
//!
//! `expr` is written fully general here which may not be necessary in a specific implementation
//! The `Stream` trait is predefined to work with array slices, string slices and iterators
//! meaning that in this case it could be defined as
//! `fn expr(input: State<&str>) -> ParseResult<Expr, &str>`
//!
//!```
//! extern crate parser_combinators;
//! use parser_combinators::{between, char, letter, spaces, many1, parser, sep_by, Parser, ParserExt,
//! ParseResult};
//! use parser_combinators::primitives::{State, Stream};
//!
//! #[derive(Debug, PartialEq)]
//! enum Expr {
//!     Id(String),
//!     Array(Vec<Expr>),
//!     Pair(Box<Expr>, Box<Expr>)
//! }
//!
//! fn expr<I>(input: State<I>) -> ParseResult<Expr, I>
//!     where I: Stream<Item=char> {
//!     let word = many1(letter());
//!     //Creates a parser which parses a char and skips any trailing whitespace
//!     let lex_char = |c| char(c).skip(spaces());
//!     let comma_list = sep_by(parser(expr::<I>), lex_char(','));
//!     let array = between(lex_char('['), lex_char(']'), comma_list);
//!     //We can use tuples to run several parsers in sequence
//!     //The resulting type is a tuple containing each parsers output
//!     let pair = (lex_char('('), parser(expr::<I>), lex_char(','), parser(expr::<I>), lex_char(')'))
//!         .map(|t| Expr::Pair(Box::new(t.1), Box::new(t.3)));
//!     word.map(Expr::Id)
//!         .or(array.map(Expr::Array))
//!         .or(pair)
//!         .skip(spaces())
//!         .parse_state(input)
//! }
//! 
//! fn main() {
//!     let result = parser(expr)
//!         .parse("[[], (hello, world), [rust]]");
//!     let expr = Expr::Array(vec![
//!           Expr::Array(Vec::new())
//!         , Expr::Pair(Box::new(Expr::Id("hello".to_string())),
//!                      Box::new(Expr::Id("world".to_string())))
//!         , Expr::Array(vec![Expr::Id("rust".to_string())])
//!     ]);
//!     assert_eq!(result, Ok((expr, "")));
//! }
//!```

#[doc(inline)]
pub use primitives::{Parser, ParseError, from_iter};
#[doc(inline)]
pub use char::{
    char,
    digit,
    space,
    spaces,
    newline,
    crlf,
    tab,
    upper,
    lower,
    letter,
    alpha_num,
    hex_digit,
    oct_digit,
    string,

    ParseResult//use char::ParseResult for compatibility
};
#[doc(inline)]
pub use combinator::{
    any,
    between,
    chainl1,
    choice,
    many,
    many1,
    optional,
    parser,
    satisfy,
    sep_by,
    skip_many,
    skip_many1,
    token,
    try,
    value,
    unexpected,
    not_followed_by,

    ParserExt
};

macro_rules! static_fn {
    (($($arg: pat, $arg_ty: ty),*) -> $ret: ty { $body: expr }) => { {
        fn temp($($arg: $arg_ty),*) -> $ret { $body }
        let temp: fn (_) -> _ = temp;
        temp
    } }
}

///Module containing the primitive types which is used to create and compose more advanced parsers
pub mod primitives;
///Module containing all specific parsers
pub mod combinator;
///Module containg parsers specialized on character streams
pub mod char;

#[cfg(test)]
mod tests {
    use super::*;
    use super::primitives::{SourcePosition, State, Stream, Error, Consumed};
    

    fn integer<'a, I>(input: State<I>) -> ParseResult<i64, I>
        where I: Stream<Item=char> {
        let (s, input) = try!(many1::<String, _>(digit())
            .expected("integer")
            .parse_state(input));
        let mut n = 0;
        for c in s.chars() {
            n = n * 10 + (c as i64 - '0' as i64);
        }
        Ok((n, input))
    }

    #[test]
    fn test_integer() {
        let result = parser(integer).parse("123");
        assert_eq!(result, Ok((123i64, "")));
    }
    #[test]
    fn list() {
        let mut p = sep_by(parser(integer), char(','));
        let result = p.parse("123,4,56");
        assert_eq!(result, Ok((vec![123i64, 4, 56], "")));
    }
    #[test]
    fn iterator() {
        let result = parser(integer).parse(from_iter("123".chars()))
            .map(|(i, input)| (i, input.uncons().err()));
        assert_eq!(result, Ok((123i64, Some(()))));
    }
    #[test]
    fn field() {
        let word = || many(alpha_num());
        let spaces = spaces();
        let c_decl = (word(), spaces.clone(), char(':'), spaces, word())
            .map(|t| (t.0, t.4))
            .parse("x: int");
        assert_eq!(c_decl, Ok((("x".to_string(), "int".to_string()), "")));
    }
    #[test]
    fn source_position() {
        let source =
r"
123
";
        let result = (spaces(), parser(integer), spaces())
            .map(|t| t.1)
            .parse_state(State::new(source));
        let state = Consumed::Consumed(State {
            position: SourcePosition { line: 3, column: 1 },
            input: ""
        });
        assert_eq!(result, Ok((123i64, state)));
    }

    #[derive(Debug, PartialEq)]
    enum Expr {
        Id(String),
        Int(i64),
        Array(Vec<Expr>),
        Plus(Box<Expr>, Box<Expr>),
        Times(Box<Expr>, Box<Expr>),
    }

    #[allow(unconditional_recursion)]
    fn expr(input: State<&str>) -> ParseResult<Expr, &str> {
        let word = many1(letter())
            .expected("identifier");
        let integer = parser(integer);
        let array = between(char('['), char(']'), sep_by(parser(expr), char(',')))
            .expected("[");
        let paren_expr = between(char('('), char(')'), parser(term))
            .expected("(");
        let spaces = spaces();
        spaces.clone().with(
                word.map(Expr::Id)
                .or(integer.map(Expr::Int))
                .or(array.map(Expr::Array))
                .or(paren_expr)
            ).skip(spaces)
            .parse_state(input)
    }

    #[test]
    fn expression() {
        let result = sep_by(parser(expr), char(','))
            .parse("int, 100, [[], 123]");
        let exprs = vec![
              Expr::Id("int".to_string())
            , Expr::Int(100)
            , Expr::Array(vec![Expr::Array(vec![]), Expr::Int(123)])
        ];
        assert_eq!(result, Ok((exprs, "")));
    }

    #[test]
    fn expression_error() {
        let input =
r"
,123
";
        let result = parser(expr)
            .parse(input);
        let err = ParseError {
            position: SourcePosition { line: 2, column: 1 },
                errors: vec![
                    Error::Unexpected(','),
                    Error::Expected("integer".into()),
                    Error::Expected("identifier".into()),
                    Error::Expected("[".into()),
                    Error::Expected("(".into()),
                ]
        };
        assert_eq!(result, Err(err));
    }

    #[test]
    fn expression_error_message() {
        let input =
r"
,123
";
        let result = parser(expr)
            .parse(input);
        let m = format!("{}", result.unwrap_err());
let expected =
r"Parse error at line: 2, column: 1
Unexpected token ','
Expected 'integer', 'identifier', '[' or '('
";
        assert_eq!(m, expected);
    }

    fn term(input: State<&str>) -> ParseResult<Expr, &str> {
        fn times(l: Expr, r: Expr) -> Expr { Expr::Times(Box::new(l), Box::new(r)) }
        fn plus(l: Expr, r: Expr) -> Expr { Expr::Plus(Box::new(l), Box::new(r)) }
        let mul = char('*')
            .map(|_| times);
        let add = char('+')
            .map(|_| plus);
        let factor = chainl1(parser(expr), mul);
        chainl1(factor, add)
            .parse_state(input)
    }

    #[test]
    fn operators() {
        let input =
r"
1 * 2 + 3 * test
";
        let (result, _) = parser(term)
            .parse(input)
            .unwrap();

        let e1 = Expr::Times(Box::new(Expr::Int(1)), Box::new(Expr::Int(2)));
        let e2 = Expr::Times(Box::new(Expr::Int(3)), Box::new(Expr::Id("test".to_string())));
        assert_eq!(result, Expr::Plus(Box::new(e1), Box::new(e2)));
    }


    fn follow(input: State<&str>) -> ParseResult<(), &str> {
        match input.clone().uncons() {
            Ok((c, _)) => {
                if c.is_alphanumeric() {
                    Err(Consumed::Empty(ParseError::new(input.position, Error::Unexpected(c))))
                }
                else {
                    Ok(((), Consumed::Empty(input)))
                }
            }
            Err(_) => Ok(((), Consumed::Empty(input)))
        }
    }
    #[test]
    fn error_position() {
        let mut p = string("let").skip(parser(follow)).map(|x| x.to_string())
            .or(many1(digit()));
        match p.parse("le123") {
            Ok(_) => assert!(false),
            Err(err) => assert_eq!(err.position, SourcePosition { line: 1, column: 1 })
        }
    }

    #[test]
    fn sep_by_error_consume() {
        let mut p = sep_by::<Vec<_>, _, _>(string("abc"), char(','));
        let err = p.parse("ab,abc")
            .map(|x| format!("{:?}", x))
            .unwrap_err();
        assert_eq!(err.position, SourcePosition { line: 1, column: 1});
    }

    #[test]
    fn optional_error_consume() {
        let mut p = optional(string("abc"));
        let err = p.parse("ab")
            .map(|x| format!("{:?}", x))
            .unwrap_err();
        assert_eq!(err.position, SourcePosition { line: 1, column: 1});
    }
    #[test]
    fn chainl1_error_consume() {
        fn first<T, U>(t: T, _: U) -> T { t }
        let mut p = chainl1(string("abc"), char(',').map(|_| first));
        assert!(p.parse("abc,ab").is_err());
    }

    #[test]
    fn inner_error_consume() {
        let mut p = many::<Vec<_>, _>(between(char('['), char(']'), digit()));
        let result = p.parse("[1][2][]");
        assert!(result.is_err(), format!("{:?}", result));
        let error = result
            .map(|x| format!("{:?}", x))
            .unwrap_err();
        assert_eq!(error.position, SourcePosition { line: 1, column: 8 });
    }

    #[test]
    fn infinite_recursion_in_box_parser() {
        let _: Result<(Vec<_>, _), _> = (many(Box::new(digit())))
            .parse("1");
    }

    #[test]
    fn unsized_parser() {
        let mut parser: Box<Parser<Input=&str, Output=char>> = Box::new(digit());
        let borrow_parser = &mut *parser;
        assert_eq!(borrow_parser.parse("1"), Ok(('1', "")));
    }

    #[test]
    fn choice_strings() {
        let mut fruits = [
            try(string("Apple")),
            try(string("Banana")),
            try(string("Cherry")),
            try(string("Date")),
            try(string("Fig")),
            try(string("Grape")),
        ];
        let mut parser = choice(&mut fruits);
        assert_eq!(parser.parse("Apple"), Ok(("Apple", "")));
        assert_eq!(parser.parse("Banana"), Ok(("Banana", "")));
        assert_eq!(parser.parse("Cherry"), Ok(("Cherry", "")));
        assert_eq!(parser.parse("DateABC"), Ok(("Date", "ABC")));
        assert_eq!(parser.parse("Fig123"), Ok(("Fig", "123")));
        assert_eq!(parser.parse("GrapeApple"), Ok(("Grape", "Apple")));
    }

    #[test]
    fn std_error() {
        use std::fmt;
        use std::error::Error as StdError;
        #[derive(Debug)]
        struct Error;
        impl fmt::Display for Error {
            fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
                write!(f, "error")
            }
        }
        impl StdError for Error {
            fn description(&self) -> &str { "error" }
        }
        let result: Result<((), _), _> = string("abc")
            .and_then(|_| Err(Error))
            .parse("abc");
        assert!(result.is_err());
        //Test that ParseError can be coerced to a StdError
        let _ = result.map_err(|err| { let err: Box<StdError> = Box::new(err); err });
    }
}