pest 2.0.2

The Elegant Parser
Documentation
// pest. The Elegant Parser
// Copyright (c) 2018 DragoČ™ Tiselice
//
// Licensed under the Apache License, Version 2.0
// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. All files in the project carrying such notice may not be copied,
// modified, or distributed except according to those terms.

#[macro_use]
extern crate pest;

use std::collections::HashMap;

use pest::{state, ParseResult, Parser, ParserState, Span};
use pest::error::Error;
use pest::iterators::{Pair, Pairs};

#[allow(dead_code, non_camel_case_types)]
#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
enum Rule {
    json,
    object,
    pair,
    array,
    value,
    string,
    escape,
    unicode,
    hex,
    number,
    int,
    exp,
    bool,
    null
}

struct JsonParser;

impl Parser<Rule> for JsonParser {
    fn parse(rule: Rule, input: &str) -> Result<Pairs<Rule>, Error<Rule>> {
        fn json(state: Box<ParserState<Rule>>) -> ParseResult<Box<ParserState<Rule>>> {
            value(state)
        }

        fn object(state: Box<ParserState<Rule>>) -> ParseResult<Box<ParserState<Rule>>> {
            state.rule(Rule::object, |s| {
                s.sequence(|s| {
                    s.match_string("{")
                        .and_then(|s| skip(s))
                        .and_then(|s| pair(s))
                        .and_then(|s| skip(s))
                        .and_then(|s| {
                            s.repeat(|s| {
                                s.sequence(|s| {
                                    s.match_string(",")
                                        .and_then(|s| skip(s))
                                        .and_then(|s| pair(s))
                                        .and_then(|s| skip(s))
                                })
                            })
                        })
                        .and_then(|s| s.match_string("}"))
                }).or_else(|s| {
                    s.sequence(|s| {
                        s.match_string("{")
                            .and_then(|s| skip(s))
                            .and_then(|s| s.match_string("}"))
                    })
                })
            })
        }

        fn pair(state: Box<ParserState<Rule>>) -> ParseResult<Box<ParserState<Rule>>> {
            state.rule(Rule::pair, |s| {
                s.sequence(|s| {
                    string(s)
                        .and_then(|s| skip(s))
                        .and_then(|s| s.match_string(":"))
                        .and_then(|s| skip(s))
                        .and_then(|s| value(s))
                })
            })
        }

        fn array(state: Box<ParserState<Rule>>) -> ParseResult<Box<ParserState<Rule>>> {
            state.rule(Rule::array, |s| {
                s.sequence(|s| {
                    s.match_string("[")
                        .and_then(|s| skip(s))
                        .and_then(|s| value(s))
                        .and_then(|s| skip(s))
                        .and_then(|s| {
                            s.repeat(|s| {
                                s.sequence(|s| {
                                    s.match_string(",")
                                        .and_then(|s| skip(s))
                                        .and_then(|s| value(s))
                                        .and_then(|s| skip(s))
                                })
                            })
                        })
                        .and_then(|s| s.match_string("]"))
                }).or_else(|s| {
                    s.sequence(|s| {
                        s.match_string("[")
                            .and_then(|s| skip(s))
                            .and_then(|s| s.match_string("]"))
                    })
                })
            })
        }

        fn value(state: Box<ParserState<Rule>>) -> ParseResult<Box<ParserState<Rule>>> {
            state.rule(Rule::value, |s| {
                string(s)
                    .or_else(|s| number(s))
                    .or_else(|s| object(s))
                    .or_else(|s| array(s))
                    .or_else(|s| bool(s))
                    .or_else(|s| null(s))
            })
        }

        fn string(state: Box<ParserState<Rule>>) -> ParseResult<Box<ParserState<Rule>>> {
            state.rule(Rule::string, |s| {
                s.match_string("\"")
                    .and_then(|s| {
                        s.repeat(|s| {
                            escape(s).or_else(|s| {
                                s.sequence(|s| {
                                    s.lookahead(false, |s| {
                                        s.match_string("\"").or_else(|s| s.match_string("\\"))
                                    }).and_then(|s| s.skip(1))
                                })
                            })
                        })
                    })
                    .and_then(|pos| pos.match_string("\""))
            })
        }

        fn escape(state: Box<ParserState<Rule>>) -> ParseResult<Box<ParserState<Rule>>> {
            state.sequence(|s| {
                s.match_string("\\").and_then(|s| {
                    s.match_string("\"")
                        .or_else(|s| s.match_string("\\"))
                        .or_else(|s| s.match_string("/"))
                        .or_else(|s| s.match_string("b"))
                        .or_else(|s| s.match_string("f"))
                        .or_else(|s| s.match_string("n"))
                        .or_else(|s| s.match_string("r"))
                        .or_else(|s| s.match_string("t"))
                        .or_else(|s| unicode(s))
                })
            })
        }

        fn unicode(state: Box<ParserState<Rule>>) -> ParseResult<Box<ParserState<Rule>>> {
            state.sequence(|s| {
                s.match_string("u")
                    .and_then(|s| hex(s))
                    .and_then(|s| hex(s))
                    .and_then(|s| hex(s))
            })
        }

        fn hex(state: Box<ParserState<Rule>>) -> ParseResult<Box<ParserState<Rule>>> {
            state
                .match_range('0'..'9')
                .or_else(|s| s.match_range('a'..'f'))
                .or_else(|s| s.match_range('A'..'F'))
        }

        fn number(state: Box<ParserState<Rule>>) -> ParseResult<Box<ParserState<Rule>>> {
            state.rule(Rule::number, |s| {
                s.sequence(|s| {
                    s.optional(|s| s.match_string("-"))
                        .and_then(|s| int(s))
                        .and_then(|s| {
                            s.optional(|s| {
                                s.sequence(|s| {
                                    s.match_string(".")
                                        .and_then(|s| s.match_range('0'..'9'))
                                        .and_then(|s| s.repeat(|s| s.match_range('0'..'9')))
                                        .and_then(|s| s.optional(|s| exp(s)))
                                        .or_else(|s| exp(s))
                                })
                            })
                        })
                })
            })
        }

        fn int(state: Box<ParserState<Rule>>) -> ParseResult<Box<ParserState<Rule>>> {
            state.match_string("0").or_else(|s| {
                s.sequence(|s| {
                    s.match_range('1'..'9')
                        .and_then(|s| s.repeat(|s| s.match_range('0'..'9')))
                })
            })
        }

        fn exp(state: Box<ParserState<Rule>>) -> ParseResult<Box<ParserState<Rule>>> {
            state.sequence(|s| {
                s.match_string("E")
                    .or_else(|s| s.match_string("e"))
                    .and_then(|s| {
                        s.optional(|s| s.match_string("+").or_else(|s| s.match_string("-")))
                    })
                    .and_then(|s| int(s))
            })
        }

        fn bool(state: Box<ParserState<Rule>>) -> ParseResult<Box<ParserState<Rule>>> {
            state.rule(Rule::bool, |s| {
                s.match_string("true").or_else(|s| s.match_string("false"))
            })
        }

        fn null(state: Box<ParserState<Rule>>) -> ParseResult<Box<ParserState<Rule>>> {
            state.rule(Rule::null, |s| s.match_string("null"))
        }

        fn skip(state: Box<ParserState<Rule>>) -> ParseResult<Box<ParserState<Rule>>> {
            state.repeat(|s| {
                s.match_string(" ")
                    .or_else(|s| s.match_string("\t"))
                    .or_else(|s| s.match_string("\r"))
                    .or_else(|s| s.match_string("\n"))
            })
        }

        state(input, |state| match rule {
            Rule::json => json(state),
            Rule::object => object(state),
            Rule::pair => pair(state),
            Rule::array => array(state),
            Rule::value => value(state),
            Rule::string => string(state),
            Rule::escape => escape(state),
            Rule::unicode => unicode(state),
            Rule::hex => hex(state),
            Rule::number => number(state),
            Rule::int => int(state),
            Rule::exp => exp(state),
            Rule::bool => bool(state),
            Rule::null => null(state)
        })
    }
}

#[derive(Debug, PartialEq)]
enum Json<'i> {
    Null,
    Bool(bool),
    Number(f64),
    String(Span<'i>),
    Array(Vec<Json<'i>>),
    Object(HashMap<Span<'i>, Json<'i>>)
}

fn consume(pair: Pair<Rule>) -> Json {
    fn value(pair: Pair<Rule>) -> Json {
        let pair = pair.into_inner().next().unwrap();

        match pair.as_rule() {
            Rule::null => Json::Null,
            Rule::bool => match pair.as_str() {
                "false" => Json::Bool(false),
                "true" => Json::Bool(true),
                _ => unreachable!()
            },
            Rule::number => Json::Number(pair.as_str().parse().unwrap()),
            Rule::string => Json::String(pair.as_span()),
            Rule::array => Json::Array(pair.into_inner().map(value).collect()),
            Rule::object => {
                let pairs = pair.into_inner().map(|pos| {
                    let mut pair = pos.into_inner();

                    let key = pair.next().unwrap().as_span();
                    let value = value(pair.next().unwrap());

                    (key, value)
                });

                Json::Object(pairs.collect())
            }
            _ => unreachable!()
        }
    }

    value(pair)
}

#[test]
fn null() {
    parses_to! {
        parser: JsonParser,
        input: "null",
        rule: Rule::null,
        tokens: [
            null(0, 4)
        ]
    };
}

#[test]
fn bool() {
    parses_to! {
        parser: JsonParser,
        input: "false",
        rule: Rule::bool,
        tokens: [
            bool(0, 5)
        ]
    };
}

#[test]
fn number_zero() {
    parses_to! {
        parser: JsonParser,
        input: "0",
        rule: Rule::number,
        tokens: [
            number(0, 1)
        ]
    };
}

#[test]
fn float() {
    parses_to! {
        parser: JsonParser,
        input: "100.001",
        rule: Rule::number,
        tokens: [
            number(0, 7)
        ]
    };
}

#[test]
fn float_with_exp() {
    parses_to! {
        parser: JsonParser,
        input: "100.001E+100",
        rule: Rule::number,
        tokens: [
            number(0, 12)
        ]
    };
}

#[test]
fn number_minus_zero() {
    parses_to! {
        parser: JsonParser,
        input: "-0",
        rule: Rule::number,
        tokens: [
            number(0, 2)
        ]
    };
}

#[test]
fn string_with_escapes() {
    parses_to! {
        parser: JsonParser,
        input: "\"asd\\u0000\\\"\"",
        rule: Rule::string,
        tokens: [
            string(0, 13)
        ]
    };
}

#[test]
fn array_empty() {
    parses_to! {
        parser: JsonParser,
        input: "[ ]",
        rule: Rule::array,
        tokens: [
            array(0, 3)
        ]
    };
}

#[test]
fn array() {
    parses_to! {
        parser: JsonParser,
        input: "[0.0e1, false, null, \"a\", [0]]",
        rule: Rule::array,
        tokens: [
            array(0, 30, [
                value(1,  6, [number(1, 6)]),
                value(8, 13, [bool(8, 13)]),
                value(15, 19, [null(15, 19)]),
                value(21, 24, [string(21, 24)]),
                value(26, 29, [
                    array(26, 29, [
                        value(27, 28, [number(27, 28)])
                    ])
                ])
            ])
        ]
    };
}

#[test]
fn object() {
    parses_to! {
        parser: JsonParser,
        input: "{\"a\" : 3, \"b\" : [{}, 3]}",
        rule: Rule::object,
        tokens: [
            object(0, 24, [
                pair(1, 8, [
                    string(1, 4),
                    value(7, 8, [number(7, 8)])
                ]),
                pair(10, 23, [
                    string(10, 13),
                    value(16, 23, [
                        array(16, 23, [
                            value(17, 19, [object(17, 19)]),
                            value(21, 22, [number(21, 22)])
                        ])
                    ])
                ])
            ])
        ]
    };
}

#[test]
fn ast() {
    let input = "{\"a\": [null, true, 3.4]}";

    let ast = consume(
        JsonParser::parse(Rule::json, input)
            .unwrap()
            .next()
            .unwrap()
    );

    match ast {
        Json::Object(pairs) => {
            let vals: Vec<&Json> = pairs.values().collect();

            assert_eq!(
                **vals.get(0).unwrap(),
                Json::Array(vec![Json::Null, Json::Bool(true), Json::Number(3.4)])
            );
        }
        _ => {}
    }
}