genex 0.6.4

Text-expansion library
Documentation
use std::collections::HashMap;

use pest::Parser;
use pest_derive::Parser;

use crate::Error;
use crate::Grammar;
use crate::Node;
use crate::Result;
use crate::VarRef;

#[derive(Parser)]
#[grammar = "genx.pest"]
struct GenxParser;

// type PestError = pest::error::Error<Rule>;

fn parse_node(pair: pest::iterators::Pair<Rule>) -> Result<Node> {
    match pair.as_rule() {
        Rule::non_terminal => Ok(Node::NonTerminal(
            pair.into_inner().next().unwrap().as_str().to_string(),
        )),
        Rule::var_ref => {
            let mut inner = pair.into_inner();
            let var = inner.next().unwrap().as_str().to_string();
            let modifier = inner.next().map(|p| p.as_str().to_string());
            Ok(Node::VarRef(VarRef { var, modifier }))
        }
        Rule::text => Ok(Node::Text(pair.as_str().to_string())),
        Rule::optional => {
            // Optional nodes contain an implicit sequence.
            Ok(Node::Optional(Box::new(parse_sequence(pair)?)))
        }
        Rule::choice => {
            // Each choice_arm child is an implicit sequence.
            let arms = pair
                .into_inner()
                .map(parse_sequence)
                .collect::<Result<Vec<_>>>()?;
            Ok(Node::Choice(arms))
        }
        Rule::sequence => parse_sequence(pair),
        _ => Err(Error::InternalParserError(format!(
            "Unrecognized rule while parsing node: {:?} {:?}",
            pair.as_rule(),
            pair.as_span()
        ))),
    }
}

fn parse_sequence(pair: pest::iterators::Pair<Rule>) -> Result<Node> {
    Ok(Node::Sequence(
        pair.into_inner()
            .map(parse_node)
            .collect::<Result<Vec<_>>>()?,
    ))
}

pub(crate) fn parse_grammar(s: &str) -> Result<Grammar> {
    let mut grammar = Grammar::new();
    let mut weights = HashMap::new();
    match GenxParser::parse(Rule::grammar, s) {
        Ok(pairs) => {
            for pair in pairs {
                match pair.as_rule() {
                    Rule::EOI => break,
                    Rule::rules => {
                        for pair in pair.into_inner() {
                            let mut inner = pair.into_inner();
                            let lhs = inner.next().unwrap().as_str();
                            let rhs_nodes: Result<Vec<Node>> =
                                inner.next().unwrap().into_inner().map(parse_node).collect();
                            let rhs = Node::Sequence(rhs_nodes?);
                            grammar.add_rule(lhs, rhs);
                        }
                    }
                    Rule::weights => {
                        for pair in pair.into_inner() {
                            let mut inner = pair.into_inner();
                            let lhs = inner.next().unwrap().as_str();
                            let weight = inner.next().unwrap().as_str().parse::<f64>().unwrap();
                            weights.insert(lhs.to_string(), weight);
                        }
                    }
                    _ => {
                        return Err(Error::InternalParserError(format!(
                            "Unexpected rule while parsing grammar: {:?}",
                            pair.as_rule()
                        )))
                    }
                }
            }
        }
        Err(e) => return Err(Error::ParseError(e.to_string())),
    };
    grammar.default_weights = weights;
    Ok(grammar)
}

#[cfg(test)]
mod tests {
    use super::*;
    use maplit::hashmap;

    #[test]
    fn test_parse() {
        let s = r#"
          RULES:
          top = hi <name>?:[, my dear #gender#,] [in <location>]
          name = #name#
          location = [city of #city|capitalize#|#county# county]

          WEIGHTS:
          city = 2
          county = 1
        "#;
        let grammar = parse_grammar(s).unwrap();
        assert_eq!(
            grammar.get_rule("top").unwrap(),
            &Node::Sequence(vec![
                Node::Text("hi ".to_string()),
                Node::NonTerminal("name".to_string()),
                Node::Optional(Box::new(Node::Sequence(vec![
                    Node::Text(", my dear ".to_string()),
                    Node::VarRef(VarRef::with_variable("gender")),
                    Node::Text(",".to_string())
                ]))),
                Node::Text(" ".to_string()),
                Node::Sequence(vec![
                    Node::Text("in ".to_string()),
                    Node::NonTerminal("location".to_string()),
                ])
            ])
        );
        assert_eq!(
            grammar.get_rule("name").unwrap(),
            &Node::Sequence(vec![Node::VarRef(VarRef::with_variable("name"))])
        );
        assert_eq!(
            grammar.get_rule("location").unwrap(),
            &Node::Sequence(vec![Node::Choice(vec![
                Node::Sequence(vec![
                    Node::Text("city of ".to_string()),
                    Node::VarRef(VarRef::with_variable_and_modifier("city", "capitalize")),
                ]),
                Node::Sequence(vec![
                    Node::VarRef(VarRef::with_variable("county")),
                    Node::Text(" county".to_string()),
                ]),
            ])])
        );
        assert_eq!(
            grammar.default_weights,
            hashmap! {
                "city".to_string() => 2.0,
                "county".to_string() => 1.0,
            }
        );
    }

    #[test]
    fn test_advcir_grammar() {
        let s = r#"
          top = <id_and_type>[ <call_sign>] is circling over <location>[ <altitude>][ <speed>][ <squawk>]
          id_and_type = [#registration#|#registration#, #type|a#,|#militaryregistration#, a military aircraft,|#militaryregistration#, a military #type#,|Aircraft with unknown registration, ICAO #icao#|#type# with unknown registration, ICAO #icao#|Military aircraft with unknown registration, ICAO #militaryicao#]
          call_sign = call sign #call_sign#
          location = [#neighbourhood#, #locality#|#neighbourhood#, #county#|#locality#|#localadmin#|#name#]
          altitude = at #alt# feet
          speed = speed #speed# MPH
          squawk = squawking #squawk#
          landmark = #nearby_distance# miles from #nearby_landmark#
          adsbx_url = https://globe.adsbexchange.com/?icao=#icao#&zoom=13
        "#;
        let _grammar = parse_grammar(s).unwrap();
    }
}