ungrammar 1.3.0

A DSL for describing concrete syntax trees
Documentation
//! Simple hand-written ungrammar parser.
use std::collections::HashMap;

use crate::{
    error::{bail, format_err, Result},
    lexer::{self, TokenKind},
    Grammar, Node, NodeData, Rule, Token, TokenData,
};

macro_rules! bail {
    ($loc:expr, $($tt:tt)*) => {{
        let err = $crate::error::format_err!($($tt)*)
            .with_location($loc);
        return Err(err);
    }};
}

pub(crate) fn parse(tokens: Vec<lexer::Token>) -> Result<Grammar> {
    let mut p = Parser::new(tokens);
    while !p.is_eof() {
        node(&mut p)?;
    }
    p.finish()
}

#[derive(Default)]
struct Parser {
    grammar: Grammar,
    tokens: Vec<lexer::Token>,
    node_table: HashMap<String, Node>,
    token_table: HashMap<String, Token>,
}

const DUMMY_RULE: Rule = Rule::Node(Node(!0));

impl Parser {
    fn new(mut tokens: Vec<lexer::Token>) -> Parser {
        tokens.reverse();
        Parser {
            tokens,
            ..Parser::default()
        }
    }

    fn peek(&self) -> Option<&lexer::Token> {
        self.peek_n(0)
    }
    fn peek_n(&self, n: usize) -> Option<&lexer::Token> {
        self.tokens.iter().nth_back(n)
    }
    fn bump(&mut self) -> Result<lexer::Token> {
        self.tokens
            .pop()
            .ok_or_else(|| format_err!("unexpected EOF"))
    }
    fn expect(&mut self, kind: TokenKind, what: &str) -> Result<()> {
        let token = self.bump()?;
        if token.kind != kind {
            bail!(token.loc, "unexpected token, expected `{}`", what);
        }
        Ok(())
    }
    fn is_eof(&self) -> bool {
        self.tokens.is_empty()
    }
    fn finish(self) -> Result<Grammar> {
        for node_data in &self.grammar.nodes {
            if matches!(node_data.rule, DUMMY_RULE) {
                crate::error::bail!("Undefined node: {}", node_data.name)
            }
        }
        Ok(self.grammar)
    }
    fn intern_node(&mut self, name: String) -> Node {
        let len = self.node_table.len();
        let grammar = &mut self.grammar;
        *self.node_table.entry(name.clone()).or_insert_with(|| {
            grammar.nodes.push(NodeData {
                name,
                rule: DUMMY_RULE,
            });
            Node(len)
        })
    }
    fn intern_token(&mut self, name: String) -> Token {
        let len = self.token_table.len();
        let grammar = &mut self.grammar;
        *self.token_table.entry(name.clone()).or_insert_with(|| {
            grammar.tokens.push(TokenData { name });
            Token(len)
        })
    }
}

fn node(p: &mut Parser) -> Result<()> {
    let token = p.bump()?;
    let node = match token.kind {
        TokenKind::Node(it) => p.intern_node(it),
        _ => bail!(token.loc, "expected ident"),
    };
    p.expect(TokenKind::Eq, "=")?;
    if !matches!(p.grammar[node].rule, DUMMY_RULE) {
        bail!(token.loc, "duplicate rule: `{}`", p.grammar[node].name)
    }

    let rule = rule(p)?;
    p.grammar.nodes[node.0].rule = rule;
    Ok(())
}

fn rule(p: &mut Parser) -> Result<Rule> {
    let lhs = seq_rule(p)?;
    let mut alt = vec![lhs];
    while let Some(token) = p.peek() {
        if token.kind != TokenKind::Pipe {
            break;
        }
        p.bump()?;
        let rule = seq_rule(p)?;
        alt.push(rule)
    }
    let res = if alt.len() == 1 {
        alt.pop().unwrap()
    } else {
        Rule::Alt(alt)
    };
    Ok(res)
}

fn seq_rule(p: &mut Parser) -> Result<Rule> {
    let lhs = atom_rule(p)?;

    let mut seq = vec![lhs];
    while let Some(rule) = opt_atom_rule(p)? {
        seq.push(rule)
    }
    let res = if seq.len() == 1 {
        seq.pop().unwrap()
    } else {
        Rule::Seq(seq)
    };
    Ok(res)
}

fn atom_rule(p: &mut Parser) -> Result<Rule> {
    match opt_atom_rule(p)? {
        Some(it) => Ok(it),
        None => {
            let token = p.bump()?;
            bail!(token.loc, "unexpected token")
        }
    }
}

fn opt_atom_rule(p: &mut Parser) -> Result<Option<Rule>> {
    let token = match p.peek() {
        Some(it) => it,
        None => return Ok(None),
    };
    let mut res = match &token.kind {
        TokenKind::Node(name) => {
            if let Some(lookahead) = p.peek_n(1) {
                match lookahead.kind {
                    TokenKind::Eq => return Ok(None),
                    TokenKind::Colon => {
                        let label = name.clone();
                        p.bump()?;
                        p.bump()?;
                        let rule = atom_rule(p)?;
                        let res = Rule::Labeled {
                            label,
                            rule: Box::new(rule),
                        };
                        return Ok(Some(res));
                    }
                    _ => (),
                }
            }
            match p.peek_n(1) {
                Some(token) if token.kind == TokenKind::Eq => return Ok(None),
                _ => (),
            }
            let name = name.clone();
            p.bump()?;
            let node = p.intern_node(name);
            Rule::Node(node)
        }
        TokenKind::Token(name) => {
            let name = name.clone();
            p.bump()?;
            let token = p.intern_token(name);
            Rule::Token(token)
        }
        TokenKind::LParen => {
            p.bump()?;
            let rule = rule(p)?;
            p.expect(TokenKind::RParen, ")")?;
            rule
        }
        _ => return Ok(None),
    };

    if let Some(token) = p.peek() {
        match &token.kind {
            TokenKind::QMark => {
                p.bump()?;
                res = Rule::Opt(Box::new(res));
            }
            TokenKind::Star => {
                p.bump()?;
                res = Rule::Rep(Box::new(res));
            }
            _ => (),
        }
    }
    Ok(Some(res))
}