perplex 0.4.1

A grammar analyzer and LR(k)/GLR parser generator.
Documentation
// Copyright (c) 2018 Fabian Schuiki

//! Parser code generation.

use std::io::{Result, Write};
use std::collections::HashMap;

use grammar::{self, Grammar, NonterminalId, RuleId, Symbol, TerminalId};
use machine::{Action, StateId, StateMachine};

/// A code generation backend.
///
/// Holds all information necessary to generate the code for a parser.
pub struct Backend {
    term_pats: HashMap<TerminalId, String>,
    nonterm_types: HashMap<NonterminalId, String>,
    reduction_funcs: HashMap<RuleId, String>,
}

impl Backend {
    /// Create a new backend.
    pub fn new() -> Backend {
        Backend {
            term_pats: HashMap::new(),
            nonterm_types: HashMap::new(),
            reduction_funcs: HashMap::new(),
        }
    }

    /// Add a match pattern for a terminal.
    pub fn add_terminal<S: Into<String>>(&mut self, terminal: TerminalId, pattern: S) {
        self.term_pats.insert(terminal, pattern.into());
    }

    /// Add a type for a nonterminal.
    pub fn add_nonterminal<S: Into<String>>(&mut self, nonterminal: NonterminalId, ty: S) {
        self.nonterm_types.insert(nonterminal, ty.into());
    }

    /// Add a reduction function for a rule.
    pub fn add_reduction_function<S: Into<String>>(&mut self, rule: RuleId, func_name: S) {
        self.reduction_funcs.insert(rule, func_name.into());
    }
}

/// Additional information about generated code.
///
/// This structure is used inside the code generation functions and is used to
/// keep track of function and type names that were used.
struct BackendCache<'be> {
    grammar: &'be Grammar,
    backend: &'be Backend,
}

impl<'be> BackendCache<'be> {
    fn state_fn_name(&mut self, id: StateId) -> String {
        format!("state_{}", id)
    }

    fn reduced_fn_name(&mut self, id: StateId) -> String {
        format!("reduced_{}", id)
    }

    fn terminal_pattern(&mut self, terminal: TerminalId) -> String {
        match self.backend.term_pats.get(&terminal) {
            Some(s) => s.clone(),
            None => panic!(
                "no pattern defined for terminal `{}`",
                terminal.pretty(self.grammar)
            ),
        }
    }

    fn nonterminal_type(&mut self, nonterminal: NonterminalId) -> String {
        match self.backend.nonterm_types.get(&nonterminal) {
            Some(s) => s.clone(),
            None => panic!(
                "no type defined for nonterminal `{}`",
                nonterminal.pretty(self.grammar)
            ),
        }
    }

    fn nonterminal_name(&mut self, nonterminal: NonterminalId) -> String {
        format!("Nt{}", nonterminal.as_usize())
    }

    fn nonterminal_unwrap(&mut self, nonterminal: NonterminalId) -> String {
        format!("unwrap_nt{}", nonterminal.as_usize())
    }

    fn reduction_function(&mut self, rule: RuleId) -> String {
        match self.backend.reduction_funcs.get(&rule) {
            Some(s) => s.clone(),
            None => format!("reduce_rule_{}", rule.as_usize()),
            // None => panic!("no reduction function defined for rule {:?}", rule),
        }
    }
}

/// Generate the code for the parser.
pub fn generate_parser<W: Write>(
    into: &mut W,
    backend: &Backend,
    machine: &StateMachine,
    grammar: &Grammar,
) -> Result<()> {
    let mut cache = BackendCache {
        grammar: grammar,
        backend: backend,
    };
    write!(into, "// Code automatically generated by perplex.\n")?;

    // Generate the enum of nonterminals.
    write!(
        into,
        "\n/// All nonterminals that may be pushed onto the stack.\nenum Nonterminal {{\n"
    )?;
    for ntid in 0..grammar.nonterminal_id_bound() {
        let ntid = NonterminalId::from_usize(ntid);
        write!(
            into,
            "    {}({}),\n",
            cache.nonterminal_name(ntid),
            cache.nonterminal_type(ntid)
        )?;
    }
    write!(into, "}}\n")?;
    write!(into, "\nimpl Nonterminal {{\n")?;
    for ntid in 0..grammar.nonterminal_id_bound() {
        let ntid = NonterminalId::from_usize(ntid);
        write!(into, "    #[inline(always)]\n")?;
        write!(
            into,
            "    fn {}(self) -> {} {{\n",
            cache.nonterminal_unwrap(ntid),
            cache.nonterminal_type(ntid)
        )?;
        write!(into, "        match self {{\n")?;
        write!(
            into,
            "            Nonterminal::{}(nt) => nt,\n",
            cache.nonterminal_name(ntid)
        )?;
        write!(
            into,
            "            _ => panic!(\"expected nonterminal `{}`\"),\n",
            cache.nonterminal_type(ntid)
        )?;
        write!(into, "        }}\n")?;
        write!(into, "    }}\n")?;
    }
    write!(into, "}}\n")?;
    write!(into, "\nimpl ::std::fmt::Debug for Nonterminal {{\n")?;
    write!(
        into,
        "    fn fmt(&self, f: &mut ::std::fmt::Formatter) -> ::std::fmt::Result {{\n"
    )?;
    write!(into, "        match *self {{\n")?;
    for ntid in 0..grammar.nonterminal_id_bound() {
        let ntid = NonterminalId::from_usize(ntid);
        write!(
            into,
            "            Nonterminal::{}(..) => write!(f, \"{}\"),\n",
            cache.nonterminal_name(ntid),
            grammar.nonterminal_name(ntid),
        )?;
    }
    write!(into, "        }}\n")?;
    write!(into, "    }}\n")?;
    write!(into, "}}\n")?;

    let parser_constraint = format!("P: Parser<Terminal = Terminal, Nonterminal = Nonterminal>");

    for state in machine.states() {
        let mut ta: Vec<_> = state
            .actions()
            .filter_map(|(symbol, action)| match symbol {
                Symbol::Terminal(tid) => Some((tid, action)),
                _ => None,
            })
            .collect();

        let nta: Vec<_> = state
            .actions()
            .filter_map(|(symbol, action)| match symbol {
                Symbol::Nonterminal(ntid) => Some((ntid, action)),
                _ => None,
            })
            .collect();

        write!(
            into,
            "\nfn {}<P>(p: &mut P) where {} {{\n",
            cache.state_fn_name(state.id()),
            parser_constraint,
        )?;
        write!(into, "    match *p.peek() {{\n")?;
        for (tid, action) in ta {
            match action {
                Action::Shift(target_state) => {
                    write!(
                        into,
                        "        {} => p.shift({}, {}),\n",
                        cache.terminal_pattern(tid),
                        cache.state_fn_name(target_state),
                        cache.reduced_fn_name(target_state)
                    )?;
                }
                Action::Reduce(rule_id) => {
                    write!(into, "        {} => ", cache.terminal_pattern(tid))?;
                    if rule_id == grammar::ACCEPT {
                        write!(into, "p.accept(),\n")?;
                    } else {
                        let rule = &grammar[rule_id];
                        let (name, num) = (rule.name(), rule.symbols().len());
                        write!(into, "p.reduce({}, |args|{{\n", num)?;
                        write!(into, "            let mut args = args.into_iter();\n")?;
                        for (i, &symbol) in rule.symbols().iter().enumerate() {
                            write!(into, "            let arg{} = args.next().unwrap()", i)?;
                            match symbol {
                                Symbol::Terminal(..) => write!(into, ".unwrap_terminal();\n")?,
                                Symbol::Nonterminal(ntid) => write!(
                                    into,
                                    ".unwrap_nonterminal().unwrap_nt{}();\n",
                                    ntid.as_usize()
                                )?,
                            }
                        }
                        write!(
                            into,
                            "            let reduced: {} = {}(\n",
                            cache.nonterminal_type(name),
                            cache.reduction_function(rule_id),
                        )?;
                        for i in 0..num {
                            write!(into, "                arg{},\n", i)?;
                        }
                        write!(into, "            );\n")?;
                        write!(
                            into,
                            "            Nonterminal::{}(reduced)\n",
                            cache.nonterminal_name(name)
                        )?;
                        write!(into, "        }}),\n")?;
                    }
                }
            }
        }
        write!(
            into,
            "        _ => panic!(\"syntax error, unexpected {{:?}}\", p.peek()),\n"
        )?;
        write!(into, "    }};\n")?;
        write!(into, "}}\n")?;

        write!(
            into,
            "\nfn {}<P>(p: &mut P, nt: Nonterminal) where {} {{\n",
            cache.reduced_fn_name(state.id()),
            parser_constraint,
        )?;
        write!(into, "    match nt {{\n")?;
        for (ntid, action) in nta {
            let state_id = match action {
                Action::Shift(sid) => sid,
                _ => panic!("nonterminal must always trigger a shift action"),
            };
            write!(
                into,
                "        Nonterminal::{}(..) => p.goto(nt, {}, {}),\n",
                cache.nonterminal_name(ntid),
                cache.state_fn_name(state_id),
                cache.reduced_fn_name(state_id)
            )?;
        }
        write!(into, "        _ => unreachable!(),\n")?;
        write!(into, "    }};\n")?;
        write!(into, "}}\n")?;
    }

    Ok(())
}