#![allow(dead_code)]
use crate as gazelle;
use crate::grammar;
use crate::lexer::Scanner;
include!("meta_generated.rs");
#[doc(hidden)]
pub struct AstBuilder;
impl Types for AstBuilder {
type Error = crate::ParseError;
type Ident = String;
type Num = String;
type GrammarDef = grammar::Grammar;
type ExpectDecl = ExpectDecl<Self>;
type TerminalItem = grammar::TerminalDef;
type TypeAnnot = crate::Ignore;
type Rule = grammar::Rule;
type Alt = grammar::Alt;
type Variant = String;
type Term = grammar::Term;
}
impl gazelle::Action<Variant<Self>> for AstBuilder {
fn build(&mut self, node: Variant<Self>) -> Result<String, crate::ParseError> {
let Variant::Variant(name) = node;
Ok(name)
}
}
impl gazelle::Action<GrammarDef<Self>> for AstBuilder {
fn build(&mut self, node: GrammarDef<Self>) -> Result<grammar::Grammar, crate::ParseError> {
let GrammarDef::GrammarDef(start, expects, terminals, rules) = node;
let mut expect_rr = 0;
let mut expect_sr = 0;
for e in expects {
let ExpectDecl::ExpectDecl(count, kind) = e;
let count: usize = count.parse().unwrap_or(0);
match kind.as_str() {
"rr" => expect_rr = count,
"sr" => expect_sr = count,
_ => {}
}
}
Ok(grammar::Grammar {
start,
expect_rr,
expect_sr,
terminals,
rules,
})
}
}
impl gazelle::Action<TerminalItem<Self>> for AstBuilder {
fn build(
&mut self,
node: TerminalItem<Self>,
) -> Result<grammar::TerminalDef, crate::ParseError> {
let TerminalItem::TerminalItem(is_prec, name, has_type) = node;
Ok(grammar::TerminalDef {
name,
has_type: has_type.is_some(),
is_prec: is_prec.is_some(),
})
}
}
impl gazelle::Action<Rule<Self>> for AstBuilder {
fn build(&mut self, node: Rule<Self>) -> Result<grammar::Rule, crate::ParseError> {
let Rule::Rule(name, alts) = node;
Ok(grammar::Rule { name, alts })
}
}
impl gazelle::Action<Alt<Self>> for AstBuilder {
fn build(&mut self, node: Alt<Self>) -> Result<grammar::Alt, crate::ParseError> {
let Alt::Alt(terms, name) = node;
Ok(grammar::Alt { terms, name })
}
}
impl gazelle::Action<Term<Self>> for AstBuilder {
fn build(&mut self, node: Term<Self>) -> Result<grammar::Term, crate::ParseError> {
Ok(match node {
Term::SymSep(name, sep) => grammar::Term::SeparatedBy { symbol: name, sep },
Term::SymOpt(name) => grammar::Term::Optional(name),
Term::SymStar(name) => grammar::Term::ZeroOrMore(name),
Term::SymPlus(name) => grammar::Term::OneOrMore(name),
Term::SymPlain(name) => grammar::Term::Symbol(name),
Term::SymEmpty => grammar::Term::Empty,
})
}
}
fn lex_grammar(input: &str) -> Result<Vec<Terminal<AstBuilder>>, String> {
let mut src = Scanner::new(input);
let mut tokens = Vec::new();
loop {
src.skip_whitespace();
while src.skip_line_comment("//") || src.skip_block_comment("/*", "*/") {
src.skip_whitespace();
}
if src.at_end() {
break;
}
if let Some(span) = src.read_ident() {
let s = &input[span];
let tok = match s {
"start" => Terminal::KwStart,
"terminals" => Terminal::KwTerminals,
"prec" => Terminal::KwPrec,
"expect" => Terminal::KwExpect,
"_" => Terminal::Underscore,
_ => Terminal::Ident(s.to_string()),
};
tokens.push(tok);
continue;
}
if let Some(span) = src.read_digits() {
let s = &input[span];
tokens.push(Terminal::Num(s.to_string()));
continue;
}
if let Some(c) = src.peek() {
let tok = match c {
'=' => {
src.advance();
if src.peek() == Some('>') {
src.advance();
Terminal::FatArrow
} else {
Terminal::Eq
}
}
'|' => {
src.advance();
Terminal::Pipe
}
':' => {
src.advance();
Terminal::Colon
}
'?' => {
src.advance();
Terminal::Question
}
'*' => {
src.advance();
Terminal::Star
}
'+' => {
src.advance();
Terminal::Plus
}
'%' => {
src.advance();
Terminal::Percent
}
';' => {
src.advance();
Terminal::Semi
}
'{' => {
src.advance();
Terminal::Lbrace
}
'}' => {
src.advance();
Terminal::Rbrace
}
',' => {
src.advance();
Terminal::Comma
}
'(' => {
src.advance();
Terminal::Lparen
}
')' => {
src.advance();
Terminal::Rparen
}
_ => {
let (line, col) = src.line_col(src.offset());
return Err(format!("{}:{}: unexpected character: {:?}", line, col, c));
}
};
tokens.push(tok);
continue;
}
}
Ok(tokens)
}
pub fn parse_tokens_typed<I>(tokens: I) -> Result<grammar::Grammar, String>
where
I: IntoIterator<Item = Terminal<AstBuilder>>,
{
let mut parser = Parser::<AstBuilder>::new();
let mut actions = AstBuilder;
for tok in tokens {
if let Err(e) = parser.push(tok, &mut actions) {
return Err(parser.format_error(&e, None, None));
}
}
parser
.finish(&mut actions)
.map_err(|(p, e)| p.format_error(&e, None, None))
}
pub fn parse_grammar(input: &str) -> Result<grammar::Grammar, String> {
let tokens = lex_grammar(input)?;
if tokens.is_empty() {
return Err("Empty grammar".to_string());
}
parse_tokens_typed(tokens)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::lr::to_grammar_internal;
#[test]
fn test_lex() {
let tokens = lex_grammar("start s; terminals { A } s: S = A;").unwrap();
assert!(matches!(&tokens[0], Terminal::<AstBuilder>::KwStart));
assert!(matches!(&tokens[1], Terminal::<AstBuilder>::Ident(s) if s == "s"));
}
#[test]
fn test_parse_simple() {
let grammar = parse_grammar(
r#"
start expr;
terminals { PLUS, NUM }
expr = expr PLUS term => add | term => term;
term = NUM => num;
"#,
)
.unwrap();
assert_eq!(grammar.start, "expr");
assert_eq!(grammar.terminals.len(), 2);
assert_eq!(grammar.rules.len(), 2);
}
#[test]
fn test_parse_expr_grammar() {
let grammar = parse_grammar(
r#"
start expr;
terminals { PLUS, STAR, NUM, LPAREN, RPAREN }
expr = expr PLUS term => add | term => term;
term = term STAR factor => mul | factor => factor;
factor = NUM => num | LPAREN expr RPAREN => paren;
"#,
)
.unwrap();
assert_eq!(grammar.rules.len(), 3);
assert_eq!(grammar.rules[0].alts.len(), 2);
assert_eq!(grammar.rules[1].alts.len(), 2);
assert_eq!(grammar.rules[2].alts.len(), 2);
}
#[test]
fn test_parse_error_message() {
let result = parse_grammar(
r#"
start foo;
terminals { A }
foo = A A A => triple;
"#,
);
assert!(result.is_ok());
}
#[test]
fn test_prec_terminal() {
let grammar = parse_grammar(
r#"
start expr;
terminals { prec OP, NUM }
expr = expr OP expr => binop | NUM => num;
"#,
)
.unwrap();
assert_eq!(grammar.terminals.len(), 2);
assert!(grammar.terminals[0].is_prec);
assert!(!grammar.terminals[1].is_prec);
}
#[test]
fn test_roundtrip() {
let grammar = parse_grammar(
r#"
start s;
terminals { a }
s = a => a;
"#,
)
.unwrap();
let internal = to_grammar_internal(&grammar).unwrap();
assert_eq!(internal.rules.len(), 2);
}
#[test]
fn test_terminals_with_types() {
let grammar = parse_grammar(
r#"
start expr;
terminals { NUM: _, IDENT: _, PLUS }
expr = NUM => num | IDENT => ident | expr PLUS expr => add;
"#,
)
.unwrap();
assert_eq!(grammar.terminals.len(), 3);
assert_eq!(grammar.terminals[0].name, "NUM");
assert!(grammar.terminals[0].has_type);
assert_eq!(grammar.terminals[1].name, "IDENT");
assert!(grammar.terminals[1].has_type);
assert_eq!(grammar.terminals[2].name, "PLUS");
assert!(!grammar.terminals[2].has_type);
}
#[test]
fn test_rule_without_action() {
let grammar = parse_grammar(
r#"
start expr;
terminals { NUM }
expr = NUM => num;
"#,
)
.unwrap();
assert_eq!(grammar.rules[0].alts[0].name, "num");
}
#[test]
fn test_named_reductions() {
let grammar = parse_grammar(
r#"
start expr;
terminals { PLUS, NUM }
expr = expr PLUS expr => binop | NUM => literal;
"#,
)
.unwrap();
assert_eq!(grammar.rules[0].alts[0].name, "binop");
assert_eq!(grammar.rules[0].alts[1].name, "literal");
}
#[test]
fn test_modifier_parsing() {
let grammar = parse_grammar(
r#"
start s;
terminals { A, B, C }
s = A? B* C+ => s;
"#,
)
.unwrap();
assert_eq!(grammar.rules[0].alts[0].terms.len(), 3);
assert_eq!(
grammar.rules[0].alts[0].terms[0],
grammar::Term::Optional("A".to_string())
);
assert_eq!(
grammar.rules[0].alts[0].terms[1],
grammar::Term::ZeroOrMore("B".to_string())
);
assert_eq!(
grammar.rules[0].alts[0].terms[2],
grammar::Term::OneOrMore("C".to_string())
);
}
#[test]
fn test_named_empty_production() {
let grammar = parse_grammar(
r#"
start s;
terminals { A }
s = A => a | _ => empty;
"#,
)
.unwrap();
assert_eq!(grammar.rules[0].alts.len(), 2);
assert_eq!(grammar.rules[0].alts[1].terms.len(), 1);
assert_eq!(grammar.rules[0].alts[1].terms[0], grammar::Term::Empty);
assert_eq!(grammar.rules[0].alts[1].name, "empty");
}
#[test]
fn test_modifier_desugaring() {
use crate::lr::AltAction;
let grammar = parse_grammar(
r#"
start s;
terminals { A: _ }
s = A? => s;
"#,
)
.unwrap();
let internal = to_grammar_internal(&grammar).unwrap();
let opt_id = internal.symbols.get_id("__a_opt").unwrap();
assert_eq!(internal.types[&opt_id], Some("Option<A>".to_string()));
let opt_sym = internal.symbols.get("__a_opt").unwrap();
let opt_rules: Vec<_> = internal.rules.iter().filter(|r| r.lhs == opt_sym).collect();
assert_eq!(opt_rules.len(), 2);
assert_eq!(opt_rules[0].action, AltAction::OptSome);
assert_eq!(opt_rules[1].action, AltAction::OptNone);
let s_sym = internal.symbols.get("s").unwrap();
let s_rules: Vec<_> = internal.rules.iter().filter(|r| r.lhs == s_sym).collect();
assert_eq!(s_rules.len(), 1);
assert_eq!(s_rules[0].rhs, vec![opt_sym]);
}
#[test]
fn test_expect_declarations() {
let grammar = parse_grammar(
r#"
start s;
expect 2 sr;
expect 1 rr;
terminals { A }
s = A => a;
"#,
)
.unwrap();
assert_eq!(grammar.expect_sr, 2);
assert_eq!(grammar.expect_rr, 1);
}
#[test]
fn test_no_trailing_comma() {
let grammar = parse_grammar(
r#"
start s;
terminals { A, B, C }
s = A => a;
"#,
)
.unwrap();
assert_eq!(grammar.terminals.len(), 3);
}
#[test]
fn test_unknown_symbol_error() {
let grammar = parse_grammar(
r#"
start s;
terminals { A }
s = A B => s;
"#,
)
.unwrap();
let result = to_grammar_internal(&grammar);
assert!(result.is_err());
assert!(result.unwrap_err().contains("Unknown symbol: B"));
}
#[test]
fn test_untyped_modifier_star() {
let grammar = parse_grammar(
r#"
start s;
terminals { A }
s = A* => s;
"#,
)
.unwrap();
let internal = to_grammar_internal(&grammar).unwrap();
let star_id = internal.symbols.get_id("__a_star").unwrap();
assert_eq!(internal.types[&star_id], Some("Vec<()>".to_string()));
}
#[test]
fn test_untyped_nonterminal_modifier_optional() {
let grammar = parse_grammar(
r#"
start s;
terminals { A }
s = foo? => s;
foo = A => a;
"#,
)
.unwrap();
let internal = to_grammar_internal(&grammar).unwrap();
let opt_id = internal.symbols.get_id("__foo_opt").unwrap();
assert_eq!(internal.types[&opt_id], Some("Option<Foo>".to_string()));
}
#[test]
fn test_untyped_nonterminal_modifier_star() {
let grammar = parse_grammar(
r#"
start s;
terminals { A }
s = foo* => s;
foo = A => a;
"#,
)
.unwrap();
let internal = to_grammar_internal(&grammar).unwrap();
let star_id = internal.symbols.get_id("__foo_star").unwrap();
assert_eq!(internal.types[&star_id], Some("Vec<Foo>".to_string()));
}
#[test]
fn test_separator_modifier_parsing() {
let grammar = parse_grammar(
r#"
start s;
terminals { A, COMMA }
s = (A % COMMA) => s;
"#,
)
.unwrap();
assert_eq!(grammar.rules[0].alts[0].terms.len(), 1);
assert_eq!(
grammar.rules[0].alts[0].terms[0],
grammar::Term::SeparatedBy {
symbol: "A".to_string(),
sep: "COMMA".to_string()
}
);
}
#[test]
fn test_separator_modifier_desugaring() {
use crate::lr::AltAction;
let grammar = parse_grammar(
r#"
start s;
terminals { A: _, COMMA }
s = (A % COMMA) => s;
"#,
)
.unwrap();
let internal = to_grammar_internal(&grammar).unwrap();
let sep_id = internal.symbols.get_id("__a_sep_comma").unwrap();
assert_eq!(internal.types[&sep_id], Some("Vec<A>".to_string()));
let sep_sym = internal.symbols.get("__a_sep_comma").unwrap();
let sep_rules: Vec<_> = internal.rules.iter().filter(|r| r.lhs == sep_sym).collect();
assert_eq!(sep_rules.len(), 2);
let a_sym = internal.symbols.get("A").unwrap();
let comma_sym = internal.symbols.get("COMMA").unwrap();
assert_eq!(sep_rules[0].rhs, vec![sep_sym, comma_sym, a_sym]);
assert_eq!(sep_rules[0].action, AltAction::VecAppend);
assert_eq!(sep_rules[1].rhs, vec![a_sym]);
assert_eq!(sep_rules[1].action, AltAction::VecSingle);
let s_sym = internal.symbols.get("s").unwrap();
let s_rules: Vec<_> = internal.rules.iter().filter(|r| r.lhs == s_sym).collect();
assert_eq!(s_rules.len(), 1);
assert_eq!(s_rules[0].rhs, vec![sep_sym]);
}
#[test]
fn test_separator_end_to_end() {
let grammar = parse_grammar(
r#"
start items;
terminals { ITEM, COMMA }
items = (ITEM % COMMA) => items;
"#,
)
.unwrap();
let internal = to_grammar_internal(&grammar).unwrap();
use crate::table::CompiledTable;
let compiled = CompiledTable::build_from_internal(&internal);
assert!(!compiled.has_conflicts());
let item_id = compiled.symbol_id("ITEM").unwrap();
let comma_id = compiled.symbol_id("COMMA").unwrap();
{
use crate::runtime::{Parser, Token};
let mut parser = Parser::new(compiled.table());
let token = Token::new(item_id);
assert!(parser.maybe_reduce(Some(token)).unwrap().is_none());
parser.shift(token);
while let Some((rule, _, _)) = parser.maybe_reduce(None).unwrap() {
if rule == 0 {
break;
}
}
}
{
use crate::runtime::{Parser, Token};
let mut parser = Parser::new(compiled.table());
let tokens = vec![
Token::new(item_id),
Token::new(comma_id),
Token::new(item_id),
];
for tok in tokens {
while let Some((rule, _, _)) = parser.maybe_reduce(Some(tok)).unwrap() {
if rule == 0 {
break;
}
}
parser.shift(tok);
}
while let Some((rule, _, _)) = parser.maybe_reduce(None).unwrap() {
if rule == 0 {
break;
}
}
}
{
use crate::runtime::{Parser, Token};
let mut parser = Parser::new(compiled.table());
let tokens = vec![
Token::new(item_id),
Token::new(comma_id),
Token::new(item_id),
Token::new(comma_id),
Token::new(item_id),
];
for tok in tokens {
while let Some((rule, _, _)) = parser.maybe_reduce(Some(tok)).unwrap() {
if rule == 0 {
break;
}
}
parser.shift(tok);
}
while let Some((rule, _, _)) = parser.maybe_reduce(None).unwrap() {
if rule == 0 {
break;
}
}
}
}
}