pub(crate) mod actions;
mod arrays;
mod base;
mod functions;
use quote::format_ident;
use rustemo::{Parser, WARN};
use std::{
fs,
path::{Path, PathBuf},
};
use syn::{parse_quote, Ident};
use yansi::Paint;
use crate::{
error::{Error, Result},
index::{StateIndex, TermIndex},
lang::rustemo::RustemoParser,
settings::{BuilderType, GeneratorTableType, LexerType, Settings},
table::{Action, LRTable},
};
use crate::{grammar::builder::GrammarBuilder, ParserAlgo};
use crate::{
grammar::{
types::{to_pascal_case, to_snake_case, Choice, SymbolTypes},
Grammar, NonTerminal, Production,
},
index::NonTermIndex,
};
trait PartGenerator<'g, 's> {
fn delegate(&self) -> &dyn PartGenerator<'g, 's>;
fn header(&self, generator: &ParserGenerator<'g, 's>) -> Result<Vec<syn::Stmt>> {
self.delegate().header(generator)
}
fn parser_header(&self, generator: &ParserGenerator<'g, 's>) -> Result<Vec<syn::Stmt>> {
self.delegate().parser_header(generator)
}
fn symbols(&self, generator: &ParserGenerator<'g, 's>) -> Result<Vec<syn::Stmt>> {
self.delegate().symbols(generator)
}
fn types(&self, generator: &ParserGenerator<'g, 's>) -> Result<Vec<syn::Stmt>> {
self.delegate().types(generator)
}
fn parser(&self, generator: &ParserGenerator<'g, 's>) -> Result<Vec<syn::Stmt>> {
self.delegate().parser(generator)
}
fn lexer_definition(&self, generator: &ParserGenerator<'g, 's>) -> Result<Vec<syn::Stmt>> {
self.delegate().lexer_definition(generator)
}
fn parser_definition(&self, generator: &ParserGenerator<'g, 's>) -> Result<Vec<syn::Stmt>> {
self.delegate().parser_definition(generator)
}
fn builder(&self, generator: &ParserGenerator<'g, 's>) -> Result<Vec<syn::Stmt>> {
self.delegate().builder(generator)
}
}
pub fn generate_parser(
grammar_path: &Path,
out_dir: Option<&Path>,
out_dir_actions: Option<&Path>,
settings: &Settings,
) -> Result<()> {
if !grammar_path.exists() {
return Err(Error::Error("Grammar file doesn't exist.".to_string()));
}
let grammar_dir = PathBuf::from(grammar_path.parent().ok_or_else(|| {
Error::Error("Cannot deduce parent directory of the grammar file.".to_string())
})?);
let out_dir = out_dir.unwrap_or(&grammar_dir);
let out_dir_actions = out_dir_actions.unwrap_or(&grammar_dir);
let mut parser = RustemoParser::new();
let file = parser.parse_file(grammar_path)?;
let grammar: Grammar = GrammarBuilder::new().try_from_file(file, Some(grammar_path))?;
if let LexerType::Default = settings.lexer_type {
for term in &grammar.terminals {
if term.idx != TermIndex(0) && term.recognizer.is_none() {
return Err(Error::Error(format!(
"Recognizer not defined for terminal '{}'.",
term.name
)));
}
}
}
let table = LRTable::new(&grammar, settings)?;
if settings.dot {
let dot_file = grammar_path.with_extension("dot");
println!("Writting dot file: {:?}", &dot_file);
fs::write(dot_file, table.to_dot())?;
}
if let ParserAlgo::LR = settings.parser_algo {
let conflicts = table.get_conflicts();
if !conflicts.is_empty() {
println!("{}", "\nCONFLICTS:".paint(WARN));
table.print_conflicts_report(&conflicts);
return Err(Error::Error(
"Grammar is not deterministic. There are conflicts.".to_string(),
));
}
}
let generator = ParserGenerator::new(
grammar_path,
out_dir.to_owned(),
out_dir_actions.to_owned(),
&grammar,
table,
settings,
)?;
generator.generate(out_dir)?;
Ok(())
}
#[allow(dead_code)]
struct ParserGenerator<'g, 's> {
file_name: String,
root_symbol: Ident,
parser: Ident,
layout_parser: Ident,
parser_definition: Ident,
actions_file: Ident,
lexer_file: Ident,
builder_file: Ident,
grammar: &'g Grammar,
out_dir: PathBuf,
out_dir_actions: PathBuf,
table: LRTable<'g, 's>,
settings: &'s Settings,
input_type: syn::Type,
part_generator: Box<dyn PartGenerator<'g, 's>>,
types: Option<SymbolTypes>,
}
impl<'g, 's> ParserGenerator<'g, 's> {
fn new(
grammar_path: &Path,
out_dir: PathBuf,
out_dir_actions: PathBuf,
grammar: &'g Grammar,
table: LRTable<'g, 's>,
settings: &'s Settings,
) -> Result<Self> {
let file_name = grammar_path
.file_stem()
.ok_or_else(|| {
Error::Error(format!(
"Cannot deduce base file name from {grammar_path:?}"
))
})?
.to_str()
.ok_or_else(|| {
Error::Error(format!(
"Cannot deduce base file name from {grammar_path:?}"
))
})?;
let parser_name = to_pascal_case(file_name);
let root_symbol = format_ident!("{}", grammar.symbol_name(grammar.start_index));
let parser = format_ident!("{}Parser", parser_name);
let layout_parser = format_ident!("{}LayoutParser", parser_name);
let parser_definition = format_ident!("{}Definition", parser);
let actions_file = format_ident!("{}_actions", file_name);
let lexer_file = format_ident!("{}_lexer", file_name);
let builder_file = format_ident!("{}_builder", file_name);
let part_generator: Box<dyn PartGenerator> = match settings.generator_table_type {
GeneratorTableType::Arrays => Box::new(arrays::ArrayPartGenerator::new()),
GeneratorTableType::Functions => Box::new(functions::FunctionPartGenerator::new()),
};
let input_type = syn::parse_str(&settings.input_type)?;
let types = if let BuilderType::Default = settings.builder_type {
Some(SymbolTypes::new(grammar))
} else {
None
};
Ok(Self {
file_name: file_name.to_string(),
root_symbol,
parser,
layout_parser,
parser_definition,
actions_file,
lexer_file,
builder_file,
grammar,
out_dir,
out_dir_actions,
table,
settings,
input_type,
part_generator,
types,
})
}
fn generate(&self, out_dir: &Path) -> Result<()> {
let mut ast: Vec<syn::Stmt> = vec![];
ast.extend(self.part_generator.header(self)?);
ast.extend(self.part_generator.parser_header(self)?);
ast.extend(self.part_generator.types(self)?);
ast.extend(self.part_generator.symbols(self)?);
ast.extend(self.part_generator.parser_definition(self)?);
ast.extend(self.part_generator.parser(self)?);
ast.extend(self.part_generator.lexer_definition(self)?);
ast.extend(self.part_generator.builder(self)?);
std::fs::create_dir_all(out_dir).map_err(|e| {
Error::Error(format!(
"Cannot create directories for path '{out_dir:?}': {e:?}."
))
})?;
let mut file: syn::File = parse_quote!();
file.items.extend(ast.into_iter().map(|s| match s {
syn::Stmt::Item(i) => i,
_ => panic!("Invalid item."),
}));
let out_file = out_dir.join(&self.file_name).with_extension("rs");
println!("Writing parser file {out_file:?}");
std::fs::write(&out_file, prettyplease::unparse(&file)).map_err(|e| {
Error::Error(format!("Cannot write parser file '{out_file:?}': {e:?}."))
})?;
Ok(())
}
fn prod_kind(&self, prod: &Production) -> String {
format!(
"{}{}",
prod.nonterminal(self.grammar).name,
if let Some(ref kind) = prod.kind {
kind.clone()
} else {
format!("P{}", prod.ntidx + 1)
}
)
}
fn prod_kind_ident(&self, prod: &Production) -> syn::Ident {
format_ident!("{}", self.prod_kind(prod))
}
fn term_kind_ident(&self, term: TermIndex) -> syn::Ident {
format_ident!("{}", self.grammar.term_by_index(term).name)
}
fn nonterm_kind_ident(&self, nonterm: NonTermIndex) -> syn::Ident {
format_ident!("{}", self.grammar.nonterm_by_index(nonterm).name)
}
fn state_kind_ident(&self, state: StateIndex) -> syn::Ident {
format_ident!(
"{}S{}",
self.grammar.symbol_name(self.table.states[state].symbol),
state.0
)
}
fn action_to_syntax(&self, action: &Option<Action>) -> syn::Expr {
match action {
Some(action) => match action {
Action::Shift(state) => {
let state_kind_ident = self.state_kind_ident(*state);
parse_quote! { Shift(State::#state_kind_ident) }
}
Action::Reduce(prod, len) => {
let prod_kind = self.prod_kind_ident(&self.grammar.productions[*prod]);
parse_quote! { Reduce(PK::#prod_kind, #len) }
}
Action::Accept => parse_quote! { Accept },
},
None => parse_quote! { Error },
}
}
}
fn action_name(nonterminal: &NonTerminal, choice: &Choice) -> String {
to_snake_case(format!("{}_{}", nonterminal.name, &choice.name))
}