rustemo_compiler/generator/
mod.rs

1pub(crate) mod actions;
2mod arrays;
3mod base;
4mod functions;
5
6use colored::Colorize;
7use quote::format_ident;
8use rustemo::Parser;
9use std::{
10    fs,
11    path::{Path, PathBuf},
12};
13use syn::{parse_quote, Ident};
14
15use crate::{
16    error::{Error, Result},
17    index::{StateIndex, TermIndex},
18    lang::rustemo::RustemoParser,
19    settings::{BuilderType, GeneratorTableType, LexerType, Settings},
20    table::{Action, LRTable},
21};
22use crate::{grammar::builder::GrammarBuilder, ParserAlgo};
23use crate::{
24    grammar::{
25        types::{to_pascal_case, to_snake_case, Choice, SymbolTypes},
26        Grammar, NonTerminal, Production,
27    },
28    index::NonTermIndex,
29};
30
31/// Generator for parser implementation parts. Different types can implement
32/// different parser implementation strategies.
33trait PartGenerator<'g, 's> {
34    fn delegate(&self) -> &dyn PartGenerator<'g, 's>;
35    fn header(&self, generator: &ParserGenerator<'g, 's>) -> Result<Vec<syn::Stmt>> {
36        self.delegate().header(generator)
37    }
38    fn parser_header(&self, generator: &ParserGenerator<'g, 's>) -> Result<Vec<syn::Stmt>> {
39        self.delegate().parser_header(generator)
40    }
41    fn symbols(&self, generator: &ParserGenerator<'g, 's>) -> Result<Vec<syn::Stmt>> {
42        self.delegate().symbols(generator)
43    }
44    fn types(&self, generator: &ParserGenerator<'g, 's>) -> Result<Vec<syn::Stmt>> {
45        self.delegate().types(generator)
46    }
47    fn parser(&self, generator: &ParserGenerator<'g, 's>) -> Result<Vec<syn::Stmt>> {
48        self.delegate().parser(generator)
49    }
50    fn lexer_definition(&self, generator: &ParserGenerator<'g, 's>) -> Result<Vec<syn::Stmt>> {
51        self.delegate().lexer_definition(generator)
52    }
53    fn parser_definition(&self, generator: &ParserGenerator<'g, 's>) -> Result<Vec<syn::Stmt>> {
54        self.delegate().parser_definition(generator)
55    }
56    fn builder(&self, generator: &ParserGenerator<'g, 's>) -> Result<Vec<syn::Stmt>> {
57        self.delegate().builder(generator)
58    }
59}
60
61/// Main entry point for the parser generator.
62pub fn generate_parser(
63    grammar_path: &Path,
64    out_dir: Option<&Path>,
65    out_dir_actions: Option<&Path>,
66    settings: &Settings,
67) -> Result<()> {
68    if !grammar_path.exists() {
69        return Err(Error::Error("Grammar file doesn't exist.".to_string()));
70    }
71
72    let grammar_dir = PathBuf::from(grammar_path.parent().ok_or_else(|| {
73        Error::Error("Cannot deduce parent directory of the grammar file.".to_string())
74    })?);
75
76    let out_dir = out_dir.unwrap_or(&grammar_dir);
77    let out_dir_actions = out_dir_actions.unwrap_or(&grammar_dir);
78
79    let mut parser = RustemoParser::new();
80    let file = parser.parse_file(grammar_path)?;
81    let grammar: Grammar = GrammarBuilder::new().try_from_file(file, Some(grammar_path))?;
82
83    // Check recognizers definition. If default string lexer is used all
84    // recognizers must be defined. If custom lexer is used no recognizer should
85    // be defined.
86    if let LexerType::Default = settings.lexer_type {
87        for term in &grammar.terminals {
88            if term.idx != TermIndex(0) && term.recognizer.is_none() {
89                return Err(Error::Error(format!(
90                    "Recognizer not defined for terminal '{}'.",
91                    term.name
92                )));
93            }
94        }
95    }
96
97    let table = LRTable::new(&grammar, settings)?;
98    if settings.dot {
99        let dot_file = grammar_path.with_extension("dot");
100        println!("Writting dot file: {:?}", &dot_file);
101        fs::write(dot_file, table.to_dot())?;
102    }
103
104    if let ParserAlgo::LR = settings.parser_algo {
105        let conflicts = table.get_conflicts();
106        if !conflicts.is_empty() {
107            println!("{}", "\nCONFLICTS:".red());
108            table.print_conflicts_report(&conflicts);
109            return Err(Error::Error(
110                "Grammar is not deterministic. There are conflicts.".to_string(),
111            ));
112        }
113    }
114
115    let generator = ParserGenerator::new(
116        grammar_path,
117        out_dir.to_owned(),
118        out_dir_actions.to_owned(),
119        &grammar,
120        table,
121        settings,
122    )?;
123
124    generator.generate(out_dir)?;
125    Ok(())
126}
127
128/// A generator for the parser code.
129// Some fields are used in parse_quote macros, that's why dead code is allowed.
130#[allow(dead_code)]
131struct ParserGenerator<'g, 's> {
132    file_name: String,
133    root_symbol: Ident,
134    parser: Ident,
135    layout_parser: Ident,
136    parser_definition: Ident,
137    actions_file: Ident,
138    lexer_file: Ident,
139    builder_file: Ident,
140    grammar: &'g Grammar,
141    out_dir: PathBuf,
142    out_dir_actions: PathBuf,
143    table: LRTable<'g, 's>,
144    settings: &'s Settings,
145    input_type: syn::Type,
146    part_generator: Box<dyn PartGenerator<'g, 's>>,
147    types: Option<SymbolTypes>,
148}
149
150impl<'g, 's> ParserGenerator<'g, 's> {
151    fn new(
152        grammar_path: &Path,
153        out_dir: PathBuf,
154        out_dir_actions: PathBuf,
155        grammar: &'g Grammar,
156        table: LRTable<'g, 's>,
157        settings: &'s Settings,
158    ) -> Result<Self> {
159        let file_name = grammar_path
160            .file_stem()
161            .ok_or_else(|| {
162                Error::Error(format!(
163                    "Cannot deduce base file name from {grammar_path:?}"
164                ))
165            })?
166            .to_str()
167            .ok_or_else(|| {
168                Error::Error(format!(
169                    "Cannot deduce base file name from {grammar_path:?}"
170                ))
171            })?;
172        let parser_name = to_pascal_case(file_name);
173        let root_symbol = format_ident!("{}", grammar.symbol_name(grammar.start_index));
174        let parser = format_ident!("{}Parser", parser_name);
175        let layout_parser = format_ident!("{}LayoutParser", parser_name);
176        let parser_definition = format_ident!("{}Definition", parser);
177        let actions_file = format_ident!("{}_actions", file_name);
178        let lexer_file = format_ident!("{}_lexer", file_name);
179        let builder_file = format_ident!("{}_builder", file_name);
180
181        // Choose parser implementation strategy.
182        let part_generator: Box<dyn PartGenerator> = match settings.generator_table_type {
183            GeneratorTableType::Arrays => Box::new(arrays::ArrayPartGenerator::new()),
184            GeneratorTableType::Functions => Box::new(functions::FunctionPartGenerator::new()),
185        };
186
187        let input_type = syn::parse_str(&settings.input_type)?;
188
189        let types = if let BuilderType::Default = settings.builder_type {
190            // Deduce AST types
191            Some(SymbolTypes::new(grammar))
192        } else {
193            None
194        };
195
196        Ok(Self {
197            file_name: file_name.to_string(),
198            root_symbol,
199            parser,
200            layout_parser,
201            parser_definition,
202            actions_file,
203            lexer_file,
204            builder_file,
205            grammar,
206            out_dir,
207            out_dir_actions,
208            table,
209            settings,
210            input_type,
211            part_generator,
212            types,
213        })
214    }
215
216    fn generate(&self, out_dir: &Path) -> Result<()> {
217        let mut ast: Vec<syn::Stmt> = vec![];
218        ast.extend(self.part_generator.header(self)?);
219        ast.extend(self.part_generator.parser_header(self)?);
220        ast.extend(self.part_generator.types(self)?);
221        ast.extend(self.part_generator.symbols(self)?);
222        ast.extend(self.part_generator.parser_definition(self)?);
223        ast.extend(self.part_generator.parser(self)?);
224        ast.extend(self.part_generator.lexer_definition(self)?);
225        ast.extend(self.part_generator.builder(self)?);
226
227        std::fs::create_dir_all(out_dir).map_err(|e| {
228            Error::Error(format!(
229                "Cannot create directories for path '{out_dir:?}': {e:?}."
230            ))
231        })?;
232
233        let mut file: syn::File = parse_quote!();
234
235        file.items.extend(ast.into_iter().map(|s| match s {
236            syn::Stmt::Item(i) => i,
237            _ => panic!("Invalid item."),
238        }));
239
240        let out_file = out_dir.join(&self.file_name).with_extension("rs");
241        println!("Writing parser file {out_file:?}");
242        std::fs::write(&out_file, prettyplease::unparse(&file)).map_err(|e| {
243            Error::Error(format!("Cannot write parser file '{out_file:?}': {e:?}."))
244        })?;
245
246        Ok(())
247    }
248
249    fn prod_kind(&self, prod: &Production) -> String {
250        format!(
251            "{}{}",
252            prod.nonterminal(self.grammar).name,
253            if let Some(ref kind) = prod.kind {
254                kind.clone()
255            } else {
256                format!("P{}", prod.ntidx + 1)
257            }
258        )
259    }
260
261    fn prod_kind_ident(&self, prod: &Production) -> syn::Ident {
262        format_ident!("{}", self.prod_kind(prod))
263    }
264
265    fn term_kind_ident(&self, term: TermIndex) -> syn::Ident {
266        format_ident!("{}", self.grammar.term_by_index(term).name)
267    }
268
269    fn nonterm_kind_ident(&self, nonterm: NonTermIndex) -> syn::Ident {
270        format_ident!("{}", self.grammar.nonterm_by_index(nonterm).name)
271    }
272
273    fn state_kind_ident(&self, state: StateIndex) -> syn::Ident {
274        format_ident!(
275            "{}S{}",
276            self.grammar.symbol_name(self.table.states[state].symbol),
277            state.0
278        )
279    }
280
281    fn action_to_syntax(&self, action: &Option<Action>) -> syn::Expr {
282        match action {
283            Some(action) => match action {
284                Action::Shift(state) => {
285                    let state_kind_ident = self.state_kind_ident(*state);
286                    parse_quote! { Shift(State::#state_kind_ident) }
287                }
288                Action::Reduce(prod, len) => {
289                    let prod_kind = self.prod_kind_ident(&self.grammar.productions[*prod]);
290                    parse_quote! { Reduce(PK::#prod_kind, #len) }
291                }
292                Action::Accept => parse_quote! { Accept },
293            },
294            None => parse_quote! { Error },
295        }
296    }
297}
298
299fn action_name(nonterminal: &NonTerminal, choice: &Choice) -> String {
300    to_snake_case(format!("{}_{}", nonterminal.name, &choice.name))
301}