rustemo_compiler/
settings.rs

1use std::fs;
2
3use clap::ValueEnum;
4
5use crate::table::TableType;
6use crate::{Error, Result};
7use std::path::{Path, PathBuf};
8
9use crate::generator::generate_parser;
10
11/// The parsing algorithm used
12#[derive(Debug, Default, Clone, ValueEnum)]
13pub enum ParserAlgo {
14    #[default]
15    LR,
16    GLR,
17}
18
19/// The lexer type used during parsing to break the input into tokens
20#[derive(Debug, Default, Clone, ValueEnum)]
21pub enum LexerType {
22    /// Default lexer if the input is `str` is based on string/regex recognizers
23    #[default]
24    Default,
25    /// The lexer will be supplied by the user
26    Custom,
27}
28
29/// The builder type used during parsing to construct the output
30#[derive(Debug, Default, Clone, ValueEnum)]
31pub enum BuilderType {
32    /// Default builder type constructs AST using inferred node types
33    #[default]
34    Default,
35    /// Generic builder generates CST where each node is `TreeNode`
36    Generic,
37    /// The builder is user provided
38    Custom,
39}
40
41/// Different generated parser table variants with different trade-offs
42#[derive(Debug, Default, Clone, ValueEnum)]
43pub enum GeneratorTableType {
44    /// Table is generated as nested static arrays
45    /// Access time should be relatively good but it produces
46    /// larger executables.
47    Arrays,
48    /// Table is an array of function pointers where functions contain match
49    /// expression for further disambiguation. Uses less statically allocated
50    /// storage but requires function call and pattern matching.
51    #[default]
52    Functions,
53}
54
55/// Provides parser settings information.
56///
57/// It is the main entry point in the parser generation process. It is meant to
58/// be used from the project `build.rs` script. See [tests crate `build.rs`
59/// script](https://github.com/igordejanovic/rustemo/blob/main/tests/build.rs)
60/// for examples of various configurations.
61///
62/// The first step is to create default `Settings` instance, do necessary
63/// configuration by calling methods in a builder (chain) style and, at the end,
64/// call the method to process the grammar, either by directly specifying the
65/// file or recursivelly processing the directory.
66///
67/// Most of these settings are also exposed through `rcomp` CLI tool so you can
68/// process grammar and generate parsers from the command line (or shell script)
69/// if you prefer.
70///
71/// You can read more in the [Rustemo book](https://www.igordejanovic.net/rustemo/)
72///
73/// ## Example
74///
75/// ```rust
76/// rustemo_compiler::Settings::new().parser_algo(ParserAlgo::GLR).process_crate_dir()
77/// ```
78#[derive(Debug, Clone)]
79pub struct Settings {
80    pub(crate) out_dir_root: Option<PathBuf>,
81    pub(crate) out_dir_actions_root: Option<PathBuf>,
82    pub(crate) root_dir: Option<PathBuf>,
83
84    pub(crate) prefer_shifts: bool,
85    pub(crate) prefer_shifts_over_empty: bool,
86    pub(crate) table_type: TableType,
87    pub(crate) parser_algo: ParserAlgo,
88    pub(crate) print_table: bool,
89    pub(crate) exclude: Vec<String>,
90    pub(crate) actions: bool,
91    pub(crate) notrace: bool,
92
93    pub(crate) lexer_type: LexerType,
94    pub(crate) builder_type: BuilderType,
95    pub(crate) builder_loc_info: bool,
96    pub(crate) generator_table_type: GeneratorTableType,
97    pub(crate) input_type: String,
98
99    pub(crate) lexical_disamb_most_specific: bool,
100    pub(crate) lexical_disamb_longest_match: bool,
101    pub(crate) lexical_disamb_grammar_order: bool,
102
103    pub(crate) partial_parse: bool,
104    pub(crate) skip_ws: bool,
105
106    pub(crate) force: bool,
107    force_explicit: bool,
108
109    pub(crate) dot: bool,
110    pub(crate) fancy_regex: bool,
111}
112
113impl Default for Settings {
114    fn default() -> Self {
115        // If called from cargo build use OUT_DIR as a default out_dir directory
116        // for both parser and actions.
117        let out_dir_root = std::env::var("OUT_DIR").map_or(None, |d| Some(PathBuf::from(d)));
118
119        // By default root dir is the root of the cargo project.
120        let root_dir =
121            std::env::var("CARGO_MANIFEST_DIR").map_or(None, |d| Some(PathBuf::from(d)));
122
123        Self {
124            root_dir,
125            out_dir_root: out_dir_root.clone(),
126            out_dir_actions_root: out_dir_root,
127            prefer_shifts: false,
128            prefer_shifts_over_empty: true,
129            table_type: Default::default(),
130            parser_algo: Default::default(),
131            print_table: false,
132            actions: true,
133            notrace: false,
134            lexer_type: Default::default(),
135            builder_type: Default::default(),
136            builder_loc_info: false,
137            generator_table_type: Default::default(),
138            input_type: "str".into(),
139            lexical_disamb_most_specific: true,
140            lexical_disamb_longest_match: true,
141            lexical_disamb_grammar_order: true,
142            partial_parse: false,
143            skip_ws: true,
144            force: true, // Overwriting actions by default
145            force_explicit: false,
146            exclude: vec![],
147            dot: false,
148            fancy_regex: false,
149        }
150    }
151}
152
153impl Settings {
154    /// Creates a default instance.
155    pub fn new() -> Self {
156        Settings::default()
157    }
158
159    /// Root dir used to calculate output file path from the input grammar path
160    /// when the `out_dir_root` is not `None`.
161    /// It can be overridden explicitly or when using `process_dir` call.
162    /// It is an error if `root_dir` is `None`, `our_dir_root` is set and
163    /// `CARGO_MANIFEST_DIR` env variable cannot be found.
164    pub fn root_dir(mut self, root_dir: PathBuf) -> Self {
165        self.root_dir = Some(root_dir);
166        self
167    }
168
169    /// Sets output root for the generated parser. By default, the parser is
170    /// generated in the source tree next to the grammar.
171    pub fn out_dir_root(mut self, out_dir: PathBuf) -> Self {
172        self.out_dir_root = Some(out_dir);
173        self
174    }
175
176    /// Output root for the generated actions when default builder is used. By
177    /// default, actions are generated in the source tree next to the grammar.
178    pub fn out_dir_actions_root(mut self, out_dir: PathBuf) -> Self {
179        self.out_dir_actions_root = Some(out_dir);
180        self
181    }
182
183    /// Generate both parser and actions (for default builder) in the source
184    /// tree, next to the grammar. By default, parser and actions are generated
185    /// in out `OUT_DIR`.
186    pub fn in_source_tree(mut self) -> Self {
187        self.out_dir_root = None;
188        if matches!(self.builder_type, BuilderType::Default) {
189            self.actions_in_source_tree()
190        } else {
191            self
192        }
193    }
194
195    /// Generate actions in the source tree (if the default builder is used),
196    /// next to the grammar. By default, actions are generated in out `OUT_DIR`.
197    pub fn actions_in_source_tree(mut self) -> Self {
198        if !matches!(self.builder_type, BuilderType::Default) {
199            panic!("Settings 'actions_in_source_tree' is only available for the default builder type!");
200        }
201        self.out_dir_actions_root = None;
202        if !self.force_explicit {
203            self.force = false;
204        }
205        self
206    }
207
208    /// Excludes path from processing. If path contains any of the string given
209    /// in `exclude` vector it will be skipped.
210    pub fn exclude(mut self, exclude: Vec<String>) -> Self {
211        self.exclude = exclude;
212        self
213    }
214
215    /// When there are competing REDUCE and SHIFT operations, this settings will
216    /// always favor SHIFT.
217    pub fn prefer_shifts(mut self, prefer: bool) -> Self {
218        self.prefer_shifts = prefer;
219        self
220    }
221
222    /// When there are competing EMPTY reduction and SHIFT, this settings will
223    /// always favor SHIFT.
224    pub fn prefer_shifts_over_empty(mut self, prefer: bool) -> Self {
225        self.prefer_shifts_over_empty = prefer;
226        self
227    }
228
229    /// LR table type to construct.
230    pub fn table_type(mut self, table_type: TableType) -> Self {
231        self.table_type = table_type;
232        self
233    }
234
235    /// LR algorithm to use
236    pub fn parser_algo(mut self, parser_algo: ParserAlgo) -> Self {
237        match parser_algo {
238            ParserAlgo::LR => {}
239            ParserAlgo::GLR => {
240                // For GLR we are using RN tables
241                self.table_type = TableType::LALR_RN;
242                // For GLR we should not favour shifts at all
243                self.prefer_shifts = false;
244                self.prefer_shifts_over_empty = false;
245                // We don't use grammar order by default
246                self.lexical_disamb_grammar_order = false;
247            }
248        }
249        self.parser_algo = parser_algo;
250        self
251    }
252
253    /// Sets lexer type. Default lexer is used for string inputs and is based on
254    /// regex/string matches from the grammar.
255    pub fn lexer_type(mut self, lexer_type: LexerType) -> Self {
256        self.lexer_type = lexer_type;
257        self
258    }
259
260    /// Sets builder type. The default builder will deduce AST types and actions.
261    pub fn builder_type(mut self, builder_type: BuilderType) -> Self {
262        self.builder_type = builder_type;
263        self
264    }
265
266    /// Should generated default AST builder types contain location/layout information
267    /// This is only used if builder-type is default.
268    pub fn builder_loc_info(mut self, builder_loc_info: bool) -> Self {
269        self.builder_loc_info = builder_loc_info;
270        self
271    }
272
273    /// Sets generator table type. The default is nested static arrays.
274    pub fn generator_table_type(mut self, generator_table_type: GeneratorTableType) -> Self {
275        self.generator_table_type = generator_table_type;
276        self
277    }
278
279    /// Sets the input type. Default is `str`
280    pub fn input_type(mut self, input_type: String) -> Self {
281        self.input_type = input_type;
282        self
283    }
284
285    /// Lexical disambiguation using most specific match strategy.
286    pub fn lexical_disamb_most_specific(mut self, most_specific: bool) -> Self {
287        self.lexical_disamb_most_specific = most_specific;
288        self
289    }
290
291    /// Lexical disambiguation using longest match strategy.
292    pub fn lexical_disamb_longest_match(mut self, longest_match: bool) -> Self {
293        self.lexical_disamb_longest_match = longest_match;
294        self
295    }
296
297    /// Lexical disambiguation using grammar order.
298    pub fn lexical_disamb_grammar_order(mut self, grammar_order: bool) -> Self {
299        if let ParserAlgo::LR = self.parser_algo {
300            if !grammar_order {
301                panic!("Can't disable grammar order strategy for LR.")
302            }
303        }
304        self.lexical_disamb_grammar_order = grammar_order;
305        self
306    }
307
308    /// Set whether or not we use [`fancy_regex`](https://docs.rs/fancy-regex/latest/fancy_regex/)
309    /// instead of [`regex`](https://docs.rs/regex/latest/regex/)
310    pub fn fancy_regex(mut self, fancy_regex: bool) -> Self {
311        self.fancy_regex = fancy_regex;
312        self
313    }
314
315    pub fn print_table(mut self, print_table: bool) -> Self {
316        self.print_table = print_table;
317        self
318    }
319
320    /// If partial parse is allowed parsing can succeed even if the parser
321    /// didn't reach the end of the input. Use with care, especially with GLR
322    /// parsing as it may lead to a large number of partial solutions.
323    pub fn partial_parse(mut self, partial_parse: bool) -> Self {
324        self.partial_parse = partial_parse;
325        self
326    }
327
328    /// Should whitespaces be skipped. `true` by default. Not used if Layout
329    /// rule exists in the Grammar. Used only in the default lexer.
330    pub fn skip_ws(mut self, skip_ws: bool) -> Self {
331        self.skip_ws = skip_ws;
332        self
333    }
334
335    /// Should actions be generated. `true` by default. Used only if default
336    /// builder is used.
337    pub fn actions(mut self, actions: bool) -> Self {
338        self.actions = actions;
339        self
340    }
341
342    /// Should trace log be printed. `false` by default. Does nothing for
343    /// release builds as trace is only available in debug build. Can also be
344    /// set by `RUSTEMO_NOTRACE=1` env variable.
345    pub fn notrace(mut self, notrace: bool) -> Self {
346        let notrace = if !notrace {
347            std::env::var("RUSTEMO_NOTRACE").is_ok()
348        } else {
349            std::env::set_var("RUSTEMO_NOTRACE", "1");
350            true
351        };
352
353        self.notrace = notrace;
354        self
355    }
356
357    /// Should actions file be recreated if exist. Use with care.
358    pub fn force(mut self, force: bool) -> Self {
359        self.force = force;
360        self.force_explicit = true;
361        self
362    }
363
364    /// If this is set a .dot file with automata visualization will be produced during
365    /// compiling.
366    pub fn dot(mut self, dot: bool) -> Self {
367        self.dot = dot;
368        self
369    }
370
371    /// Recursively traverse the root dir and process each Rustemo grammar found.
372    /// Used as the last call to the configured [Settings] value.
373    pub fn process_dir(&self) -> Result<()> {
374        if let Some(root_dir) = &self.root_dir {
375            if !root_dir.exists() {
376                return Err(Error::Error(format!(
377                    "Directory/File {root_dir:?} doesn't exist."
378                )));
379            }
380
381            let visitor = |grammar: &Path| -> Result<()> {
382                self.process_grammar(grammar)?;
383                Ok(())
384            };
385
386            self.visit_dirs(root_dir, &visitor)
387        } else {
388            Err(Error::Error("Root dir must be set!".to_string()))
389        }
390    }
391
392    /// Process the given grammar and generates the parser and actions (if
393    /// default builder is used). Used as the last call to the configured
394    /// [Settings] value.
395    pub fn process_grammar(&self, grammar: &Path) -> Result<()> {
396        println!("Generating parser for grammar {grammar:?}");
397        let relative_outdir = |p: &Path| -> Result<PathBuf> {
398            Ok(p.join(
399                grammar
400                    .parent()
401                    .ok_or(Error::Error(format!(
402                        "Cannot find parent of '{grammar:?}' file."
403                    )))?
404                    .strip_prefix(self.root_dir.as_ref().expect("'root_dir' must be set!"))
405                    .or(Err(Error::Error(format!(
406                        "Cannot remove prefix '{:?}' from '{grammar:?}'.",
407                        &self.root_dir
408                    ))))?,
409            ))
410        };
411
412        let out_dir = self
413            .out_dir_root
414            .as_ref()
415            .map(|p| relative_outdir(p))
416            .transpose()?;
417
418        let out_dir_actions = self
419            .out_dir_actions_root
420            .as_ref()
421            .map(|p| relative_outdir(p))
422            .transpose()?;
423
424        if let Some(ref dir) = out_dir {
425            println!("Parser out dir: {dir:?}");
426        }
427        if let Some(ref dir) = out_dir_actions {
428            println!("Actions out dir: {dir:?}");
429        }
430
431        generate_parser(
432            grammar,
433            out_dir.as_deref(),
434            out_dir_actions.as_deref(),
435            self,
436        )
437    }
438
439    /// Recursively visits dirs starting from the given `dir` and calls
440    /// `visitor` for each Rustemo grammar found.
441    fn visit_dirs(&self, dir: &Path, visitor: &dyn Fn(&Path) -> Result<()>) -> Result<()> {
442        if dir.is_dir() {
443            for entry in fs::read_dir(dir)? {
444                let entry = entry?;
445                let path = entry.path();
446
447                // Check excluded paths
448                let path_name = path.to_string_lossy();
449                if self.exclude.iter().any(|e| path_name.contains(e)) {
450                    println!("Excluding path: {path_name:?}");
451                    continue;
452                }
453
454                if path.is_dir() {
455                    self.visit_dirs(&path, visitor)?;
456                } else if matches!(path.extension(), Some(ext) if ext == "rustemo") {
457                    visitor(&path)?
458                }
459            }
460        }
461        Ok(())
462    }
463}
464
465/// Recursively process a given dir and generate a parser for each found
466/// grammar with default settings.
467///
468/// # Example
469///
470/// ```rust
471/// rustemo_compiler::process_dir("~/my_project")
472/// ```
473///
474/// # Errors
475///
476/// In case of an error a value of [rustemo::Error] is returned.
477pub fn process_dir<P: AsRef<Path>>(dir: P) -> Result<()> {
478    Settings::new()
479        .root_dir(PathBuf::from(dir.as_ref()))
480        .process_dir()?;
481    Ok(())
482}
483
484/// A shortcut function which creates default [Settings] and use it to process
485/// the crate project directory.
486pub fn process_crate_dir() -> Result<()> {
487    Settings::new().process_dir()?;
488    Ok(())
489}
490
491/// Generates a parser from the given grammar file with default settings.
492///
493/// # Errors
494///
495/// In case of an error a value of [rustemo::Error] is returned.
496pub fn process_grammar<P: AsRef<Path>>(grammar: P) -> Result<()> {
497    Settings::new().process_grammar(grammar.as_ref())?;
498    Ok(())
499}