rustemo_compiler/
settings.rs

1use std::fs;
2
3use clap::ValueEnum;
4use rustemo::LOG;
5use yansi::Paint;
6
7use crate::table::TableType;
8use crate::{Error, Result};
9use std::path::{Path, PathBuf};
10
11use crate::generator::generate_parser;
12
13/// The parsing algorithm used
14#[derive(Debug, Default, Clone, ValueEnum)]
15pub enum ParserAlgo {
16    #[default]
17    LR,
18    GLR,
19}
20
21/// The lexer type used during parsing to break the input into tokens
22#[derive(Debug, Default, Clone, ValueEnum)]
23pub enum LexerType {
24    /// Default lexer if the input is `str` is based on string/regex recognizers
25    #[default]
26    Default,
27    /// The lexer will be supplied by the user
28    Custom,
29}
30
31/// The builder type used during parsing to construct the output
32#[derive(Debug, Default, Clone, ValueEnum)]
33pub enum BuilderType {
34    /// Default builder type constructs AST using inferred node types
35    #[default]
36    Default,
37    /// Generic builder generates CST where each node is `TreeNode`
38    Generic,
39    /// The builder is user provided
40    Custom,
41}
42
43/// Different generated parser table variants with different trade-offs
44#[derive(Debug, Default, Clone, ValueEnum)]
45pub enum GeneratorTableType {
46    /// Table is generated as nested static arrays
47    /// Access time should be relatively good but it produces
48    /// larger executables.
49    Arrays,
50    /// Table is an array of function pointers where functions contain match
51    /// expression for further disambiguation. Uses less statically allocated
52    /// storage but requires function call and pattern matching.
53    #[default]
54    Functions,
55}
56
57/// Provides parser settings information.
58///
59/// It is the main entry point in the parser generation process. It is meant to
60/// be used from the project `build.rs` script. See [tests crate `build.rs`
61/// script](https://github.com/igordejanovic/rustemo/blob/main/tests/build.rs)
62/// for examples of various configurations.
63///
64/// The first step is to create default `Settings` instance, do necessary
65/// configuration by calling methods in a builder (chain) style and, at the end,
66/// call the method to process the grammar, either by directly specifying the
67/// file or recursivelly processing the directory.
68///
69/// Most of these settings are also exposed through `rcomp` CLI tool so you can
70/// process grammar and generate parsers from the command line (or shell script)
71/// if you prefer.
72///
73/// You can read more in the [Rustemo book](https://www.igordejanovic.net/rustemo/)
74///
75/// ## Example
76///
77/// ```rust
78/// rustemo_compiler::Settings::new().parser_algo(ParserAlgo::GLR).process_crate_dir()
79/// ```
80#[derive(Debug, Clone)]
81pub struct Settings {
82    pub(crate) out_dir_root: Option<PathBuf>,
83    pub(crate) out_dir_actions_root: Option<PathBuf>,
84    pub(crate) root_dir: Option<PathBuf>,
85
86    pub(crate) prefer_shifts: bool,
87    pub(crate) prefer_shifts_over_empty: bool,
88    pub(crate) table_type: TableType,
89    pub(crate) parser_algo: ParserAlgo,
90    pub(crate) print_table: bool,
91    pub(crate) exclude: Vec<String>,
92    pub(crate) actions: bool,
93    pub(crate) trace: bool,
94
95    pub(crate) lexer_type: LexerType,
96    pub(crate) builder_type: BuilderType,
97    pub(crate) builder_loc_info: bool,
98    pub(crate) generator_table_type: GeneratorTableType,
99    pub(crate) input_type: String,
100
101    pub(crate) lexical_disamb_most_specific: bool,
102    pub(crate) lexical_disamb_longest_match: bool,
103    pub(crate) lexical_disamb_grammar_order: bool,
104
105    pub(crate) partial_parse: bool,
106    pub(crate) skip_ws: bool,
107
108    pub(crate) force: bool,
109    force_explicit: bool,
110
111    pub(crate) dot: bool,
112    pub(crate) fancy_regex: bool,
113}
114
115impl Default for Settings {
116    fn default() -> Self {
117        // If called from cargo build use OUT_DIR as a default out_dir directory
118        // for both parser and actions.
119        let out_dir_root = std::env::var("OUT_DIR").map_or(None, |d| Some(PathBuf::from(d)));
120
121        // By default root dir is the root of the cargo project.
122        let root_dir =
123            std::env::var("CARGO_MANIFEST_DIR").map_or(None, |d| Some(PathBuf::from(d)));
124
125        Self {
126            root_dir,
127            out_dir_root: out_dir_root.clone(),
128            out_dir_actions_root: out_dir_root,
129            prefer_shifts: false,
130            prefer_shifts_over_empty: true,
131            table_type: Default::default(),
132            parser_algo: Default::default(),
133            print_table: false,
134            actions: true,
135            trace: false,
136            lexer_type: Default::default(),
137            builder_type: Default::default(),
138            builder_loc_info: false,
139            generator_table_type: Default::default(),
140            input_type: "str".into(),
141            lexical_disamb_most_specific: true,
142            lexical_disamb_longest_match: true,
143            lexical_disamb_grammar_order: true,
144            partial_parse: false,
145            skip_ws: true,
146            force: true, // Overwriting actions by default
147            force_explicit: false,
148            exclude: vec![],
149            dot: false,
150            fancy_regex: false,
151        }
152    }
153}
154
155impl Settings {
156    /// Creates a default instance.
157    pub fn new() -> Self {
158        Settings::default()
159    }
160
161    /// Root dir used to calculate output file path from the input grammar path
162    /// when the `out_dir_root` is not `None`.
163    /// It can be overridden explicitly or when using `process_dir` call.
164    /// It is an error if `root_dir` is `None`, `our_dir_root` is set and
165    /// `CARGO_MANIFEST_DIR` env variable cannot be found.
166    pub fn root_dir(mut self, root_dir: PathBuf) -> Self {
167        self.root_dir = Some(root_dir);
168        self
169    }
170
171    /// Sets output root for the generated parser. By default, the parser is
172    /// generated in the source tree next to the grammar.
173    pub fn out_dir_root(mut self, out_dir: PathBuf) -> Self {
174        self.out_dir_root = Some(out_dir);
175        self
176    }
177
178    /// Output root for the generated actions when default builder is used. By
179    /// default, actions are generated in the source tree next to the grammar.
180    pub fn out_dir_actions_root(mut self, out_dir: PathBuf) -> Self {
181        self.out_dir_actions_root = Some(out_dir);
182        self
183    }
184
185    /// Generate both parser and actions (for default builder) in the source
186    /// tree, next to the grammar. By default, parser and actions are generated
187    /// in out `OUT_DIR`.
188    pub fn in_source_tree(mut self) -> Self {
189        self.out_dir_root = None;
190        if matches!(self.builder_type, BuilderType::Default) {
191            self.actions_in_source_tree()
192        } else {
193            self
194        }
195    }
196
197    /// Generate actions in the source tree (if the default builder is used),
198    /// next to the grammar. By default, actions are generated in out `OUT_DIR`.
199    pub fn actions_in_source_tree(mut self) -> Self {
200        if !matches!(self.builder_type, BuilderType::Default) {
201            panic!("Settings 'actions_in_source_tree' is only available for the default builder type!");
202        }
203        self.out_dir_actions_root = None;
204        if !self.force_explicit {
205            self.force = false;
206        }
207        self
208    }
209
210    /// Excludes path from processing. If path contains any of the string given
211    /// in `exclude` vector it will be skipped.
212    pub fn exclude(mut self, exclude: Vec<String>) -> Self {
213        self.exclude = exclude;
214        self
215    }
216
217    /// When there are competing REDUCE and SHIFT operations, this settings will
218    /// always favor SHIFT.
219    pub fn prefer_shifts(mut self, prefer: bool) -> Self {
220        self.prefer_shifts = prefer;
221        self
222    }
223
224    /// When there are competing EMPTY reduction and SHIFT, this settings will
225    /// always favor SHIFT.
226    pub fn prefer_shifts_over_empty(mut self, prefer: bool) -> Self {
227        self.prefer_shifts_over_empty = prefer;
228        self
229    }
230
231    /// LR table type to construct.
232    pub fn table_type(mut self, table_type: TableType) -> Self {
233        self.table_type = table_type;
234        self
235    }
236
237    /// LR algorithm to use
238    pub fn parser_algo(mut self, parser_algo: ParserAlgo) -> Self {
239        match parser_algo {
240            ParserAlgo::LR => {}
241            ParserAlgo::GLR => {
242                // For GLR we are using RN tables
243                self.table_type = TableType::LALR_RN;
244                // For GLR we should not favour shifts at all
245                self.prefer_shifts = false;
246                self.prefer_shifts_over_empty = false;
247                // We don't use grammar order by default
248                self.lexical_disamb_grammar_order = false;
249            }
250        }
251        self.parser_algo = parser_algo;
252        self
253    }
254
255    /// Sets lexer type. Default lexer is used for string inputs and is based on
256    /// regex/string matches from the grammar.
257    pub fn lexer_type(mut self, lexer_type: LexerType) -> Self {
258        self.lexer_type = lexer_type;
259        self
260    }
261
262    /// Sets builder type. The default builder will deduce AST types and actions.
263    pub fn builder_type(mut self, builder_type: BuilderType) -> Self {
264        self.builder_type = builder_type;
265        self
266    }
267
268    /// Should generated default AST builder types contain location/layout information
269    /// This is only used if builder-type is default.
270    pub fn builder_loc_info(mut self, builder_loc_info: bool) -> Self {
271        self.builder_loc_info = builder_loc_info;
272        self
273    }
274
275    /// Sets generator table type. The default is nested static arrays.
276    pub fn generator_table_type(mut self, generator_table_type: GeneratorTableType) -> Self {
277        self.generator_table_type = generator_table_type;
278        self
279    }
280
281    /// Sets the input type. Default is `str`
282    pub fn input_type(mut self, input_type: String) -> Self {
283        self.input_type = input_type;
284        self
285    }
286
287    /// Lexical disambiguation using most specific match strategy.
288    pub fn lexical_disamb_most_specific(mut self, most_specific: bool) -> Self {
289        self.lexical_disamb_most_specific = most_specific;
290        self
291    }
292
293    /// Lexical disambiguation using longest match strategy.
294    pub fn lexical_disamb_longest_match(mut self, longest_match: bool) -> Self {
295        self.lexical_disamb_longest_match = longest_match;
296        self
297    }
298
299    /// Lexical disambiguation using grammar order.
300    pub fn lexical_disamb_grammar_order(mut self, grammar_order: bool) -> Self {
301        if let ParserAlgo::LR = self.parser_algo {
302            if !grammar_order {
303                panic!("Can't disable grammar order strategy for LR.")
304            }
305        }
306        self.lexical_disamb_grammar_order = grammar_order;
307        self
308    }
309
310    /// Set whether or not we use [`fancy_regex`](https://docs.rs/fancy-regex/latest/fancy_regex/)
311    /// instead of [`regex`](https://docs.rs/regex/latest/regex/)
312    pub fn fancy_regex(mut self, fancy_regex: bool) -> Self {
313        self.fancy_regex = fancy_regex;
314        self
315    }
316
317    pub fn print_table(mut self, print_table: bool) -> Self {
318        self.print_table = print_table;
319        self
320    }
321
322    /// If partial parse is allowed parsing can succeed even if the parser
323    /// didn't reach the end of the input. Use with care, especially with GLR
324    /// parsing as it may lead to a large number of partial solutions.
325    pub fn partial_parse(mut self, partial_parse: bool) -> Self {
326        self.partial_parse = partial_parse;
327        self
328    }
329
330    /// Should whitespaces be skipped. `true` by default. Not used if Layout
331    /// rule exists in the Grammar. Used only in the default lexer.
332    pub fn skip_ws(mut self, skip_ws: bool) -> Self {
333        self.skip_ws = skip_ws;
334        self
335    }
336
337    /// Should actions be generated. `true` by default. Used only if default
338    /// builder is used.
339    pub fn actions(mut self, actions: bool) -> Self {
340        self.actions = actions;
341        self
342    }
343
344    /// Should trace log be printed. `false` by default. Does nothing for
345    /// release builds as trace is only available in debug build. Can also be
346    /// set by `RUSTEMO_TRACE=1` env variable.
347    pub fn trace(mut self, trace: bool) -> Self {
348        let trace = if !trace {
349            std::env::var("RUSTEMO_TRACE").is_ok()
350        } else {
351            std::env::set_var("RUSTEMO_TRACE", "1");
352            true
353        };
354
355        self.trace = trace;
356        self
357    }
358
359    /// Should actions file be recreated if exist. Use with care.
360    pub fn force(mut self, force: bool) -> Self {
361        self.force = force;
362        self.force_explicit = true;
363        self
364    }
365
366    /// If this is set a .dot file with automata visualization will be produced during
367    /// compiling.
368    pub fn dot(mut self, dot: bool) -> Self {
369        self.dot = dot;
370        self
371    }
372
373    /// Recursively traverse the root dir and process each Rustemo grammar found.
374    /// Used as the last call to the configured [Settings] value.
375    pub fn process_dir(&self) -> Result<()> {
376        if let Some(root_dir) = &self.root_dir {
377            if !root_dir.exists() {
378                return Err(Error::Error(format!(
379                    "Directory/File {root_dir:?} doesn't exist."
380                )));
381            }
382
383            let visitor = |grammar: &Path| -> Result<()> {
384                self.process_grammar(grammar)?;
385                Ok(())
386            };
387
388            self.visit_dirs(root_dir, &visitor)
389        } else {
390            Err(Error::Error("Root dir must be set!".to_string()))
391        }
392    }
393
394    /// Process the given grammar and generates the parser and actions (if
395    /// default builder is used). Used as the last call to the configured
396    /// [Settings] value.
397    pub fn process_grammar(&self, grammar_path: &Path) -> Result<()> {
398        println!(
399            "{} {:?}",
400            "Generating parser for grammar".paint(LOG),
401            grammar_path.paint(LOG)
402        );
403        let relative_outdir = |p: &Path| -> Result<PathBuf> {
404            Ok(p.join(
405                grammar_path
406                    .parent()
407                    .ok_or(Error::Error(format!(
408                        "Cannot find parent of '{grammar_path:?}' file."
409                    )))?
410                    .strip_prefix(self.root_dir.as_ref().expect("'root_dir' must be set!"))
411                    .unwrap_or(grammar_path),
412            ))
413        };
414
415        let out_dir = self
416            .out_dir_root
417            .as_ref()
418            .map(|p| relative_outdir(p))
419            .transpose()?;
420
421        let out_dir_actions = self
422            .out_dir_actions_root
423            .as_ref()
424            .map(|p| relative_outdir(p))
425            .transpose()?;
426
427        if let Some(ref dir) = out_dir {
428            println!("Parser out dir: {dir:?}");
429        }
430        if let Some(ref dir) = out_dir_actions {
431            println!("Actions out dir: {dir:?}");
432        }
433
434        generate_parser(
435            grammar_path,
436            out_dir.as_deref(),
437            out_dir_actions.as_deref(),
438            self,
439        )
440    }
441
442    /// Recursively visits dirs starting from the given `dir` and calls
443    /// `visitor` for each Rustemo grammar found.
444    fn visit_dirs(&self, dir: &Path, visitor: &dyn Fn(&Path) -> Result<()>) -> Result<()> {
445        if dir.is_dir() {
446            for entry in fs::read_dir(dir)? {
447                let entry = entry?;
448                let path = entry.path();
449
450                // Check excluded paths
451                let path_name = path.to_string_lossy();
452                if self.exclude.iter().any(|e| path_name.contains(e)) {
453                    println!("Excluding path: {path_name:?}");
454                    continue;
455                }
456
457                if path.is_dir() {
458                    self.visit_dirs(&path, visitor)?;
459                } else if matches!(path.extension(), Some(ext) if ext == "rustemo") {
460                    visitor(&path)?
461                }
462            }
463        }
464        Ok(())
465    }
466}
467
468/// Recursively process a given dir and generate a parser for each found
469/// grammar with default settings.
470///
471/// # Example
472///
473/// ```rust
474/// rustemo_compiler::process_dir("~/my_project")
475/// ```
476///
477/// # Errors
478///
479/// In case of an error a value of [rustemo::Error] is returned.
480pub fn process_dir<P: AsRef<Path>>(dir: P) -> Result<()> {
481    Settings::new()
482        .root_dir(PathBuf::from(dir.as_ref()))
483        .process_dir()?;
484    Ok(())
485}
486
487/// A shortcut function which creates default [Settings] and use it to process
488/// the crate project directory.
489pub fn process_crate_dir() -> Result<()> {
490    Settings::new().process_dir()?;
491    Ok(())
492}
493
494/// Generates a parser from the given grammar file with default settings.
495///
496/// # Errors
497///
498/// In case of an error a value of [rustemo::Error] is returned.
499pub fn process_grammar<P: AsRef<Path>>(grammar: P) -> Result<()> {
500    Settings::new().process_grammar(grammar.as_ref())?;
501    Ok(())
502}