lrpar/
ctbuilder.rs

1//! Build grammars at compile-time so that they can be statically included into a binary.
2
3use std::{
4    any::type_name,
5    borrow::Cow,
6    collections::{HashMap, HashSet},
7    env::{current_dir, var},
8    error::Error,
9    fmt::{self, Debug, Write as fmtWrite},
10    fs::{self, create_dir_all, read_to_string, File},
11    hash::Hash,
12    io::{self, Write},
13    marker::PhantomData,
14    path::{Path, PathBuf},
15    sync::Mutex,
16};
17
18use bincode::{deserialize, serialize_into};
19use cfgrammar::{
20    newlinecache::NewlineCache,
21    yacc::{ast::ASTWithValidityInfo, YaccGrammar, YaccKind, YaccOriginalActionKind},
22    RIdx, Spanned, Symbol,
23};
24use filetime::FileTime;
25use lazy_static::lazy_static;
26use lrtable::{from_yacc, statetable::Conflicts, Minimiser, StateGraph, StateTable};
27use num_traits::{AsPrimitive, PrimInt, Unsigned};
28use regex::Regex;
29use serde::{de::DeserializeOwned, Serialize};
30
31use crate::{LexerTypes, RecoveryKind};
32
33const ACTION_PREFIX: &str = "__gt_";
34const GLOBAL_PREFIX: &str = "__GT_";
35const ACTIONS_KIND: &str = "__GtActionsKind";
36const ACTIONS_KIND_PREFIX: &str = "Ak";
37const ACTIONS_KIND_HIDDEN: &str = "__GtActionsKindHidden";
38
39const RUST_FILE_EXT: &str = "rs";
40
41const GRM_CONST_NAME: &str = "__GRM_DATA";
42const STABLE_CONST_NAME: &str = "__STABLE_DATA";
43
44lazy_static! {
45    static ref RE_DOL_NUM: Regex = Regex::new(r"\$([0-9]+)").unwrap();
46    static ref GENERATED_PATHS: Mutex<HashSet<PathBuf>> = Mutex::new(HashSet::new());
47}
48
49struct CTConflictsError<StorageT: Eq + Hash> {
50    stable: StateTable<StorageT>,
51}
52
53impl<StorageT> fmt::Display for CTConflictsError<StorageT>
54where
55    StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
56    usize: AsPrimitive<StorageT>,
57{
58    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
59        let conflicts = self.stable.conflicts().unwrap();
60        write!(
61            f,
62            "CTConflictsError{{{} Reduce/Reduce, {} Shift/Reduce}}",
63            conflicts.rr_len(),
64            conflicts.sr_len()
65        )
66    }
67}
68
69impl<StorageT> fmt::Debug for CTConflictsError<StorageT>
70where
71    StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
72    usize: AsPrimitive<StorageT>,
73{
74    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
75        let conflicts = self.stable.conflicts().unwrap();
76        write!(
77            f,
78            "CTConflictsError{{{} Reduce/Reduce, {} Shift/Reduce}}",
79            conflicts.rr_len(),
80            conflicts.sr_len()
81        )
82    }
83}
84
85impl<StorageT> Error for CTConflictsError<StorageT>
86where
87    StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
88    usize: AsPrimitive<StorageT>,
89{
90}
91
92/// A string which uses `Display` for it's `Debug` impl.
93struct ErrorString(String);
94impl fmt::Display for ErrorString {
95    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
96        let ErrorString(s) = self;
97        write!(f, "{}", s)
98    }
99}
100impl fmt::Debug for ErrorString {
101    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
102        let ErrorString(s) = self;
103        write!(f, "{}", s)
104    }
105}
106impl Error for ErrorString {}
107
108/// Specify the visibility of the module generated by `CTBuilder`.
109#[derive(Clone, PartialEq, Eq, Debug)]
110pub enum Visibility {
111    /// Module-level visibility only.
112    Private,
113    /// `pub`
114    Public,
115    /// `pub(super)`
116    PublicSuper,
117    /// `pub(self)`
118    PublicSelf,
119    /// `pub(crate)`
120    PublicCrate,
121    /// `pub(in {arg})`
122    PublicIn(String),
123}
124
125/// Specifies the [Rust Edition] that will be emitted during code generation.
126///
127/// [Rust Edition]: https://doc.rust-lang.org/edition-guide/rust-2021/index.html
128#[derive(Clone, Copy, PartialEq, Eq, Debug)]
129pub enum RustEdition {
130    Rust2015,
131    Rust2018,
132    Rust2021,
133}
134
135impl Visibility {
136    fn cow_str(&self) -> Cow<'static, str> {
137        match self {
138            Visibility::Private => Cow::from(""),
139            Visibility::Public => Cow::from("pub"),
140            Visibility::PublicSuper => Cow::from("pub(super)"),
141            Visibility::PublicSelf => Cow::from("pub(self)"),
142            Visibility::PublicCrate => Cow::from("pub(crate)"),
143            Visibility::PublicIn(data) => Cow::from(format!("pub(in {})", data)),
144        }
145    }
146}
147
148/// A `CTParserBuilder` allows one to specify the criteria for building a statically generated
149/// parser.
150pub struct CTParserBuilder<'a, LexerTypesT: LexerTypes>
151where
152    LexerTypesT::StorageT: Eq + Hash,
153    usize: AsPrimitive<LexerTypesT::StorageT>,
154{
155    // Anything stored in here (except `output_path`, `conflicts`, and `error_on_conflict`) almost
156    // certainly needs to be included as part of the rebuild_cache function below so that, if it's
157    // changed, the grammar is rebuilt.
158    grammar_path: Option<PathBuf>,
159    output_path: Option<PathBuf>,
160    mod_name: Option<&'a str>,
161    recoverer: RecoveryKind,
162    yacckind: Option<YaccKind>,
163    error_on_conflicts: bool,
164    warnings_are_errors: bool,
165    show_warnings: bool,
166    visibility: Visibility,
167    rust_edition: RustEdition,
168    phantom: PhantomData<LexerTypesT>,
169}
170
171impl<
172        'a,
173        StorageT: 'static + Debug + Hash + PrimInt + Serialize + Unsigned,
174        LexerTypesT: LexerTypes<StorageT = StorageT>,
175    > CTParserBuilder<'a, LexerTypesT>
176where
177    usize: AsPrimitive<StorageT>,
178{
179    /// Create a new `CTParserBuilder`.
180    ///
181    /// `StorageT` must be an unsigned integer type (e.g. `u8`, `u16`) which is:
182    ///   * big enough to index (separately) all the tokens, rules, productions in the grammar,
183    ///   * big enough to index the state table created from the grammar,
184    ///   * less than or equal in size to `u32`.
185    ///
186    /// In other words, if you have a grammar with 256 tokens, 256 rules, and 256 productions,
187    /// which creates a state table of 256 states you can safely specify `u8` here; but if any of
188    /// those counts becomes 257 or greater you will need to specify `u16`. If you are parsing
189    /// large files, the additional storage requirements of larger integer types can be noticeable,
190    /// and in such cases it can be worth specifying a smaller type. `StorageT` defaults to `u32`
191    /// if unspecified.
192    ///
193    /// # Examples
194    ///
195    /// ```text
196    /// CTParserBuilder::<DefaultLexerTypes<u8>>::new()
197    ///     .grammar_in_src_dir("grm.y")?
198    ///     .build()?;
199    /// ```
200    pub fn new() -> Self {
201        CTParserBuilder {
202            grammar_path: None,
203            output_path: None,
204            mod_name: None,
205            recoverer: RecoveryKind::CPCTPlus,
206            yacckind: None,
207            error_on_conflicts: true,
208            warnings_are_errors: true,
209            show_warnings: true,
210            visibility: Visibility::Private,
211            rust_edition: RustEdition::Rust2021,
212            phantom: PhantomData,
213        }
214    }
215
216    /// Set the input grammar path to a file relative to this project's `src` directory. This will
217    /// also set the output path (i.e. you do not need to call [CTParserBuilder::output_path]).
218    ///
219    /// For example if `a/b.y` is passed as `inp` then [CTParserBuilder::build] will:
220    ///   * use `src/a/b.y` as the input file.
221    ///   * write output to a file which can then be imported by calling `lrpar_mod!("a/b.y")`.
222    ///   * create a module in that output file named `b_y`.
223    ///
224    /// You can override the output path and/or module name by calling [CTParserBuilder::output_path]
225    /// and/or [CTParserBuilder::mod_name], respectively, after calling this function.
226    ///
227    /// This is a convenience function that makes it easier to compile grammar files stored in a
228    /// project's `src/` directory: please see [CTParserBuilder::build] for additional constraints
229    /// and information about the generated files. Note also that each `.y` file can only be
230    /// processed once using this function: if you want to generate multiple grammars from a single
231    /// `.y` file, you will need to use [CTParserBuilder::output_path].
232    pub fn grammar_in_src_dir<P>(mut self, srcp: P) -> Result<Self, Box<dyn Error>>
233    where
234        P: AsRef<Path>,
235    {
236        if !srcp.as_ref().is_relative() {
237            return Err(format!(
238                "Grammar path '{}' must be a relative path.",
239                srcp.as_ref().to_str().unwrap_or("<invalid UTF-8>")
240            )
241            .into());
242        }
243
244        let mut grmp = current_dir()?;
245        grmp.push("src");
246        grmp.push(srcp.as_ref());
247        self.grammar_path = Some(grmp);
248
249        let mut outp = PathBuf::new();
250        outp.push(var("OUT_DIR").unwrap());
251        outp.push(srcp.as_ref().parent().unwrap().to_str().unwrap());
252        create_dir_all(&outp)?;
253        let mut leaf = srcp
254            .as_ref()
255            .file_name()
256            .unwrap()
257            .to_str()
258            .unwrap()
259            .to_owned();
260        write!(leaf, ".{}", RUST_FILE_EXT).ok();
261        outp.push(leaf);
262        Ok(self.output_path(outp))
263    }
264
265    /// Set the input grammar path to `inp`. If specified, you must also call
266    /// [CTParserBuilder::output_path]. In general it is easier to use
267    /// [CTParserBuilder::grammar_in_src_dir].
268    pub fn grammar_path<P>(mut self, inp: P) -> Self
269    where
270        P: AsRef<Path>,
271    {
272        self.grammar_path = Some(inp.as_ref().to_owned());
273        self
274    }
275
276    /// Set the output grammar path to `outp`. Note that there are no requirements on `outp`: the
277    /// file can exist anywhere you can create a valid [Path] to. However, if you wish to use
278    /// [crate::lrpar_mod!] you will need to make sure that `outp` is in
279    /// [std::env::var]`("OUT_DIR")` or one of its subdirectories.
280    pub fn output_path<P>(mut self, outp: P) -> Self
281    where
282        P: AsRef<Path>,
283    {
284        self.output_path = Some(outp.as_ref().to_owned());
285        self
286    }
287
288    /// Set the generated module name to `mod_name`. If no module name is specified,
289    /// [CTParserBuilder::build] will attempt to create a sensible default based on the grammar
290    /// filename.
291    pub fn mod_name(mut self, mod_name: &'a str) -> Self {
292        self.mod_name = Some(mod_name);
293        self
294    }
295
296    /// Set the visibility of the generated module to `vis`. Defaults to `Visibility::Private`.
297    pub fn visibility(mut self, vis: Visibility) -> Self {
298        self.visibility = vis;
299        self
300    }
301
302    /// Set the recoverer for this parser to `rk`. Defaults to `RecoveryKind::CPCTPlus`.
303    pub fn recoverer(mut self, rk: RecoveryKind) -> Self {
304        self.recoverer = rk;
305        self
306    }
307
308    /// Set the `YaccKind` for this parser to `ak`.
309    pub fn yacckind(mut self, yk: YaccKind) -> Self {
310        self.yacckind = Some(yk);
311        self
312    }
313
314    /// If set to true, [CTParserBuilder::build] will return an error if the given grammar contains
315    /// any Shift/Reduce or Reduce/Reduce conflicts. Defaults to `true`.
316    pub fn error_on_conflicts(mut self, b: bool) -> Self {
317        self.error_on_conflicts = b;
318        self
319    }
320
321    /// If set to true, [CTParserBuilder::build] will return an error if the given grammar contains
322    /// any warnings. Defaults to `true`.
323    pub fn warnings_are_errors(mut self, b: bool) -> Self {
324        self.warnings_are_errors = b;
325        self
326    }
327
328    /// If set to true, [CTParserBuilder::build] will print warnings to stderr, or via cargo when
329    /// running under cargo. Defaults to `true`.
330    pub fn show_warnings(mut self, b: bool) -> Self {
331        self.show_warnings = b;
332        self
333    }
334
335    /// Sets the rust edition to be used for generated code. Defaults to the latest edition of
336    /// rust supported by grmtools.
337    pub fn rust_edition(mut self, edition: RustEdition) -> Self {
338        self.rust_edition = edition;
339        self
340    }
341
342    /// Statically compile the Yacc file specified by [CTParserBuilder::grammar_path()] into Rust,
343    /// placing the output into the file spec [CTParserBuilder::output_path()]. Note that three
344    /// additional files will be created with the same name as specified in [self.output_path] but
345    /// with the extensions `grm`, and `stable`, overwriting any existing files with those names.
346    ///
347    /// If `%parse-param` is not specified, the generated module follows the form:
348    ///
349    /// ```text
350    ///   mod <modname> {
351    ///     pub fn parse<'lexer, 'input: 'lexer>(lexer: &'lexer dyn NonStreamingLexer<...>)
352    ///       -> (Option<ActionT>, Vec<LexParseError<...>> { ... }
353    ///
354    ///     pub fn token_epp<'a>(tidx: ::cfgrammar::TIdx<StorageT>) -> ::std::option::Option<&'a str> {
355    ///       ...
356    ///     }
357    ///
358    ///     ...
359    ///   }
360    /// ```
361    ///
362    /// If `%parse-param x: t` is specified, the generated module follows the form:
363    ///
364    /// ```text
365    ///   mod <modname> {
366    ///     pub fn parse<'lexer, 'input: 'lexer>(lexer: &'lexer dyn NonStreamingLexer<...>, x: t)
367    ///       -> (Option<ActionT>, Vec<LexParseError<...>> { ... }
368    ///
369    ///     pub fn token_epp<'a>(tidx: ::cfgrammar::TIdx<StorageT>) -> ::std::option::Option<&'a str> {
370    ///       ...
371    ///     }
372    ///
373    ///     ...
374    ///   }
375    /// ```
376    ///
377    /// where:
378    ///  * `modname` is either:
379    ///    * the module name specified by [CTParserBuilder::mod_name()];
380    ///    * or, if no module name was explicitly specified, then for the file `/a/b/c.y` the
381    ///      module name is `c_y` (i.e. the file's leaf name, minus its extension, with a prefix of
382    ///      `_y`).
383    ///  * `ActionT` is either:
384    ///    * if the `yacckind` was set to `YaccKind::GrmTools` or
385    ///      `YaccKind::Original(YaccOriginalActionKind::UserAction)`, it is
386    ///      the return type of the `%start` rule;
387    ///    * or, if the `yacckind` was set to
388    ///      `YaccKind::Original(YaccOriginalActionKind::GenericParseTree)`, it
389    ///      is [`crate::Node<StorageT>`].
390    ///
391    /// # Panics
392    ///
393    /// If `StorageT` is not big enough to index the grammar's tokens, rules, or productions.
394    pub fn build(self) -> Result<CTParser<StorageT>, Box<dyn Error>> {
395        let grmp = self
396            .grammar_path
397            .as_ref()
398            .expect("grammar_path must be specified before processing.");
399        let outp = self
400            .output_path
401            .as_ref()
402            .expect("output_path must be specified before processing.");
403        let yk = match self.yacckind {
404            None => panic!("yacckind must be specified before processing."),
405            Some(YaccKind::Original(x)) => YaccKind::Original(x),
406            Some(YaccKind::Grmtools) => YaccKind::Grmtools,
407            Some(YaccKind::Eco) => panic!("Eco compile-time grammar generation not supported."),
408        };
409
410        {
411            let mut lk = GENERATED_PATHS.lock().unwrap();
412            if lk.contains(outp.as_path()) {
413                return Err(format!("Generating two parsers to the same path ('{}') is not allowed: use CTParserBuilder::output_path (and, optionally, CTParserBuilder::mod_name) to differentiate them.", &outp.to_str().unwrap()).into());
414            }
415            lk.insert(outp.clone());
416        }
417
418        let inc =
419            read_to_string(grmp).map_err(|e| format!("When reading '{}': {e}", grmp.display()))?;
420        let ast_validation = ASTWithValidityInfo::new(yk, &inc);
421        let warnings = ast_validation.ast().warnings();
422        let spanned_fmt = |x: &dyn Spanned, inc: &str, line_cache: &NewlineCache| {
423            if let Some((line, column)) =
424                line_cache.byte_to_line_num_and_col_num(inc, x.spans()[0].start())
425            {
426                format!("{} at line {line} column {column}", x)
427            } else {
428                format!("{}", x)
429            }
430        };
431
432        let res = YaccGrammar::<StorageT>::new_from_ast_with_validity_info(yk, &ast_validation);
433        let grm = match res {
434            Ok(_) if self.warnings_are_errors && !warnings.is_empty() => {
435                let mut line_cache = NewlineCache::new();
436                line_cache.feed(&inc);
437                return Err(ErrorString(if warnings.len() > 1 {
438                    // Indent under the "Error:" prefix.
439                    format!(
440                        "\n\t{}",
441                        warnings
442                            .iter()
443                            .map(|w| spanned_fmt(w, &inc, &line_cache))
444                            .collect::<Vec<_>>()
445                            .join("\n\t")
446                    )
447                } else {
448                    spanned_fmt(warnings.first().unwrap(), &inc, &line_cache)
449                }))?;
450            }
451            Ok(grm) => {
452                if !warnings.is_empty() {
453                    let mut line_cache = NewlineCache::new();
454                    line_cache.feed(&inc);
455                    for w in warnings {
456                        // Assume if this variable is set we are running under cargo.
457                        if std::env::var("OUT_DIR").is_ok() && self.show_warnings {
458                            println!("cargo:warning={}", spanned_fmt(&w, &inc, &line_cache));
459                        } else if self.show_warnings {
460                            eprintln!("{}", spanned_fmt(&w, &inc, &line_cache));
461                        }
462                    }
463                }
464                grm
465            }
466            Err(errs) => {
467                let mut line_cache = NewlineCache::new();
468                line_cache.feed(&inc);
469                return Err(ErrorString(if errs.len() + warnings.len() > 1 {
470                    // Indent under the "Error:" prefix.
471                    format!(
472                        "\n\t{}",
473                        errs.iter()
474                            .map(|e| spanned_fmt(e, &inc, &line_cache))
475                            .chain(warnings.iter().map(|w| spanned_fmt(w, &inc, &line_cache)))
476                            .collect::<Vec<_>>()
477                            .join("\n\t")
478                    )
479                } else {
480                    spanned_fmt(errs.first().unwrap(), &inc, &line_cache)
481                }))?;
482            }
483        };
484
485        let rule_ids = grm
486            .tokens_map()
487            .iter()
488            .map(|(&n, &i)| (n.to_owned(), i.as_storaget()))
489            .collect::<HashMap<_, _>>();
490        let cache = self.rebuild_cache(&grm);
491
492        // We don't need to go through the full rigmarole of generating an output file if all of
493        // the following are true: the output file exists; it is newer than the input file; and the
494        // cache hasn't changed. The last of these might be surprising, but it's vital: we don't
495        // know, for example, what the IDs map might be from one run to the next, and it might
496        // change for reasons beyond lrpar's control. If it does change, that means that the lexer
497        // and lrpar would get out of sync, so we have to play it safe and regenerate in such
498        // cases.
499        if let Ok(ref inmd) = fs::metadata(grmp) {
500            if let Ok(ref out_rs_md) = fs::metadata(outp) {
501                if FileTime::from_last_modification_time(out_rs_md)
502                    > FileTime::from_last_modification_time(inmd)
503                {
504                    if let Ok(outc) = read_to_string(outp) {
505                        if outc.contains(&cache) {
506                            return Ok(CTParser {
507                                regenerated: false,
508                                rule_ids,
509                                conflicts: None,
510                            });
511                        }
512                    }
513                }
514            }
515        }
516
517        // At this point, we know we're going to generate fresh output; however, if something goes
518        // wrong in the process between now and us writing /out/blah.rs, rustc thinks that
519        // everything's gone swimmingly (even if build.rs errored!), and tries to carry on
520        // compilation, leading to weird errors. We therefore delete /out/blah.rs at this point,
521        // which means, at worse, the user gets a "file not found" error from rustc (which is less
522        // confusing than the alternatives).
523        fs::remove_file(outp).ok();
524
525        let (sgraph, stable) = from_yacc(&grm, Minimiser::Pager)?;
526        if self.error_on_conflicts {
527            if let Some(c) = stable.conflicts() {
528                match (grm.expect(), grm.expectrr()) {
529                    (Some(i), Some(j)) if i == c.sr_len() && j == c.rr_len() => (),
530                    (Some(i), None) if i == c.sr_len() && 0 == c.rr_len() => (),
531                    (None, Some(j)) if 0 == c.sr_len() && j == c.rr_len() => (),
532                    (None, None) if 0 == c.rr_len() && 0 == c.sr_len() => (),
533                    _ => return Err(Box::new(CTConflictsError { stable })),
534                }
535            }
536        }
537
538        let mod_name = match self.mod_name {
539            Some(s) => s.to_owned(),
540            None => {
541                // The user hasn't specified a module name, so we create one automatically: what we
542                // do is strip off all the filename extensions (note that it's likely that inp ends
543                // with `y.rs`, so we potentially have to strip off more than one extension) and
544                // then add `_y` to the end.
545                let mut stem = grmp.to_str().unwrap();
546                loop {
547                    let new_stem = Path::new(stem).file_stem().unwrap().to_str().unwrap();
548                    if stem == new_stem {
549                        break;
550                    }
551                    stem = new_stem;
552                }
553                format!("{}_y", stem)
554            }
555        };
556
557        self.output_file(&grm, &stable, &mod_name, outp, &cache)?;
558        let conflicts = if stable.conflicts().is_some() {
559            Some((grm, sgraph, stable))
560        } else {
561            None
562        };
563        Ok(CTParser {
564            regenerated: true,
565            rule_ids,
566            conflicts,
567        })
568    }
569
570    /// Given the filename `a/b.y` as input, statically compile the grammar `src/a/b.y` into a Rust
571    /// module which can then be imported using `lrpar_mod!("a/b.y")`. This is a convenience
572    /// function around [`process_file`](#method.process_file) which makes it easier to compile
573    /// grammar files stored in a project's `src/` directory: please see
574    /// [`process_file`](#method.process_file) for additional constraints and information about the
575    /// generated files.
576    #[deprecated(
577        since = "0.11.0",
578        note = "Please use grammar_in_src_dir(), build(), and token_map() instead"
579    )]
580    #[allow(deprecated)]
581    pub fn process_file_in_src(
582        &mut self,
583        srcp: &str,
584    ) -> Result<HashMap<String, StorageT>, Box<dyn Error>> {
585        let mut inp = current_dir()?;
586        inp.push("src");
587        inp.push(srcp);
588        let mut outp = PathBuf::new();
589        outp.push(var("OUT_DIR").unwrap());
590        outp.push(Path::new(srcp).parent().unwrap().to_str().unwrap());
591        create_dir_all(&outp)?;
592        let mut leaf = Path::new(srcp)
593            .file_name()
594            .unwrap()
595            .to_str()
596            .unwrap()
597            .to_owned();
598        write!(leaf, ".{}", RUST_FILE_EXT).ok();
599        outp.push(leaf);
600        self.process_file(inp, outp)
601    }
602
603    /// Statically compile the Yacc file `inp` into Rust, placing the output into the file `outp`.
604    /// Note that three additional files will be created with the same name as `outp` but with the
605    /// extensions `grm`, and `stable`, overwriting any existing files with those names.
606    ///
607    /// `outp` defines a module as follows:
608    ///
609    /// ```text
610    ///   mod modname {
611    ///     pub fn parse(lexemes: &::std::vec::Vec<::lrpar::Lexeme<StorageT>>) { ... }
612    ///         -> (::std::option::Option<ActionT>,
613    ///             ::std::vec::Vec<::lrpar::LexParseError<StorageT>>)> { ...}
614    ///
615    ///     pub fn token_epp<'a>(tidx: ::cfgrammar::TIdx<StorageT>) -> ::std::option::Option<&'a str> {
616    ///       ...
617    ///     }
618    ///
619    ///     ...
620    ///   }
621    /// ```
622    ///
623    /// where:
624    ///  * `modname` is either:
625    ///    * the module name specified [`mod_name`](#method.mod_name)
626    ///    * or, if no module name was explicitly specified, then for the file `/a/b/c.y` the
627    ///      module name is `c_y` (i.e. the file's leaf name, minus its extension, with a prefix of
628    ///      `_y`).
629    ///  * `ActionT` is either:
630    ///    * the `%actiontype` value given to the grammar
631    ///    * or, if the `yacckind` was set YaccKind::Original(YaccOriginalActionKind::UserAction),
632    ///      it is [`Node<StorageT>`](../parser/enum.Node.html)
633    ///
634    /// # Panics
635    ///
636    /// If `StorageT` is not big enough to index the grammar's tokens, rules, or
637    /// productions.
638    #[deprecated(
639        since = "0.11.0",
640        note = "Please use grammar_path(), output_path(), build(), and token_map() instead"
641    )]
642    #[allow(deprecated)]
643    pub fn process_file<P, Q>(
644        &mut self,
645        inp: P,
646        outp: Q,
647    ) -> Result<HashMap<String, StorageT>, Box<dyn Error>>
648    where
649        P: AsRef<Path>,
650        Q: AsRef<Path>,
651    {
652        self.grammar_path = Some(inp.as_ref().to_owned());
653        self.output_path = Some(outp.as_ref().to_owned());
654        let cl: CTParserBuilder<LexerTypesT> = CTParserBuilder {
655            grammar_path: self.grammar_path.clone(),
656            output_path: self.output_path.clone(),
657            mod_name: self.mod_name,
658            recoverer: self.recoverer,
659            yacckind: self.yacckind,
660            error_on_conflicts: self.error_on_conflicts,
661            warnings_are_errors: self.warnings_are_errors,
662            show_warnings: self.show_warnings,
663            visibility: self.visibility.clone(),
664            rust_edition: self.rust_edition,
665            phantom: PhantomData,
666        };
667        Ok(cl.build()?.rule_ids)
668    }
669
670    fn output_file<P: AsRef<Path>>(
671        &self,
672        grm: &YaccGrammar<StorageT>,
673        stable: &StateTable<StorageT>,
674        mod_name: &str,
675        outp_rs: P,
676        cache: &str,
677    ) -> Result<(), Box<dyn Error>> {
678        let mut outs = String::new();
679        writeln!(outs, "{} mod {} {{", self.visibility.cow_str(), mod_name).ok();
680        // Emit user program section, and actions at the top so they may specify inner attributes.
681        if let Some(YaccKind::Original(YaccOriginalActionKind::UserAction) | YaccKind::Grmtools) =
682            self.yacckind
683        {
684            outs.push_str(&self.gen_user_actions(grm)?);
685        }
686        outs.push_str("    mod _parser_ {\n");
687        outs.push_str(
688            "        #![allow(clippy::type_complexity)]
689        #![allow(clippy::unnecessary_wraps)]
690        #![deny(unsafe_code)]
691        #[allow(unused_imports)]
692        use super::*;
693",
694        );
695
696        outs.push_str(&self.gen_parse_function(grm, stable)?);
697        outs.push_str(&self.gen_rule_consts(grm));
698        outs.push_str(&self.gen_token_epp(grm));
699        match self.yacckind.unwrap() {
700            YaccKind::Original(YaccOriginalActionKind::UserAction) | YaccKind::Grmtools => {
701                outs.push_str(&self.gen_wrappers(grm));
702            }
703            YaccKind::Original(YaccOriginalActionKind::NoAction)
704            | YaccKind::Original(YaccOriginalActionKind::GenericParseTree) => (),
705            _ => unreachable!(),
706        }
707        outs.push_str("    } // End of `mod _parser_`\n\n");
708        outs.push_str("    #[allow(unused_imports)]\n");
709        outs.push_str("    pub use _parser_::*;\n");
710        outs.push_str("    #[allow(unused_imports)]\n");
711        outs.push_str("    use ::lrpar::Lexeme;\n");
712        outs.push_str("} // End of `mod {mod_name}` \n\n");
713
714        // Output the cache so that we can check whether the IDs map is stable.
715        outs.push_str(cache);
716
717        let mut f = File::create(outp_rs)?;
718        f.write_all(outs.as_bytes())?;
719
720        Ok(())
721    }
722
723    /// Generate the cache, which determines if anything's changed enough that we need to
724    /// regenerate outputs and force rustc to recompile.
725    fn rebuild_cache(&self, grm: &YaccGrammar<StorageT>) -> String {
726        // We don't need to be particularly clever here: we just need to record the various things
727        // that could change between builds.
728        let mut cache = String::new();
729        cache.push_str("\n/* CACHE INFORMATION\n");
730
731        // Record the time that this version of lrpar was built. If the source code changes and
732        // rustc forces a recompile, this will change this value, causing anything which depends on
733        // this build of lrpar to be recompiled too.
734        writeln!(cache, "   Build time: {:?}", env!("VERGEN_BUILD_TIMESTAMP")).ok();
735
736        writeln!(cache, "   Grammar path: {:?}", self.grammar_path).ok();
737        writeln!(cache, "   Mod name: {:?}", self.mod_name).ok();
738        writeln!(cache, "   Recoverer: {:?}", self.recoverer).ok();
739        writeln!(cache, "   YaccKind: {:?}", self.yacckind).ok();
740        writeln!(cache, "   Visibility: {:?}", self.visibility.cow_str()).ok();
741        writeln!(
742            cache,
743            "   Error on conflicts: {:?}\n",
744            self.error_on_conflicts
745        )
746        .ok();
747
748        // Record the rule IDs map
749        for tidx in grm.iter_tidxs() {
750            let n = match grm.token_name(tidx) {
751                Some(n) => format!("'{}'", n),
752                None => "<unknown>".to_string(),
753            };
754            writeln!(cache, "   {} {}", usize::from(tidx), n).ok();
755        }
756
757        cache.push_str("*/\n");
758        cache
759    }
760
761    /// Generate the main parse() function for the output file.
762    fn gen_parse_function(
763        &self,
764        grm: &YaccGrammar<StorageT>,
765        stable: &StateTable<StorageT>,
766    ) -> Result<String, Box<dyn Error>> {
767        let mut outs = String::new();
768
769        // bincode format is serialized into constants which the generated
770        // source code.
771        serialize_bin_output(grm, GRM_CONST_NAME, &mut outs)?;
772        serialize_bin_output(stable, STABLE_CONST_NAME, &mut outs)?;
773
774        match self.yacckind.unwrap() {
775            YaccKind::Original(YaccOriginalActionKind::UserAction) | YaccKind::Grmtools => {
776                let parse_param = match grm.parse_param() {
777                    Some((name, tyname)) => format!(", {}: {}", name, tyname),
778                    None => "".to_owned(),
779                };
780                write!(outs,
781                    "
782    #[allow(dead_code)]
783    pub fn parse<'lexer, 'input: 'lexer>(
784        lexer: &'lexer dyn ::lrpar::NonStreamingLexer<'input, {lexertypest}>{parse_param})
785          -> (::std::option::Option<{actiont}>, ::std::vec::Vec<::lrpar::LexParseError<{storaget}, {lexertypest}>>)
786    {{",
787                    storaget = type_name::<StorageT>(),
788                    lexertypest = type_name::<LexerTypesT>(),
789                    parse_param = parse_param,
790                    actiont = grm.actiontype(self.user_start_ridx(grm)).as_ref().unwrap(),
791                ).ok();
792            }
793            YaccKind::Original(YaccOriginalActionKind::GenericParseTree) => {
794                write!(
795                    outs,
796                    "
797    #[allow(dead_code)]
798    pub fn parse(lexer: &dyn ::lrpar::NonStreamingLexer<{lexertypest}>)
799          -> (::std::option::Option<::lrpar::Node<<{lexertypest} as ::lrpar::LexerTypes>::LexemeT, {storaget}>>,
800              ::std::vec::Vec<::lrpar::LexParseError<{storaget}, {lexertypest}>>)
801    {{",
802                    storaget = type_name::<StorageT>(),
803                    lexertypest = type_name::<LexerTypesT>(),
804                )
805                .ok();
806            }
807            YaccKind::Original(YaccOriginalActionKind::NoAction) => {
808                write!(
809                    outs,
810                    "
811    #[allow(dead_code)]
812    pub fn parse(lexer: &dyn ::lrpar::NonStreamingLexer<{lexertypest}>)
813          -> ::std::vec::Vec<::lrpar::LexParseError<{storaget}, {lexertypest}>>
814    {{",
815                    storaget = type_name::<StorageT>(),
816                    lexertypest = type_name::<LexerTypesT>(),
817                )
818                .ok();
819            }
820            YaccKind::Eco => unreachable!(),
821        };
822
823        write!(
824            outs,
825            "
826        let (grm, stable) = ::lrpar::ctbuilder::_reconstitute({}, {});",
827            GRM_CONST_NAME, STABLE_CONST_NAME
828        )
829        .ok();
830
831        let recoverer = match self.recoverer {
832            RecoveryKind::CPCTPlus => "CPCTPlus",
833            RecoveryKind::None => "None",
834        };
835        match self.yacckind.unwrap() {
836            YaccKind::Original(YaccOriginalActionKind::UserAction) | YaccKind::Grmtools => {
837                // action function references
838                let wrappers = grm
839                    .iter_pidxs()
840                    .map(|pidx| {
841                        format!(
842                            "&{prefix}wrapper_{}",
843                            usize::from(pidx),
844                            prefix = ACTION_PREFIX
845                        )
846                    })
847                    .collect::<Vec<_>>()
848                    .join(",\n                        ");
849                let (parse_param, parse_paramty) = match grm.parse_param() {
850                    Some((name, tyname)) => (name.clone(), tyname.clone()),
851                    None => ("()".to_owned(), "()".to_owned()),
852                };
853                write!(outs,
854                    "\n        #[allow(clippy::type_complexity)]
855        let actions: ::std::vec::Vec<&dyn Fn(::cfgrammar::RIdx<{storaget}>,
856                       &'lexer dyn ::lrpar::NonStreamingLexer<'input, {lexertypest}>,
857                       ::cfgrammar::Span,
858                       ::std::vec::Drain<{edition_lifetime} ::lrpar::parser::AStackType<<{lexertypest} as ::lrpar::LexerTypes>::LexemeT, {actionskind}<'input>>>,
859                       {parse_paramty})
860                    -> {actionskind}<'input>> = ::std::vec![{wrappers}];\n",
861                    actionskind = ACTIONS_KIND,
862                    storaget = type_name::<StorageT>(),
863                    lexertypest = type_name::<LexerTypesT>(),
864                    parse_paramty = parse_paramty,
865                    wrappers = wrappers,
866                    edition_lifetime = if self.rust_edition != RustEdition::Rust2015 { "'_, " } else { "" },
867                ).ok();
868                write!(
869                    outs,
870                    "
871        match ::lrpar::RTParserBuilder::new(&grm, &stable)
872            .recoverer(::lrpar::RecoveryKind::{recoverer})
873            .parse_actions(lexer, &actions, {parse_param}) {{
874                (Some({actionskind}::{actionskindprefix}{ridx}(x)), y) => (Some(x), y),
875                (None, y) => (None, y),
876                _ => unreachable!()
877        }}",
878                    parse_param = parse_param,
879                    actionskind = ACTIONS_KIND,
880                    actionskindprefix = ACTIONS_KIND_PREFIX,
881                    ridx = usize::from(self.user_start_ridx(grm)),
882                    recoverer = recoverer,
883                )
884                .ok();
885            }
886            YaccKind::Original(YaccOriginalActionKind::GenericParseTree) => {
887                write!(
888                    outs,
889                    "
890        ::lrpar::RTParserBuilder::new(&grm, &stable)
891            .recoverer(::lrpar::RecoveryKind::{})
892            .parse_generictree(lexer)\n",
893                    recoverer
894                )
895                .ok();
896            }
897            YaccKind::Original(YaccOriginalActionKind::NoAction) => {
898                write!(
899                    outs,
900                    "
901        ::lrpar::RTParserBuilder::new(&grm, &stable)
902            .recoverer(::lrpar::RecoveryKind::{})
903            .parse_noaction(lexer)\n",
904                    recoverer
905                )
906                .ok();
907            }
908            YaccKind::Eco => unreachable!(),
909        };
910
911        outs.push_str("\n    }\n\n");
912        Ok(outs)
913    }
914
915    fn gen_rule_consts(&self, grm: &YaccGrammar<StorageT>) -> String {
916        let mut outs = String::new();
917        for ridx in grm.iter_rules() {
918            if !grm.rule_to_prods(ridx).contains(&grm.start_prod()) {
919                write!(
920                    outs,
921                    "    #[allow(dead_code)]\n    pub const R_{}: {} = {:?};\n",
922                    grm.rule_name_str(ridx).to_ascii_uppercase(),
923                    type_name::<StorageT>(),
924                    usize::from(ridx)
925                )
926                .ok();
927            }
928        }
929        outs
930    }
931
932    fn gen_token_epp(&self, grm: &YaccGrammar<StorageT>) -> String {
933        let mut tidxs = Vec::new();
934        for tidx in grm.iter_tidxs() {
935            match grm.token_epp(tidx) {
936                Some(n) => tidxs.push(format!("Some(\"{}\")", str_escape(n))),
937                None => tidxs.push("None".to_string()),
938            }
939        }
940        format!(
941            "    const {prefix}EPP: &[::std::option::Option<&str>] = &[{}];
942
943    /// Return the %epp entry for token `tidx` (where `None` indicates \"the token has no
944    /// pretty-printed value\"). Panics if `tidx` doesn't exist.
945    #[allow(dead_code)]
946    pub fn token_epp<'a>(tidx: ::cfgrammar::TIdx<{storaget}>) -> ::std::option::Option<&'a str> {{
947        {prefix}EPP[usize::from(tidx)]
948    }}",
949            tidxs.join(", "),
950            storaget = type_name::<StorageT>(),
951            prefix = GLOBAL_PREFIX
952        )
953    }
954
955    /// Generate the wrappers that call user actions
956    fn gen_wrappers(&self, grm: &YaccGrammar<StorageT>) -> String {
957        let mut outs = String::new();
958
959        outs.push_str("\n\n    // Wrappers\n\n");
960
961        let (parse_paramname, parse_paramdef) = match grm.parse_param() {
962            Some((name, tyname)) => (name.to_owned(), format!("{}: {}", name, tyname)),
963            None => ("()".to_owned(), "_: ()".to_owned()),
964        };
965        for pidx in grm.iter_pidxs() {
966            let ridx = grm.prod_to_rule(pidx);
967
968            // Iterate over all $-arguments and replace them with their respective
969            // element from the argument vector (e.g. $1 is replaced by args[0]). At
970            // the same time extract &str from tokens and actiontype from nonterminals.
971            write!(outs,
972                "    fn {prefix}wrapper_{}<'lexer, 'input: 'lexer>({prefix}ridx: ::cfgrammar::RIdx<{storaget}>,
973                      {prefix}lexer: &'lexer dyn ::lrpar::NonStreamingLexer<'input, {lexertypest}>,
974                      {prefix}span: ::cfgrammar::Span,
975                      mut {prefix}args: ::std::vec::Drain<{edition_lifetime} ::lrpar::parser::AStackType<<{lexertypest} as ::lrpar::LexerTypes>::LexemeT, {actionskind}<'input>>>,
976                      {parse_paramdef})
977                   -> {actionskind}<'input> {{",
978                usize::from(pidx),
979                storaget = type_name::<StorageT>(),
980                lexertypest = type_name::<LexerTypesT>(),
981                prefix = ACTION_PREFIX,
982                parse_paramdef = parse_paramdef,
983                actionskind = ACTIONS_KIND,
984                edition_lifetime = if self.rust_edition != RustEdition::Rust2015 { "'_, " } else { "" },
985            ).ok();
986
987            if grm.action(pidx).is_some() {
988                // Unpack the arguments passed to us by the drain
989                for i in 0..grm.prod(pidx).len() {
990                    match grm.prod(pidx)[i] {
991                        Symbol::Rule(ref_ridx) => {
992                            write!(outs,
993                            "
994        #[allow(clippy::let_unit_value)]
995        let {prefix}arg_{i} = match {prefix}args.next().unwrap() {{
996            ::lrpar::parser::AStackType::ActionType({actionskind}::{actionskindprefix}{ref_ridx}(x)) => x,
997            _ => unreachable!()
998        }};",
999                            i = i + 1,
1000                            ref_ridx = usize::from(ref_ridx),
1001                            prefix = ACTION_PREFIX,
1002                            actionskind = ACTIONS_KIND,
1003                            actionskindprefix = ACTIONS_KIND_PREFIX
1004                        ).ok();
1005                        }
1006                        Symbol::Token(_) => {
1007                            write!(
1008                                outs,
1009                                "
1010        let {prefix}arg_{} = match {prefix}args.next().unwrap() {{
1011            ::lrpar::parser::AStackType::Lexeme(l) => {{
1012                if l.faulty() {{
1013                    Err(l)
1014                }} else {{
1015                    Ok(l)
1016                }}
1017            }},
1018            ::lrpar::parser::AStackType::ActionType(_) => unreachable!()
1019        }};",
1020                                i + 1,
1021                                prefix = ACTION_PREFIX
1022                            )
1023                            .ok();
1024                        }
1025                    }
1026                }
1027
1028                // Call the user code
1029                let args = (0..grm.prod(pidx).len())
1030                    .map(|i| format!("{prefix}arg_{i}", prefix = ACTION_PREFIX, i = i + 1))
1031                    .collect::<Vec<_>>();
1032                // If the rule `r` that we're calling has the unit type then Clippy will warn that
1033                // `enum::A(wrapper_r())` is pointless. We thus have to split it into two:
1034                // `wrapper_r(); enum::A(())`.
1035                match grm.actiontype(ridx) {
1036                    Some(s) if s == "()" => {
1037                        write!(outs, "\n        {prefix}action_{pidx}({prefix}ridx, {prefix}lexer, {prefix}span, {parse_paramname}, {args});
1038        {actionskind}::{actionskindprefix}{ridx}(())",
1039                            actionskind = ACTIONS_KIND,
1040                            actionskindprefix = ACTIONS_KIND_PREFIX,
1041                            prefix = ACTION_PREFIX,
1042                            ridx = usize::from(ridx),
1043                            pidx = usize::from(pidx),
1044                            parse_paramname = parse_paramname,
1045                            args = args.join(", ")).ok();
1046                    }
1047                    _ => {
1048                        write!(outs, "\n        {actionskind}::{actionskindprefix}{ridx}({prefix}action_{pidx}({prefix}ridx, {prefix}lexer, {prefix}span, {parse_paramname}, {args}))",
1049                            actionskind = ACTIONS_KIND,
1050                            actionskindprefix = ACTIONS_KIND_PREFIX,
1051                            prefix = ACTION_PREFIX,
1052                            ridx = usize::from(ridx),
1053                            pidx = usize::from(pidx),
1054                            parse_paramname = parse_paramname,
1055                            args = args.join(", ")).ok();
1056                    }
1057                }
1058            } else if pidx == grm.start_prod() {
1059                // The action for the start production (i.e. the extra rule/production
1060                // added by lrpar) will never be executed, so a dummy function is all
1061                // that's required. We add "unreachable" as a check in case some other
1062                // detail of lrpar changes in the future.
1063                if parse_paramname != "()" {
1064                    // If the parse parameter is the unit type, `let _ = ();` leads to Clippy
1065                    // warnings.
1066                    write!(outs, "\n        let _ = {parse_paramname:};").ok();
1067                }
1068                outs.push_str("\n        unreachable!()");
1069            } else {
1070                panic!(
1071                    "Production in rule '{}' must have an action body.",
1072                    grm.rule_name_str(grm.prod_to_rule(pidx))
1073                );
1074            }
1075            outs.push_str("\n    }\n\n");
1076        }
1077
1078        // Wrappers enum
1079
1080        write!(
1081            outs,
1082            "    #[allow(dead_code)]
1083    enum {}<'input> {{\n",
1084            ACTIONS_KIND
1085        )
1086        .ok();
1087        for ridx in grm.iter_rules() {
1088            if grm.actiontype(ridx).is_none() {
1089                continue;
1090            }
1091
1092            writeln!(
1093                outs,
1094                "        {actionskindprefix}{ridx}({actiont}),",
1095                actionskindprefix = ACTIONS_KIND_PREFIX,
1096                ridx = usize::from(ridx),
1097                actiont = grm.actiontype(ridx).as_ref().unwrap()
1098            )
1099            .ok();
1100        }
1101        write!(
1102            outs,
1103            "    _{actionskindhidden}(::std::marker::PhantomData<&'input ()>)
1104    }}\n\n",
1105            actionskindhidden = ACTIONS_KIND_HIDDEN
1106        )
1107        .ok();
1108
1109        outs
1110    }
1111
1112    /// Generate the user action functions (if any).
1113    fn gen_user_actions(&self, grm: &YaccGrammar<StorageT>) -> Result<String, Box<dyn Error>> {
1114        let mut outs = String::new();
1115
1116        if let Some(s) = grm.programs() {
1117            outs.push_str("\n// User code from the program section\n\n");
1118            outs.push_str(s);
1119            outs.push_str("\n// End of user code from the program section\n\n");
1120        }
1121
1122        // Convert actions to functions
1123        outs.push_str("\n    // User actions\n\n");
1124        let (parse_paramname, parse_paramdef) = match grm.parse_param() {
1125            Some((name, tyname)) => (name.to_owned(), format!("{}: {}", name, tyname)),
1126            None => ("()".to_owned(), "_: ()".to_owned()),
1127        };
1128        for pidx in grm.iter_pidxs() {
1129            if pidx == grm.start_prod() {
1130                continue;
1131            }
1132
1133            // Work out the right type for each argument
1134            let mut args = Vec::with_capacity(grm.prod(pidx).len());
1135            for i in 0..grm.prod(pidx).len() {
1136                let argt = match grm.prod(pidx)[i] {
1137                    Symbol::Rule(ref_ridx) => grm.actiontype(ref_ridx).as_ref().unwrap().clone(),
1138                    Symbol::Token(_) => format!(
1139                        "::std::result::Result<{lexemet}, {lexemet}>",
1140                        lexemet = type_name::<LexerTypesT::LexemeT>(),
1141                    ),
1142                };
1143                args.push(format!("mut {}arg_{}: {}", ACTION_PREFIX, i + 1, argt));
1144            }
1145
1146            // If this rule's `actiont` is `()` then Clippy will warn that the return type `-> ()`
1147            // is pointless (which is true). We therefore avoid outputting a return type if actiont
1148            // is the unit type.
1149            let returnt = {
1150                let actiont = grm.actiontype(grm.prod_to_rule(pidx)).as_ref().unwrap();
1151                if actiont == "()" {
1152                    "".to_owned()
1153                } else {
1154                    format!("\n                 -> {}", actiont)
1155                }
1156            };
1157            write!(
1158                outs,
1159                "    // {rulename}
1160    #[allow(clippy::too_many_arguments)]
1161    fn {prefix}action_{}<'lexer, 'input: 'lexer>({prefix}ridx: ::cfgrammar::RIdx<{storaget}>,
1162                     {prefix}lexer: &'lexer dyn ::lrpar::NonStreamingLexer<'input, {lexertypest}>,
1163                     {prefix}span: ::cfgrammar::Span,
1164                     {parse_paramdef},
1165                     {args}){returnt} {{\n",
1166                usize::from(pidx),
1167                rulename = grm.rule_name_str(grm.prod_to_rule(pidx)),
1168                storaget = type_name::<StorageT>(),
1169                lexertypest = type_name::<LexerTypesT>(),
1170                prefix = ACTION_PREFIX,
1171                returnt = returnt,
1172                parse_paramdef = parse_paramdef,
1173                args = args.join(",\n                     ")
1174            )
1175            .ok();
1176
1177            if parse_paramname != "()" {
1178                // If the parse parameter is the unit type, `let _ = ();` leads to Clippy
1179                // warnings.
1180                writeln!(outs, "        let _ = {parse_paramname:};").ok();
1181            }
1182
1183            // Iterate over all $-arguments and replace them with their respective
1184            // element from the argument vector (e.g. $1 is replaced by args[0]).
1185            let pre_action = grm.action(pidx).as_ref().ok_or_else(|| {
1186                format!(
1187                    "Rule {} has a production which is missing action code",
1188                    grm.rule_name_str(grm.prod_to_rule(pidx))
1189                )
1190            })?;
1191            let mut last = 0;
1192            loop {
1193                match pre_action[last..].find('$') {
1194                    Some(off) => {
1195                        if pre_action[last + off..].starts_with("$$") {
1196                            outs.push_str(&pre_action[last..last + off + "$".len()]);
1197                            last = last + off + "$$".len();
1198                        } else if pre_action[last + off..].starts_with("$lexer") {
1199                            outs.push_str(&pre_action[last..last + off]);
1200                            write!(outs, "{prefix}lexer", prefix = ACTION_PREFIX).ok();
1201                            last = last + off + "$lexer".len();
1202                        } else if pre_action[last + off..].starts_with("$span") {
1203                            outs.push_str(&pre_action[last..last + off]);
1204                            write!(outs, "{prefix}span", prefix = ACTION_PREFIX).ok();
1205                            last = last + off + "$span".len();
1206                        } else if last + off + 1 < pre_action.len()
1207                            && pre_action[last + off + 1..].starts_with(|c: char| c.is_numeric())
1208                        {
1209                            outs.push_str(&pre_action[last..last + off]);
1210                            write!(outs, "{prefix}arg_", prefix = ACTION_PREFIX).ok();
1211                            last = last + off + "$".len();
1212                        } else {
1213                            panic!(
1214                                "Unknown text following '$' operator: {}",
1215                                &pre_action[last + off..]
1216                            );
1217                        }
1218                    }
1219                    None => {
1220                        outs.push_str(&pre_action[last..]);
1221                        break;
1222                    }
1223                }
1224            }
1225
1226            outs.push_str("\n    }\n\n");
1227        }
1228        Ok(outs)
1229    }
1230
1231    /// Return the `RIdx` of the %start rule in the grammar (which will not be the same as
1232    /// grm.start_rule_idx because the latter has an additional rule insert by cfgrammar
1233    /// which then calls the user's %start rule).
1234    fn user_start_ridx(&self, grm: &YaccGrammar<StorageT>) -> RIdx<StorageT> {
1235        debug_assert_eq!(grm.prod(grm.start_prod()).len(), 1);
1236        match grm.prod(grm.start_prod())[0] {
1237            Symbol::Rule(ridx) => ridx,
1238            _ => unreachable!(),
1239        }
1240    }
1241}
1242
1243/// Return a version of the string `s` which is safe to embed in source code as a string.
1244fn str_escape(s: &str) -> String {
1245    s.replace('\\', "\\\\").replace('"', "\\\"")
1246}
1247
1248/// This function is called by generated files; it exists so that generated files don't require a
1249/// dependency on serde and rmps.
1250#[doc(hidden)]
1251pub fn _reconstitute<StorageT: DeserializeOwned + Hash + PrimInt + Unsigned>(
1252    grm_buf: &[u8],
1253    stable_buf: &[u8],
1254) -> (YaccGrammar<StorageT>, StateTable<StorageT>) {
1255    let grm = deserialize(grm_buf).unwrap();
1256    let stable = deserialize(stable_buf).unwrap();
1257    (grm, stable)
1258}
1259
1260fn serialize_bin_output<T: Serialize + ?Sized>(
1261    ser: &T,
1262    name: &str,
1263    buffer: &mut String,
1264) -> Result<(), Box<dyn Error>> {
1265    let mut w = ArrayWriter::new(name);
1266    serialize_into(&mut w, ser)?;
1267    let data = w.finish();
1268    buffer.push_str(&data);
1269    Ok(())
1270}
1271
1272/// Makes formatting bytes into a rust array relatively painless.
1273struct ArrayWriter {
1274    buffer: String,
1275}
1276
1277impl ArrayWriter {
1278    /// create a new array with the specified name
1279    fn new(name: &str) -> Self {
1280        Self {
1281            buffer: format!(r#"#[allow(dead_code)] const {}: &[u8] = &["#, name),
1282        }
1283    }
1284
1285    /// complete the array, and return the finished string
1286    fn finish(mut self) -> String {
1287        self.buffer.push_str("];\n");
1288        self.buffer
1289    }
1290}
1291
1292impl Write for ArrayWriter {
1293    #[allow(dead_code)]
1294    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
1295        for b in buf {
1296            self.buffer.write_fmt(format_args!("{},", b)).unwrap();
1297        }
1298        Ok(buf.len())
1299    }
1300
1301    #[allow(dead_code)]
1302    fn flush(&mut self) -> io::Result<()> {
1303        Ok(())
1304    }
1305}
1306
1307/// An interface to the result of [CTParserBuilder::build()].
1308pub struct CTParser<StorageT = u32>
1309where
1310    StorageT: Eq + Hash,
1311{
1312    regenerated: bool,
1313    rule_ids: HashMap<String, StorageT>,
1314    conflicts: Option<(
1315        YaccGrammar<StorageT>,
1316        StateGraph<StorageT>,
1317        StateTable<StorageT>,
1318    )>,
1319}
1320
1321impl<StorageT> CTParser<StorageT>
1322where
1323    StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
1324    usize: AsPrimitive<StorageT>,
1325{
1326    /// Returns `true` if this compile-time parser was regenerated or `false` if it was not.
1327    pub fn regenerated(&self) -> bool {
1328        self.regenerated
1329    }
1330
1331    /// Returns a [HashMap] from lexeme string types to numeric types (e.g. `INT: 2`), suitable for
1332    /// handing to a lexer to coordinate the IDs of lexer and parser.
1333    pub fn token_map(&self) -> &HashMap<String, StorageT> {
1334        &self.rule_ids
1335    }
1336
1337    /// If there are any conflicts in the grammar, return a tuple which allows users to inspect and
1338    /// pretty print them; otherwise returns `None`. If the grammar was not regenerated, this will
1339    /// always return `None`, even if the grammar actually has conflicts.
1340    ///
1341    /// **Note: The conflicts feature is currently unstable and may change in the future.**
1342    #[allow(private_interfaces)]
1343    pub fn conflicts(
1344        &self,
1345        _: crate::unstable::UnstableApi,
1346    ) -> Option<(
1347        &YaccGrammar<StorageT>,
1348        &StateGraph<StorageT>,
1349        &StateTable<StorageT>,
1350        &Conflicts<StorageT>,
1351    )> {
1352        if let Some((grm, sgraph, stable)) = &self.conflicts {
1353            return Some((grm, sgraph, stable, stable.conflicts().unwrap()));
1354        }
1355        None
1356    }
1357}
1358
1359#[cfg(test)]
1360mod test {
1361    use std::{fs::File, io::Write, path::PathBuf};
1362
1363    use super::{CTConflictsError, CTParserBuilder};
1364    use crate::test_utils::TestLexerTypes;
1365    use cfgrammar::yacc::{YaccKind, YaccOriginalActionKind};
1366    use tempfile::TempDir;
1367
1368    #[test]
1369    fn test_conflicts() {
1370        let temp = TempDir::new().unwrap();
1371        let mut file_path = PathBuf::from(temp.as_ref());
1372        file_path.push("grm.y");
1373        let mut f = File::create(&file_path).unwrap();
1374        let _ = f.write_all(
1375            "%start A
1376%%
1377A : 'a' 'b' | B 'b';
1378B : 'a' | C;
1379C : 'a';"
1380                .as_bytes(),
1381        );
1382
1383        match CTParserBuilder::<TestLexerTypes>::new()
1384            .error_on_conflicts(false)
1385            .yacckind(YaccKind::Original(YaccOriginalActionKind::GenericParseTree))
1386            .grammar_path(file_path.to_str().unwrap())
1387            .output_path(file_path.with_extension("ignored"))
1388            .build()
1389            .unwrap()
1390            .conflicts(crate::unstable::UnstableApi)
1391        {
1392            Some((_, _, _, conflicts)) => {
1393                assert_eq!(conflicts.sr_len(), 1);
1394                assert_eq!(conflicts.rr_len(), 1);
1395            }
1396            None => panic!("Expected error data"),
1397        }
1398    }
1399
1400    #[test]
1401    fn test_conflicts_error() {
1402        let temp = TempDir::new().unwrap();
1403        let mut file_path = PathBuf::from(temp.as_ref());
1404        file_path.push("grm.y");
1405        let mut f = File::create(&file_path).unwrap();
1406        let _ = f.write_all(
1407            "%start A
1408%%
1409A : 'a' 'b' | B 'b';
1410B : 'a' | C;
1411C : 'a';"
1412                .as_bytes(),
1413        );
1414
1415        match CTParserBuilder::<TestLexerTypes>::new()
1416            .yacckind(YaccKind::Original(YaccOriginalActionKind::GenericParseTree))
1417            .grammar_path(file_path.to_str().unwrap())
1418            .output_path(file_path.with_extension("ignored"))
1419            .build()
1420        {
1421            Ok(_) => panic!("Expected error"),
1422            Err(e) => {
1423                let cs = e.downcast_ref::<CTConflictsError<u16>>();
1424                assert_eq!(cs.unwrap().stable.conflicts().unwrap().rr_len(), 1);
1425                assert_eq!(cs.unwrap().stable.conflicts().unwrap().sr_len(), 1);
1426            }
1427        }
1428    }
1429
1430    #[test]
1431    fn test_expect_error() {
1432        let temp = TempDir::new().unwrap();
1433        let mut file_path = PathBuf::from(temp.as_ref());
1434        file_path.push("grm.y");
1435        let mut f = File::create(&file_path).unwrap();
1436        let _ = f.write_all(
1437            "%start A
1438%expect 2
1439%%
1440A: 'a' 'b' | B 'b';
1441B: 'a';"
1442                .as_bytes(),
1443        );
1444
1445        match CTParserBuilder::<TestLexerTypes>::new()
1446            .yacckind(YaccKind::Original(YaccOriginalActionKind::GenericParseTree))
1447            .grammar_path(file_path.to_str().unwrap())
1448            .output_path(file_path.with_extension("ignored"))
1449            .build()
1450        {
1451            Ok(_) => panic!("Expected error"),
1452            Err(e) => {
1453                let cs = e.downcast_ref::<CTConflictsError<u16>>();
1454                assert_eq!(cs.unwrap().stable.conflicts().unwrap().rr_len(), 0);
1455                assert_eq!(cs.unwrap().stable.conflicts().unwrap().sr_len(), 1);
1456            }
1457        }
1458    }
1459
1460    #[test]
1461    fn test_expectrr_error() {
1462        let temp = TempDir::new().unwrap();
1463        let mut file_path = PathBuf::from(temp.as_ref());
1464        file_path.push("grm.y");
1465        let mut f = File::create(&file_path).unwrap();
1466        let _ = f.write_all(
1467            "%start A
1468%expect 1
1469%expect-rr 2
1470%%
1471A : 'a' 'b' | B 'b';
1472B : 'a' | C;
1473C : 'a';"
1474                .as_bytes(),
1475        );
1476
1477        match CTParserBuilder::<TestLexerTypes>::new()
1478            .yacckind(YaccKind::Original(YaccOriginalActionKind::GenericParseTree))
1479            .grammar_path(file_path.to_str().unwrap())
1480            .output_path(file_path.with_extension("ignored"))
1481            .build()
1482        {
1483            Ok(_) => panic!("Expected error"),
1484            Err(e) => {
1485                let cs = e.downcast_ref::<CTConflictsError<u16>>();
1486                assert_eq!(cs.unwrap().stable.conflicts().unwrap().rr_len(), 1);
1487                assert_eq!(cs.unwrap().stable.conflicts().unwrap().sr_len(), 1);
1488            }
1489        }
1490    }
1491}