lrlex 0.14.2 - Docs.rs

//! Build grammars at run-time.

use bincode::Encode;
use cfgrammar::{
    header::{
        GrmtoolsSectionParser, Header, HeaderError, HeaderErrorKind, HeaderValue, Namespaced,
        Setting, Value,
    },
    markmap::MergeBehavior,
    span::{Location, Span},
};
use glob::glob;
use lrpar::{
    CTParserBuilder, LexerTypes,
    diagnostics::{DiagnosticFormatter, SpannedDiagnosticFormatter},
};
use num_traits::{AsPrimitive, PrimInt, Unsigned};
use proc_macro2::{Ident, TokenStream};
use quote::{ToTokens, TokenStreamExt, format_ident, quote};
use regex::Regex;
use std::marker::PhantomData;
use std::{
    any::type_name,
    borrow::Borrow,
    collections::{HashMap, HashSet},
    env::{current_dir, var},
    error::Error,
    fmt::{self, Debug, Display, Write as _},
    fs::{self, File, create_dir_all, read_to_string},
    hash::Hash,
    io::Write,
    path::{Path, PathBuf},
    sync::{LazyLock, Mutex},
};

use crate::{DefaultLexerTypes, LRNonStreamingLexer, LRNonStreamingLexerDef, LexFlags, LexerDef};

const RUST_FILE_EXT: &str = "rs";

const ERROR: &str = "[Error]";
const WARNING: &str = "[Warning]";

static RE_TOKEN_ID: LazyLock<Regex> =
    LazyLock::new(|| Regex::new(r"^[a-zA-Z_][a-zA-Z_0-9]*$").unwrap());

static GENERATED_PATHS: LazyLock<Mutex<HashSet<PathBuf>>> =
    LazyLock::new(|| Mutex::new(HashSet::new()));

#[non_exhaustive]
pub enum LexerKind {
    LRNonStreamingLexer,
}

impl<T: Clone> TryFrom<&Value<T>> for LexerKind {
    type Error = cfgrammar::header::HeaderError<T>;
    fn try_from(it: &Value<T>) -> Result<LexerKind, Self::Error> {
        match it {
            Value::Flag(_, loc) => Err(HeaderError {
                kind: HeaderErrorKind::ConversionError(
                    "LexerKind",
                    "Expected `LexerKind` found bool",
                ),
                locations: vec![loc.clone()],
            }),
            Value::Setting(Setting::Num(_, loc)) => Err(HeaderError {
                kind: HeaderErrorKind::ConversionError(
                    "LexerKind",
                    "Expected `LexerKind` found numeric",
                ),
                locations: vec![loc.clone()],
            }),
            Value::Setting(Setting::String(_, loc)) => Err(HeaderError {
                kind: HeaderErrorKind::ConversionError(
                    "LexerKind",
                    "Expected `LexerKind` found string",
                ),
                locations: vec![loc.clone()],
            }),
            Value::Setting(Setting::Constructor {
                ctor:
                    Namespaced {
                        namespace: _,
                        member: (_, loc),
                    },
                arg: _,
            }) => Err(HeaderError {
                kind: HeaderErrorKind::ConversionError(
                    "LexerKind",
                    "Expected `LexerKind` found constructor",
                ),
                locations: vec![loc.clone()],
            }),
            Value::Setting(Setting::Array(_, arr_loc, _)) => Err(HeaderError {
                kind: HeaderErrorKind::ConversionError(
                    "LexerKind",
                    "Expected `LexerKind` found array",
                ),
                locations: vec![arr_loc.clone()],
            }),
            Value::Setting(Setting::Unitary(Namespaced {
                namespace,
                member: (member, member_loc),
            })) => {
                if let Some((ns, loc)) = namespace {
                    if ns.to_lowercase() != "lexerkind" {
                        return Err(HeaderError {
                            kind: HeaderErrorKind::ConversionError(
                                "LexerKind",
                                "Expected namespace `LexerKind`",
                            ),
                            locations: vec![loc.clone()],
                        });
                    }
                }
                if member.to_lowercase() != "lrnonstreaminglexer" {
                    return Err(HeaderError {
                        kind: HeaderErrorKind::ConversionError(
                            "LexerKind",
                            "Unknown `LexerKind` Variant",
                        ),
                        locations: vec![member_loc.clone()],
                    });
                }

                Ok(LexerKind::LRNonStreamingLexer)
            }
        }
    }
}

/// Specify the visibility of the module generated by [CTLexerBuilder].
#[derive(Clone, PartialEq, Eq, Debug)]
#[non_exhaustive]
pub enum Visibility {
    /// Module-level visibility only.
    Private,
    /// `pub`
    Public,
    /// `pub(super)`
    PublicSuper,
    /// `pub(self)`
    PublicSelf,
    /// `pub(crate)`
    PublicCrate,
    /// `pub(in {arg})`
    PublicIn(String),
}

impl ToTokens for Visibility {
    fn to_tokens(&self, tokens: &mut TokenStream) {
        tokens.extend(match self {
            Visibility::Private => quote!(),
            Visibility::Public => quote! {pub},
            Visibility::PublicSuper => quote! {pub(super)},
            Visibility::PublicSelf => quote! {pub(self)},
            Visibility::PublicCrate => quote! {pub(crate)},
            Visibility::PublicIn(data) => {
                let other = str::parse::<TokenStream>(data).unwrap();
                quote! {pub(in #other)}
            }
        })
    }
}

/// Specifies the [Rust Edition] that will be emitted during code generation.
///
/// [Rust Edition]: https://doc.rust-lang.org/edition-guide/rust-2021/index.html
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
#[non_exhaustive]
pub enum RustEdition {
    Rust2015,
    Rust2018,
    Rust2021,
}

/// The quote impl of `ToTokens` for `Option` prints an empty string for `None`
/// and the inner value for `Some(inner_value)`.
///
/// This wrapper instead emits both `Some` and `None` variants.
/// See: [quote #20](https://github.com/dtolnay/quote/issues/20)
struct QuoteOption<T>(Option<T>);

impl<T: ToTokens> ToTokens for QuoteOption<T> {
    fn to_tokens(&self, tokens: &mut TokenStream) {
        tokens.append_all(match self.0 {
            Some(ref t) => quote! { ::std::option::Option::Some(#t) },
            None => quote! { ::std::option::Option::None },
        });
    }
}

/// This wrapper adds a missing impl of `ToTokens` for tuples.
/// For a tuple `(a, b)` emits `(a.to_tokens(), b.to_tokens())`
struct QuoteTuple<T>(T);

impl<A: ToTokens, B: ToTokens> ToTokens for QuoteTuple<(A, B)> {
    fn to_tokens(&self, tokens: &mut TokenStream) {
        let (a, b) = &self.0;
        tokens.append_all(quote!((#a, #b)));
    }
}

/// The wrapped `&str` value will be emitted with a call to `to_string()`
struct QuoteToString<'a>(&'a str);

impl ToTokens for QuoteToString<'_> {
    fn to_tokens(&self, tokens: &mut TokenStream) {
        let x = &self.0;
        tokens.append_all(quote! { #x.to_string() });
    }
}

/// A string which uses `Display` for it's `Debug` impl.
struct ErrorString(String);
impl fmt::Display for ErrorString {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        let ErrorString(s) = self;
        write!(f, "{}", s)
    }
}
impl fmt::Debug for ErrorString {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        let ErrorString(s) = self;
        write!(f, "{}", s)
    }
}
impl Error for ErrorString {}

/// A `CTLexerBuilder` allows one to specify the criteria for building a statically generated
/// lexer.
pub struct CTLexerBuilder<'a, LexerTypesT: LexerTypes = DefaultLexerTypes<u32>>
where
    LexerTypesT::StorageT: Debug + Eq + Hash + ToTokens,
    usize: num_traits::AsPrimitive<LexerTypesT::StorageT>,
{
    lrpar_config:
        Option<Box<dyn Fn(CTParserBuilder<LexerTypesT>) -> CTParserBuilder<LexerTypesT> + 'a>>,
    lexer_path: Option<PathBuf>,
    output_path: Option<PathBuf>,
    lexerkind: Option<LexerKind>,
    mod_name: Option<&'a str>,
    visibility: Visibility,
    rust_edition: RustEdition,
    rule_ids_map: Option<HashMap<String, LexerTypesT::StorageT>>,
    allow_missing_terms_in_lexer: bool,
    allow_missing_tokens_in_parser: bool,
    warnings_are_errors: bool,
    show_warnings: bool,
    header: Header<Location>,
    #[cfg(test)]
    inspect_lexerkind_cb: Option<Box<dyn Fn(&LexerKind) -> Result<(), Box<dyn Error>>>>,
}

impl CTLexerBuilder<'_, DefaultLexerTypes<u32>> {
    /// Create a new [CTLexerBuilder].
    pub fn new() -> Self {
        CTLexerBuilder::<DefaultLexerTypes<u32>>::new_with_lexemet()
    }
}

impl<'a, LexerTypesT: LexerTypes<LexErrorT = crate::LRLexError> + 'static>
    CTLexerBuilder<'a, LexerTypesT>
where
    LexerTypesT::StorageT:
        'static + Debug + Eq + Hash + PrimInt + Encode + TryFrom<usize> + Unsigned + ToTokens,
    usize: AsPrimitive<LexerTypesT::StorageT>,
{
    /// Create a new [CTLexerBuilder].
    ///
    /// `LexerTypesT::StorageT` must be an unsigned integer type (e.g. `u8`, `u16`) which is big enough
    /// to index all the tokens, rules, and productions in the lexer and less than or equal in size
    /// to `usize` (e.g. on a 64-bit machine `u128` would be too big). If you are lexing large
    /// files, the additional storage requirements of larger integer types can be noticeable, and
    /// in such cases it can be worth specifying a smaller type. `StorageT` defaults to `u32` if
    /// unspecified.
    ///
    /// # Examples
    ///
    /// ```text
    /// CTLexerBuilder::<DefaultLexerTypes<u8>>::new_with_lexemet()
    ///     .lexer_in_src_dir("grm.l", None)?
    ///     .build()?;
    /// ```
    pub fn new_with_lexemet() -> Self {
        let mut header = Header::new();
        header.set_default_merge_behavior(MergeBehavior::Ours);
        CTLexerBuilder {
            lrpar_config: None,
            lexer_path: None,
            output_path: None,
            lexerkind: None,
            mod_name: None,
            visibility: Visibility::Private,
            rust_edition: RustEdition::Rust2021,
            rule_ids_map: None,
            allow_missing_terms_in_lexer: false,
            allow_missing_tokens_in_parser: false,
            warnings_are_errors: false,
            show_warnings: true,
            header,
            #[cfg(test)]
            inspect_lexerkind_cb: None,
        }
    }

    /// An optional convenience function to make it easier to create an (lrlex) lexer and (lrpar)
    /// parser in one shot. The closure passed to this function will be called during
    /// [CTLexerBuilder::build]: it will be passed an lrpar `CTParserBuilder` instance upon which
    /// it can set whatever lrpar options are desired. [`CTLexerBuilder`] will then create both the
    /// compiler and lexer and link them together as required.
    ///
    /// # Examples
    ///
    /// ```text
    /// CTLexerBuilder:::new()
    ///     .lrpar_config(|ctp| {
    ///         ctp.yacckind(YaccKind::Grmtools)
    ///             .grammar_in_src_dir("calc.y")
    ///             .unwrap()
    ///     })
    ///     .lexer_in_src_dir("calc.l")?
    ///     .build()?;
    /// ```
    pub fn lrpar_config<F>(mut self, config_func: F) -> Self
    where
        F: Fn(CTParserBuilder<LexerTypesT>) -> CTParserBuilder<LexerTypesT> + 'a,
    {
        self.lrpar_config = Some(Box::new(config_func));
        self
    }

    /// Set the input lexer path to a file relative to this project's `src` directory. This will
    /// also set the output path (i.e. you do not need to call [CTLexerBuilder::output_path]).
    ///
    /// For example if `a/b.l` is passed as `inp` then [CTLexerBuilder::build] will:
    ///   * use `src/a/b.l` as the input file.
    ///   * write output to a file which can then be imported by calling `lrlex_mod!("a/b.l")`.
    ///   * create a module in that output file named `b_l`.
    ///
    /// You can override the output path and/or module name by calling
    /// [CTLexerBuilder::output_path] and/or [CTLexerBuilder::mod_name], respectively, after
    /// calling this function.
    ///
    /// This is a convenience function that makes it easier to compile lexer files stored in a
    /// project's `src/` directory: please see [CTLexerBuilder::build] for additional constraints
    /// and information about the generated files. Note also that each `.l` file can only be
    /// processed once using this function: if you want to generate multiple lexers from a single
    /// `.l` file, you will need to use [CTLexerBuilder::output_path].
    pub fn lexer_in_src_dir<P>(mut self, srcp: P) -> Result<Self, Box<dyn Error>>
    where
        P: AsRef<Path>,
    {
        if !srcp.as_ref().is_relative() {
            return Err(format!(
                "Lexer path '{}' must be a relative path.",
                srcp.as_ref().to_str().unwrap_or("<invalid UTF-8>")
            )
            .into());
        }

        let mut lexp = current_dir()?;
        lexp.push("src");
        lexp.push(srcp.as_ref());
        self.lexer_path = Some(lexp);

        let mut outp = PathBuf::new();
        outp.push(var("OUT_DIR").unwrap());
        outp.push(srcp.as_ref().parent().unwrap().to_str().unwrap());
        create_dir_all(&outp)?;
        let mut leaf = srcp
            .as_ref()
            .file_name()
            .unwrap()
            .to_str()
            .unwrap()
            .to_owned();
        write!(leaf, ".{}", RUST_FILE_EXT).ok();
        outp.push(leaf);
        Ok(self.output_path(outp))
    }

    /// Set the input lexer path to `inp`. If specified, you must also call
    /// [CTLexerBuilder::output_path]. In general it is easier to use
    /// [CTLexerBuilder::lexer_in_src_dir].
    pub fn lexer_path<P>(mut self, inp: P) -> Self
    where
        P: AsRef<Path>,
    {
        self.lexer_path = Some(inp.as_ref().to_owned());
        self
    }

    /// Set the output lexer path to `outp`. Note that there are no requirements on `outp`: the
    /// file can exist anywhere you can create a valid [Path] to. However, if you wish to use
    /// [crate::lrlex_mod!] you will need to make sure that `outp` is in
    /// [std::env::var]`("OUT_DIR")` or one of its subdirectories.
    pub fn output_path<P>(mut self, outp: P) -> Self
    where
        P: AsRef<Path>,
    {
        self.output_path = Some(outp.as_ref().to_owned());
        self
    }

    /// Set the type of lexer to be generated to `lexerkind`.
    pub fn lexerkind(mut self, lexerkind: LexerKind) -> Self {
        self.lexerkind = Some(lexerkind);
        self
    }

    /// Set the generated module name to `mod_name`. If no module name is specified,
    /// [`process_file`](#method.process_file) will attempt to create a sensible default based on
    /// the input filename.
    pub fn mod_name(mut self, mod_name: &'a str) -> Self {
        self.mod_name = Some(mod_name);
        self
    }

    /// Set the visibility of the generated module to `vis`. Defaults to `Visibility::Private`.
    pub fn visibility(mut self, vis: Visibility) -> Self {
        self.visibility = vis;
        self
    }

    /// Sets the rust edition to be used for generated code. Defaults to the latest edition of
    /// rust supported by grmtools.
    pub fn rust_edition(mut self, edition: RustEdition) -> Self {
        self.rust_edition = edition;
        self
    }

    /// Set this lexer builder's map of rule IDs to `rule_ids_map`. By default, lexing rules have
    /// arbitrary, but distinct, IDs. Setting the map of rule IDs (from rule names to `StorageT`)
    /// allows users to synchronise a lexer and parser and to check that all rules are used by both
    /// parts).
    pub fn rule_ids_map<T: std::borrow::Borrow<HashMap<String, LexerTypesT::StorageT>> + Clone>(
        mut self,
        rule_ids_map: T,
    ) -> Self {
        self.rule_ids_map = Some(rule_ids_map.borrow().to_owned());
        self
    }

    /// Statically compile the `.l` file specified by [CTLexerBuilder::lexer_path()] into Rust,
    /// placing the output into the file specified by [CTLexerBuilder::output_path()].
    ///
    /// The generated module follows the form:
    ///
    /// ```text
    ///    mod modname {
    ///      pub fn lexerdef() -> LexerDef<LexerTypesT> { ... }
    ///
    ///      ...
    ///    }
    /// ```
    ///
    /// where:
    ///  * `modname` is either:
    ///    * the module name specified by [CTLexerBuilder::mod_name()]
    ///    * or, if no module name was explicitly specified, then for the file `/a/b/c.l` the
    ///      module name is `c_l` (i.e. the file's leaf name, minus its extension, with a prefix of
    ///      `_l`).
    pub fn build(mut self) -> Result<CTLexer, Box<dyn Error>> {
        let lexerp = self
            .lexer_path
            .as_ref()
            .expect("lexer_path must be specified before processing.");
        let outp = self
            .output_path
            .as_ref()
            .expect("output_path must be specified before processing.");

        {
            let mut lk = GENERATED_PATHS.lock().unwrap();
            if lk.contains(outp.as_path()) {
                return Err(format!("Generating two lexers to the same path ('{}') is not allowed: use CTLexerBuilder::output_path (and, optionally, CTLexerBuilder::mod_name) to differentiate them.", &outp.to_str().unwrap()).into());
            }
            lk.insert(outp.clone());
        }
        let lex_src = read_to_string(lexerp)
            .map_err(|e| format!("When reading '{}': {e}", lexerp.display()))?;
        let lex_diag = SpannedDiagnosticFormatter::new(&lex_src, lexerp);
        let mut header = self.header;
        let (parsed_header, _) = GrmtoolsSectionParser::new(&lex_src, false)
            .parse()
            .map_err(|es| {
                let mut out = String::new();
                out.push_str(&format!(
                    "\n{ERROR}{}\n",
                    lex_diag.file_location_msg(" parsing the `%grmtools` section", None)
                ));
                for e in es {
                    out.push_str(&indent("     ", &lex_diag.format_error(e).to_string()));
                    out.push('\n');
                }
                ErrorString(out)
            })?;
        header.merge_from(parsed_header)?;
        header.mark_used(&"lexerkind".to_string());
        let lexerkind = match self.lexerkind {
            Some(lexerkind) => lexerkind,
            None => {
                if let Some(HeaderValue(_, lk_val)) = header.get("lexerkind") {
                    LexerKind::try_from(lk_val)?
                } else {
                    LexerKind::LRNonStreamingLexer
                }
            }
        };
        #[cfg(test)]
        if let Some(inspect_lexerkind_cb) = self.inspect_lexerkind_cb {
            inspect_lexerkind_cb(&lexerkind)?
        }
        let (lexerdef, lex_flags): (LRNonStreamingLexerDef<LexerTypesT>, LexFlags) =
            match lexerkind {
                LexerKind::LRNonStreamingLexer => {
                    let lex_flags = LexFlags::try_from(&mut header)?;
                    let lexerdef = LRNonStreamingLexerDef::<LexerTypesT>::new_with_options(
                        &lex_src, lex_flags,
                    )
                    .map_err(|errs| {
                        let mut out = String::new();
                        out.push_str(&format!(
                            "\n{ERROR}{}\n",
                            lex_diag.file_location_msg("", None)
                        ));
                        for e in errs {
                            out.push_str(&indent("     ", &lex_diag.format_error(e).to_string()));
                            out.push('\n');
                        }
                        ErrorString(out)
                    })?;
                    let lex_flags = lexerdef.lex_flags().cloned();
                    (lexerdef, lex_flags.unwrap())
                }
            };

        let ct_parser = if let Some(ref lrcfg) = self.lrpar_config {
            let mut closure_lexerdef = lexerdef.clone();
            let mut ctp = CTParserBuilder::<LexerTypesT>::new().inspect_rt(Box::new(
                move |yacc_header, rtpb, rule_ids_map, grm_path| {
                    let owned_map = rule_ids_map
                        .iter()
                        .map(|(x, y)| (&**x, *y))
                        .collect::<HashMap<_, _>>();
                    closure_lexerdef.set_rule_ids(&owned_map);
                    yacc_header.mark_used(&"test_files".to_string());
                    let grammar = rtpb.grammar();
                    let test_glob = yacc_header.get("test_files");
                    let mut err_str = None;
                    let add_error_line = |err_str: &mut Option<String>, line| {
                        if let Some(err_str) = err_str {
                            err_str.push_str(&format!("{}\n", line));
                        } else {
                            let _ = err_str.insert(format!("{}\n", line));
                        }
                    };
                    match test_glob {
                        Some(HeaderValue(_, Value::Setting(Setting::Array(test_globs, _, _)))) => {
                            for setting in test_globs {
                                match setting {
                                    Setting::String(test_files, _) => {
                                        let path_joined = grm_path.parent().unwrap().join(test_files);
                                        let path_str = &path_joined.to_string_lossy();
                                        let mut glob_paths = glob(path_str).map_err(|e| e.to_string())?.peekable();
                                        if glob_paths.peek().is_none() {
                                            return Err(format!("'test_files' glob '{}' matched no paths", path_str)
                                                .to_string()
                                                .into(),
                                            );
                                        }

                                        for path in glob_paths {
                                            let path = path?;
                                            if let Some(ext) = path.extension() {
                                                if let Some(ext) = ext.to_str() {
                                                    if ext.starts_with("grm") {
                                                        add_error_line(&mut err_str, "test_files extensions beginning with `grm` are reserved.".into());
                                                    }
                                                }
                                            }
                                            let input = fs::read_to_string(&path)?;
                                            let l: LRNonStreamingLexer<LexerTypesT> =
                                                closure_lexerdef.lexer(&input);
                                            let errs = rtpb.parse_map(&l, &|_| (), &|_, _| ()).1;
                                            if !errs.is_empty() {
                                                add_error_line(&mut err_str, format!("While parsing {}:", path.display()));
                                                for e in errs {
                                                    let e_pp = e.pp(&l, &|t| grammar.token_epp(t));
                                                    let e_lines = e_pp.split("\n");
                                                    for e in e_lines {
                                                        add_error_line(&mut err_str, format!("\t{}", e));
                                                    }
                                                }
                                            }
                                        }
                                    }
                                    _ => return Err("Invalid value for setting 'test_files'".into()),
                                }
                            }
                            if let Some(err_str) = err_str {
                                Err(ErrorString(err_str))?
                            } else {
                                Ok(())
                            }

                        }
                        Some(_) => Err("Invalid value for setting 'test_files'".into()),
                        None => Ok(()),
                    }
                },
            ));
            ctp = lrcfg(ctp);
            let ct_parser = ctp.build()?;
            self.rule_ids_map = Some(ct_parser.token_map().to_owned());
            Some(ct_parser)
        } else {
            None
        };

        let mut lexerdef = Box::new(lexerdef);
        let unused_header_values = header.unused();
        if !unused_header_values.is_empty() {
            return Err(
                format!("Unused header values: {}", unused_header_values.join(", ")).into(),
            );
        }

        let (missing_from_lexer, missing_from_parser) = match self.rule_ids_map {
            Some(ref rim) => {
                // Convert from HashMap<String, _> to HashMap<&str, _>
                let owned_map = rim
                    .iter()
                    .map(|(x, y)| (&**x, *y))
                    .collect::<HashMap<_, _>>();
                let (x, y) = lexerdef.set_rule_ids_spanned(&owned_map);
                (
                    x.map(|a| a.iter().map(|&b| b.to_string()).collect::<HashSet<_>>()),
                    y.map(|a| {
                        a.iter()
                            .map(|(b, span)| (b.to_string(), *span))
                            .collect::<HashSet<_>>()
                    }),
                )
            }
            None => (None, None),
        };

        let mut has_unallowed_missing = false;
        let err_indent = " ".repeat(ERROR.len());
        if !self.allow_missing_terms_in_lexer {
            if let Some(ref mfl) = missing_from_lexer {
                if let Some(ct_parser) = &ct_parser {
                    let grm = ct_parser.yacc_grammar();
                    let token_spans = mfl
                        .iter()
                        .map(|name| {
                            ct_parser
                                .yacc_grammar()
                                .token_span(*grm.tokens_map().get(name.as_str()).unwrap())
                                .expect("Given token should have a span")
                        })
                        .collect::<Vec<_>>();

                    let yacc_diag = SpannedDiagnosticFormatter::new(
                        ct_parser.grammar_src(),
                        ct_parser.grammar_path(),
                    );

                    eprintln!(
                        "{ERROR} these tokens are not referenced in the lexer but defined as follows"
                    );
                    eprintln!(
                        "{err_indent} {}",
                        yacc_diag.file_location_msg("in the grammar", None)
                    );
                    for span in token_spans {
                        eprintln!(
                            "{}",
                            yacc_diag.underline_span_with_text(
                                span,
                                "Missing from lexer".to_string(),
                                '^'
                            )
                        );
                    }
                    eprintln!();
                } else {
                    eprintln!(
                        "{ERROR} the following tokens are used in the grammar but are not defined in the lexer:"
                    );
                    for n in mfl {
                        eprintln!("    {}", n);
                    }
                }
                has_unallowed_missing = true;
            }
        }
        if !self.allow_missing_tokens_in_parser && self.show_warnings {
            if let Some(ref mfp) = missing_from_parser {
                let error_prefix = if self.warnings_are_errors {
                    ERROR
                } else {
                    WARNING
                };
                let err_indent = " ".repeat(error_prefix.len());
                let mut outs = Vec::new();
                outs.push(format!("{error_prefix} these tokens are not referenced in the grammar but defined as follows"));
                outs.push(format!(
                    "{err_indent} {}",
                    lex_diag.file_location_msg("in the lexer", None)
                ));
                for (_, span) in mfp {
                    let error_contents = lex_diag.underline_span_with_text(
                        *span,
                        "Missing from parser".to_string(),
                        '^',
                    );
                    outs.extend(error_contents.lines().map(|s| s.to_string()));
                }

                for s in outs {
                    if !self.warnings_are_errors && std::env::var("OUT_DIR").is_ok() {
                        println!("cargo:warning={}", s)
                    } else {
                        eprintln!("{}", s);
                    }
                }

                has_unallowed_missing |= self.warnings_are_errors;
            }
        }
        if has_unallowed_missing {
            fs::remove_file(outp).ok();
            panic!();
        }

        let mod_name = match self.mod_name {
            Some(s) => s.to_owned(),
            None => {
                // The user hasn't specified a module name, so we create one automatically: what we
                // do is strip off all the filename extensions (note that it's likely that inp ends
                // with `l.rs`, so we potentially have to strip off more than one extension) and
                // then add `_l` to the end.
                let mut stem = lexerp.to_str().unwrap();
                loop {
                    let new_stem = Path::new(stem).file_stem().unwrap().to_str().unwrap();
                    if stem == new_stem {
                        break;
                    }
                    stem = new_stem;
                }
                format!("{}_l", stem)
            }
        };
        let mod_name =
            match syn::parse_str::<proc_macro2::Ident>(&mod_name) {
                Ok(s) => s,
                Err(e) => return Err(format!(
                    "CTLexerBuilder::mod_name(\"{}\") is not a valid rust identifier due to '{}'",
                    mod_name, e
                )
                .into()),
            };
        let mut lexerdef_func_impl = {
            let LexFlags {
                allow_wholeline_comments,
                dot_matches_new_line,
                multi_line,
                octal,
                posix_escapes,
                case_insensitive,
                unicode,
                swap_greed,
                ignore_whitespace,
                size_limit,
                dfa_size_limit,
                nest_limit,
            } = lex_flags;
            let allow_wholeline_comments = QuoteOption(allow_wholeline_comments);
            let dot_matches_new_line = QuoteOption(dot_matches_new_line);
            let multi_line = QuoteOption(multi_line);
            let octal = QuoteOption(octal);
            let posix_escapes = QuoteOption(posix_escapes);
            let case_insensitive = QuoteOption(case_insensitive);
            let unicode = QuoteOption(unicode);
            let swap_greed = QuoteOption(swap_greed);
            let ignore_whitespace = QuoteOption(ignore_whitespace);
            let size_limit = QuoteOption(size_limit);
            let dfa_size_limit = QuoteOption(dfa_size_limit);
            let nest_limit = QuoteOption(nest_limit);

            // Code gen for the lexerdef() `lex_flags` variable.
            quote! {
                let mut lex_flags = ::lrlex::DEFAULT_LEX_FLAGS;
                lex_flags.allow_wholeline_comments = #allow_wholeline_comments.or(::lrlex::DEFAULT_LEX_FLAGS.allow_wholeline_comments);
                lex_flags.dot_matches_new_line = #dot_matches_new_line.or(::lrlex::DEFAULT_LEX_FLAGS.dot_matches_new_line);
                lex_flags.multi_line = #multi_line.or(::lrlex::DEFAULT_LEX_FLAGS.multi_line);
                lex_flags.octal = #octal.or(::lrlex::DEFAULT_LEX_FLAGS.octal);
                lex_flags.posix_escapes = #posix_escapes.or(::lrlex::DEFAULT_LEX_FLAGS.posix_escapes);
                lex_flags.case_insensitive = #case_insensitive.or(::lrlex::DEFAULT_LEX_FLAGS.case_insensitive);
                lex_flags.unicode = #unicode.or(::lrlex::DEFAULT_LEX_FLAGS.unicode);
                lex_flags.swap_greed = #swap_greed.or(::lrlex::DEFAULT_LEX_FLAGS.swap_greed);
                lex_flags.ignore_whitespace = #ignore_whitespace.or(::lrlex::DEFAULT_LEX_FLAGS.ignore_whitespace);
                lex_flags.size_limit = #size_limit.or(::lrlex::DEFAULT_LEX_FLAGS.size_limit);
                lex_flags.dfa_size_limit = #dfa_size_limit.or(::lrlex::DEFAULT_LEX_FLAGS.dfa_size_limit);
                lex_flags.nest_limit = #nest_limit.or(::lrlex::DEFAULT_LEX_FLAGS.nest_limit);
                let lex_flags = lex_flags;
            }
        };
        {
            let start_states = lexerdef.iter_start_states();
            let rules = lexerdef.iter_rules().map(|r| {
                    let tok_id = QuoteOption(r.tok_id);
                    let n = QuoteOption(r.name().map(QuoteToString));
                    let target_state =
                        QuoteOption(r.target_state().map(|(x, y)| QuoteTuple((x, y))));
                    let n_span = r.name_span();
                    let regex = QuoteToString(&r.re_str);
                    let start_states = r.start_states();
                    // Code gen to construct a rule.
                    //
                    // We cannot `impl ToToken for Rule` because `Rule` never stores `lex_flags`,
                    // Thus we reference the local lex_flags variable bound earlier.
                    quote! {
                        Rule::new(::lrlex::unstable_api::InternalPublicApi, #tok_id, #n, #n_span, #regex.to_string(),
                                vec![#(#start_states),*], #target_state, &lex_flags).unwrap()
                    }
                });
            // Code gen for `lexerdef()`s rules and the stack of `start_states`.
            lexerdef_func_impl.append_all(quote! {
                let start_states: Vec<StartState> = vec![#(#start_states),*];
                let rules = vec![#(#rules),*];
            });
        }
        let lexerdef_ty = match lexerkind {
            LexerKind::LRNonStreamingLexer => {
                quote!(::lrlex::LRNonStreamingLexerDef)
            }
        };
        // Code gen for the lexerdef() return value referencing variables bound earlier.
        lexerdef_func_impl.append_all(quote! {
            #lexerdef_ty::from_rules(start_states, rules)
        });

        let mut token_consts = TokenStream::new();
        if let Some(rim) = self.rule_ids_map {
            let mut rim_sorted = Vec::from_iter(rim.iter());
            rim_sorted.sort_by_key(|(k, _)| *k);
            for (name, id) in rim_sorted {
                if RE_TOKEN_ID.is_match(name) {
                    let tok_ident = format_ident!("N_{}", name.to_ascii_uppercase());
                    let storaget =
                        str::parse::<TokenStream>(type_name::<LexerTypesT::StorageT>()).unwrap();
                    // Code gen for the constant token values.
                    let tok_const = quote! {
                        #[allow(dead_code)]
                        pub const #tok_ident: #storaget = #id;
                    };
                    token_consts.extend(tok_const)
                }
            }
        }
        let token_consts = token_consts.into_iter();
        let out_tokens = {
            let lexerdef_param = str::parse::<TokenStream>(type_name::<LexerTypesT>()).unwrap();
            let mod_vis = self.visibility;
            // Code gen for the generated module.
            quote! {
                #mod_vis mod #mod_name {
                    use ::lrlex::{LexerDef, Rule, StartState};
                    #[allow(dead_code)]
                    pub fn lexerdef() -> #lexerdef_ty<#lexerdef_param> {
                        #lexerdef_func_impl
                    }

                    #(#token_consts)*
                }
            }
        };
        // Try and run a code formatter on the generated code.
        let unformatted = out_tokens.to_string();
        let mut outs = String::new();
        // Record the time that this version of lrlex was built. If the source code changes and rustc
        // forces a recompile, this will change this value, causing anything which depends on this
        // build of lrlex to be recompiled too.
        let timestamp = env!("VERGEN_BUILD_TIMESTAMP");
        write!(outs, "// lrlex build time: {}\n\n", quote!(#timestamp),).ok();
        outs.push_str(
            &syn::parse_str(&unformatted)
                .map(|syntax_tree| prettyplease::unparse(&syntax_tree))
                .unwrap_or(unformatted),
        );
        // If the file we're about to write out already exists with the same contents, then we
        // don't overwrite it (since that will force a recompile of the file, and relinking of the
        // binary etc).
        if let Ok(curs) = read_to_string(outp) {
            if curs == outs {
                return Ok(CTLexer {
                    missing_from_lexer,
                    missing_from_parser,
                });
            }
        }
        let mut f = File::create(outp)?;
        f.write_all(outs.as_bytes())?;
        Ok(CTLexer {
            missing_from_lexer,
            missing_from_parser,
        })
    }

    /// Given the filename `a/b.l` as input, statically compile the file `src/a/b.l` into a Rust
    /// module which can then be imported using `lrlex_mod!("a/b.l")`. This is a convenience
    /// function around [`process_file`](struct.CTLexerBuilder.html#method.process_file) which makes
    /// it easier to compile `.l` files stored in a project's `src/` directory: please see
    /// [`process_file`](#method.process_file) for additional constraints and information about the
    /// generated files.
    #[deprecated(
        since = "0.11.0",
        note = "Please use lexer_in_src_dir() and build() instead"
    )]
    #[allow(deprecated)]
    pub fn process_file_in_src(
        self,
        srcp: &str,
    ) -> Result<(Option<HashSet<String>>, Option<HashSet<String>>), Box<dyn Error>> {
        let mut inp = current_dir()?;
        inp.push("src");
        inp.push(srcp);
        let mut outp = PathBuf::new();
        outp.push(var("OUT_DIR").unwrap());
        outp.push(Path::new(srcp).parent().unwrap().to_str().unwrap());
        create_dir_all(&outp)?;
        let mut leaf = Path::new(srcp)
            .file_name()
            .unwrap()
            .to_str()
            .unwrap()
            .to_owned();
        write!(leaf, ".{}", RUST_FILE_EXT).ok();
        outp.push(leaf);
        self.process_file(inp, outp)
    }

    /// Statically compile the `.l` file `inp` into Rust, placing the output into the file `outp`.
    /// The latter defines a module as follows:
    ///
    /// ```text
    ///    mod modname {
    ///      pub fn lexerdef() -> LexerDef<LexerTypesT::StorageT> { ... }
    ///
    ///      ...
    ///    }
    /// ```
    ///
    /// where:
    ///  * `modname` is either:
    ///    * the module name specified [`mod_name`](#method.mod_name)
    ///    * or, if no module name was explicitly specified, then for the file `/a/b/c.l` the
    ///      module name is `c_l` (i.e. the file's leaf name, minus its extension, with a prefix of
    ///      `_l`).
    #[deprecated(
        since = "0.11.0",
        note = "Please use lexer_in_src_dir() and build() instead"
    )]
    pub fn process_file<P, Q>(
        mut self,
        inp: P,
        outp: Q,
    ) -> Result<(Option<HashSet<String>>, Option<HashSet<String>>), Box<dyn Error>>
    where
        P: AsRef<Path>,
        Q: AsRef<Path>,
    {
        self.lexer_path = Some(inp.as_ref().to_owned());
        self.output_path = Some(outp.as_ref().to_owned());
        let cl = self.build()?;
        Ok((
            cl.missing_from_lexer().map(|x| x.to_owned()),
            cl.missing_from_parser()
                .map(|x| x.iter().map(|(n, _)| n.to_owned()).collect::<HashSet<_>>()),
        ))
    }

    /// If passed false, tokens used in the grammar but not defined in the lexer will cause a
    /// panic at lexer generation time. Defaults to false.
    pub fn allow_missing_terms_in_lexer(mut self, allow: bool) -> Self {
        self.allow_missing_terms_in_lexer = allow;
        self
    }

    /// If passed false, tokens defined in the lexer but not used in the grammar will cause a
    /// warning at lexer generation time. Defaults to false (since lexers sometimes define tokens such
    /// as reserved words, which are intentionally not in the grammar).
    pub fn allow_missing_tokens_in_parser(mut self, allow: bool) -> Self {
        self.allow_missing_tokens_in_parser = allow;
        self
    }

    /// If set to true, [CTLexerBuilder::build] will return an error if the given lexer contains
    /// any warnings. Defaults to `true`.
    pub fn warnings_are_errors(mut self, flag: bool) -> Self {
        self.warnings_are_errors = flag;
        self
    }

    /// If set to true, [CTParserBuilder::build] will print warnings to stderr, or via cargo when
    /// running under cargo. Defaults to `true`.
    pub fn show_warnings(mut self, flag: bool) -> Self {
        self.show_warnings = flag;
        self
    }

    /// Enables `// comment` style parsing according to `flag``.
    /// When enabled comments can appear at the beginning of a line,
    /// and regular expressions with the `/` character should be escaped via `\/`.
    ///
    /// The default value is `false`.
    ///
    /// Setting this flag will override the same flag within a `%grmtools` section.
    pub fn allow_wholeline_comments(mut self, flag: bool) -> Self {
        let key = "allow_wholeline_comments".to_string();
        self.header.insert(
            key,
            HeaderValue(
                Location::Other("CTLexerBuilder".to_string()),
                Value::Flag(flag, Location::Other("CTLexerBuilder".to_string())),
            ),
        );
        self
    }

    /// Sets the `regex::RegexBuilder` option of the same name.
    /// The default value is `true`.
    ///
    /// Setting this flag will override the same flag within a `%grmtools` section.
    pub fn dot_matches_new_line(mut self, flag: bool) -> Self {
        let key = "dot_matches_new_line".to_string();
        self.header.insert(
            key,
            HeaderValue(
                Location::Other("CTLexerBuilder".to_string()),
                Value::Flag(flag, Location::Other("CTLexerBuilder".to_string())),
            ),
        );
        self
    }

    /// Sets the `regex::RegexBuilder` option of the same name.
    /// The default value is `true`.
    ///
    /// Setting this flag will override the same flag within a `%grmtools` section.
    pub fn multi_line(mut self, flag: bool) -> Self {
        let key = "multi_line".to_string();
        self.header.insert(
            key,
            HeaderValue(
                Location::Other("CTLexerBuilder".to_string()),
                Value::Flag(flag, Location::Other("CTLexerBuilder".to_string())),
            ),
        );
        self
    }

    /// Enables posix lex compatible escape sequences according to `flag`.
    /// The default value is `false`.
    ///
    /// Setting this flag will override the same flag within a `%grmtools` section.
    pub fn posix_escapes(mut self, flag: bool) -> Self {
        let key = "posix_escapes".to_string();
        self.header.insert(
            key,
            HeaderValue(
                Location::Other("CTLexerBuilder".to_string()),
                Value::Flag(flag, Location::Other("CTLexerBuilder".to_string())),
            ),
        );
        self
    }

    /// Sets the `regex::RegexBuilder` option of the same name.
    /// The default value is `true`.
    ///
    /// Setting this flag will override the same flag within a `%grmtools` section.
    pub fn octal(mut self, flag: bool) -> Self {
        let key = "octal".to_string();
        self.header.insert(
            key,
            HeaderValue(
                Location::Other("CTLexerBuilder".to_string()),
                Value::Flag(flag, Location::Other("CTLexerBuilder".to_string())),
            ),
        );
        self
    }

    /// Sets the `regex::RegexBuilder` option of the same name.
    /// Default value is specified by regex.
    ///
    /// Setting this flag will override the same flag within a `%grmtools` section.
    pub fn swap_greed(mut self, flag: bool) -> Self {
        let key = "swap_greed".to_string();
        self.header.insert(
            key,
            HeaderValue(
                Location::Other("CTLexerBuilder".to_string()),
                Value::Flag(flag, Location::Other("CTLexerBuilder".to_string())),
            ),
        );
        self
    }

    /// Sets the `regex::RegexBuilder` option of the same name.
    /// Default value is specified by regex.
    ///
    /// Setting this flag will override the same flag within a `%grmtools` section.
    pub fn ignore_whitespace(mut self, flag: bool) -> Self {
        let key = "ignore_whitespace".to_string();
        self.header.insert(
            key,
            HeaderValue(
                Location::Other("CTLexerBuilder".to_string()),
                Value::Flag(flag, Location::Other("CTLexerBuilder".to_string())),
            ),
        );
        self
    }

    /// Sets the `regex::RegexBuilder` option of the same name.
    /// Default value is specified by regex.
    ///
    /// Setting this flag will override the same flag within a `%grmtools` section.
    pub fn unicode(mut self, flag: bool) -> Self {
        let key = "unicode".to_string();
        self.header.insert(
            key,
            HeaderValue(
                Location::Other("CTLexerBuilder".to_string()),
                Value::Flag(flag, Location::Other("CTLexerBuilder".to_string())),
            ),
        );
        self
    }

    /// Sets the `regex::RegexBuilder` option of the same name.
    /// Default value is specified by regex.
    ///
    /// Setting this flag will override the same flag within a `%grmtools` section.
    pub fn case_insensitive(mut self, flag: bool) -> Self {
        let key = "case_insensitive".to_string();
        self.header.insert(
            key,
            HeaderValue(
                Location::Other("CTLexerBuilder".to_string()),
                Value::Flag(flag, Location::Other("CTLexerBuilder".to_string())),
            ),
        );
        self
    }

    /// Sets the `regex::RegexBuilder` option of the same name.
    /// Default value is specified by regex.
    ///
    /// Setting this flag will override the same flag within a `%grmtools` section.
    pub fn size_limit(mut self, sz: usize) -> Self {
        let key = "size_limit".to_string();
        self.header.insert(
            key,
            HeaderValue(
                Location::Other("CTLexerBuilder".to_string()),
                Value::Setting(Setting::Num(
                    sz as u64,
                    Location::Other("CTLexerBuilder".to_string()),
                )),
            ),
        );
        self
    }

    /// Sets the `regex::RegexBuilder` option of the same name.
    /// Default value is specified by regex.
    ///
    /// Setting this flag will override the same flag within a `%grmtools` section.
    pub fn dfa_size_limit(mut self, sz: usize) -> Self {
        let key = "dfa_size_limit".to_string();
        self.header.insert(
            key,
            HeaderValue(
                Location::Other("CTLexerBuilder".to_string()),
                Value::Setting(Setting::Num(
                    sz as u64,
                    Location::Other("CTLexerBuilder".to_string()),
                )),
            ),
        );
        self
    }

    /// Sets the `regex::RegexBuilder` option of the same name.
    /// Default value is specified by regex.
    ///
    /// Setting this flag will override the same flag within a `%grmtools` section.
    pub fn nest_limit(mut self, lim: u32) -> Self {
        let key = "nest_limit".to_string();
        self.header.insert(
            key,
            HeaderValue(
                Location::Other("CTLexerBuilder".to_string()),
                Value::Setting(Setting::Num(
                    lim as u64,
                    Location::Other("CTLexerBuilder".to_string()),
                )),
            ),
        );
        self
    }

    #[cfg(test)]
    pub fn inspect_lexerkind(
        mut self,
        cb: Box<dyn Fn(&LexerKind) -> Result<(), Box<dyn Error>>>,
    ) -> Self {
        self.inspect_lexerkind_cb = Some(cb);
        self
    }
}

/// An interface to the result of [CTLexerBuilder::build()].
pub struct CTLexer {
    missing_from_lexer: Option<HashSet<String>>,
    missing_from_parser: Option<HashSet<(String, Span)>>,
}

impl CTLexer {
    fn missing_from_lexer(&self) -> Option<&HashSet<String>> {
        self.missing_from_lexer.as_ref()
    }

    fn missing_from_parser(&self) -> Option<&HashSet<(String, Span)>> {
        self.missing_from_parser.as_ref()
    }
}

/// Exports all token IDs used by a parser as a separate Rust module.
///
/// This builder will create a Rust module named `mod_name`
/// that can be imported with [`lrlex_mod!(mod_name)`](crate::lrlex_mod).
/// The module will contain one `const` `StorageT` per token in `token_map`,
/// with the token prefixed by `T_`. In addition, it will contain
/// an array of all token IDs `TOK_IDS`.
///
/// For example, if `StorageT` is `u8`, `mod_name` is `x`, and `token_map` is
/// `HashMap{"ID": 0, "INT": 1}` the generated module will look roughly as follows:
///
/// ```rust,ignore
/// mod x {
///   pub const T_ID: u8 = 0;
///   pub const T_INT: u8 = 1;
///   pub const TOK_IDS: &[u8] = &[T_ID, T_INT];
/// }
/// ```
///
/// See the [custom lexer example] for more usage details.
///
/// [custom lexer example]: https://github.com/softdevteam/grmtools/tree/master/lrlex/examples/calc_manual_lex
#[derive(Debug, Clone)]
pub struct CTTokenMapBuilder<StorageT: Display + ToTokens> {
    mod_name: String,
    token_map: Vec<(String, TokenStream)>,
    rename_map: Option<HashMap<String, String>>,
    allow_dead_code: bool,
    _marker: PhantomData<StorageT>,
}

impl<StorageT: Display + ToTokens> CTTokenMapBuilder<StorageT> {
    /// Create a new token map builder.
    ///
    /// See the [builder documentation] for more info.
    ///
    /// [builder documentation]: CTTokenMapBuilder
    pub fn new(
        mod_name: impl Into<String>,
        token_map: impl Borrow<HashMap<String, StorageT>>,
    ) -> Self {
        Self {
            mod_name: mod_name.into(),
            token_map: token_map
                .borrow()
                .iter()
                .map(|(tok_name, tok_value)| (tok_name.clone(), tok_value.to_token_stream()))
                .collect(),
            rename_map: None,
            allow_dead_code: false,
            _marker: PhantomData,
        }
    }

    /// Set a token rename map.
    ///
    /// Rename map is used to specify identifier names for tokens whose names
    /// are not valid Rust identifiers. For example, if `token_map`
    /// is `HashMap{"+": 0, "ID": 1}` and `rename_map` is `HashMap{"+": "PLUS"}`
    /// then the generated module will look roughly as follows:
    ///
    /// ```rust,ignore
    /// mod x {
    ///   pub const T_PLUS: u8 = 0;
    ///   pub const T_ID: u8 = 1;
    /// }
    /// ```
    pub fn rename_map<M, I, K, V>(mut self, rename_map: Option<M>) -> Self
    where
        M: IntoIterator<Item = I>,
        I: Borrow<(K, V)>,
        K: AsRef<str>,
        V: AsRef<str>,
    {
        self.rename_map = rename_map.map(|rename_map| {
            rename_map
                .into_iter()
                .map(|it| {
                    let (k, v) = it.borrow();
                    let k = k.as_ref().into();
                    let v = v.as_ref().into();
                    (k, v)
                })
                .collect()
        });
        self
    }

    /// Control whether the builder will add `#[allow(dead_code)]`
    /// to the generated module.
    ///
    /// By default, all tokens are `#[deny(dead_code)]`, meaning that you'll
    /// get a warning if your custom lexer doesn't use any of them.
    /// This function can be used to disable this behavior.
    pub fn allow_dead_code(mut self, allow_dead_code: bool) -> Self {
        self.allow_dead_code = allow_dead_code;
        self
    }

    /// Build the token map module.
    pub fn build(&self) -> Result<(), Box<dyn Error>> {
        // Record the time that this version of lrlex was built. If the source code changes and rustc
        // forces a recompile, this will change this value, causing anything which depends on this
        // build of lrlex to be recompiled too.
        let mut outs = String::new();
        let timestamp = env!("VERGEN_BUILD_TIMESTAMP");
        let mod_ident = format_ident!("{}", self.mod_name);
        write!(outs, "// lrlex build time: {}\n\n", quote!(#timestamp),).ok();
        let storaget = str::parse::<TokenStream>(type_name::<StorageT>()).unwrap();
        // Sort the tokens so that they're always in the same order.
        // This will prevent unneeded rebuilds.
        let mut token_map_sorted = self.token_map.clone();
        token_map_sorted.sort_by(|(l, _), (r, _)| l.cmp(r));
        let (token_array, tokens) = token_map_sorted
            .iter()
            .map(|(k, id)| {
                let name = match &self.rename_map {
                    Some(rmap) => rmap.get(k).unwrap_or(k),
                    _ => k,
                };
                let tok_ident: Ident = syn::parse_str(&format!("T_{}", name.to_ascii_uppercase()))
                    .map_err(|e| {
                        format!(
                            "token name {:?} is not a valid Rust identifier: {}; \
                            consider renaming it via `CTTokenMapBuilder::rename_map`.",
                            name, e
                        )
                    })?;
                Ok((
                    // Note: the array of all tokens can't use `tok_ident` because
                    // it will confuse the dead code checker. For this reason,
                    // we use `id` here.
                    quote! {
                        #id,
                    },
                    quote! {
                        pub const #tok_ident: #storaget = #id;
                    },
                ))
            })
            .collect::<Result<(TokenStream, TokenStream), Box<dyn Error>>>()?;
        let unused_annotation = if self.allow_dead_code {
            quote! {#[allow(dead_code)]}
        } else {
            quote! {}
        };
        // Since the formatter doesn't preserve comments and we don't want to lose build time,
        // just format the module contents.
        let unformatted = quote! {
            #unused_annotation
            mod #mod_ident {
                #tokens
                #[allow(dead_code)]
                pub const TOK_IDS: &[#storaget] = &[#token_array];
            }
        }
        .to_string();
        let out_mod = syn::parse_str(&unformatted)
            .map(|syntax_tree| prettyplease::unparse(&syntax_tree))
            .unwrap_or(unformatted);
        outs.push_str(&out_mod);
        let mut outp = PathBuf::from(var("OUT_DIR")?);
        outp.push(&self.mod_name);
        outp.set_extension("rs");

        // If the file we're about to write out already exists with the same contents, then we
        // don't overwrite it (since that will force a recompile of the file, and relinking of the
        // binary etc).
        if let Ok(curs) = read_to_string(&outp) {
            if curs == outs {
                return Ok(());
            }
        }

        let mut f = File::create(outp)?;
        f.write_all(outs.as_bytes())?;
        Ok(())
    }
}

/// Create a Rust module named `mod_name` that can be imported with
/// [`lrlex_mod!(mod_name)`](crate::lrlex_mod).
///
/// This function is deprecated in favour of [`CTTokenMapBuilder`].
#[deprecated(since = "0.14.0", note = "use `lrlex::CTTokenMapBuilder` instead")]
pub fn ct_token_map<StorageT: Display + ToTokens>(
    mod_name: &str,
    token_map: impl Borrow<HashMap<String, StorageT>>,
    rename_map: Option<&HashMap<&str, &str>>,
) -> Result<(), Box<dyn Error>> {
    CTTokenMapBuilder::new(mod_name, token_map)
        .rename_map(rename_map)
        .allow_dead_code(true)
        .build()
}

/// Indents a multi-line string and trims any trailing newline.
/// This currently assumes that indentation on blank lines does not matter.
///
/// The algorithm used by this function is:
/// 1. Prefix `s` with the indentation, indenting the first line.
/// 2. Trim any trailing newlines.
/// 3. Replace all newlines with `\n{indent}`` to indent all lines after the first.
///
/// It is plausible that we should a step 4, but currently do not:
/// 4. Replace all `\n{indent}\n` with `\n\n`
fn indent(indent: &str, s: &str) -> String {
    format!("{indent}{}\n", s.trim_end_matches('\n')).replace('\n', &format!("\n{}", indent))
}

// It isn't clear to me why this test isn't working on wasm32,
// as the `workspace_runner` should allow access to `OUT_DIR`
// perhaps it is related to absolute paths
#[cfg(all(not(target_arch = "wasm32"), test))]
mod test {
    use std::fs::File;
    use std::io::Write;

    use super::{CTLexerBuilder, LexerKind};
    #[test]
    fn test_grmtools_section_lexerkind() {
        let lexerkinds = [
            "LRNonStreamingLexer",
            "lrnonstreaminglexer",
            "LexerKind::lrnonstreaminglexer",
            "lexerkind::LRNonStreamingLexer",
        ];
        for (i, kind) in lexerkinds.iter().enumerate() {
            let lex_src = format!(
                "
%grmtools{{lexerkind: {}}}
%%
. ;
",
                kind
            );
            let lex_path = format!(
                "{}/test_grmtools_section_lexerkind_{}.l",
                env!("OUT_DIR"),
                i
            );
            let mut l_file = File::create(lex_path.clone()).unwrap();
            l_file.write_all(lex_src.as_bytes()).unwrap();
            CTLexerBuilder::new()
                .output_path(format!("{}.rs", lex_path.clone()))
                .lexer_path(lex_path.clone())
                .inspect_lexerkind(Box::new(move |lexerkind| {
                    assert!(matches!(lexerkind, &LexerKind::LRNonStreamingLexer));
                    Ok(())
                }))
                .build()
                .unwrap();
        }
    }

    #[test]
    /// Tests a yacc .y filename containing a dash character leading to an invalid rust identifier
    /// when that dash is subsequently used as the default `CTParserBuilder::mod_name`.
    fn test_invalid_identifier_in_derived_mod_name() {
        let mut lex_path = std::path::PathBuf::from(env!("OUT_DIR"));
        lex_path.push("contains-a-dash.l");
        let mut f = File::create(&lex_path).unwrap();
        let _ = f.write_all(
            r#"
%%
A  "A"
"#
            .as_bytes(),
        );
        match CTLexerBuilder::new()
            .output_path(format!("{}.rs", lex_path.display()))
            .lexer_path(lex_path.clone())
            .build()
        {
            Ok(_) => panic!("Expected error"),
            Err(e) => {
                let err_string = e.to_string();
                assert_eq!(
                    err_string,
                    "CTLexerBuilder::mod_name(\"contains-a-dash_l\") is not a valid rust identifier due to 'unexpected token'"
                );
            }
        }
    }
}