Skip to main content

parol/generators/
lexer_generator.rs

1use crate::generators::{GrammarConfig, NamingHelper, generate_terminal_name};
2use crate::generators::lexer_ir::{build_scanner_mode_data, ScannerModeBuildData};
3use crate::{CommonGeneratorConfig, generate_name};
4use anyhow::Result;
5use parol_runtime::TerminalIndex;
6
7use crate::StrVec;
8use std::fmt::Debug;
9
10#[derive(Debug, Default)]
11struct ScannerBuildInfo {
12    scanner_name: String,
13    terminal_mappings: Vec<crate::generators::lexer_ir::TerminalMapping>,
14    transitions: Vec<crate::generators::lexer_ir::ScannerTransition>,
15}
16
17impl ScannerBuildInfo {
18    fn from_mode_data(mode_data: ScannerModeBuildData) -> Self {
19        Self {
20            scanner_name: mode_data.scanner_name,
21            terminal_mappings: mode_data.terminal_mappings,
22            transitions: mode_data.transitions,
23        }
24    }
25}
26
27fn determine_hashes_for_raw_string(e: &str) -> String {
28    let mut pattern = r#"""#.to_string();
29    let mut count = 0;
30    while e.contains(&pattern) {
31        pattern.push('#');
32        count += 1;
33    }
34    "#".repeat(count)
35}
36
37#[derive(Debug, Default)]
38struct LexerData {
39    terminal_names: StrVec,
40    terminal_count: usize,
41    scanner_macro: StrVec,
42}
43
44// ---------------------------------------------------
45// Part of the Public API
46// *Changes will affect crate's version according to semver*
47// ---------------------------------------------------
48///
49/// Generates the lexer part of the parser output file.
50///
51pub fn generate_lexer_source<C: CommonGeneratorConfig>(
52    grammar_config: &GrammarConfig,
53    config: &C,
54) -> Result<String> {
55    let terminal_names = generate_terminal_names(grammar_config);
56    generate_lexer_source_with_terminal_names(grammar_config, config, &terminal_names)
57}
58
59pub(crate) fn generate_lexer_source_with_terminal_names<C: CommonGeneratorConfig>(
60    grammar_config: &GrammarConfig,
61    config: &C,
62    terminal_names: &[String],
63) -> Result<String> {
64    let terminal_count = terminal_names.len();
65    let width = (terminal_count as f32).log10() as usize + 1;
66
67    let mode_data = build_scanner_mode_data(grammar_config, terminal_names)?;
68
69    let macro_start =
70        StrVec::from_iter(vec![format!("\n    {} {{", get_scanner_type_name(config))]);
71    let mut scanner_macro = mode_data
72        .into_iter()
73        .map(ScannerBuildInfo::from_mode_data)
74        .fold(macro_start, |mut acc, e| {
75            acc.push(format!("{e}"));
76            acc
77        });
78    scanner_macro.push("    }".to_string());
79
80    let terminal_names =
81        terminal_names
82            .iter()
83            .enumerate()
84            .fold(StrVec::new(4), |mut acc, (i, e)| {
85                acc.push(format!(r#"/* {i:width$} */ "{e}","#));
86                acc
87            });
88
89    let lexer_data = LexerData {
90        terminal_names,
91        terminal_count,
92        scanner_macro,
93    };
94
95    Ok(format!("{lexer_data}"))
96}
97
98/// Generates all terminal names of a given grammar
99pub fn generate_terminal_names(grammar_config: &GrammarConfig) -> Vec<String> {
100    grammar_config
101        .generate_augmented_terminals()
102        .iter()
103        .enumerate()
104        .fold(Vec::new(), |mut acc, (i, e)| {
105            let n = generate_name(
106                acc.iter(),
107                generate_terminal_name(
108                    &e.0,
109                    Some(i as TerminalIndex),
110                    e.1.as_ref(),
111                    &grammar_config.cfg,
112                ),
113            );
114            acc.push(n);
115            acc
116        })
117}
118
119fn get_scanner_type_name<C: CommonGeneratorConfig>(config: &C) -> String {
120    let scanner_type_name = NamingHelper::to_upper_camel_case(config.user_type_name());
121    scanner_type_name + "Scanner"
122}
123
124impl std::fmt::Display for LexerData {
125    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
126        let LexerData {
127            terminal_names,
128            terminal_count,
129            scanner_macro,
130        } = self;
131
132        let blank_line = "\n\n";
133        f.write_fmt(ume::ume! {
134        #blank_line
135        // pub const TERMINALS: &[(&str, Option<(bool, &str)>); #terminal_count] = &[
136        // #augmented_terminals];
137        #blank_line
138        pub const TERMINAL_NAMES: &[&str; #terminal_count] = &[
139        #terminal_names];
140        #blank_line
141        })?;
142        f.write_fmt(format_args!("scanner! {{{scanner_macro}}}"))
143    }
144}
145
146impl std::fmt::Display for ScannerBuildInfo {
147    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
148        let ScannerBuildInfo {
149            scanner_name,
150            terminal_mappings,
151            transitions,
152        } = self;
153
154        let tokens = terminal_mappings
155            .iter()
156            .fold(StrVec::new(12), |mut acc, (rx, i, l, tn)| {
157                // Generate the token definition
158                //   No lookahead expression
159                //     token r"World" => 10;
160                //   With positive lookahead expression
161                //     token r"World" followed by r"!" => 11;
162                //   With negative lookahead expression
163                //     token r"!" not followed by r"!" => 12;
164
165                let hashes = determine_hashes_for_raw_string(rx);
166                let terminal_name_comment = if tn.is_empty() {
167                    String::new()
168                } else {
169                    format!(r#" // "{tn}""#)
170                };
171                let lookahead = if let Some((is_positive, pattern)) = l {
172                    let hashes = determine_hashes_for_raw_string(pattern);
173                    if *is_positive {
174                        format!(" followed by r{hashes}\"{pattern}\"{hashes}")
175                    } else {
176                        format!(" not followed by r{hashes}\"{pattern}\"{hashes}")
177                    }
178                } else {
179                    String::new()
180                };
181
182                let token = format!(
183                    r#"token r{hashes}"{rx}"{hashes} {lookahead}=> {i};{terminal_name_comment}"#
184                );
185
186                acc.push(token);
187                acc
188            });
189
190        let transitions = transitions.iter().fold(StrVec::new(12), |mut acc, (i, e)| {
191            // Generate the transition definition
192            //   on 10 enter World;
193            acc.push(format!(r#"on {i} {e};"#));
194            acc
195        });
196
197        // Generate the scanner's part of the macro code
198        f.write_fmt(format_args!("        mode {scanner_name} {{\n"))?;
199        f.write_fmt(format_args!("{tokens}"))?;
200        f.write_fmt(format_args!("{transitions}"))?;
201        f.write_str("        }")
202    }
203}