parol/generators/
lexer_generator.rs1use crate::generators::{GrammarConfig, NamingHelper, generate_terminal_name};
2use crate::generators::lexer_ir::{build_scanner_mode_data, ScannerModeBuildData};
3use crate::{CommonGeneratorConfig, generate_name};
4use anyhow::Result;
5use parol_runtime::TerminalIndex;
6
7use crate::StrVec;
8use std::fmt::Debug;
9
10#[derive(Debug, Default)]
11struct ScannerBuildInfo {
12 scanner_name: String,
13 terminal_mappings: Vec<crate::generators::lexer_ir::TerminalMapping>,
14 transitions: Vec<crate::generators::lexer_ir::ScannerTransition>,
15}
16
17impl ScannerBuildInfo {
18 fn from_mode_data(mode_data: ScannerModeBuildData) -> Self {
19 Self {
20 scanner_name: mode_data.scanner_name,
21 terminal_mappings: mode_data.terminal_mappings,
22 transitions: mode_data.transitions,
23 }
24 }
25}
26
27fn determine_hashes_for_raw_string(e: &str) -> String {
28 let mut pattern = r#"""#.to_string();
29 let mut count = 0;
30 while e.contains(&pattern) {
31 pattern.push('#');
32 count += 1;
33 }
34 "#".repeat(count)
35}
36
37#[derive(Debug, Default)]
38struct LexerData {
39 terminal_names: StrVec,
40 terminal_count: usize,
41 scanner_macro: StrVec,
42}
43
44pub fn generate_lexer_source<C: CommonGeneratorConfig>(
52 grammar_config: &GrammarConfig,
53 config: &C,
54) -> Result<String> {
55 let terminal_names = generate_terminal_names(grammar_config);
56 generate_lexer_source_with_terminal_names(grammar_config, config, &terminal_names)
57}
58
59pub(crate) fn generate_lexer_source_with_terminal_names<C: CommonGeneratorConfig>(
60 grammar_config: &GrammarConfig,
61 config: &C,
62 terminal_names: &[String],
63) -> Result<String> {
64 let terminal_count = terminal_names.len();
65 let width = (terminal_count as f32).log10() as usize + 1;
66
67 let mode_data = build_scanner_mode_data(grammar_config, terminal_names)?;
68
69 let macro_start =
70 StrVec::from_iter(vec![format!("\n {} {{", get_scanner_type_name(config))]);
71 let mut scanner_macro = mode_data
72 .into_iter()
73 .map(ScannerBuildInfo::from_mode_data)
74 .fold(macro_start, |mut acc, e| {
75 acc.push(format!("{e}"));
76 acc
77 });
78 scanner_macro.push(" }".to_string());
79
80 let terminal_names =
81 terminal_names
82 .iter()
83 .enumerate()
84 .fold(StrVec::new(4), |mut acc, (i, e)| {
85 acc.push(format!(r#"/* {i:width$} */ "{e}","#));
86 acc
87 });
88
89 let lexer_data = LexerData {
90 terminal_names,
91 terminal_count,
92 scanner_macro,
93 };
94
95 Ok(format!("{lexer_data}"))
96}
97
98pub fn generate_terminal_names(grammar_config: &GrammarConfig) -> Vec<String> {
100 grammar_config
101 .generate_augmented_terminals()
102 .iter()
103 .enumerate()
104 .fold(Vec::new(), |mut acc, (i, e)| {
105 let n = generate_name(
106 acc.iter(),
107 generate_terminal_name(
108 &e.0,
109 Some(i as TerminalIndex),
110 e.1.as_ref(),
111 &grammar_config.cfg,
112 ),
113 );
114 acc.push(n);
115 acc
116 })
117}
118
119fn get_scanner_type_name<C: CommonGeneratorConfig>(config: &C) -> String {
120 let scanner_type_name = NamingHelper::to_upper_camel_case(config.user_type_name());
121 scanner_type_name + "Scanner"
122}
123
124impl std::fmt::Display for LexerData {
125 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
126 let LexerData {
127 terminal_names,
128 terminal_count,
129 scanner_macro,
130 } = self;
131
132 let blank_line = "\n\n";
133 f.write_fmt(ume::ume! {
134 #blank_line
135 #blank_line
138 pub const TERMINAL_NAMES: &[&str; #terminal_count] = &[
139 #terminal_names];
140 #blank_line
141 })?;
142 f.write_fmt(format_args!("scanner! {{{scanner_macro}}}"))
143 }
144}
145
146impl std::fmt::Display for ScannerBuildInfo {
147 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
148 let ScannerBuildInfo {
149 scanner_name,
150 terminal_mappings,
151 transitions,
152 } = self;
153
154 let tokens = terminal_mappings
155 .iter()
156 .fold(StrVec::new(12), |mut acc, (rx, i, l, tn)| {
157 let hashes = determine_hashes_for_raw_string(rx);
166 let terminal_name_comment = if tn.is_empty() {
167 String::new()
168 } else {
169 format!(r#" // "{tn}""#)
170 };
171 let lookahead = if let Some((is_positive, pattern)) = l {
172 let hashes = determine_hashes_for_raw_string(pattern);
173 if *is_positive {
174 format!(" followed by r{hashes}\"{pattern}\"{hashes}")
175 } else {
176 format!(" not followed by r{hashes}\"{pattern}\"{hashes}")
177 }
178 } else {
179 String::new()
180 };
181
182 let token = format!(
183 r#"token r{hashes}"{rx}"{hashes} {lookahead}=> {i};{terminal_name_comment}"#
184 );
185
186 acc.push(token);
187 acc
188 });
189
190 let transitions = transitions.iter().fold(StrVec::new(12), |mut acc, (i, e)| {
191 acc.push(format!(r#"on {i} {e};"#));
194 acc
195 });
196
197 f.write_fmt(format_args!(" mode {scanner_name} {{\n"))?;
199 f.write_fmt(format_args!("{tokens}"))?;
200 f.write_fmt(format_args!("{transitions}"))?;
201 f.write_str(" }")
202 }
203}