use crate::generators::{GrammarConfig, NamingHelper, generate_terminal_name};
use crate::generators::lexer_ir::{build_scanner_mode_data, ScannerModeBuildData};
use crate::{CommonGeneratorConfig, generate_name};
use anyhow::Result;
use parol_runtime::TerminalIndex;
use crate::StrVec;
use std::fmt::Debug;
#[derive(Debug, Default)]
struct ScannerBuildInfo {
scanner_name: String,
terminal_mappings: Vec<crate::generators::lexer_ir::TerminalMapping>,
transitions: Vec<crate::generators::lexer_ir::ScannerTransition>,
}
impl ScannerBuildInfo {
fn from_mode_data(mode_data: ScannerModeBuildData) -> Self {
Self {
scanner_name: mode_data.scanner_name,
terminal_mappings: mode_data.terminal_mappings,
transitions: mode_data.transitions,
}
}
}
fn determine_hashes_for_raw_string(e: &str) -> String {
let mut pattern = r#"""#.to_string();
let mut count = 0;
while e.contains(&pattern) {
pattern.push('#');
count += 1;
}
"#".repeat(count)
}
#[derive(Debug, Default)]
struct LexerData {
terminal_names: StrVec,
terminal_count: usize,
scanner_macro: StrVec,
}
pub fn generate_lexer_source<C: CommonGeneratorConfig>(
grammar_config: &GrammarConfig,
config: &C,
) -> Result<String> {
let terminal_names = generate_terminal_names(grammar_config);
generate_lexer_source_with_terminal_names(grammar_config, config, &terminal_names)
}
pub(crate) fn generate_lexer_source_with_terminal_names<C: CommonGeneratorConfig>(
grammar_config: &GrammarConfig,
config: &C,
terminal_names: &[String],
) -> Result<String> {
let terminal_count = terminal_names.len();
let width = (terminal_count as f32).log10() as usize + 1;
let mode_data = build_scanner_mode_data(grammar_config, terminal_names)?;
let macro_start =
StrVec::from_iter(vec![format!("\n {} {{", get_scanner_type_name(config))]);
let mut scanner_macro = mode_data
.into_iter()
.map(ScannerBuildInfo::from_mode_data)
.fold(macro_start, |mut acc, e| {
acc.push(format!("{e}"));
acc
});
scanner_macro.push(" }".to_string());
let terminal_names =
terminal_names
.iter()
.enumerate()
.fold(StrVec::new(4), |mut acc, (i, e)| {
acc.push(format!(r#"/* {i:width$} */ "{e}","#));
acc
});
let lexer_data = LexerData {
terminal_names,
terminal_count,
scanner_macro,
};
Ok(format!("{lexer_data}"))
}
pub fn generate_terminal_names(grammar_config: &GrammarConfig) -> Vec<String> {
grammar_config
.generate_augmented_terminals()
.iter()
.enumerate()
.fold(Vec::new(), |mut acc, (i, e)| {
let n = generate_name(
acc.iter(),
generate_terminal_name(
&e.0,
Some(i as TerminalIndex),
e.1.as_ref(),
&grammar_config.cfg,
),
);
acc.push(n);
acc
})
}
fn get_scanner_type_name<C: CommonGeneratorConfig>(config: &C) -> String {
let scanner_type_name = NamingHelper::to_upper_camel_case(config.user_type_name());
scanner_type_name + "Scanner"
}
impl std::fmt::Display for LexerData {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let LexerData {
terminal_names,
terminal_count,
scanner_macro,
} = self;
let blank_line = "\n\n";
f.write_fmt(ume::ume! {
#blank_line
#blank_line
pub const TERMINAL_NAMES: &[&str; #terminal_count] = &[
#terminal_names];
#blank_line
})?;
f.write_fmt(format_args!("scanner! {{{scanner_macro}}}"))
}
}
impl std::fmt::Display for ScannerBuildInfo {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let ScannerBuildInfo {
scanner_name,
terminal_mappings,
transitions,
} = self;
let tokens = terminal_mappings
.iter()
.fold(StrVec::new(12), |mut acc, (rx, i, l, tn)| {
let hashes = determine_hashes_for_raw_string(rx);
let terminal_name_comment = if tn.is_empty() {
String::new()
} else {
format!(r#" // "{tn}""#)
};
let lookahead = if let Some((is_positive, pattern)) = l {
let hashes = determine_hashes_for_raw_string(pattern);
if *is_positive {
format!(" followed by r{hashes}\"{pattern}\"{hashes}")
} else {
format!(" not followed by r{hashes}\"{pattern}\"{hashes}")
}
} else {
String::new()
};
let token = format!(
r#"token r{hashes}"{rx}"{hashes} {lookahead}=> {i};{terminal_name_comment}"#
);
acc.push(token);
acc
});
let transitions = transitions.iter().fold(StrVec::new(12), |mut acc, (i, e)| {
acc.push(format!(r#"on {i} {e};"#));
acc
});
f.write_fmt(format_args!(" mode {scanner_name} {{\n"))?;
f.write_fmt(format_args!("{tokens}"))?;
f.write_fmt(format_args!("{transitions}"))?;
f.write_str(" }")
}
}