#![deny(missing_docs)]
use std::collections::{BTreeMap, BTreeSet};
use std::convert::TryFrom;
use std::path::{Path, PathBuf};
use std::{env, fs};
use crate::config::{CommonGeneratorConfig, ParserGeneratorConfig, UserTraitGeneratorConfig};
use crate::generators::export_node_types::{NodeTypesExporter, NodeTypesInfo};
use crate::generators::lexer_backend::{
CSharpLexerBackend, RustLexerBackend, generate_lexer_source_for_language,
};
use crate::generators::lexer_ir::LexerGenerationIR;
use crate::generators::node_kind_enum_generator::NodeKindTypesGenerator;
use crate::generators::parser_backend::{
CSharpParserBackend, RustParserBackend, generate_parser_source_for_language,
};
use crate::generators::parser_ir::{ParserAlgorithmIR, ParserGenerationIR};
use crate::generators::user_trait_backend::{
CSharpUserTraitBackend, RustUserTraitBackend, generate_user_trait_source_for_language,
};
use crate::generators::user_trait_ir::UserTraitGenerationIR;
use crate::parser::GrammarType;
use crate::{GrammarConfig, GrammarTypeInfo, LRParseTable, LookaheadDFA, MAX_K, ParolGrammar};
use clap::{Parser, ValueEnum};
use parol_macros::parol;
use parol_runtime::{ParseTree, Result};
#[derive(Clone, Debug, Parser, ValueEnum)]
pub enum InnerAttributes {
AllowTooManyArguments,
}
impl std::fmt::Display for InnerAttributes {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
InnerAttributes::AllowTooManyArguments => {
write!(f, "#![allow(clippy::too_many_arguments)]")
}
}
}
}
pub const DEFAULT_MAX_LOOKAHEAD: usize = 5;
pub const DEFAULT_MODULE_NAME: &str = "grammar";
pub const DEFAULT_USER_TYPE_NAME: &str = "Grammar";
fn is_build_script() -> bool {
env::var_os("OUT_DIR").is_some() && env::var_os("CARGO_MANIFEST_DIR").is_some()
}
#[derive(Clone)]
pub struct Builder {
output_dir: PathBuf,
grammar_file: Option<PathBuf>,
expanded_grammar_output_file: Option<PathBuf>,
parser_output_file: Option<PathBuf>,
actions_output_file: Option<PathBuf>,
node_kind_enum_output_file: Option<PathBuf>,
pub(crate) user_type_name: String,
pub(crate) module_name: String,
cargo_integration: bool,
max_lookahead: usize,
output_sanity_checks: bool,
pub(crate) minimize_boxed_types: bool,
debug_verbose: bool,
range: bool,
enum_kind: bool,
inner_attributes: Vec<InnerAttributes>,
pub(crate) trim_parse_tree: bool,
pub(crate) disable_recovery: bool,
pub(crate) language: crate::config::Language,
}
impl Builder {
pub fn with_cargo_script_output() -> Self {
assert!(is_build_script(), "Cannot use outside of a cargo script");
let out_dir = env::var_os("OUT_DIR").unwrap();
let mut builder = Self::with_explicit_output_dir(out_dir);
builder
.parser_output_file("parser.rs")
.actions_output_file("grammar_trait.rs")
.node_kind_enums_output_file("node_kind.rs")
.expanded_grammar_output_file("grammar-exp.par");
assert!(builder.cargo_integration);
builder
}
fn resolve_output_path(&self, p: impl AsRef<Path>) -> PathBuf {
self.output_dir.join(p)
}
pub fn with_explicit_output_dir(output: impl AsRef<Path>) -> Self {
Builder {
output_dir: PathBuf::from(output.as_ref()),
grammar_file: None,
cargo_integration: is_build_script(),
debug_verbose: false,
range: false,
enum_kind: false,
max_lookahead: DEFAULT_MAX_LOOKAHEAD,
module_name: String::from(DEFAULT_MODULE_NAME),
user_type_name: String::from(DEFAULT_USER_TYPE_NAME),
parser_output_file: None,
actions_output_file: None,
node_kind_enum_output_file: None,
expanded_grammar_output_file: None,
minimize_boxed_types: false,
inner_attributes: Vec::new(),
output_sanity_checks: true,
trim_parse_tree: false,
disable_recovery: false,
language: crate::config::Language::Rust,
}
}
pub fn disable_output_sanity_checks(&mut self) -> &mut Self {
self.output_sanity_checks = false;
self
}
pub fn parser_output_file(&mut self, p: impl AsRef<Path>) -> &mut Self {
self.parser_output_file = Some(self.resolve_output_path(p));
self
}
pub fn actions_output_file(&mut self, p: impl AsRef<Path>) -> &mut Self {
self.actions_output_file = Some(self.resolve_output_path(p));
self
}
pub fn expanded_grammar_output_file(&mut self, p: impl AsRef<Path>) -> &mut Self {
self.expanded_grammar_output_file = Some(self.resolve_output_path(p));
self
}
pub fn node_kind_enums_output_file(&mut self, p: impl AsRef<Path>) -> &mut Self {
self.node_kind_enum_output_file = Some(self.resolve_output_path(p));
self
}
pub fn set_cargo_integration(&mut self, enabled: bool) -> &mut Self {
self.cargo_integration = enabled;
self
}
pub fn grammar_file(&mut self, grammar: impl AsRef<Path>) -> &mut Self {
self.grammar_file = Some(PathBuf::from(grammar.as_ref()));
self
}
pub fn user_type_name(&mut self, name: &str) -> &mut Self {
self.user_type_name = name.into();
self
}
pub fn user_trait_module_name(&mut self, name: &str) -> &mut Self {
self.module_name = name.into();
self
}
pub fn max_lookahead(&mut self, k: usize) -> std::result::Result<&mut Self, BuilderError> {
if k > MAX_K {
return Err(BuilderError::LookaheadTooLarge);
}
self.max_lookahead = k;
Ok(self)
}
#[doc(hidden)]
pub fn debug_verbose(&mut self) -> &mut Self {
self.debug_verbose = true;
self
}
pub fn range(&mut self) -> &mut Self {
self.range = true;
self
}
pub fn node_kind_enums(&mut self) -> &mut Self {
self.enum_kind = true;
self
}
pub fn inner_attributes(&mut self, inner_attributes: Vec<InnerAttributes>) -> &mut Self {
self.inner_attributes = inner_attributes;
self
}
pub fn minimize_boxed_types(&mut self) -> &mut Self {
self.minimize_boxed_types = true;
self
}
pub fn trim_parse_tree(&mut self) -> &mut Self {
self.trim_parse_tree = true;
self
}
pub fn disable_recovery(&mut self) -> &mut Self {
self.disable_recovery = true;
self
}
pub fn language(&mut self, language: crate::config::Language) -> &mut Self {
self.language = language;
self
}
pub fn begin_generation_with<'l>(
&mut self,
listener: Option<&'l mut dyn BuildListener>,
) -> std::result::Result<GrammarGenerator<'l>, BuilderError> {
let grammar_file = self
.grammar_file
.as_ref()
.ok_or(BuilderError::MissingGrammarFile)?
.clone();
if self.output_sanity_checks {
if self.parser_output_file.is_none() {
return Err(BuilderError::MissingParserOutputFile);
} else if self.actions_output_file.is_none() {
return Err(BuilderError::MissingActionOutputFile);
}
}
Ok(GrammarGenerator {
listener: MaybeBuildListener(listener),
grammar_file,
builder: self.clone(),
state: None,
grammar_config: None,
lookahead_dfa_s: None,
parse_table: None,
type_info: None,
})
}
pub fn generate_parser(&mut self) -> Result<()> {
self.begin_generation_with(None)
.map_err(|e| parol!("Misconfigured parol generation: {}", e))?
.generate_parser()
}
pub fn generate_parser_and_export_node_infos(&mut self) -> Result<NodeTypesInfo> {
self.begin_generation_with(None)
.map_err(|e| parol!("Misconfigured parol generation: {}", e))?
.generate_parser_and_export_node_infos()
}
}
impl CommonGeneratorConfig for Builder {
fn user_type_name(&self) -> &str {
&self.user_type_name
}
fn module_name(&self) -> &str {
&self.module_name
}
fn minimize_boxed_types(&self) -> bool {
self.minimize_boxed_types
}
fn range(&self) -> bool {
self.range
}
fn node_kind_enums(&self) -> bool {
self.enum_kind
}
fn language(&self) -> crate::config::Language {
self.language
}
}
impl ParserGeneratorConfig for Builder {
fn trim_parse_tree(&self) -> bool {
self.trim_parse_tree
}
fn recovery_disabled(&self) -> bool {
self.disable_recovery
}
}
impl UserTraitGeneratorConfig for Builder {
fn inner_attributes(&self) -> &[InnerAttributes] {
&self.inner_attributes
}
}
pub struct GrammarGenerator<'l> {
listener: MaybeBuildListener<'l>,
pub(crate) grammar_file: PathBuf,
builder: Builder,
state: Option<State>,
pub(crate) grammar_config: Option<GrammarConfig>,
lookahead_dfa_s: Option<BTreeMap<String, LookaheadDFA>>,
parse_table: Option<LRParseTable>,
type_info: Option<GrammarTypeInfo>,
}
impl GrammarGenerator<'_> {
pub fn generate_parser(&mut self) -> Result<()> {
self.parse()?;
self.expand()?;
self.post_process()?;
self.write_output()?;
Ok(())
}
pub fn generate_parser_and_export_node_infos(&mut self) -> Result<NodeTypesInfo> {
self.parse()?;
self.expand()?;
self.post_process()?;
self.write_output()?;
self.export_node_infos()
}
#[doc(hidden)]
pub fn parse(&mut self) -> Result<()> {
assert_eq!(self.state, None);
let input = fs::read_to_string(&self.grammar_file).map_err(|e| {
parol!(
"Can't read grammar file {}: {}",
self.grammar_file.display(),
e
)
})?;
if self.builder.cargo_integration {
println!("cargo:rerun-if-changed={}", self.grammar_file.display());
}
let mut parol_grammar = ParolGrammar::new();
let syntax_tree = crate::parser::parse(&input, &self.grammar_file, &mut parol_grammar)?;
self.listener
.on_initial_grammar_parse(&syntax_tree, &input, &parol_grammar)?;
self.grammar_config = Some(GrammarConfig::try_from(parol_grammar)?);
let _grammar_config = self.grammar_config.as_ref().unwrap();
self.state = Some(State::Parsed);
Ok(())
}
#[doc(hidden)]
pub fn expand(&mut self) -> Result<()> {
assert_eq!(self.state, Some(State::Parsed));
let grammar_config = self.grammar_config.as_mut().unwrap();
self.listener
.on_intermediate_grammar(IntermediateGrammar::Untransformed, &*grammar_config)?;
let ignored_unreachable_non_terminals = grammar_config
.unreachable_non_terminals_to_ignore
.iter()
.cloned()
.collect::<BTreeSet<String>>();
let cfg = crate::generators::grammar_trans::check_and_transform_grammar_with_ignored(
&grammar_config.cfg,
grammar_config.grammar_type,
&ignored_unreachable_non_terminals,
)?;
if let Some(ref expanded_file) = self.builder.expanded_grammar_output_file {
fs::write(
expanded_file,
crate::render_par_string(grammar_config, true)?,
)
.map_err(|e| parol!("Error writing left-factored grammar! {}", e))?;
}
grammar_config.update_cfg(cfg);
self.listener
.on_intermediate_grammar(IntermediateGrammar::Transformed, &*grammar_config)?;
if let Some(ref expanded_file) = self.builder.expanded_grammar_output_file {
fs::write(
expanded_file,
crate::render_par_string(grammar_config, true)?,
)
.map_err(|e| parol!("Error writing left-factored grammar!: {}", e))?;
}
self.state = Some(State::Expanded);
Ok(())
}
#[doc(hidden)]
pub fn post_process(&mut self) -> Result<()> {
assert_eq!(self.state, Some(State::Expanded));
let grammar_config = self.grammar_config.as_mut().unwrap();
match grammar_config.grammar_type {
GrammarType::LLK => {
self.lookahead_dfa_s = Some(
crate::calculate_lookahead_dfas(grammar_config, self.builder.max_lookahead)
.map_err(|e| {
parol!("Lookahead calculation for the given grammar failed!: {}", e)
})?,
);
if self.builder.debug_verbose {
print!(
"Lookahead DFAs:\n{:?}",
self.lookahead_dfa_s.as_ref().unwrap()
);
}
grammar_config.update_lookahead_size(
self.lookahead_dfa_s
.as_ref()
.unwrap()
.iter()
.max_by_key(|(_, dfa)| dfa.k)
.unwrap()
.1
.k,
);
}
GrammarType::LALR1 => {
self.parse_table = Some(crate::calculate_lalr1_parse_table(grammar_config)?.0);
grammar_config.update_lookahead_size(1);
}
}
if self.builder.debug_verbose {
print!("\nGrammar config:\n{grammar_config:?}");
}
self.state = Some(State::PostProcessed);
Ok(())
}
#[doc(hidden)]
pub fn write_output(&mut self) -> Result<()> {
assert_eq!(self.state, Some(State::PostProcessed));
let grammar_config = self.grammar_config.as_mut().unwrap();
let language = self.builder.language();
let lexer_ir = LexerGenerationIR::new(grammar_config, &self.builder);
let lexer_source = match language {
crate::config::Language::Rust => {
generate_lexer_source_for_language(&RustLexerBackend, &lexer_ir)
.map_err(|e| parol!("Failed to generate lexer source!: {}", e))?
}
crate::config::Language::CSharp => {
generate_lexer_source_for_language(&CSharpLexerBackend, &lexer_ir)
.map_err(|e| parol!("Failed to generate C# lexer source!: {}", e))?
}
};
let mut type_info: GrammarTypeInfo =
GrammarTypeInfo::try_new(&self.builder.user_type_name)?;
let mut user_trait_ir = UserTraitGenerationIR::new(
grammar_config,
&self.builder,
grammar_config.grammar_type,
&mut type_info,
);
let user_trait_source = match language {
crate::config::Language::Rust => {
generate_user_trait_source_for_language(&RustUserTraitBackend, &mut user_trait_ir)?
}
crate::config::Language::CSharp => generate_user_trait_source_for_language(
&CSharpUserTraitBackend,
&mut user_trait_ir,
)?,
};
if let Some(ref user_trait_file_out) = self.builder.actions_output_file {
fs::write(user_trait_file_out, user_trait_source)
.map_err(|e| parol!("Error writing generated user trait source!: {}", e))?;
if language == crate::config::Language::Rust {
crate::try_format(user_trait_file_out)?;
}
} else if self.builder.debug_verbose {
println!("\nSource for semantic actions:\n{user_trait_source}");
}
let parser_source = match grammar_config.grammar_type {
GrammarType::LLK => {
let ast_type_has_lifetime =
type_info.symbol_table.has_lifetime(type_info.ast_enum_type);
let parser_ir = ParserGenerationIR::new(
grammar_config,
&lexer_source,
&self.builder,
ast_type_has_lifetime,
ParserAlgorithmIR::Llk(self.lookahead_dfa_s.as_ref().unwrap()),
)
.map_err(|e| parol!("Failed to build parser generation IR!: {}", e))?;
match language {
crate::config::Language::Rust => {
generate_parser_source_for_language(&RustParserBackend, &parser_ir)?
}
crate::config::Language::CSharp => {
generate_parser_source_for_language(&CSharpParserBackend, &parser_ir)?
}
}
}
GrammarType::LALR1 => {
let ast_type_has_lifetime =
type_info.symbol_table.has_lifetime(type_info.ast_enum_type);
let parser_ir = ParserGenerationIR::new(
grammar_config,
&lexer_source,
&self.builder,
ast_type_has_lifetime,
ParserAlgorithmIR::Lalr1(self.parse_table.as_ref().unwrap()),
)
.map_err(|e| parol!("Failed to build parser generation IR!: {}", e))?;
match language {
crate::config::Language::Rust => {
generate_parser_source_for_language(&RustParserBackend, &parser_ir)?
}
crate::config::Language::CSharp => {
generate_parser_source_for_language(&CSharpParserBackend, &parser_ir)?
}
}
}
};
if let Some(ref parser_file_out) = self.builder.parser_output_file {
fs::write(parser_file_out, parser_source)
.map_err(|e| parol!("Error writing generated lexer source!: {}", e))?;
if language == crate::config::Language::Rust {
crate::try_format(parser_file_out)?;
}
} else if self.builder.debug_verbose {
println!("\nParser source:\n{parser_source}");
}
if let Some(ref syntree_node_wrappers_output_file) = self.builder.node_kind_enum_output_file
{
let mut f = fs::OpenOptions::new()
.write(true)
.create(true)
.truncate(true)
.open(syntree_node_wrappers_output_file)
.map_err(|e| parol!("Error opening generated syntree node wrappers!: {}", e))?;
let syntree_node_types_generator =
NodeKindTypesGenerator::new(grammar_config, &type_info);
syntree_node_types_generator
.generate(&mut f)
.map_err(|e| parol!("Error generating syntree node wrappers!: {}", e))?;
crate::try_format(syntree_node_wrappers_output_file)?;
}
self.state = Some(State::Finished);
self.type_info = Some(type_info);
Ok(())
}
fn export_node_infos(&self) -> Result<NodeTypesInfo> {
let node_types_exporter = NodeTypesExporter::new(
self.grammar_config.as_ref().unwrap(),
self.type_info.as_ref().unwrap(),
);
Ok(node_types_exporter.generate())
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
enum State {
Parsed,
Expanded,
PostProcessed,
Finished,
}
#[allow(
unused_variables, // All these variables are going to be unused because these are NOP impls....
missing_docs, // This is fine because this is internal.
)]
pub trait BuildListener {
fn on_initial_grammar_parse(
&mut self,
syntax_tree: &ParseTree,
input: &str,
grammar: &ParolGrammar,
) -> Result<()> {
Ok(())
}
fn on_intermediate_grammar(
&mut self,
stage: IntermediateGrammar,
config: &GrammarConfig,
) -> Result<()> {
Ok(())
}
}
#[derive(Default)]
struct MaybeBuildListener<'l>(Option<&'l mut dyn BuildListener>);
impl BuildListener for MaybeBuildListener<'_> {
fn on_initial_grammar_parse(
&mut self,
syntax_tree: &ParseTree,
input: &str,
grammar: &ParolGrammar,
) -> Result<()> {
if let Some(ref mut inner) = self.0 {
inner.on_initial_grammar_parse(syntax_tree, input, grammar)
} else {
Ok(())
}
}
fn on_intermediate_grammar(
&mut self,
stage: IntermediateGrammar,
config: &GrammarConfig,
) -> Result<()> {
if let Some(ref mut inner) = self.0 {
inner.on_intermediate_grammar(stage, config)
} else {
Ok(())
}
}
}
#[non_exhaustive]
#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum IntermediateGrammar {
Untransformed,
Transformed,
}
impl IntermediateGrammar {
pub const LAST: IntermediateGrammar = IntermediateGrammar::Transformed;
}
#[derive(Debug, thiserror::Error)]
#[non_exhaustive]
pub enum BuilderError {
#[error("Missing an input grammar file")]
MissingGrammarFile,
#[error("No parser output file specified")]
MissingParserOutputFile,
#[error("No action output file specified")]
MissingActionOutputFile,
#[error("Maximum lookahead is {}", MAX_K)]
LookaheadTooLarge,
}