use std::{
any::type_name,
borrow::Cow,
collections::{HashMap, HashSet},
convert::AsRef,
env::{current_dir, var},
error::Error,
fmt::{self, Debug, Write as fmtWrite},
fs::{self, create_dir_all, read_to_string, File},
hash::Hash,
io::{self, Write},
marker::PhantomData,
path::{Path, PathBuf},
sync::Mutex,
};
use bincode::{deserialize, serialize_into};
use cfgrammar::{
newlinecache::NewlineCache,
yacc::{ast::ASTWithValidityInfo, YaccGrammar, YaccKind, YaccOriginalActionKind},
RIdx, Spanned, Symbol,
};
use filetime::FileTime;
use lazy_static::lazy_static;
use lrtable::{from_yacc, statetable::Conflicts, Minimiser, StateGraph, StateTable};
use num_traits::{AsPrimitive, PrimInt, Unsigned};
use regex::Regex;
use serde::{de::DeserializeOwned, Serialize};
use crate::{LexerTypes, RecoveryKind};
const ACTION_PREFIX: &str = "__gt_";
const GLOBAL_PREFIX: &str = "__GT_";
const ACTIONS_KIND: &str = "__GtActionsKind";
const ACTIONS_KIND_PREFIX: &str = "Ak";
const ACTIONS_KIND_HIDDEN: &str = "__GtActionsKindHidden";
const RUST_FILE_EXT: &str = "rs";
const GRM_CONST_NAME: &str = "__GRM_DATA";
const STABLE_CONST_NAME: &str = "__STABLE_DATA";
lazy_static! {
static ref RE_DOL_NUM: Regex = Regex::new(r"\$([0-9]+)").unwrap();
static ref GENERATED_PATHS: Mutex<HashSet<PathBuf>> = Mutex::new(HashSet::new());
}
struct CTConflictsError<StorageT: Eq + Hash> {
stable: StateTable<StorageT>,
}
impl<StorageT> fmt::Display for CTConflictsError<StorageT>
where
StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
usize: AsPrimitive<StorageT>,
{
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let conflicts = self.stable.conflicts().unwrap();
write!(
f,
"CTConflictsError{{{} Reduce/Reduce, {} Shift/Reduce}}",
conflicts.rr_len(),
conflicts.sr_len()
)
}
}
impl<StorageT> fmt::Debug for CTConflictsError<StorageT>
where
StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
usize: AsPrimitive<StorageT>,
{
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let conflicts = self.stable.conflicts().unwrap();
write!(
f,
"CTConflictsError{{{} Reduce/Reduce, {} Shift/Reduce}}",
conflicts.rr_len(),
conflicts.sr_len()
)
}
}
impl<StorageT> Error for CTConflictsError<StorageT>
where
StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
usize: AsPrimitive<StorageT>,
{
}
struct ErrorString(String);
impl fmt::Display for ErrorString {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.0)
}
}
impl fmt::Debug for ErrorString {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self)
}
}
impl Error for ErrorString {}
#[derive(Clone, PartialEq, Eq, Debug)]
pub enum Visibility {
Private,
Public,
PublicSuper,
PublicSelf,
PublicCrate,
PublicIn(String),
}
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
pub enum RustEdition {
Rust2015,
Rust2018,
Rust2021,
}
impl Visibility {
fn cow_str(&self) -> Cow<'static, str> {
match self {
Visibility::Private => Cow::from(""),
Visibility::Public => Cow::from("pub"),
Visibility::PublicSuper => Cow::from("pub(super)"),
Visibility::PublicSelf => Cow::from("pub(self)"),
Visibility::PublicCrate => Cow::from("pub(crate)"),
Visibility::PublicIn(data) => Cow::from(format!("pub(in {})", data)),
}
}
}
pub struct CTParserBuilder<'a, LexerTypesT: LexerTypes>
where
LexerTypesT::StorageT: Eq + Hash,
usize: AsPrimitive<LexerTypesT::StorageT>,
{
grammar_path: Option<PathBuf>,
output_path: Option<PathBuf>,
mod_name: Option<&'a str>,
recoverer: RecoveryKind,
yacckind: Option<YaccKind>,
error_on_conflicts: bool,
warnings_are_errors: bool,
show_warnings: bool,
visibility: Visibility,
rust_edition: RustEdition,
phantom: PhantomData<LexerTypesT>,
}
impl<
'a,
StorageT: 'static + Debug + Hash + PrimInt + Serialize + Unsigned,
LexerTypesT: LexerTypes<StorageT = StorageT>,
> CTParserBuilder<'a, LexerTypesT>
where
usize: AsPrimitive<StorageT>,
{
pub fn new() -> Self {
CTParserBuilder {
grammar_path: None,
output_path: None,
mod_name: None,
recoverer: RecoveryKind::CPCTPlus,
yacckind: None,
error_on_conflicts: true,
warnings_are_errors: true,
show_warnings: true,
visibility: Visibility::Private,
rust_edition: RustEdition::Rust2021,
phantom: PhantomData,
}
}
pub fn grammar_in_src_dir<P>(mut self, srcp: P) -> Result<Self, Box<dyn Error>>
where
P: AsRef<Path>,
{
if !srcp.as_ref().is_relative() {
return Err(format!(
"Grammar path '{}' must be a relative path.",
srcp.as_ref().to_str().unwrap_or("<invalid UTF-8>")
)
.into());
}
let mut grmp = current_dir()?;
grmp.push("src");
grmp.push(srcp.as_ref());
self.grammar_path = Some(grmp);
let mut outp = PathBuf::new();
outp.push(var("OUT_DIR").unwrap());
outp.push(srcp.as_ref().parent().unwrap().to_str().unwrap());
create_dir_all(&outp)?;
let mut leaf = srcp
.as_ref()
.file_name()
.unwrap()
.to_str()
.unwrap()
.to_owned();
write!(leaf, ".{}", RUST_FILE_EXT).ok();
outp.push(leaf);
Ok(self.output_path(outp))
}
pub fn grammar_path<P>(mut self, inp: P) -> Self
where
P: AsRef<Path>,
{
self.grammar_path = Some(inp.as_ref().to_owned());
self
}
pub fn output_path<P>(mut self, outp: P) -> Self
where
P: AsRef<Path>,
{
self.output_path = Some(outp.as_ref().to_owned());
self
}
pub fn mod_name(mut self, mod_name: &'a str) -> Self {
self.mod_name = Some(mod_name);
self
}
pub fn visibility(mut self, vis: Visibility) -> Self {
self.visibility = vis;
self
}
pub fn recoverer(mut self, rk: RecoveryKind) -> Self {
self.recoverer = rk;
self
}
pub fn yacckind(mut self, yk: YaccKind) -> Self {
self.yacckind = Some(yk);
self
}
pub fn error_on_conflicts(mut self, b: bool) -> Self {
self.error_on_conflicts = b;
self
}
pub fn warnings_are_errors(mut self, b: bool) -> Self {
self.warnings_are_errors = b;
self
}
pub fn show_warnings(mut self, b: bool) -> Self {
self.show_warnings = b;
self
}
pub fn rust_edition(mut self, edition: RustEdition) -> Self {
self.rust_edition = edition;
self
}
pub fn build(self) -> Result<CTParser<StorageT>, Box<dyn Error>> {
let grmp = self
.grammar_path
.as_ref()
.expect("grammar_path must be specified before processing.");
let outp = self
.output_path
.as_ref()
.expect("output_path must be specified before processing.");
let yk = match self.yacckind {
None => panic!("yacckind must be specified before processing."),
Some(YaccKind::Original(x)) => YaccKind::Original(x),
Some(YaccKind::Grmtools) => YaccKind::Grmtools,
Some(YaccKind::Eco) => panic!("Eco compile-time grammar generation not supported."),
};
{
let mut lk = GENERATED_PATHS.lock().unwrap();
if lk.contains(outp.as_path()) {
return Err(format!("Generating two parsers to the same path ('{}') is not allowed: use CTParserBuilder::output_path (and, optionally, CTParserBuilder::mod_name) to differentiate them.", &outp.to_str().unwrap()).into());
}
lk.insert(outp.clone());
}
let inc = read_to_string(grmp).unwrap();
let ast_validation = ASTWithValidityInfo::new(yk, &inc);
let warnings = ast_validation.ast().warnings();
let spanned_fmt = |x: &dyn Spanned, inc: &str, line_cache: &NewlineCache| {
if let Some((line, column)) =
line_cache.byte_to_line_num_and_col_num(inc, x.spans()[0].start())
{
format!("{} at line {line} column {column}", x)
} else {
format!("{}", x)
}
};
let res = YaccGrammar::<StorageT>::new_from_ast_with_validity_info(yk, &ast_validation);
let grm = match res {
Ok(_) if self.warnings_are_errors && !warnings.is_empty() => {
let mut line_cache = NewlineCache::new();
line_cache.feed(&inc);
return Err(ErrorString(if warnings.len() > 1 {
format!(
"\n\t{}",
warnings
.iter()
.map(|w| spanned_fmt(w, &inc, &line_cache))
.collect::<Vec<_>>()
.join("\n\t")
)
} else {
spanned_fmt(warnings.first().unwrap(), &inc, &line_cache)
}))?;
}
Ok(grm) => {
if !warnings.is_empty() {
let mut line_cache = NewlineCache::new();
line_cache.feed(&inc);
for w in warnings {
if std::env::var("OUT_DIR").is_ok() && self.show_warnings {
println!("cargo:warning={}", spanned_fmt(&w, &inc, &line_cache));
} else if self.show_warnings {
eprintln!("{}", spanned_fmt(&w, &inc, &line_cache));
}
}
}
grm
}
Err(errs) => {
let mut line_cache = NewlineCache::new();
line_cache.feed(&inc);
return Err(ErrorString(if errs.len() + warnings.len() > 1 {
format!(
"\n\t{}",
errs.iter()
.map(|e| spanned_fmt(e, &inc, &line_cache))
.chain(warnings.iter().map(|w| spanned_fmt(w, &inc, &line_cache)))
.collect::<Vec<_>>()
.join("\n\t")
)
} else {
spanned_fmt(errs.first().unwrap(), &inc, &line_cache)
}))?;
}
};
let rule_ids = grm
.tokens_map()
.iter()
.map(|(&n, &i)| (n.to_owned(), i.as_storaget()))
.collect::<HashMap<_, _>>();
let cache = self.rebuild_cache(&grm);
if let Ok(ref inmd) = fs::metadata(grmp) {
if let Ok(ref out_rs_md) = fs::metadata(outp) {
if FileTime::from_last_modification_time(out_rs_md)
> FileTime::from_last_modification_time(inmd)
{
if let Ok(outc) = read_to_string(outp) {
if outc.contains(&cache) {
return Ok(CTParser {
regenerated: false,
rule_ids,
conflicts: None,
});
}
}
}
}
}
fs::remove_file(outp).ok();
let (sgraph, stable) = from_yacc(&grm, Minimiser::Pager)?;
if self.error_on_conflicts {
if let Some(c) = stable.conflicts() {
match (grm.expect(), grm.expectrr()) {
(Some(i), Some(j)) if i == c.sr_len() && j == c.rr_len() => (),
(Some(i), None) if i == c.sr_len() && 0 == c.rr_len() => (),
(None, Some(j)) if 0 == c.sr_len() && j == c.rr_len() => (),
(None, None) if 0 == c.rr_len() && 0 == c.sr_len() => (),
_ => return Err(Box::new(CTConflictsError { stable })),
}
}
}
let mod_name = match self.mod_name {
Some(s) => s.to_owned(),
None => {
let mut stem = grmp.to_str().unwrap();
loop {
let new_stem = Path::new(stem).file_stem().unwrap().to_str().unwrap();
if stem == new_stem {
break;
}
stem = new_stem;
}
format!("{}_y", stem)
}
};
self.output_file(&grm, &stable, &mod_name, outp, &cache)?;
let conflicts = if stable.conflicts().is_some() {
Some((grm, sgraph, stable))
} else {
None
};
Ok(CTParser {
regenerated: true,
rule_ids,
conflicts,
})
}
#[deprecated(
since = "0.11.0",
note = "Please use grammar_in_src_dir(), build(), and token_map() instead"
)]
#[allow(deprecated)]
pub fn process_file_in_src(
&mut self,
srcp: &str,
) -> Result<HashMap<String, StorageT>, Box<dyn Error>> {
let mut inp = current_dir()?;
inp.push("src");
inp.push(srcp);
let mut outp = PathBuf::new();
outp.push(var("OUT_DIR").unwrap());
outp.push(Path::new(srcp).parent().unwrap().to_str().unwrap());
create_dir_all(&outp)?;
let mut leaf = Path::new(srcp)
.file_name()
.unwrap()
.to_str()
.unwrap()
.to_owned();
write!(leaf, ".{}", RUST_FILE_EXT).ok();
outp.push(leaf);
self.process_file(inp, outp)
}
#[deprecated(
since = "0.11.0",
note = "Please use grammar_path(), output_path(), build(), and token_map() instead"
)]
#[allow(deprecated)]
pub fn process_file<P, Q>(
&mut self,
inp: P,
outp: Q,
) -> Result<HashMap<String, StorageT>, Box<dyn Error>>
where
P: AsRef<Path>,
Q: AsRef<Path>,
{
self.grammar_path = Some(inp.as_ref().to_owned());
self.output_path = Some(outp.as_ref().to_owned());
let cl: CTParserBuilder<LexerTypesT> = CTParserBuilder {
grammar_path: self.grammar_path.clone(),
output_path: self.output_path.clone(),
mod_name: self.mod_name,
recoverer: self.recoverer,
yacckind: self.yacckind,
error_on_conflicts: self.error_on_conflicts,
warnings_are_errors: self.warnings_are_errors,
show_warnings: self.show_warnings,
visibility: self.visibility.clone(),
rust_edition: self.rust_edition,
phantom: PhantomData,
};
Ok(cl.build()?.rule_ids)
}
fn output_file<P: AsRef<Path>>(
&self,
grm: &YaccGrammar<StorageT>,
stable: &StateTable<StorageT>,
mod_name: &str,
outp_rs: P,
cache: &str,
) -> Result<(), Box<dyn Error>> {
let mut outs = String::new();
writeln!(outs, "{} mod {} {{", self.visibility.cow_str(), mod_name).ok();
outs.push_str(
" #![allow(clippy::type_complexity)]
#![allow(clippy::unnecessary_wraps)]
#![deny(unsafe_code)]
#[allow(unused_imports)]
use ::lrpar::Lexeme;
",
);
outs.push_str(&self.gen_parse_function(grm, stable)?);
outs.push_str(&self.gen_rule_consts(grm));
outs.push_str(&self.gen_token_epp(grm));
match self.yacckind.unwrap() {
YaccKind::Original(YaccOriginalActionKind::UserAction) | YaccKind::Grmtools => {
outs.push_str(&self.gen_wrappers(grm));
outs.push_str(&self.gen_user_actions(grm));
}
YaccKind::Original(YaccOriginalActionKind::NoAction)
| YaccKind::Original(YaccOriginalActionKind::GenericParseTree) => (),
_ => unreachable!(),
}
outs.push_str("}\n\n");
outs.push_str(cache);
let mut f = File::create(outp_rs)?;
f.write_all(outs.as_bytes())?;
Ok(())
}
fn rebuild_cache(&self, grm: &YaccGrammar<StorageT>) -> String {
let mut cache = String::new();
cache.push_str("\n/* CACHE INFORMATION\n");
writeln!(cache, " Build time: {:?}", env!("VERGEN_BUILD_TIMESTAMP")).ok();
writeln!(cache, " Grammar path: {:?}", self.grammar_path).ok();
writeln!(cache, " Mod name: {:?}", self.mod_name).ok();
writeln!(cache, " Recoverer: {:?}", self.recoverer).ok();
writeln!(cache, " YaccKind: {:?}", self.yacckind).ok();
writeln!(cache, " Visibility: {:?}", self.visibility.cow_str()).ok();
writeln!(
cache,
" Error on conflicts: {:?}\n",
self.error_on_conflicts
)
.ok();
for tidx in grm.iter_tidxs() {
let n = match grm.token_name(tidx) {
Some(n) => format!("'{}'", n),
None => "<unknown>".to_string(),
};
writeln!(cache, " {} {}", usize::from(tidx), n).ok();
}
cache.push_str("*/\n");
cache
}
fn gen_parse_function(
&self,
grm: &YaccGrammar<StorageT>,
stable: &StateTable<StorageT>,
) -> Result<String, Box<dyn Error>> {
let mut outs = String::new();
serialize_bin_output(grm, GRM_CONST_NAME, &mut outs)?;
serialize_bin_output(stable, STABLE_CONST_NAME, &mut outs)?;
match self.yacckind.unwrap() {
YaccKind::Original(YaccOriginalActionKind::UserAction) | YaccKind::Grmtools => {
let parse_param = match grm.parse_param() {
Some((name, tyname)) => format!(", {}: {}", name, tyname),
None => "".to_owned(),
};
write!(outs,
"
#[allow(dead_code)]
pub fn parse<'lexer, 'input: 'lexer>(
lexer: &'lexer dyn ::lrpar::NonStreamingLexer<'input, {lexertypest}>{parse_param})
-> (::std::option::Option<{actiont}>, ::std::vec::Vec<::lrpar::LexParseError<{storaget}, {lexertypest}>>)
{{",
storaget = type_name::<StorageT>(),
lexertypest = type_name::<LexerTypesT>(),
parse_param = parse_param,
actiont = grm.actiontype(self.user_start_ridx(grm)).as_ref().unwrap(),
).ok();
}
YaccKind::Original(YaccOriginalActionKind::GenericParseTree) => {
write!(
outs,
"
#[allow(dead_code)]
pub fn parse(lexer: &dyn ::lrpar::NonStreamingLexer<{lexertypest}>)
-> (::std::option::Option<::lrpar::Node<<{lexertypest} as ::lrpar::LexerTypes>::LexemeT, {storaget}>>,
::std::vec::Vec<::lrpar::LexParseError<{storaget}, {lexertypest}>>)
{{",
storaget = type_name::<StorageT>(),
lexertypest = type_name::<LexerTypesT>(),
)
.ok();
}
YaccKind::Original(YaccOriginalActionKind::NoAction) => {
write!(
outs,
"
#[allow(dead_code)]
pub fn parse(lexer: &dyn ::lrpar::NonStreamingLexer<{lexertypest}>)
-> ::std::vec::Vec<::lrpar::LexParseError<{storaget}, {lexertypest}>>
{{",
storaget = type_name::<StorageT>(),
lexertypest = type_name::<LexerTypesT>(),
)
.ok();
}
YaccKind::Eco => unreachable!(),
};
write!(
outs,
"
let (grm, stable) = ::lrpar::ctbuilder::_reconstitute({}, {});",
GRM_CONST_NAME, STABLE_CONST_NAME
)
.ok();
let recoverer = match self.recoverer {
RecoveryKind::CPCTPlus => "CPCTPlus",
RecoveryKind::None => "None",
};
match self.yacckind.unwrap() {
YaccKind::Original(YaccOriginalActionKind::UserAction) | YaccKind::Grmtools => {
let wrappers = grm
.iter_pidxs()
.map(|pidx| {
format!(
"&{prefix}wrapper_{}",
usize::from(pidx),
prefix = ACTION_PREFIX
)
})
.collect::<Vec<_>>()
.join(",\n ");
let (parse_param, parse_paramty) = match grm.parse_param() {
Some((name, tyname)) => (name.clone(), tyname.clone()),
None => ("()".to_owned(), "()".to_owned()),
};
write!(outs,
"\n #[allow(clippy::type_complexity)]
let actions: ::std::vec::Vec<&dyn Fn(::cfgrammar::RIdx<{storaget}>,
&'lexer dyn ::lrpar::NonStreamingLexer<'input, {lexertypest}>,
::cfgrammar::Span,
::std::vec::Drain<{edition_lifetime} ::lrpar::parser::AStackType<<{lexertypest} as ::lrpar::LexerTypes>::LexemeT, {actionskind}<'input>>>,
{parse_paramty})
-> {actionskind}<'input>> = ::std::vec![{wrappers}];\n",
actionskind = ACTIONS_KIND,
storaget = type_name::<StorageT>(),
lexertypest = type_name::<LexerTypesT>(),
parse_paramty = parse_paramty,
wrappers = wrappers,
edition_lifetime = if self.rust_edition != RustEdition::Rust2015 { "'_, " } else { "" },
).ok();
write!(
outs,
"
match ::lrpar::RTParserBuilder::new(&grm, &stable)
.recoverer(::lrpar::RecoveryKind::{recoverer})
.parse_actions(lexer, &actions, {parse_param}) {{
(Some({actionskind}::{actionskindprefix}{ridx}(x)), y) => (Some(x), y),
(None, y) => (None, y),
_ => unreachable!()
}}",
parse_param = parse_param,
actionskind = ACTIONS_KIND,
actionskindprefix = ACTIONS_KIND_PREFIX,
ridx = usize::from(self.user_start_ridx(grm)),
recoverer = recoverer,
)
.ok();
}
YaccKind::Original(YaccOriginalActionKind::GenericParseTree) => {
write!(
outs,
"
::lrpar::RTParserBuilder::new(&grm, &stable)
.recoverer(::lrpar::RecoveryKind::{})
.parse_generictree(lexer)\n",
recoverer
)
.ok();
}
YaccKind::Original(YaccOriginalActionKind::NoAction) => {
write!(
outs,
"
::lrpar::RTParserBuilder::new(&grm, &stable)
.recoverer(::lrpar::RecoveryKind::{})
.parse_noaction(lexer)\n",
recoverer
)
.ok();
}
YaccKind::Eco => unreachable!(),
};
outs.push_str("\n }\n\n");
Ok(outs)
}
fn gen_rule_consts(&self, grm: &YaccGrammar<StorageT>) -> String {
let mut outs = String::new();
for ridx in grm.iter_rules() {
if !grm.rule_to_prods(ridx).contains(&grm.start_prod()) {
write!(
outs,
" #[allow(dead_code)]\n pub const R_{}: {} = {:?};\n",
grm.rule_name_str(ridx).to_ascii_uppercase(),
type_name::<StorageT>(),
usize::from(ridx)
)
.ok();
}
}
outs
}
fn gen_token_epp(&self, grm: &YaccGrammar<StorageT>) -> String {
let mut tidxs = Vec::new();
for tidx in grm.iter_tidxs() {
match grm.token_epp(tidx) {
Some(n) => tidxs.push(format!("Some(\"{}\")", str_escape(n))),
None => tidxs.push("None".to_string()),
}
}
format!(
" const {prefix}EPP: &[::std::option::Option<&str>] = &[{}];
/// Return the %epp entry for token `tidx` (where `None` indicates \"the token has no
/// pretty-printed value\"). Panics if `tidx` doesn't exist.
#[allow(dead_code)]
pub fn token_epp<'a>(tidx: ::cfgrammar::TIdx<{storaget}>) -> ::std::option::Option<&'a str> {{
{prefix}EPP[usize::from(tidx)]
}}",
tidxs.join(", "),
storaget = type_name::<StorageT>(),
prefix = GLOBAL_PREFIX
)
}
fn gen_wrappers(&self, grm: &YaccGrammar<StorageT>) -> String {
let mut outs = String::new();
outs.push_str("\n\n // Wrappers\n\n");
let (parse_paramname, parse_paramdef) = match grm.parse_param() {
Some((name, tyname)) => (name.to_owned(), format!("{}: {}", name, tyname)),
None => ("()".to_owned(), "_: ()".to_owned()),
};
for pidx in grm.iter_pidxs() {
let ridx = grm.prod_to_rule(pidx);
write!(outs,
" fn {prefix}wrapper_{}<'lexer, 'input: 'lexer>({prefix}ridx: ::cfgrammar::RIdx<{storaget}>,
{prefix}lexer: &'lexer dyn ::lrpar::NonStreamingLexer<'input, {lexertypest}>,
{prefix}span: ::cfgrammar::Span,
mut {prefix}args: ::std::vec::Drain<{edition_lifetime} ::lrpar::parser::AStackType<<{lexertypest} as ::lrpar::LexerTypes>::LexemeT, {actionskind}<'input>>>,
{parse_paramdef})
-> {actionskind}<'input> {{",
usize::from(pidx),
storaget = type_name::<StorageT>(),
lexertypest = type_name::<LexerTypesT>(),
prefix = ACTION_PREFIX,
parse_paramdef = parse_paramdef,
actionskind = ACTIONS_KIND,
edition_lifetime = if self.rust_edition != RustEdition::Rust2015 { "'_, " } else { "" },
).ok();
if grm.action(pidx).is_some() {
for i in 0..grm.prod(pidx).len() {
match grm.prod(pidx)[i] {
Symbol::Rule(ref_ridx) => {
write!(outs,
"
let {prefix}arg_{i} = match {prefix}args.next().unwrap() {{
::lrpar::parser::AStackType::ActionType({actionskind}::{actionskindprefix}{ref_ridx}(x)) => x,
_ => unreachable!()
}};",
i = i + 1,
ref_ridx = usize::from(ref_ridx),
prefix = ACTION_PREFIX,
actionskind = ACTIONS_KIND,
actionskindprefix = ACTIONS_KIND_PREFIX
).ok();
}
Symbol::Token(_) => {
write!(
outs,
"
let {prefix}arg_{} = match {prefix}args.next().unwrap() {{
::lrpar::parser::AStackType::Lexeme(l) => {{
if l.faulty() {{
Err(l)
}} else {{
Ok(l)
}}
}},
::lrpar::parser::AStackType::ActionType(_) => unreachable!()
}};",
i + 1,
prefix = ACTION_PREFIX
)
.ok();
}
}
}
let args = (0..grm.prod(pidx).len())
.map(|i| format!("{prefix}arg_{i}", prefix = ACTION_PREFIX, i = i + 1))
.collect::<Vec<_>>();
match grm.actiontype(ridx) {
Some(s) if s == "()" => {
write!(outs, "\n {prefix}action_{pidx}({prefix}ridx, {prefix}lexer, {prefix}span, {parse_paramname}, {args});
{actionskind}::{actionskindprefix}{ridx}(())",
actionskind = ACTIONS_KIND,
actionskindprefix = ACTIONS_KIND_PREFIX,
prefix = ACTION_PREFIX,
ridx = usize::from(ridx),
pidx = usize::from(pidx),
parse_paramname = parse_paramname,
args = args.join(", ")).ok();
}
_ => {
write!(outs, "\n {actionskind}::{actionskindprefix}{ridx}({prefix}action_{pidx}({prefix}ridx, {prefix}lexer, {prefix}span, {parse_paramname}, {args}))",
actionskind = ACTIONS_KIND,
actionskindprefix = ACTIONS_KIND_PREFIX,
prefix = ACTION_PREFIX,
ridx = usize::from(ridx),
pidx = usize::from(pidx),
parse_paramname = parse_paramname,
args = args.join(", ")).ok();
}
}
} else if pidx == grm.start_prod() {
if parse_paramname != "()" {
write!(outs, "\n let _ = {parse_paramname:};").ok();
}
outs.push_str("\n unreachable!()");
} else {
panic!(
"Production in rule '{}' must have an action body.",
grm.rule_name_str(grm.prod_to_rule(pidx))
);
}
outs.push_str("\n }\n\n");
}
write!(
outs,
" #[allow(dead_code)]
enum {}<'input> {{\n",
ACTIONS_KIND
)
.ok();
for ridx in grm.iter_rules() {
if grm.actiontype(ridx).is_none() {
continue;
}
writeln!(
outs,
" {actionskindprefix}{ridx}({actiont}),",
actionskindprefix = ACTIONS_KIND_PREFIX,
ridx = usize::from(ridx),
actiont = grm.actiontype(ridx).as_ref().unwrap()
)
.ok();
}
write!(
outs,
" _{actionskindhidden}(::std::marker::PhantomData<&'input ()>)
}}\n\n",
actionskindhidden = ACTIONS_KIND_HIDDEN
)
.ok();
outs
}
fn gen_user_actions(&self, grm: &YaccGrammar<StorageT>) -> String {
let mut outs = String::new();
if let Some(s) = grm.programs() {
outs.push_str("\n// User code from the program section\n\n");
outs.push_str(s);
}
outs.push_str("\n // User actions\n\n");
let (parse_paramname, parse_paramdef) = match grm.parse_param() {
Some((name, tyname)) => (name.to_owned(), format!("{}: {}", name, tyname)),
None => ("()".to_owned(), "_: ()".to_owned()),
};
for pidx in grm.iter_pidxs() {
if pidx == grm.start_prod() {
continue;
}
let mut args = Vec::with_capacity(grm.prod(pidx).len());
for i in 0..grm.prod(pidx).len() {
let argt = match grm.prod(pidx)[i] {
Symbol::Rule(ref_ridx) => grm.actiontype(ref_ridx).as_ref().unwrap().clone(),
Symbol::Token(_) => format!(
"::std::result::Result<{lexemet}, {lexemet}>",
lexemet = type_name::<LexerTypesT::LexemeT>(),
),
};
args.push(format!("mut {}arg_{}: {}", ACTION_PREFIX, i + 1, argt));
}
let returnt = {
let actiont = grm.actiontype(grm.prod_to_rule(pidx)).as_ref().unwrap();
if actiont == "()" {
"".to_owned()
} else {
format!("\n -> {}", actiont)
}
};
write!(
outs,
" // {rulename}
#[allow(clippy::too_many_arguments)]
#[allow(unsafe_code)] // Allow an action to embed unsafe blocks within it.
fn {prefix}action_{}<'lexer, 'input: 'lexer>({prefix}ridx: ::cfgrammar::RIdx<{storaget}>,
{prefix}lexer: &'lexer dyn ::lrpar::NonStreamingLexer<'input, {lexertypest}>,
{prefix}span: ::cfgrammar::Span,
{parse_paramdef},
{args}){returnt} {{\n",
usize::from(pidx),
rulename = grm.rule_name_str(grm.prod_to_rule(pidx)),
storaget = type_name::<StorageT>(),
lexertypest = type_name::<LexerTypesT>(),
prefix = ACTION_PREFIX,
returnt = returnt,
parse_paramdef = parse_paramdef,
args = args.join(",\n ")
)
.ok();
if parse_paramname != "()" {
writeln!(outs, " let _ = {parse_paramname:};").ok();
}
let pre_action = grm.action(pidx).as_ref().unwrap();
let mut last = 0;
loop {
match pre_action[last..].find('$') {
Some(off) => {
if pre_action[last + off..].starts_with("$$") {
outs.push_str(&pre_action[last..last + off + "$".len()]);
last = last + off + "$$".len();
} else if pre_action[last + off..].starts_with("$lexer") {
outs.push_str(&pre_action[last..last + off]);
write!(outs, "{prefix}lexer", prefix = ACTION_PREFIX).ok();
last = last + off + "$lexer".len();
} else if pre_action[last + off..].starts_with("$span") {
outs.push_str(&pre_action[last..last + off]);
write!(outs, "{prefix}span", prefix = ACTION_PREFIX).ok();
last = last + off + "$span".len();
} else if last + off + 1 < pre_action.len()
&& pre_action[last + off + 1..].starts_with(|c: char| c.is_numeric())
{
outs.push_str(&pre_action[last..last + off]);
write!(outs, "{prefix}arg_", prefix = ACTION_PREFIX).ok();
last = last + off + "$".len();
} else {
panic!(
"Unknown text following '$' operator: {}",
&pre_action[last + off..]
);
}
}
None => {
outs.push_str(&pre_action[last..]);
break;
}
}
}
outs.push_str("\n }\n\n");
}
outs
}
fn user_start_ridx(&self, grm: &YaccGrammar<StorageT>) -> RIdx<StorageT> {
debug_assert_eq!(grm.prod(grm.start_prod()).len(), 1);
match grm.prod(grm.start_prod())[0] {
Symbol::Rule(ridx) => ridx,
_ => unreachable!(),
}
}
}
fn str_escape(s: &str) -> String {
s.replace('\\', "\\\\").replace('"', "\\\"")
}
#[doc(hidden)]
pub fn _reconstitute<StorageT: DeserializeOwned + Hash + PrimInt + Unsigned>(
grm_buf: &[u8],
stable_buf: &[u8],
) -> (YaccGrammar<StorageT>, StateTable<StorageT>) {
let grm = deserialize(grm_buf).unwrap();
let stable = deserialize(stable_buf).unwrap();
(grm, stable)
}
fn serialize_bin_output<T: Serialize + ?Sized>(
ser: &T,
name: &str,
buffer: &mut String,
) -> Result<(), Box<dyn Error>> {
let mut w = ArrayWriter::new(name);
serialize_into(&mut w, ser)?;
let data = w.finish();
buffer.push_str(&data);
Ok(())
}
struct ArrayWriter {
buffer: String,
}
impl ArrayWriter {
fn new(name: &str) -> Self {
Self {
buffer: format!(r#"#[allow(dead_code)] const {}: &[u8] = &["#, name),
}
}
fn finish(mut self) -> String {
self.buffer.push_str("];\n");
self.buffer
}
}
impl Write for ArrayWriter {
#[allow(dead_code)]
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
for b in buf {
self.buffer.write_fmt(format_args!("{},", b)).unwrap();
}
Ok(buf.len())
}
#[allow(dead_code)]
fn flush(&mut self) -> io::Result<()> {
Ok(())
}
}
pub struct CTParser<StorageT = u32>
where
StorageT: Eq + Hash,
{
regenerated: bool,
rule_ids: HashMap<String, StorageT>,
conflicts: Option<(
YaccGrammar<StorageT>,
StateGraph<StorageT>,
StateTable<StorageT>,
)>,
}
impl<StorageT> CTParser<StorageT>
where
StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
usize: AsPrimitive<StorageT>,
{
pub fn regenerated(&self) -> bool {
self.regenerated
}
pub fn token_map(&self) -> &HashMap<String, StorageT> {
&self.rule_ids
}
pub fn conflicts(
&self,
) -> Option<(
&YaccGrammar<StorageT>,
&StateGraph<StorageT>,
&StateTable<StorageT>,
&Conflicts<StorageT>,
)> {
if let Some((grm, sgraph, stable)) = &self.conflicts {
return Some((grm, sgraph, stable, stable.conflicts().unwrap()));
}
None
}
}
#[cfg(test)]
mod test {
use std::{fs::File, io::Write, path::PathBuf};
use super::{CTConflictsError, CTParserBuilder};
use crate::test_utils::TestLexerTypes;
use cfgrammar::yacc::{YaccKind, YaccOriginalActionKind};
use tempfile::TempDir;
#[test]
fn test_conflicts() {
let temp = TempDir::new().unwrap();
let mut file_path = PathBuf::from(temp.as_ref());
file_path.push("grm.y");
let mut f = File::create(&file_path).unwrap();
let _ = f.write_all(
"%start A
%%
A : 'a' 'b' | B 'b';
B : 'a' | C;
C : 'a';"
.as_bytes(),
);
match CTParserBuilder::<TestLexerTypes>::new()
.error_on_conflicts(false)
.yacckind(YaccKind::Original(YaccOriginalActionKind::GenericParseTree))
.grammar_path(file_path.to_str().unwrap())
.output_path(file_path.with_extension("ignored"))
.build()
.unwrap()
.conflicts()
{
Some((_, _, _, conflicts)) => {
assert_eq!(conflicts.sr_len(), 1);
assert_eq!(conflicts.rr_len(), 1);
}
None => panic!("Expected error data"),
}
}
#[test]
fn test_conflicts_error() {
let temp = TempDir::new().unwrap();
let mut file_path = PathBuf::from(temp.as_ref());
file_path.push("grm.y");
let mut f = File::create(&file_path).unwrap();
let _ = f.write_all(
"%start A
%%
A : 'a' 'b' | B 'b';
B : 'a' | C;
C : 'a';"
.as_bytes(),
);
match CTParserBuilder::<TestLexerTypes>::new()
.yacckind(YaccKind::Original(YaccOriginalActionKind::GenericParseTree))
.grammar_path(file_path.to_str().unwrap())
.output_path(file_path.with_extension("ignored"))
.build()
{
Ok(_) => panic!("Expected error"),
Err(e) => {
let cs = e.downcast_ref::<CTConflictsError<u16>>();
assert_eq!(cs.unwrap().stable.conflicts().unwrap().rr_len(), 1);
assert_eq!(cs.unwrap().stable.conflicts().unwrap().sr_len(), 1);
}
}
}
#[test]
fn test_expect_error() {
let temp = TempDir::new().unwrap();
let mut file_path = PathBuf::from(temp.as_ref());
file_path.push("grm.y");
let mut f = File::create(&file_path).unwrap();
let _ = f.write_all(
"%start A
%expect 2
%%
A: 'a' 'b' | B 'b';
B: 'a';"
.as_bytes(),
);
match CTParserBuilder::<TestLexerTypes>::new()
.yacckind(YaccKind::Original(YaccOriginalActionKind::GenericParseTree))
.grammar_path(file_path.to_str().unwrap())
.output_path(file_path.with_extension("ignored"))
.build()
{
Ok(_) => panic!("Expected error"),
Err(e) => {
let cs = e.downcast_ref::<CTConflictsError<u16>>();
assert_eq!(cs.unwrap().stable.conflicts().unwrap().rr_len(), 0);
assert_eq!(cs.unwrap().stable.conflicts().unwrap().sr_len(), 1);
}
}
}
#[test]
fn test_expectrr_error() {
let temp = TempDir::new().unwrap();
let mut file_path = PathBuf::from(temp.as_ref());
file_path.push("grm.y");
let mut f = File::create(&file_path).unwrap();
let _ = f.write_all(
"%start A
%expect 1
%expect-rr 2
%%
A : 'a' 'b' | B 'b';
B : 'a' | C;
C : 'a';"
.as_bytes(),
);
match CTParserBuilder::<TestLexerTypes>::new()
.yacckind(YaccKind::Original(YaccOriginalActionKind::GenericParseTree))
.grammar_path(file_path.to_str().unwrap())
.output_path(file_path.with_extension("ignored"))
.build()
{
Ok(_) => panic!("Expected error"),
Err(e) => {
let cs = e.downcast_ref::<CTConflictsError<u16>>();
assert_eq!(cs.unwrap().stable.conflicts().unwrap().rr_len(), 1);
assert_eq!(cs.unwrap().stable.conflicts().unwrap().sr_len(), 1);
}
}
}
}