use std::{
collections::HashMap,
convert::AsRef,
env::{current_dir, var},
error::Error,
fmt::{self, Debug},
fs::{self, read_to_string, File},
hash::Hash,
io::Write,
marker::PhantomData,
path::{Path, PathBuf}
};
use bincode::{deserialize, serialize_into};
use cfgrammar::{
yacc::{YaccGrammar, YaccKind},
Symbol
};
use filetime::FileTime;
use lrtable::{from_yacc, statetable::Conflicts, Minimiser, StateGraph, StateTable};
use num_traits::{AsPrimitive, PrimInt, Unsigned};
use regex::Regex;
use serde::{Deserialize, Serialize};
use typename::TypeName;
use RecoveryKind;
const YACC_SUFFIX: &str = "_y";
const ACTION_PREFIX: &str = "__gt_";
const GLOBAL_PREFIX: &str = "__GT_";
const GRM_FILE_EXT: &str = "grm";
const RUST_FILE_EXT: &str = "rs";
const SGRAPH_FILE_EXT: &str = "sgraph";
const STABLE_FILE_EXT: &str = "stable";
lazy_static! {
static ref RE_DOL_NUM: Regex = Regex::new(r"\$([0-9]+)").unwrap();
static ref RE_DOL_LEXER: Regex = Regex::new(r"\$lexer").unwrap();
}
pub enum ActionKind {
CustomAction,
GenericParseTree
}
struct CTConflictsError<StorageT: Eq + Hash> {
pub grm: YaccGrammar<StorageT>,
pub sgraph: StateGraph<StorageT>,
pub stable: StateTable<StorageT>
}
impl<StorageT> fmt::Display for CTConflictsError<StorageT>
where
StorageT: 'static + Debug + Hash + PrimInt + Serialize + TypeName + Unsigned,
usize: AsPrimitive<StorageT>,
u32: AsPrimitive<StorageT>
{
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let conflicts = self.stable.conflicts().unwrap();
write!(
f,
"CTConflictsError{{{} Shift/Reduce, {} Reduce/Reduce}}",
conflicts.sr_len(),
conflicts.rr_len()
)
}
}
impl<StorageT> fmt::Debug for CTConflictsError<StorageT>
where
StorageT: 'static + Debug + Hash + PrimInt + Serialize + TypeName + Unsigned,
usize: AsPrimitive<StorageT>,
u32: AsPrimitive<StorageT>
{
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let conflicts = self.stable.conflicts().unwrap();
write!(
f,
"CTConflictsError{{{} Shift/Reduce, {} Reduce/Reduce}}",
conflicts.sr_len(),
conflicts.rr_len()
)
}
}
impl<StorageT> Error for CTConflictsError<StorageT>
where
StorageT: 'static + Debug + Hash + PrimInt + Serialize + TypeName + Unsigned,
usize: AsPrimitive<StorageT>,
u32: AsPrimitive<StorageT>
{
}
pub struct CTParserBuilder<StorageT = u32>
where
StorageT: Eq + Hash
{
recoverer: RecoveryKind,
phantom: PhantomData<StorageT>,
actionkind: ActionKind,
error_on_conflicts: bool,
conflicts: Option<(
YaccGrammar<StorageT>,
StateGraph<StorageT>,
StateTable<StorageT>
)>
}
impl CTParserBuilder<u32> {
pub fn new() -> Self {
CTParserBuilder::<u32>::new_with_storaget()
}
}
impl<StorageT> CTParserBuilder<StorageT>
where
StorageT: 'static + Debug + Hash + PrimInt + Serialize + TypeName + Unsigned,
usize: AsPrimitive<StorageT>,
u32: AsPrimitive<StorageT>
{
pub fn new_with_storaget() -> Self {
CTParserBuilder {
recoverer: RecoveryKind::MF,
phantom: PhantomData,
actionkind: ActionKind::GenericParseTree,
error_on_conflicts: true,
conflicts: None
}
}
pub fn recoverer(mut self, rk: RecoveryKind) -> Self {
self.recoverer = rk;
self
}
pub fn process_file_in_src(
&mut self,
srcp: &str
) -> Result<HashMap<String, StorageT>, Box<Error>> {
let mut inp = current_dir()?;
inp.push("src");
inp.push(srcp);
let mut outd = PathBuf::new();
outd.push(var("OUT_DIR").unwrap());
self.process_file(inp, outd)
}
pub fn action_kind(mut self, ak: ActionKind) -> Self {
self.actionkind = ak;
self
}
pub fn error_on_conflicts(mut self, b: bool) -> Self {
self.error_on_conflicts = b;
self
}
pub fn conflicts(
&self
) -> Option<(
&YaccGrammar<StorageT>,
&StateGraph<StorageT>,
&StateTable<StorageT>,
&Conflicts<StorageT>
)> {
if let Some((grm, sgraph, stable)) = &self.conflicts {
return Some((grm, sgraph, stable, &stable.conflicts().unwrap()));
}
None
}
pub fn process_file<P, Q>(
&mut self,
inp: P,
outd: Q
) -> Result<HashMap<String, StorageT>, Box<Error>>
where
P: AsRef<Path>,
Q: AsRef<Path>
{
let inc = read_to_string(&inp).unwrap();
let grm = YaccGrammar::<StorageT>::new_with_storaget(YaccKind::Eco, &inc)?;
let rule_ids = grm
.tokens_map()
.iter()
.map(|(&n, &i)| (n.to_owned(), i.as_storaget()))
.collect::<HashMap<_, _>>();
let cache = self.rebuild_cache(&grm);
let mut outp_base = outd.as_ref().to_path_buf();
let mut leaf = inp
.as_ref()
.file_stem()
.unwrap()
.to_str()
.unwrap()
.to_owned();
leaf.push_str(YACC_SUFFIX);
outp_base.push(leaf);
let mut outp_rs = outp_base.clone();
outp_rs.set_extension(RUST_FILE_EXT);
if let Ok(ref inmd) = fs::metadata(&inp) {
if let Ok(ref out_rs_md) = fs::metadata(&outp_rs) {
if FileTime::from_last_modification_time(out_rs_md)
> FileTime::from_last_modification_time(inmd)
{
if let Ok(outc) = read_to_string(&outp_rs) {
if outc.contains(&cache) {
return Ok(rule_ids);
}
}
}
}
}
fs::remove_file(&outp_rs).ok();
let (sgraph, stable) = from_yacc(&grm, Minimiser::Pager)?;
if stable.conflicts().is_some() && self.error_on_conflicts {
return Err(Box::new(CTConflictsError {
grm,
sgraph,
stable
}));
}
let out_grm = self.bin_output(&outp_base, GRM_FILE_EXT, &grm)?;
let out_sgraph = self.bin_output(&outp_base, SGRAPH_FILE_EXT, &sgraph)?;
let out_stable = self.bin_output(&outp_base, STABLE_FILE_EXT, &stable)?;
let mut outs = String::new();
let mod_name = inp.as_ref().file_stem().unwrap().to_str().unwrap();
let actiontype = match grm.actiontype() {
Some(t) => t.clone(), None => {
match self.actionkind {
ActionKind::CustomAction => panic!("Action return type not defined!"),
ActionKind::GenericParseTree => {
String::new() }
}
}
};
outs.push_str(&format!("mod {}_y {{\n", mod_name));
outs.push_str(
" use lrpar::{{Lexer, LexParseError, RecoveryKind, RTParserBuilder}};
use lrpar::ctbuilder::_reconstitute;
use cfgrammar::TIdx;
"
);
match self.actionkind {
ActionKind::CustomAction => {
outs.push_str(&format!(
" use lrpar::{{Lexeme, parser::AStackType}};
use cfgrammar::RIdx;
use std::vec;
pub fn parse(lexer: &mut Lexer<{storaget}>)
-> (Option<{actiont}>, Vec<LexParseError<{storaget}>>)
{{",
storaget = StorageT::type_name(),
actiont = actiontype
));
}
ActionKind::GenericParseTree => {
outs.push_str(&format!(
"use lrpar::Node;
pub fn parse(lexer: &mut Lexer<{storaget}>)
-> (Option<Node<{storaget}>>, Vec<LexParseError<{storaget}>>)
{{",
storaget = StorageT::type_name()
));
}
};
let recoverer = match self.recoverer {
RecoveryKind::CPCTPlus => "CPCTPlus",
RecoveryKind::MF => "MF",
RecoveryKind::Panic => "Panic",
RecoveryKind::None => "None"
};
outs.push_str(&format!(
"
let (grm, sgraph, stable) = _reconstitute(include_bytes!(\"{}\"),
include_bytes!(\"{}\"),
include_bytes!(\"{}\"));",
out_grm.to_str().unwrap(),
out_sgraph.to_str().unwrap(),
out_stable.to_str().unwrap()
));
match self.actionkind {
ActionKind::CustomAction => {
outs.push_str(&format!(
"\n let mut actions: Vec<&Fn(RIdx<{storaget}>,
&Lexer<{storaget}>,
vec::Drain<AStackType<{actiont}, {storaget}>>)
-> {actiont}> = Vec::new();\n",
storaget = StorageT::type_name(),
actiont = actiontype
));
for pidx in grm.iter_pidxs() {
outs.push_str(&format!(
" actions.push(&{prefix}action_{});\n",
usize::from(pidx),
prefix = ACTION_PREFIX
))
}
outs.push_str(&format!(
"
RTParserBuilder::new(&grm, &sgraph, &stable)
.recoverer(RecoveryKind::{})
.parse_actions(lexer, &actions)\n",
recoverer,
));
}
ActionKind::GenericParseTree => {
outs.push_str(&format!(
"
RTParserBuilder::new(&grm, &sgraph, &stable)
.recoverer(RecoveryKind::{})
.parse_generictree(lexer)\n",
recoverer
));
}
};
outs.push_str(" }\n\n");
for ridx in grm.iter_rules() {
if !grm.rule_to_prods(ridx).contains(&grm.start_prod()) {
outs.push_str(&format!(
" #[allow(dead_code)]\n pub const R_{}: {} = {:?};\n",
grm.rule_name(ridx).to_ascii_uppercase(),
StorageT::type_name(),
usize::from(ridx)
));
}
}
outs.push_str(&self.gen_token_epp(&grm));
match self.actionkind {
ActionKind::CustomAction => {
if let Some(s) = grm.programs() {
outs.push_str("\n/* User code */\n\n");
outs.push_str(s);
}
outs.push_str("\n/* Converted actions */\n\n");
for pidx in grm.iter_pidxs() {
outs.push_str(&format!(
"fn {prefix}action_{}({prefix}ridx: RIdx<{storaget}>,
{prefix}lexer: &Lexer<{storaget}>,
mut {prefix}args: vec::Drain<AStackType<{actiont}, {storaget}>>)
-> {actiont} {{\n",
usize::from(pidx),
storaget = StorageT::type_name(),
prefix = ACTION_PREFIX,
actiont = actiontype
));
for i in 0..grm.prod(pidx).len() {
match grm.prod(pidx)[i] {
Symbol::Rule(_) => outs.push_str(&format!(
"
let {prefix}arg_{} = match {prefix}args.next().unwrap() {{
AStackType::ActionType(v) => v,
AStackType::Lexeme(_) => unreachable!()
}};
",
i + 1,
prefix = ACTION_PREFIX
)),
Symbol::Token(_) => outs.push_str(&format!(
"
let {prefix}arg_{}: Result<Lexeme<_>, Lexeme<_>> = match {prefix}args.next().unwrap() {{
AStackType::ActionType(_) => unreachable!(),
AStackType::Lexeme(l) => {{
if l.len().is_some() {{
Ok(l)
}} else {{
Err(l)
}}
}}
}};
",
i + 1,
prefix = ACTION_PREFIX
))
}
}
if let Some(s) = grm.action(pidx) {
let s = RE_DOL_NUM
.replace_all(
s,
format!("{prefix}arg_$1", prefix = ACTION_PREFIX).as_str()
)
.into_owned();
let s = RE_DOL_LEXER.replace_all(
&s,
format!("{prefix}lexer", prefix = ACTION_PREFIX).as_str()
);
outs.push_str(&format!(" {}", &s));
} else if pidx == grm.start_prod() {
outs.push_str(" unreachable!()");
} else {
panic!(
"Production in rule '{}' must have an action body.",
grm.rule_name(grm.prod_to_rule(pidx))
);
}
outs.push_str("\n}\n\n");
}
}
ActionKind::GenericParseTree => ()
};
outs.push_str("}\n\n");
outs.push_str(&cache);
let mut f = File::create(outp_rs)?;
f.write_all(outs.as_bytes())?;
if stable.conflicts().is_some() {
self.conflicts = Some((grm, sgraph, stable));
}
Ok(rule_ids)
}
fn rebuild_cache(&self, grm: &YaccGrammar<StorageT>) -> String {
let mut cache = String::new();
cache.push_str("\n/* CACHE INFORMATION\n");
cache.push_str(&format!(
" Build time: {:?}",
env!("VERGEN_BUILD_TIMESTAMP")
));
cache.push_str(&format!(" Recoverer: {:?}\n", self.recoverer));
for tidx in grm.iter_tidxs() {
let n = match grm.token_name(tidx) {
Some(n) => format!("'{}'", n),
None => "<unknown>".to_string()
};
cache.push_str(&format!(" {} {}\n", usize::from(tidx), n));
}
cache.push_str("*/\n");
cache
}
fn gen_token_epp(&self, grm: &YaccGrammar<StorageT>) -> String {
let mut tidxs = Vec::new();
for tidx in grm.iter_tidxs() {
match grm.token_epp(tidx) {
Some(n) => tidxs.push(format!("Some(\"{}\")", n)),
None => tidxs.push("None".to_string())
}
}
format!(
" const {prefix}EPP: &[Option<&str>] = &[{}];
/// Return the %epp entry for token `tidx` (where `None` indicates \"the token has no
/// pretty-printed value\"). Panics if `tidx` doesn't exist.
#[allow(dead_code)]
pub fn token_epp<'a>(tidx: TIdx<{storaget}>) -> Option<&'a str> {{
{prefix}EPP[usize::from(tidx)]
}}",
tidxs.join(", "),
storaget = StorageT::type_name(),
prefix = GLOBAL_PREFIX
)
}
fn bin_output<P: AsRef<Path>, T: Serialize>(
&self,
outp_base: P,
ext: &str,
d: &T
) -> Result<PathBuf, Box<Error>> {
let mut outp = outp_base.as_ref().to_path_buf();
outp.set_extension(ext);
let f = File::create(&outp)?;
serialize_into(f, d)?;
Ok(outp)
}
}
#[doc(hidden)]
pub fn _reconstitute<'a, StorageT: Deserialize<'a> + Hash + PrimInt + Unsigned>(
grm_buf: &'a [u8],
sgraph_buf: &'a [u8],
stable_buf: &'a [u8]
) -> (
YaccGrammar<StorageT>,
StateGraph<StorageT>,
StateTable<StorageT>
) {
let grm = deserialize(grm_buf).unwrap();
let sgraph = deserialize(sgraph_buf).unwrap();
let stable = deserialize(stable_buf).unwrap();
(grm, sgraph, stable)
}
#[cfg(test)]
mod test {
extern crate temp_testdir;
use std::{fs::File, io::Write, path::PathBuf};
use self::temp_testdir::TempDir;
use super::{ActionKind, CTConflictsError, CTParserBuilder};
#[test]
fn test_conflicts() {
let temp = TempDir::default();
let mut file_path = PathBuf::from(temp.as_ref());
file_path.push("grm.y");
let mut f = File::create(&file_path).unwrap();
let _ = f.write_all(
"%start A
%%
A : 'a' 'b' | B 'b';
B : 'a' | C;
C : 'a';"
.as_bytes()
);
let mut ct = CTParserBuilder::new()
.error_on_conflicts(false)
.action_kind(ActionKind::GenericParseTree);
ct.process_file_in_src(file_path.to_str().unwrap()).unwrap();
match ct.conflicts() {
Some((_, _, _, conflicts)) => {
assert_eq!(conflicts.sr_len(), 1);
assert_eq!(conflicts.rr_len(), 1);
}
None => panic!("Expected error data")
}
}
#[test]
fn test_conflicts_error() {
let temp = TempDir::default();
let mut file_path = PathBuf::from(temp.as_ref());
file_path.push("grm.y");
let mut f = File::create(&file_path).unwrap();
let _ = f.write_all(
"%start A
%%
A : 'a' 'b' | B 'b';
B : 'a' | C;
C : 'a';"
.as_bytes()
);
match CTParserBuilder::new()
.action_kind(ActionKind::GenericParseTree)
.process_file_in_src(file_path.to_str().unwrap())
{
Ok(_) => panic!("Expected error"),
Err(e) => {
let cs = e.downcast_ref::<CTConflictsError<u32>>();
assert_eq!(cs.unwrap().stable.conflicts().unwrap().sr_len(), 1);
assert_eq!(cs.unwrap().stable.conflicts().unwrap().rr_len(), 1);
}
}
}
}