use std::{
any::type_name,
borrow::Cow,
collections::{HashMap, HashSet},
convert::AsRef,
env::{current_dir, var},
error::Error,
fmt::Debug,
fs::{self, create_dir_all, read_to_string, File},
hash::Hash,
io::Write,
path::{Path, PathBuf},
};
use lazy_static::lazy_static;
use num_traits::{PrimInt, Unsigned};
use regex::Regex;
use try_from::TryFrom;
use crate::lexer::{LRNonStreamingLexerDef, LexerDef};
const RUST_FILE_EXT: &str = "rs";
lazy_static! {
static ref RE_TOKEN_ID: Regex = Regex::new(r"^[a-zA-Z_][a-zA-Z_0-9]*$").unwrap();
}
pub enum LexerKind {
LRNonStreamingLexer,
}
#[derive(Clone, PartialEq, Eq, Debug)]
pub enum Visibility {
Private,
Public,
PublicSuper,
PublicSelf,
PublicCrate,
PublicIn(String),
}
impl Visibility {
fn cow_str(&self) -> Cow<'static, str> {
match self {
Visibility::Private => Cow::from(""),
Visibility::Public => Cow::from("pub"),
Visibility::PublicSuper => Cow::from("pub(super)"),
Visibility::PublicSelf => Cow::from("pub(self)"),
Visibility::PublicCrate => Cow::from("pub(crate)"),
Visibility::PublicIn(data) => Cow::from(format!("pub(in {})", data)),
}
}
}
pub struct LexerBuilder<'a, StorageT = u32> {
lexerkind: LexerKind,
mod_name: Option<&'a str>,
visibility: Visibility,
rule_ids_map: Option<HashMap<String, StorageT>>,
allow_missing_terms_in_lexer: bool,
allow_missing_tokens_in_parser: bool,
}
impl<'a, StorageT> LexerBuilder<'a, StorageT>
where
StorageT: Copy + Debug + Eq + Hash + PrimInt + TryFrom<usize> + Unsigned,
{
pub fn new() -> Self {
LexerBuilder {
lexerkind: LexerKind::LRNonStreamingLexer,
mod_name: None,
visibility: Visibility::Private,
rule_ids_map: None,
allow_missing_terms_in_lexer: false,
allow_missing_tokens_in_parser: true,
}
}
pub fn lexerkind(mut self, lexerkind: LexerKind) -> Self {
self.lexerkind = lexerkind;
self
}
pub fn mod_name(mut self, mod_name: &'a str) -> Self {
self.mod_name = Some(mod_name);
self
}
pub fn visibility(mut self, vis: Visibility) -> Self {
self.visibility = vis;
self
}
pub fn rule_ids_map(mut self, rule_ids_map: HashMap<String, StorageT>) -> Self {
self.rule_ids_map = Some(rule_ids_map);
self
}
pub fn process_file_in_src(
self,
srcp: &str,
) -> Result<(Option<HashSet<String>>, Option<HashSet<String>>), Box<dyn Error>> {
let mut inp = current_dir()?;
inp.push("src");
inp.push(srcp);
let mut outp = PathBuf::new();
outp.push(var("OUT_DIR").unwrap());
outp.push(Path::new(srcp).parent().unwrap().to_str().unwrap());
create_dir_all(&outp)?;
let mut leaf = Path::new(srcp)
.file_name()
.unwrap()
.to_str()
.unwrap()
.to_owned();
leaf.push_str(&format!(".{}", RUST_FILE_EXT));
outp.push(leaf);
self.process_file(inp, outp)
}
pub fn process_file<P, Q>(
self,
inp: P,
outp: Q,
) -> Result<(Option<HashSet<String>>, Option<HashSet<String>>), Box<dyn Error>>
where
P: AsRef<Path>,
Q: AsRef<Path>,
{
let mut lexerdef: Box<dyn LexerDef<StorageT>> = match self.lexerkind {
LexerKind::LRNonStreamingLexer => {
Box::new(LRNonStreamingLexerDef::from_str(&read_to_string(&inp)?)?)
}
};
let (missing_from_lexer, missing_from_parser) = match self.rule_ids_map {
Some(ref rim) => {
let owned_map = rim
.iter()
.map(|(x, y)| (&**x, *y))
.collect::<HashMap<_, _>>();
let (x, y) = lexerdef.set_rule_ids(&owned_map);
(
x.map(|a| a.iter().map(|&b| b.to_string()).collect::<HashSet<_>>()),
y.map(|a| a.iter().map(|&b| b.to_string()).collect::<HashSet<_>>()),
)
}
None => (None, None),
};
if !self.allow_missing_terms_in_lexer {
if let Some(ref mfl) = missing_from_lexer {
eprintln!("Error: the following tokens are used in the grammar but are not defined in the lexer:");
for n in mfl {
eprintln!(" {}", n);
}
fs::remove_file(&outp).ok();
panic!();
}
}
if !self.allow_missing_tokens_in_parser {
if let Some(ref mfp) = missing_from_parser {
eprintln!("Error: the following tokens are defined in the lexer but not used in the grammar:");
for n in mfp {
eprintln!(" {}", n);
}
fs::remove_file(&outp).ok();
panic!();
}
}
let mod_name = match self.mod_name {
Some(s) => s.to_owned(),
None => {
let mut stem = inp.as_ref().to_str().unwrap();
loop {
let new_stem = Path::new(stem).file_stem().unwrap().to_str().unwrap();
if stem == new_stem {
break;
}
stem = new_stem;
}
format!("{}_l", stem)
}
};
let mut outs = String::new();
let (lexerdef_name, lexerdef_type) = match self.lexerkind {
LexerKind::LRNonStreamingLexer => (
"LRNonStreamingLexerDef",
format!("LRNonStreamingLexerDef<{}>", type_name::<StorageT>()),
),
};
outs.push_str(&format!(
"{mod_vis} mod {mod_name} {{
use lrlex::{{LexerDef, LRNonStreamingLexerDef, Rule}};
#[allow(dead_code)]
pub fn lexerdef() -> {lexerdef_type} {{
let rules = vec![",
mod_vis = self.visibility.cow_str(),
mod_name = mod_name,
lexerdef_type = lexerdef_type
));
for r in lexerdef.iter_rules() {
let tok_id = match r.tok_id {
Some(ref t) => format!("Some({:?})", t),
None => "None".to_owned(),
};
let n = match r.name {
Some(ref n) => format!("Some({:?}.to_string())", n),
None => "None".to_owned(),
};
outs.push_str(&format!(
"
Rule::new({}, {}, \"{}\".to_string()).unwrap(),",
tok_id,
n,
r.re_str.replace("\\", "\\\\").replace("\"", "\\\"")
));
}
outs.push_str(&format!(
"
];
{lexerdef_name}::from_rules(rules)
}}
",
lexerdef_name = lexerdef_name
));
if let Some(ref rim) = self.rule_ids_map {
for (n, id) in rim {
if RE_TOKEN_ID.is_match(n) {
outs.push_str(&format!(
"#[allow(dead_code)]\npub const T_{}: {} = {:?};\n",
n.to_ascii_uppercase(),
type_name::<StorageT>(),
*id
));
}
}
}
outs.push_str("}");
if let Ok(curs) = read_to_string(&outp) {
if curs == outs {
return Ok((missing_from_lexer, missing_from_parser));
}
}
let mut f = File::create(outp)?;
f.write_all(outs.as_bytes())?;
Ok((missing_from_lexer, missing_from_parser))
}
pub fn allow_missing_terms_in_lexer(mut self, allow: bool) -> Self {
self.allow_missing_terms_in_lexer = allow;
self
}
pub fn allow_missing_tokens_in_parser(mut self, allow: bool) -> Self {
self.allow_missing_tokens_in_parser = allow;
self
}
}