#![allow(dead_code)]
#![allow(unused_variables)]
#![allow(non_snake_case)]
#![allow(non_camel_case_types)]
#![allow(unused_parens)]
#![allow(unused_mut)]
#![allow(unused_assignments)]
#![allow(unused_doc_comments)]
#![allow(unused_imports)]
use std::collections::{HashMap,HashSet,BTreeSet};
use std::cell::{RefCell,Ref,RefMut};
use std::hash::{Hash,Hasher};
use std::io::{self,Read,Write,BufReader,BufRead};
use std::fs::File;
use std::io::prelude::*;
pub const DEFAULTPRECEDENCE:i32 = 0;
pub const NONASSOCBIT:i32 = -1 - 0x40000000; pub const TRACE:usize = 0;
#[derive(Clone,Debug)]
pub struct Gsym {
pub sym : String,
pub rusttype : String, pub terminal : bool,
pub label : String, pub precedence : i32, pub index : usize, }
impl Gsym
{
pub fn new(s:&str,isterminal:bool) -> Gsym {
Gsym {
sym : s.to_owned(),
terminal : isterminal,
label : String::default(),
rusttype : String::new(),
precedence : DEFAULTPRECEDENCE, index:0,
}
}
pub fn setlabel(&mut self, la:&str)
{ self.label = String::from(la); }
pub fn settype(&mut self, rt:&str)
{ self.rusttype = String::from(rt); }
pub fn setprecedence(&mut self, p:i32)
{ self.precedence = p; }
pub fn gettype<'t>(&self,Gmr:&'t Grammar) -> &'t str
{ &Gmr.Symbols[self.index].rusttype }
}
#[derive(Clone)]
pub struct Grule {
pub lhs : Gsym, pub rhs : Vec<Gsym>, pub action : String, pub precedence : i32, pub autogenerated : bool, }
impl Grule
{
pub fn new_skeleton(lh:&str) -> Grule
{
Grule {
lhs : Gsym::new(lh,false),
rhs : Vec::new(),
action : String::default(),
precedence : DEFAULTPRECEDENCE,
autogenerated : false,
}
}
pub fn from_lhs(nt:&Gsym) -> Grule
{
Grule {
lhs : nt.clone(),
rhs : Vec::new(),
action : String::default(),
precedence : DEFAULTPRECEDENCE,
autogenerated : false,
}
}
}
pub fn printruleb(rule:&Grule,ri:usize) -> String {
let mut msg = format!("PRODUCTION_{}: {} --> ",ri,rule.lhs.sym);
for s in &rule.rhs {
msg.push_str(&s.sym);
if s.label.len()>0 { msg.push(':'); msg.push_str(&s.label);
}
msg.push(' ');
}
msg.push_str(&format!(" action{{ {}, precedence {}\n",rule.action.trim(),rule.precedence));
msg
}
pub fn printrule(rule:&Grule,ri:usize) {
print!("PRODUCTION_{}: {} --> ",ri,rule.lhs.sym);
for s in &rule.rhs {
print!("{}",s.sym);
if s.label.len()>0 {print!(":{}",s.label);}
print!(" ");
}
println!(" action{{ {}, precedence {}",rule.action.trim(),rule.precedence); }
pub struct Grammar
{
pub name : String,
pub Symbols : Vec<Gsym>,
pub Symhash : HashMap<String,usize>,
pub Rules: Vec<Grule>,
pub topsym : usize,
pub Nullable : HashSet<usize>,
pub First : HashMap<usize,HashSet<usize>>,
pub Rulesfor: HashMap<usize,HashSet<usize>>, pub Absyntype : String, pub Externtype : String, pub Resynch : HashSet<String>, pub Errsym : String, pub Lexnames : HashMap<String,String>, pub Nameslex : HashMap<usize,String>, pub Extras : String, pub sametype: bool, pub lifetime: String,
pub tracelev:usize,
pub Lexvals: Vec<(String,String,String)>, pub Lexconditionals: Vec<(String,String)>,
pub Haslexval : HashSet<String>,
pub Lexextras: Vec<String>,
pub enumhash:HashMap<String,usize>, pub genlex: bool,
pub genabsyn: bool,
pub Reachable:HashMap<usize,HashSet<usize>>, pub basictypes : HashSet<&'static str>,
pub ASTExtras : String,
pub haslt_base: HashSet<usize>,
pub delaymarkers: HashMap<usize,BTreeSet<(usize,usize)>>,
pub flattentypes: HashSet<usize>, pub ntcxmax : usize,
pub startnti: usize,
pub eoftermi: usize,
pub startrulei: usize,
pub mode: i32, pub bumpast: bool,
pub sdcuts: HashMap<usize,usize>, pub vargroupnames : Vec<String>,
pub vargroups: HashMap<(usize,usize),usize>, pub genlog : String,
pub tablefile : String,
pub defaults : HashMap<usize,String>,
}
impl Default for Grammar {
fn default() -> Self { Grammar::new() }
}
impl Grammar
{
pub fn new() -> Grammar
{
let mut btypes = HashSet::with_capacity(14);
for t in ["()","bool","i64","u64","usize","f64","i32","u32","u8","u16","i8","i16","f32","char","(usize,usize)","isize",] { btypes.insert(t);}
Grammar {
name : String::from(""), Symbols: Vec::new(), Symhash: HashMap::new(),
Rules: Vec::new(), topsym : usize::MAX,
Nullable : HashSet::new(),
First : HashMap::new(),
Rulesfor: HashMap::new(),
Absyntype:String::from("()"), Externtype:String::from("()"), Resynch : HashSet::new(),
Errsym : String::new(),
Lexnames : HashMap::new(),
Nameslex : HashMap::new(),
Extras: String::new(),
sametype:true,
lifetime:String::new(), tracelev:1,
Lexvals:Vec::new(),
Haslexval:HashSet::new(),
Lexextras:Vec::new(),
Lexconditionals:Vec::new(),
genlex: false,
genabsyn: false,
enumhash:HashMap::new(),
Reachable:HashMap::new(),
basictypes : btypes,
ASTExtras: String::new(),
haslt_base: HashSet::new(), delaymarkers:HashMap::new(), flattentypes:HashSet::new(),
ntcxmax : 0,
startnti : 0,
eoftermi : 0,
startrulei : 0,
mode : 0, bumpast: false,
sdcuts: HashMap::new(),
vargroupnames : Vec::new(),
vargroups : HashMap::new(),
genlog : String::new(),
tablefile : String::new(),
defaults : HashMap::new(),
}
}
pub fn logprint(&mut self, msg:&str) {
if self.tracelev>0 {println!("{}",msg);}
else { self.genlog.push_str(msg); self.genlog.push('\n'); }
}
pub fn logprint0(&mut self, msg:&str) {
if self.tracelev>0 {print!("{}",msg);}
else { self.genlog.push_str(msg); }
}
pub fn logeprint(&mut self, msg:&str) {
if self.tracelev>0 {eprintln!("{}",msg);}
else { self.genlog.push_str(msg); self.genlog.push('\n'); }
}
pub fn getlog(&self) -> &str { &self.genlog }
pub fn swap_log(&mut self, other:&mut String) {
core::mem::swap(other, &mut self.genlog);
}
pub fn basictype(&self,ty0:&str) -> bool
{
let ty=ty0.trim();
if self.basictypes.contains(ty) {return true;}
if ty.starts_with('&') && !ty.contains("mut ") {return true;}
false
}
pub fn getsym(&self,s:&str) -> Option<&Gsym>
{
match self.Symhash.get(s) {
Some(symi) => Some(&self.Symbols[*symi]),
_ => None,
} }
pub fn symref(&self,i:usize) -> &str
{
&self.Symbols[i].sym
}
pub fn Symref(&self,i:usize) -> &Gsym
{
&self.Symbols[i]
}
pub fn nonterminal(&self,s:&str) -> bool
{
match self.Symhash.get(s) {
Some(symi) => !self.Symbols[*symi].terminal,
_ => false,
}
}
pub fn nonterminali(&self,s:usize) -> bool
{
match self.Symbols.get(s) {
Some(sym) => !sym.terminal,
_ => false,
}
}
pub fn terminal(&self,s:&str) -> bool
{
match self.Symhash.get(s) {
Some(symi) => self.Symbols[*symi].terminal,
_ => false,
}
}
pub fn terminali(&self,s:usize) -> bool
{
match self.Symbols.get(s) {
Some(sym) => sym.terminal,
_ => false,
}
}
pub fn lookuptype(&self,t:&str) -> &str
{
if let Some(ti) = self.Symhash.get(t) {&self.Symbols[*ti].rusttype}
else {""}
}
pub fn parse_grammar(&mut self, filename:&str) -> bool {
let mut reader = match File::open(filename) {
Ok(f) => { Some(BufReader::new(f)) },
_ => { self.logeprint("cannot open file, reading from stdin..."); None},
};
let mut line=String::new();
let mut atEOF = false;
let mut linenum = 0;
let mut linelen = 0;
let mut stage = 0;
let mut multiline = false; let mut foundeol = false;
let mut NEWNTs:HashMap<String,usize> = HashMap::new();
let mut enumindex = 0; let mut ltopt = String::new();
let mut ntcx = 2; self.enumhash.insert("()".to_owned(), 1); let mut wildcard = Gsym::new("_WILDCARD_TOKEN_",true); wildcard.rusttype="(usize,usize)".to_owned(); self.enumhash.insert("(usize,usize)".to_owned(),ntcx);
ntcx+=1;
wildcard.index = self.Symbols.len();
self.Symhash.insert(String::from("_WILDCARD_TOKEN_"),self.Symbols.len());
self.Symbols.push(wildcard); let mut markersexist = false; let mut inttypes = HashSet::with_capacity(10);
for x in ["i8","i16","i32","i64","u8","u16","u32","u64","isize","usize"] {
inttypes.insert(x);
}
let mut usednum = false; let mut usedfloat = false;
let mut usedstrlit = false;
let mut usedalphanum = false;
while !atEOF
{
if !multiline { line.clear(); }
if foundeol { multiline=false;} else {
let result = if let Some(br)=&mut reader {br.read_line(&mut line)}
else {std::io::stdin().read_line(&mut line)};
match result {
Ok(0) | Err(_) => { line = String::from("EOF"); },
Ok(n) => {linenum+=1;},
} }
linelen = line.len();
if multiline && linelen>1 && &line[0..1]!="#" {
if linelen==3 && &line[0..3]=="EOF" {
self.logeprint(&format!("MULTI-LINE GRAMMAR PRODUCTION DID NOT END WITH <==, line {}",linenum)); return false;
}
match line.rfind("<==") {
None => {}, Some(eoli) => {
line.truncate(eoli);
foundeol = true;
}
} }
else if linelen>1 && &line[0..1]=="!" {
self.Extras.push_str(&line[1..]);
if line[1..].trim().starts_with("pub ") {
self.logeprint(&format!("WARNING: this public declaration may result in redundancy and conflicts, line {}",linenum));
}
}
else if linelen>1 && &line[0..1]=="$" {
self.ASTExtras.push_str(&line[1..]);
}
else if linelen>1 && &line[0..1]!="#" {
let rbpos = line.rfind(|c|{c=='\"' || c=='#'});
if let Some(rh) = rbpos {
if &line[rh..rh+1]=="#"
&& !line.trim().starts_with("lexterminal")
&& !line.trim().starts_with("lexname") {
line.truncate(rh);
}
}
let toksplit = line.split_whitespace();
let mut stokens:Vec<&str> = toksplit.collect();
if stokens.len()<1 {continue;}
match stokens[0] {
"!" => { let pbi = line.find('!').unwrap();
self.Extras.push_str(&line[pbi+1..]);
self.Extras.push_str("\n");
},
"$" => { let pbi = line.find('$').unwrap();
self.ASTExtras.push_str(&line[pbi+1..]);
self.ASTExtras.push_str("\n");
},
"grammarname" => {
self.name = String::from(stokens[1]);
},
"auto" | "genabsyn" => {
if stage==0 {self.genabsyn=true; self.genlex=true;}
else if !self.genabsyn {
self.logeprint("ERROR: Place 'auto' at beginning of the grammar or run with -auto option, directive may not be effective.");
}
},
"auto-bump" => {
if stage==0 {self.bumpast=true; self.genabsyn=true; self.genlex=true;}
else if !self.genabsyn {
self.logeprint("ERROR: Place 'auto' or 'auto-bump' at beginning of the grammar or run with -auto option, directive may not be effective.");
}
},
"EOF" => {atEOF=true},
("terminal" | "terminals") if stage==0 => {
for i in 1..stokens.len() {
if self.Symhash.contains_key(stokens[i]) {
self.logeprint(&format!("WARNING: REDEFINITION OF SYMBOL {} SKIPPED, line {} of grammar",stokens[i],linenum));
continue;
}
let mut newterm = Gsym::new(stokens[i],true);
if self.genabsyn {
newterm.rusttype = "()".to_owned();
}
else {
newterm.rusttype = self.Absyntype.clone();
}
newterm.index = self.Symbols.len();
self.Symhash.insert(stokens[i].to_owned(),self.Symbols.len());
self.Symbols.push(newterm);
}
}, "typedterminal" if stage==0 && stokens.len()>2 => {
if self.Symhash.contains_key(stokens[1]) {
self.logeprint(&format!("WARNING: REDEFINITION OF SYMBOL {} SKIPPED, line {} of grammar",stokens[1],linenum));
continue;
}
let mut newterm = Gsym::new(stokens[1],true);
let mut tokentype = String::new();
for i in 2..stokens.len() {
tokentype.push_str(&stokens[i][..]);
tokentype.push(' ');
}
let mut nttype = tokentype.trim();
if nttype.len()<1 {nttype = &self.Absyntype}
else if nttype!=&self.Absyntype {self.sametype=false;}
newterm.settype(nttype);
self.enumhash.insert(nttype.to_owned(), ntcx); ntcx+=1;
newterm.index = self.Symbols.len();
self.Symhash.insert(stokens[1].to_owned(),self.Symbols.len());
if self.lifetime.len()>0 && nttype.contains(&self.lifetime) {
self.haslt_base.insert(newterm.index);
}
self.Symbols.push(newterm);
}, "nonterminal" | "typednonterminal" if stage==0 && stokens.len()>1 => { if self.Symhash.get(stokens[1]).is_some() {
self.logeprint(&format!("WARNING: REDEFINITION OF SYMBOL {} SKIPPED, line {} of grammar",stokens[1],linenum));
continue;
}
let mut newterm = Gsym::new(stokens[1],false);
if !self.genabsyn {newterm.rusttype = self.Absyntype.clone();}
if stokens.len()>2 { let mut tokentype = String::new();
for i in 2..stokens.len() {
tokentype.push_str(&stokens[i][..]);
tokentype.push(' ');
}
let mut nttype = tokentype.trim().to_owned();
if nttype.starts_with(':') {
let mut limit = self.Symbols.len();
loop {
let copynt = nttype[1..].trim();
let copyntiopt = self.Symhash.get(copynt);
if copyntiopt.is_none() {
self.logeprint(&format!("ERROR: EXTENSION TYPE {} NOT DEFINED YET, LINE {}\n\n",copynt,linenum));
return false;
}
let copynti = *copyntiopt.unwrap();
if self.Symbols[copynti].rusttype.starts_with(':') {
nttype = self.Symbols[copynti].rusttype.clone();
}
else if self.Symbols[copynti].rusttype.len()>0 && !self.Symbols[copynti].rusttype.contains('@') {
self.logeprint(&format!("ERROR: TYPE DEPENDENCIES ARE ONLY ALLOWED BETWEEN AUTO-GENERATED TYPES. TYPE {} CANNOT BE EXTENDED, line {}",&self.Symbols[copynti].rusttype,linenum));
return false;
}
else {break;}
limit -=1;
if limit==0 {
self.logeprint(&format!("WARNING: CIRCULARITY DETECTED IN TYPE DEPENDENCIES; TYPE RESET, LINE {}",linenum));
nttype = String::new();
break;
}
} }
if nttype.contains('@') { let mut limit =self.Symbols.len()+1;
loop {
let mut copynt="";
let (mut start,mut end) = (0,0);
if nttype.starts_with('@') {
copynt = nttype[1..].trim();
start = 0; end = nttype.len();
}
if let Some(pos1)=nttype.find("<@") {
if let Some(pos2)=nttype[pos1+2..].find('>') {
copynt = &nttype[pos1+2..pos1+2+pos2];
start = pos1+1; end = pos1+2+pos2;
}
}
if copynt.len()>0 {
let onti = *self.Symhash.get(copynt).expect(&format!("UNRECOGNIZED NON-TERMINAL SYMBOL {} TO COPY TYPE FROM (ORDER OF DECLARATION MATTERS), line {} of grammar",copynt,linenum));
if !self.genabsyn {
nttype.replace_range(start..end,&self.Symbols[onti].rusttype);
} }
limit -= 1;
if !nttype.contains('@') || limit==0 {break;}
} } if nttype.len()<1 && !self.genabsyn {nttype = self.Absyntype.clone()};
if !nttype.contains('@') && !nttype.starts_with(':') {self.enumhash.insert(nttype.clone(), ntcx); ntcx+=1;}
if &nttype!=&self.Absyntype {self.sametype=false;}
newterm.rusttype = nttype;
} newterm.index = self.Symbols.len();
self.Symhash.insert(stokens[1].to_owned(),self.Symbols.len());
self.Symbols.push(newterm);
self.Rulesfor.insert(self.Symbols.len()-1,HashSet::new());
}, "nonterminals" if stage==0 => {
for i in 1..stokens.len() {
if self.Symhash.contains_key(stokens[i]) {
self.logeprint(&format!("WARNING: REDEFINITION OF SYMBOL {} SKIPPED, line {} of grammar",stokens[i],linenum));
continue;
}
let mut newterm = Gsym::new(stokens[i],false);
newterm.index = self.Symbols.len();
self.Symhash.insert(stokens[i].to_owned(),self.Symbols.len());
if !self.genabsyn {newterm.rusttype = self.Absyntype.clone();}
ntcx+=1;
self.Symbols.push(newterm);
self.Rulesfor.insert(self.Symbols.len()-1,HashSet::new());
}
},
"topsym" | "startsymbol" => {
if stage>1 { self.logeprint(&format!("Grammar start symbol must be defined before production rules, line {}",linenum)); return false;} else {stage=1;}
match self.Symhash.get(stokens[1]) {
Some(tsi) if *tsi<self.Symbols.len() && !self.Symbols[*tsi].terminal => {
self.topsym = *tsi; let toptype = &self.Symbols[*tsi].rusttype;
if toptype != &self.Absyntype && !self.genabsyn && toptype.len()>0 {
let msg = format!("WARNING: Type of Grammar start symbol {} set to {}; you should declare the valuetype unless using -auto mode.\n",stokens[1],&self.Absyntype);
if self.tracelev>0 {eprint!("{}",msg);}
else {self.genlog.push_str(&msg);}
if !self.genabsyn {self.Symbols[*tsi].rusttype = self.Absyntype.clone();}
}
},
_ => { let msg = format!("top symbol {} not found in declared non-terminals; check ordering of declarations, line {}\n",stokens[1],linenum);
if self.tracelev>0 {eprint!("{}",msg);}
else {self.genlog.push_str(&msg);}
return false;
},
} }, "flatten" if stokens.len()>=2 => {
for tok in stokens[1..].iter() {
let fnti = *self.Symhash.get(&tok[..]).expect(&format!("UNDEFINED GRAMMAR SYMBOL {}, LINE {}\n",tok,linenum));
if self.Symbols[fnti].terminal {
self.logeprint(&format!("WARNING: ONLY NON-TERMINALS CAN HAVE THEIR ASTS FLATTENED ({}), LINE {}\n",tok,linenum));
}
else {self.flattentypes.insert(fnti);}
} },
"errsym" | "errorsymbol" => {
if stage>1 {
self.logeprint(&format!("!!! Error recover symbol must be declared before production rules, line {}",linenum));
return false;
}
if stage==0 {stage=1;}
if !self.terminal(stokens[1]) {
self.logeprint(&format!("!!!Error recover symbol {} is not a terminal, line {} ",stokens[1],linenum));
return false;
}
self.Errsym = stokens[1].to_owned();
},
"resynch" | "resync" => {
if stage==0 {stage=1;}
for i in 1..stokens.len()
{
if !self.terminal(stokens[i]) {
self.logeprint(&format!("!!!Error recovery re-synchronization symbol {} is not a declared terminal, line {}",stokens[i],linenum));
return false;
}
self.Resynch.insert(stokens[i].trim().to_owned());
} },
"lifetime" if stokens.len()==2 && stokens[1].len()>0 && stage==0 => {
self.lifetime = if &stokens[1][0..1]=="'" && stokens[1].len()>1
{String::from(stokens[1])} else {format!("'{}",stokens[1])};
ltopt = format!("<{}>",&self.lifetime);
},
"absyntype" | "valuetype" => {
if stage>0 {self.logeprint(&format!("The grammar's abstract syntax type must be declared before production rules, line {}",linenum)); return false;}
if self.genabsyn {
self.logeprint(&format!("WARNING: absyntype/valuetype declaration ignored in -auto (genabsyn) mode, line {}", linenum));
continue;
}
let pos = line.find(stokens[0]).unwrap() + stokens[0].len();
self.Absyntype = String::from(line[pos..].trim());
},
"externtype" | "externaltype" if stage==0 => {
let pos = line.find(stokens[0]).unwrap() + stokens[0].len();
self.Externtype = String::from(line[pos..].trim());
},
"left" | "right" | "nonassoc" if stage<2 && stokens.len()>2 => {
if stage==0 {stage=1;}
if stokens.len()<3 {
self.logeprint(&format!("MALFORMED ASSOCIATIVITY/PRECEDENCE DECLARATION SKIPPED ON LINE {}",linenum));
continue;
}
let mut preclevel:i32 = DEFAULTPRECEDENCE;
if let Ok(n)=stokens[2].parse::<i32>() {
if n>0 && n<=0x40000000 {preclevel = n;}
else {self.logeprint(&format!("ERROR: PRECEDENCE VALUE MUST BE BETWEEN 1 AND {}, LINE {}\n",0x40000000,linenum)); return false;}
}
else {self.logeprint(&format!("ERROR: Did not read precedence level on line {}\n",linenum)); return false;}
if stokens[0]=="nonassoc" && preclevel>0 { preclevel = NONASSOCBIT-preclevel;}
else if stokens[0]=="right" && preclevel>0 {preclevel = -1 * preclevel;}
let mut targetsym = stokens[1];
if targetsym=="_" {targetsym = "_WILDCARD_TOKEN_";}
if let Some(index) = self.Symhash.get(targetsym) {
self.Symbols[*index].precedence = preclevel;
} else {self.logeprint(&format!("UNDEFINED GRAMMAR SYMBOL {}, LINE {}\n",targetsym,linenum)); return false;}
}, "lexname" => {
if stokens.len()<3 {
self.logeprint(&format!("MALFORMED lexname declaration line {} skipped",linenum));
continue;
}
self.Lexnames.insert(stokens[2].to_string(),stokens[1].to_string());
self.Haslexval.insert(stokens[1].to_string());
self.Symhash.get(stokens[1]).map(|sind|{
self.Nameslex.insert(*sind,stokens[2].to_string());
});
self.genlex = true;
},
"lexvalue" => {
let pos = line.find("lexvalue").unwrap()+9;
let declaration = &line[pos..];
let dtokens:Vec<_>=declaration.split_whitespace().collect();
if dtokens.len()<3 {
self.logeprint(&format!("MALFORMED lexvalue declaration skipped, line {}",linenum));
continue;
} let mut valform = String::new();
for i in 2 .. dtokens.len()
{
valform.push_str(dtokens[i]);
if (i<dtokens.len()-1) {valform.push(' ');}
}
let tokform = dtokens[1].to_owned();
self.Lexvals.push((dtokens[0].to_string(),tokform,valform));
self.Haslexval.insert(dtokens[0].to_string());
self.genlex = true;
},
"valueterminal" => {
let pos = line.find("valueterminal").unwrap()+14;
let declaration = &line[pos..];
let mut usingcolon = true;
let mut dtokens:Vec<_> = declaration.split('~').collect();
if dtokens.len()>1 && dtokens.len()<4 {
self.logeprint(&format!("ERROR ON LINE {}. MISSING ~",linenum));
return false;
}
if dtokens.len()<4 {dtokens=declaration.split_whitespace().collect(); usingcolon=false;}
if dtokens.len()<4 {
self.logeprint(&format!("MALFORMED valueterminal declaration skipped, line {}",linenum));
continue;
} let termname = dtokens[0].trim();
if self.Symhash.contains_key(termname) {
self.logeprint(&format!("WARNING: REDEFINITION OF SYMBOL {} IGNORED, line {} of grammar",termname,linenum));
continue;
}
let mut newterm = Gsym::new(termname,true);
let termtype = dtokens[1].trim();
if termtype.len()<1 {newterm.settype(&self.Absyntype);}
else {newterm.settype(termtype);}
if &newterm.rusttype!=&self.Absyntype {self.sametype=false;}
self.enumhash.insert(newterm.rusttype.clone(),ntcx); ntcx+=1;
newterm.index = self.Symbols.len();
self.Symhash.insert(termname.to_owned(),self.Symbols.len());
if self.lifetime.len()>0 && newterm.rusttype.contains(&self.lifetime) {
self.haslt_base.insert(newterm.index);
}
self.Symbols.push(newterm);
let mut valform = String::new(); for i in 3 .. dtokens.len()
{
valform.push_str(dtokens[i]);
if (i<dtokens.len()-1 && !usingcolon) {valform.push(' ');}
else if (i<dtokens.len()-1) {valform.push('~');}
}
let tokform = dtokens[2].to_owned();
self.Lexvals.push((termname.to_string(),tokform,valform));
self.Haslexval.insert(termname.to_string());
self.genlex = true;
}, "valterminal" => { if stokens.len()<3 || stage!=0 {
self.logeprint(&format!("\nWARNING: Invalid valterminal declaration on line {} ignored", linenum));
continue;
}
let pos = line.find(stokens[1]).unwrap()+stokens[1].len();
let termname = stokens[1]; let termtype0 = line[pos..].trim();
let mut termtype = termtype0.to_lowercase(); let mut tokenform = "Num(_tt)"; let mut valform = "_tt".to_owned();
if self.Symhash.contains_key(termname) {
self.logeprint(&format!("\nWARNING: REDEFINITION OF SYMBOL {} IGNORED, line {} of grammar",termname,linenum));
continue;
} let mut newterm = Gsym::new(termname,true);
newterm.index = self.Symbols.len();
if termtype.starts_with("alphanum") {
if usedalphanum {
self.logeprint(&format!("\nWARNING for line {}: only the first 'alphanumeric' valterminal declaration is recognized. Consider using 'valueterminal' or define custom token type.",linenum));
continue;
}
else {usedalphanum=true;}
if self.lifetime.len()==0 { self.lifetime="'input_lt".to_owned(); }
if self.mode==0 {termtype = format!("&{} str",&self.lifetime);}
else {termtype="string".to_owned();} newterm.rusttype = termtype;
tokenform = "Alphanum(_tt)"; self.haslt_base.insert(newterm.index);
} else if &termtype=="string literal" || &termtype=="strlit" {
if usedstrlit {
self.logeprint(&format!("\nWARNING for line {}: only the first 'string literal' valterminal declaration is recognized. Consider using 'valueterminal' or define custom token type.",linenum));
continue;
}
else {usedstrlit=true;}
if self.lifetime.len()==0 {self.lifetime="'input_lt".to_owned();}
if self.mode==0 {termtype = format!("&{} str",&self.lifetime);}
else {termtype="string".to_owned();}
newterm.rusttype = termtype;
tokenform = "Strlit(_tt)";
self.haslt_base.insert(newterm.index);
}
else if &termtype=="f32" || &termtype=="f64" || (self.mode>0 && termtype0=="float") {
if usedfloat {
self.logeprint(&format!("\nWARNING for line {}: valterminal declarations may only specify one floating point type as there is only one type of lexical token for all floating point values. Consider using 'valueterminal' or define custom token type.",linenum));
continue;
}
else {usedfloat=true;}
tokenform = "Float(_tt)";
if &termtype=="f32" {valform = "_tt as f32".to_owned();}
newterm.rusttype = termtype;
}
else if inttypes.contains(&termtype[..]) || (self.mode>0 && termtype0=="int") {
if usednum {
self.logeprint(&format!("\nWARNING for line {}: only the first 'valterminal' declarations for an integer type is recognized as there is only one type of lexical token for all integer values. Consider using 'valueterminal' or define custom token type.",linenum));
continue;
}
else {usednum=true;}
if &termtype!="i64" {
valform=format!("_tt as {}",&termtype);
}
newterm.rusttype = termtype;
}
else {
self.logeprint(&format!("\nERROR: type '{}' on line {} cannot be used with 'valterminal'; consider using 'valueterminal' or define custom token type with 'lexattribute add_custom'",termtype0,linenum));
return false;
}
if &newterm.rusttype!=&self.Absyntype {self.sametype=false;}
self.enumhash.insert(newterm.rusttype.clone(),ntcx); ntcx+=1;
self.Symhash.insert(termname.to_owned(),self.Symbols.len());
self.Symbols.push(newterm);
self.Lexvals.push((termname.to_owned(),tokenform.to_owned(),valform));
self.Haslexval.insert(termname.to_string());
self.genlex = true;
}, "lexterminal" => {
if stokens.len()!=3 {
self.logeprint(&format!("MALFORMED lexterminal declaration line {}: a terminal name and a lexical form are required",linenum)); return false;
}
let termname = stokens[1].trim();
if self.Symhash.contains_key(termname) {
self.logeprint(&format!("WARNING: REDEFINITION OF SYMBOL {} SKIPPED, line {} of grammar",termname,linenum));
continue;
}
let mut newterm = Gsym::new(termname,true);
if self.genabsyn { newterm.settype("()"); }
else {newterm.settype(&self.Absyntype);}
newterm.index = self.Symbols.len();
self.Symhash.insert(termname.to_owned(),self.Symbols.len());
self.Symbols.push(newterm);
self.Lexnames.insert(stokens[2].to_string(),termname.to_string());
self.Nameslex.insert(self.Symbols.len()-1,stokens[2].to_string());
self.Haslexval.insert(termname.to_string());
self.genlex = true;
}, "lexattribute" => {
let mut prop = String::new();
for i in 1 .. stokens.len()
{
prop.push_str(stokens[i]); prop.push(' ');
}
self.Lexextras.push(prop);
self.genlex = true;
},
"lexconditional" if stokens.len() > 2 => {
let pos = line.find("lexconditional").unwrap()+15;
let mut dtokens:Vec<_> = line[pos..].split('~').collect();
self.Lexconditionals.push((dtokens[0].trim().to_owned(),dtokens[1].trim().to_owned()));
},
"default" => {
if let Some(symi) = self.Symhash.get(stokens[1]) {
if !self.Symbols[*symi].terminal && stokens.len()>2 {
let mut defstring = String::new();
for i in 2..stokens.len() {
defstring.push_str(stokens[i]);
defstring.push(' ');
}
self.defaults.insert(*symi,defstring);
}
else {
self.logeprint(&format!("Malformed default definition, line {}",linenum));
}
}
else {
self.logeprint(&format!("Symbol {} not found, line {}",stokens[1],linenum));
}
}, "variant-group" | "operator-group" if stokens.len()>2 => {
let groupfornt = usize::MAX;
self.vargroupnames.push(stokens[1].to_owned());
for tok in &stokens[2..] {
let tokopt = self.Symhash.get(&tok[..]);
match tokopt {
Some(toki) if !self.vargroups.contains_key(&(groupfornt,*toki)) => {
self.vargroups.insert((groupfornt,*toki),self.vargroupnames.len()-1);
},
Some(_) => {
self.logeprint(&format!("WARNING: duplicate variant-group declaration for {} ignored, line {}",tok,linenum));
},
_ => {
self.logeprint(&format!("WARNING: {} is not recognized as symbol of the grammar; declaration ignore, line {}",tok,linenum));
},
} }
}, "variant-group-for" | "operator-group-for" if stokens.len()>3 => {
let mut groupfornt = usize::MAX;
match self.Symhash.get(stokens[1]) {
Some(i) if *i<self.Symbols.len() && !self.Symbols[*i].terminal => {
groupfornt = self.Symbols[*i].index;
},
_ => {
self.logeprint(&format!("ERROR: {} is not a declared non-terminal symbol",stokens[1]));
return false;
},
} self.vargroupnames.push(stokens[2].to_owned());
for tok in &stokens[3..] {
let tokopt = self.Symhash.get(&tok[..]);
match tokopt {
Some(toki) if !self.vargroups.contains_key(&(groupfornt,*toki)) => {
self.vargroups.insert((groupfornt,*toki),self.vargroupnames.len()-1);
},
Some(_) => {
self.logeprint(&format!("WARNING: duplicate variant-group declaration for {} ignored, line {}",tok,linenum));
},
None if self.Lexnames.contains_key(&tok[..]) => {
let gsymname = self.Lexnames.get(&tok[..]).unwrap();
if let Some(ti) = self.Symhash.get(gsymname) {
if !self.vargroups.contains_key(&(groupfornt,*ti)) {
self.vargroups.insert((groupfornt,*ti),self.vargroupnames.len()-1);
}
else {
self.logeprint(&format!("WARNING: duplicate variant-group declaration for {} ignored, line {}",tok,linenum));
}
}
else {
self.logeprint(&format!("WARNING: {} is not recognized as symbol of the grammar; declaration ignore, line {}",tok,linenum));
}
}, _ => {
self.logeprint(&format!("WARNING: {} is not recognized as symbol of the grammar; declaration ignore, line {}",tok,linenum));
},
} }
},
LHS0 if stokens.len()>1 => {
let mut separator = "-->";
let sepposition;
if let Some(spos) = line.find("-->") {
sepposition = spos;
}
else if let Some(mpos) = line.find("==>") { sepposition = mpos;
separator = "==>";
}
else {
self.logeprint(&format!("ERROR PARSING GRAMMAR LINE {}, unexpected declaration at grammar stage {}",linenum,stage));
return false;
}
if !foundeol && separator=="==>" {multiline=true; continue;}
else if foundeol {foundeol=false;}
if sepposition < stokens[0].len() {
stokens[0] = &stokens[0][..sepposition];
}
if stage<2 {stage=2;}
let LBC = "LC";
if self.bumpast && self.lifetime.len()==0 {self.lifetime="'src_lt".to_owned();}
let bltref = if self.bumpast {format!("&{} ",&self.lifetime)} else {String::new()};
let LBCref = if self.bumpast {format!("&{} LC",&self.lifetime)}
else {"LBox".to_owned()};
let findcsplit:Vec<_> = stokens[0].split(':').collect();
let mut LHS = findcsplit[0];
let mut manual_precedence = 0;
let (lb,rb)=findmatch(LHS0,'(',')');
if rb!=0 && lb+1<rb {
let parseopt = LHS0[lb+1..rb].parse::<i32>();
if let Ok(lev)=parseopt {manual_precedence=lev;}
else {self.logeprint(&format!("ERROR: Precedence Level ({}) must be numeric, line {}\n",&LHS[lb+1..rb],linenum)); return false;}
LHS = &stokens[0][..lb]; }
else if (lb,rb)!=(0,0) {
self.logeprint(&format!("MALFORMED LEFT HAND SIDE LINE {}\n",linenum));
return false;
} let symindex = match self.Symhash.get(LHS) {
Some(smi) if *smi<self.Symbols.len() && !self.Symbols[*smi].terminal => smi,
_ => { self.logeprint(&format!("unrecognized non-terminal symbol {}, line {}",LHS,linenum)); return false;},
};
let symind2 = *symindex;
let mut ntcnt = 0;
let pos0 = sepposition + 3; let mut linec = &line[pos0..]; let mut barsplit = Vec::new();
let mut linecs = linec;
while let Some(barpos) = findskip(linecs,'|') {
let (scar,scdr) = linecs.split_at(barpos);
barsplit.push(scar.trim());
linecs = &scdr[1..];
} barsplit.push(linecs.trim());
if barsplit.len()>1 && findcsplit.len()>1 {
self.logeprint(&format!("ERROR: the '|' symbol is not accepted in rules that has an labeled non-terminal on the left-hand side ({}) as it becomes ambiguous as to how to autmatically generate abstract syntax, line {}",findcsplit[1],linenum));
return false;
}
for rul in &barsplit
{ let bstokens:Vec<_> = rul.trim().split_whitespace().collect();
let mut rhsyms:Vec<Gsym> = Vec::new();
let mut semaction = "}";
let mut i:usize = 0; let mut maxprec:i32 = 0;
let mut seenerrsym = false;
let mut iadjust = 0;
let mut markers = Vec::new();
let reserved_rindex = self.Rules.len(); self.Rules.push(Grule::new_skeleton(LHS));
while i<bstokens.len() {
let mut strtok = bstokens[i];
i+=1;
if strtok.len()>0 && &strtok[0..1]=="{" {
let position = rul.find('{').unwrap();
semaction = rul.split_at(position+1).1;
if self.genabsyn && semaction.contains("return ") {
self.logeprint(&format!("WARNING: USING \"return\" INSIDE SEMANTIC ACTIONS COULD CAUSE CONFLICTS WITH AUTOMATIC CODE GENERATION, LINE {}\n",linenum));
}
break;
}
if strtok=="%" {
markers.push(i-1-iadjust); iadjust+=1;
markersexist=true; continue;
}
else if strtok=="!%" && !self.sdcuts.contains_key(&reserved_rindex) {
self.sdcuts.insert(reserved_rindex,i-1-iadjust);
iadjust+=1;
continue;
}
let newtok2;
if strtok.len()>1 && strtok.starts_with('(') {
let mut ntname2 = format!("NEWSEQNT_{}_{}",self.Rules.len(),ntcnt);
ntcnt+=1;
let mut newnt2 = Gsym::new(&ntname2,false);
let mut newrule2 = Grule::new_skeleton(&ntname2);
let mut defaultrelab2 = String::new(); let mut retoki = &strtok[1..]; let mut passthru:i64 = -1;
let mut jk = 0; let mut suffix="";
let mut precd = 0; while i<=bstokens.len() {
let retokisplit:Vec<&str> = retoki.split(':').collect();
let mut breakpoint = false;
if retokisplit[0].ends_with('>') {
if let Some(rpp) = retokisplit[0].rfind(')') {
breakpoint = true;
retoki = &retokisplit[0][..rpp];
if (retoki.len()<1) {self.logeprint(&format!("INVALID EXPRESSION IN GRAMMAR LINE {}: DO NOT SEPARATE TOKEN FROM `)`\n",linenum)); return false;}
if retokisplit.len()>1 {
defaultrelab2=retokisplit[1].to_owned();
if !is_alphanum(checkboxlabel(&defaultrelab2)) {
self.logeprint(&format!("ERROR: LABELS FOR RE EXPRESSIONS CANNOT BE PATTERNS, LINE {}\n",linenum)); return false;
}
}
}
else {self.logeprint(&format!("INVALID EXPRESSION IN GRAMMAR LINE {}: DO NOT SEPARATE TOKEN FROM `)`\n",linenum)); return false;}
}
else
if retokisplit[0].ends_with(")*") || retokisplit[0].ends_with(")+") || retokisplit[0].ends_with(")?") {
breakpoint=true;
retoki = &retokisplit[0][..retokisplit[0].len()-2];
if (retoki.len()<1) {self.logeprint(&format!("INVALID EXPRESSION IN GRAMMAR LINE {}: DO NOT SEPARATE TOKEN FROM `)`\n",linenum)); return false;}
suffix = &retokisplit[0][retokisplit[0].len()-1..];
if retokisplit.len()>1 {defaultrelab2=retokisplit[1].to_owned();}
} else if retokisplit.len()>1 {
self.logeprint(&format!("LABELS (:{}) ARE NOT ALLOWED INSIDE (..) GROUPINGS, LINE {}",retokisplit[1],linenum)); return false;
}
if retoki.ends_with("*") || retoki.ends_with("+") || retoki.ends_with("?") || retoki.ends_with(">") {
self.logeprint(&format!("NESTED *, +, ? and <> EXPRESSIONS ARE NOT ALLOWED, LINE {}\n",linenum)); return false;
}
let errmsg = format!("unrecognized grammar symbol '{}', line {}",retoki,linenum);
let gsymi = *self.Symhash.get(retoki).expect(&errmsg);
let igsym = &self.Symbols[gsymi];
if prec_level(igsym.precedence).abs()>prec_level(precd).abs() {precd =igsym.precedence;}
if passthru==-1 && (!igsym.terminal || igsym.rusttype!="()") {
passthru=jk;
newnt2.rusttype = format!("@{}",&igsym.sym);
}
else if passthru>=0 && (!igsym.terminal || igsym.rusttype!="()" || igsym.precedence!=0)
{passthru=-2; newnt2.rusttype=String::new();}
newrule2.rhs.push(self.Symbols[gsymi].clone());
if breakpoint {break;}
else if bstokens[i-1].starts_with('{') {i=bstokens.len()+1; break;}
jk += 1; i+=1; retoki = bstokens[i-1];
} if i>bstokens.len() {self.logeprint(&format!("INVALID EXPRESSION IN GRAMMER, line {}",linenum)); return false;}
iadjust += jk as usize;
if passthru>=0 { newrule2.action = format!(" _item{}_ }}",passthru);
}
let mut hashkey = String::from("(");
for s in &newrule2.rhs {
hashkey.push_str(&s.sym); hashkey.push(' ');
}
hashkey.push(')');
if let Some(snti) = NEWNTs.get(&hashkey) { ntname2 = self.Symbols[*snti].sym.clone();
} else { newrule2.precedence = precd;
newnt2.index = self.Symbols.len();
newrule2.lhs.index = newnt2.index;
self.Symhash.insert(ntname2.clone(),self.Symbols.len());
self.Symbols.push(newnt2);
if self.tracelev>3 {
printrule(&newrule2,self.Rules.len());
}
self.Rules.push(newrule2);
let mut rulesforset = HashSet::new();
rulesforset.insert(self.Rules.len()-1);
self.Rulesfor.insert(self.Symbols.len()-1,rulesforset);
if defaultrelab2.len()<1 && !markersexist {defaultrelab2=format!("_item{}_",i-1-iadjust);}
else if defaultrelab2.len()<1 {defaultrelab2=format!("_itemre{}_{}",i-1-iadjust,ntcx); ntcx+=1;}
NEWNTs.insert(hashkey,self.Symbols.len()-1); } newtok2 = format!("{}{}:{}",&ntname2,suffix,&defaultrelab2);
strtok = &newtok2;
}
let newtok; let retoks:Vec<&str> = strtok.split(':').collect();
if retoks.len()>0 && retoks[0].len()>1 && (retoks[0].ends_with('*') || retoks[0].ends_with('+') || retoks[0].ends_with('?')) {
strtok = retoks[0]; let defaultrelab;
if !markersexist {
defaultrelab = format!("_item{}_",i-1-iadjust);
} else {
defaultrelab = format!("_itemre{}_{}",i-1-iadjust,ntcx);
ntcx+=1;
}
let relabel = if retoks.len()>1 && retoks[1].len()>0
{
if !is_alphanum(checkboxlabel(retoks[1])) {
self.logeprint(&format!("ERROR: LABELS FOR RE EXPRESSIONS CANNOT BE PATTERNS, LINE {}\n",linenum)); return false;
}
retoks[1]
}
else {&defaultrelab};
let mut gsympart = strtok[0..strtok.len()-1].trim(); if gsympart=="_" {gsympart="_WILDCARD_TOKEN_";}
let errmsg = format!("unrecognized grammar symbol '{}', line {}",gsympart,linenum);
let gsymi = *self.Symhash.get(gsympart).expect(&errmsg);
if let Some(enti) = NEWNTs.get(retoks[0]) {
newtok = format!("{}:{}",&self.Symbols[*enti].sym,relabel);
strtok = &newtok;
}
else {
let newntname = format!("NEWRENT_{}_{}",self.Rules.len(),ntcnt); ntcnt+=1;
let mut newnt = Gsym::new(&newntname,false);
newnt.rusttype = "()".to_owned();
if &self.Symbols[gsymi].rusttype!="()" || (retoks.len()>1 && retoks[1].len()>0) {
newnt.rusttype = if strtok.ends_with('?') {
if self.basictype(&self.Symbols[gsymi].rusttype[..]) || self.Symbols[gsymi].rusttype.starts_with("Vec<") || self.Symbols[gsymi].rusttype.starts_with(LBC) {
if self.genabsyn {format!("Option<@{}>",&self.Symbols[gsymi].sym)} else {format!("Option<{}>",&self.Symbols[gsymi].rusttype)} }
else {
if self.genabsyn {format!("Option<{}<@{}>>",&LBCref,&self.Symbols[gsymi].sym)} else {format!("Option<LBox<{}>>",&self.Symbols[gsymi].rusttype)} }
} else {
if self.genabsyn {format!("Vec<{}{}<@{}>>",&bltref,LBC,&self.Symbols[gsymi].sym)} else {format!("Vec<LC<{}>>",&self.Symbols[gsymi].rusttype)} };
}
newnt.index = self.Symbols.len();
self.Symhash.insert(newntname.clone(),self.Symbols.len());
self.Symbols.push(newnt.clone());
let mut newrule1 = Grule::new_skeleton(&newntname);
newrule1.lhs.index = newnt.index;
let nr1type = &self.Symbols[newnt.index].rusttype;
newrule1.precedence = self.Symbols[gsymi].precedence;
if strtok.ends_with('?') {
newrule1.rhs.push(self.Symbols[gsymi].clone());
if nr1type.starts_with("Option<LBox<") {
newrule1.action=String::from(" Some(parser.lbx(0,_item0_)) }");
}
else if self.bumpast && nr1type.starts_with(&format!("Option<{}<",&LBCref)) {
newrule1.action=String::from(" Some(parser.exstate.make(parser.lc(0,_item0_))) }");
}
else if nr1type.starts_with("Option<") {
newrule1.action = String::from(" Some(_item0_) }");
}
} else { newrule1.rhs.push(newnt.clone());
newrule1.rhs.push(self.Symbols[gsymi].clone());
if nr1type!="()" {
if self.bumpast {
newrule1.action = String::from(" _item0_.push(parser.exstate.make(parser.lc(1,_item1_))); _item0_ }");
} else {
newrule1.action = String::from(" _item0_.push(parser.lc(1,_item1_)); _item0_ }");
}
} } let mut newrule0 = Grule::new_skeleton(&newntname);
let nr0type = &self.Symbols[newnt.index].rusttype;
newrule0.lhs.index = newnt.index;
if strtok.ends_with('+') {
newrule0.rhs.push(self.Symbols[gsymi].clone());
if nr0type!="()" {
if self.bumpast {
newrule0.action=String::from(" vec![parser.exstate.make(parser.lc(0,_item0_))] }");
} else {
newrule0.action=String::from(" vec![parser.lc(0,_item0_)] }");
}
} } else if strtok.ends_with('*') && nr0type!="()" {
newrule0.action = String::from(" Vec::new() }");
}
else if strtok.ends_with('?') && nr0type!="()" {
newrule0.action = String::from(" None }");
}
if self.tracelev>3 {
printrule(&newrule0,self.Rules.len());
printrule(&newrule1,self.Rules.len()+1);
}
self.Rules.push(newrule0);
self.Rules.push(newrule1);
let mut rulesforset = HashSet::with_capacity(2);
rulesforset.insert(self.Rules.len()-2);
rulesforset.insert(self.Rules.len()-1);
newtok = format!("{}:{}",&newntname,relabel);
self.Rulesfor.insert(self.Symbols.len()-1,rulesforset);
NEWNTs.insert(retoks[0].to_owned(),newnt.index);
strtok = &newtok;
} }
let mut newtok3; let septoks:Vec<&str> = strtok.split(':').collect();
if septoks.len()>0 && septoks[0].len()>2 && (septoks[0].ends_with("*>") || septoks[0].ends_with("+>")) {
let (lb,rb) = findmatch(strtok,'<','>');
let termi;
if lb!=0 && lb+2<rb {
let termsym = &strtok[lb+1..rb-1]; let termiopt = self.Symhash.get(termsym);
if !self.terminal(termsym) {
self.logeprint(&format!("ERROR ON LINE {}, {} is not a terminal symbol of this grammar\n",linenum,termsym)); return false;
}
termi = *termiopt.unwrap();
} else {self.logeprint(&format!("MALFORMED EXPRESSION LINE {}\n",linenum)); return false;}
strtok = septoks[0]; let defaultrelab3;
if !markersexist {
defaultrelab3 = format!("_item{}_",i-1-iadjust);
} else {
defaultrelab3 = format!("_itemre{}_{}",i-1-iadjust,ntcx);
ntcx+=1;
}
let relabel3 = if septoks.len()>1 && septoks[1].len()>0 {
if !is_alphanum(checkboxlabel(septoks[1])) {
self.logeprint(&format!("ERROR: LABELS FOR RE EXPRESSIONS CANNOT BE PATTERNS, LINE {}\n",linenum)); return false;
}
septoks[1]
} else {&defaultrelab3};
let mut gsympart3 = strtok[0..lb].trim(); if gsympart3=="_" {gsympart3="_WILDCARD_TOKEN_";}
let errmsg = format!("UNRECOGNIZED GRAMMAR SYMBOL '{}', LINE {}\n",gsympart3,linenum);
let gsymi = *self.Symhash.get(gsympart3).expect(&errmsg);
let hashkey = format!("{}{}",gsympart3,&strtok[lb..rb+1]);
if let Some(enti) = NEWNTs.get(&hashkey) {
newtok3 = format!("{}:{}",&self.Symbols[*enti].sym,relabel3);
strtok = &newtok3;
}
else { let newntname3 = format!("NEWSEPNT_{}_{}",self.Rules.len(),ntcnt); ntcnt+=1;
let mut newnt3 = Gsym::new(&newntname3,false);
newnt3.rusttype = "()".to_owned();
if &self.Symbols[gsymi].rusttype!="()" || (septoks.len()>1 && septoks[1].len()>0) {
newnt3.rusttype = format!("Vec<{}{}<@{}>>",&bltref,LBC,&self.Symbols[gsymi].sym);
}
newnt3.index = self.Symbols.len();
self.Symhash.insert(newntname3.clone(),self.Symbols.len());
self.Symbols.push(newnt3.clone()); let mut newrule3 = Grule::new_skeleton(&newntname3);
let mut newrule4 = Grule::new_skeleton(&newntname3);
newrule3.lhs.index = newnt3.index;
newrule4.lhs.index = newnt3.index;
newrule3.precedence = self.Symbols[termi].precedence;
newrule4.precedence = self.Symbols[termi].precedence;
newrule3.rhs.push(self.Symbols[gsymi].clone()); newrule4.rhs.push(newnt3.clone());
newrule4.rhs.push(self.Symbols[termi].clone());
newrule4.rhs.push(self.Symbols[gsymi].clone()); if newnt3.rusttype.starts_with("Vec") {
if self.bumpast {
newrule3.action=String::from(" vec![parser.exstate.make(parser.lc(0,_item0_))] }");
newrule4.action=String::from(" _item0_.push(parser.exstate.make(parser.lc(2,_item2_))); _item0_ }");
} else {
newrule3.action=String::from(" vec![parser.lc(0,_item0_)] }");
newrule4.action=String::from(" _item0_.push(parser.lc(2,_item2_)); _item0_ }");
} } if self.tracelev>3 {
printrule(&newrule3,self.Rules.len());
printrule(&newrule4,self.Rules.len()+1);
}
self.Rules.push(newrule3);
self.Rules.push(newrule4);
let mut rulesforset3 = HashSet::with_capacity(2);
rulesforset3.insert(self.Rules.len()-2);
rulesforset3.insert(self.Rules.len()-1);
newtok3 = format!("{}:{}",&newntname3,relabel3);
self.Rulesfor.insert(newnt3.index,rulesforset3);
if !strtok.ends_with("*>") {
NEWNTs.insert(hashkey,newnt3.index);
} else { let hashkey2 = format!("{}+>",&hashkey[..hashkey.len()-2]);
NEWNTs.insert(hashkey2,newnt3.index);
let newntname5 = format!("NEWSEPNT2_{}_{}",self.Rules.len(),ntcnt); ntcnt+=1;
let mut newnt5 = Gsym::new(&newntname5,false);
newnt5.rusttype = newnt3.rusttype.clone();
newnt5.index = self.Symbols.len();
self.Symhash.insert(newntname5.clone(),self.Symbols.len());
self.Symbols.push(newnt5.clone()); let mut newrule5 = Grule::new_skeleton(&newntname5);
let mut newrule6 = Grule::new_skeleton(&newntname5);
newrule5.lhs.index = newnt5.index; newrule6.lhs.index = newnt5.index;
newrule6.rhs.push(newnt3.clone());
if newnt5.rusttype.starts_with("Vec") {
newrule5.action = String::from(" vec![] }");
newrule6.action = String::from("_item0_ }");
}
if self.tracelev>3 {
printrule(&newrule5,self.Rules.len());
printrule(&newrule6,self.Rules.len()+1);
}
self.Rules.push(newrule5);
self.Rules.push(newrule6);
let mut rulesforset5 = HashSet::with_capacity(2);
rulesforset5.insert(self.Rules.len()-2);
rulesforset5.insert(self.Rules.len()-1);
newtok3 = format!("{}:{}",&newntname5,relabel3);
self.Rulesfor.insert(newnt5.index,rulesforset5);
NEWNTs.insert(hashkey,newnt5.index);
} strtok = &newtok3;
} }
let mut toks:Vec<&str> = strtok.split(':').collect();
if toks[0]=="_" {toks[0] = "_WILDCARD_TOKEN_";}
match self.Symhash.get(toks[0]) {
None => {self.logeprint(&format!("Unrecognized grammar symbol '{}', line {} of grammar",toks[0],linenum)); return false; },
Some(symi) => {
let sym = &self.Symbols[*symi];
if self.Errsym.len()>0 && &sym.sym == &self.Errsym {
if !seenerrsym { seenerrsym = true; }
else { self.logeprint(&format!("Error symbol {} can only appear once in a production, line {}",&self.Errsym,linenum)); return false; }
}
let mut newsym = sym.clone();
if newsym.rusttype.len()<1 && !self.genabsyn {newsym.rusttype = self.Absyntype.clone();}
if toks.len()>1 && toks[1].trim().len()==0 {
self.logeprint(&format!("WARNING: EMPTY LABEL FOR {}, LINE {}; remove whitespaces between ':' and the label\n",toks[0],linenum));
}
else
if toks.len()>1 && toks[1].trim().len()>0 { let mut label = String::new();
if let Some(atindex) = toks[1].find('@') { label.push_str(toks[1]);
while !label.ends_with('@') && i<bstokens.len()
{ label.push(' '); label.push_str(bstokens[i]); i+=1;
}
if !label.ends_with('@') { self.logeprint(&format!("pattern labels must be closed with @, line {}",linenum)); return false;}
} else { label = toks[1].trim().to_string(); }
newsym.setlabel(label.trim_end_matches('@'));
}
if prec_level(maxprec).abs() < prec_level(newsym.precedence).abs() { maxprec=newsym.precedence; }
rhsyms.push(newsym);
},
} }
if markers.len()%2==1 {self.logeprint(&format!("ERROR: DELAY MARKERS MUST COME IN PAIRS, LINE {}\n",linenum)); return false;}
else if markers.len()>=2 {
self.delaymarkers.insert(reserved_rindex,BTreeSet::new());
}
let mut i = 0;
while i+1<markers.len()
{
let dbegin = markers[i];
let dend = markers[i+1];
i += 2;
if dend>dbegin+1 {
self.delaymarkers.get_mut(&reserved_rindex).unwrap().insert((dbegin,dend));
}
}
let mut newlhs = self.Symbols[symind2].clone(); if findcsplit.len()>1 {newlhs.label = findcsplit[1].to_owned();}
if manual_precedence!=0 {maxprec=manual_precedence;} let rule = Grule {
lhs : newlhs,
rhs : rhsyms,
action: semaction.to_owned(),
precedence : maxprec,
autogenerated : false,
};
if self.tracelev>3 {printrule(&rule,self.Rules.len());}
self.Rules[reserved_rindex] = rule;
if let None = self.Rulesfor.get(&symind2) { self.Rulesfor.insert(symind2,HashSet::new());
}
let rulesforset = self.Rulesfor.get_mut(&symind2).unwrap();
rulesforset.insert(reserved_rindex);
} },
_ => {self.logeprint(&format!("ERROR parsing grammar on line {}, unexpected declaration at grammar stage {}",linenum,stage)); return false;},
} } }
if self.Symhash.contains_key("START") || self.Symhash.contains_key("EOF") || self.Symhash.contains_key("ANY_ERROR")
{
self.logeprint(&format!("Error in grammar: START and EOF are reserved symbols"));
return false;
}
let mut startnt = Gsym::new("START",false);
let mut eofterm = Gsym::new("EOF",true);
if self.genabsyn || !self.sametype {
startnt.rusttype="()".to_owned(); }
else {startnt.rusttype = self.Absyntype.clone();}
if self.genabsyn || !self.sametype {eofterm.rusttype = "()".to_owned();}
else {eofterm.rusttype = self.Absyntype.clone();}
let mut wildcard = Gsym::new("_WILDCARD_TOKEN_",true);
startnt.index = self.Symbols.len();
eofterm.index = self.Symbols.len()+1;
self.startnti = startnt.index;
self.eoftermi = eofterm.index;
self.Symhash.insert(String::from("START"),self.startnti);
self.Symhash.insert(String::from("EOF"),self.eoftermi);
self.Symbols.push(startnt.clone());
self.Symbols.push(eofterm.clone());
if self.topsym == usize::MAX {
self.logeprint("GRAMMAR START SYMBOL NOT DECLARED");
return false;
}
let topgsym = &self.Symbols[self.topsym]; let startrule = Grule { lhs:startnt,
rhs:vec![topgsym.clone()], action: String::default(),
precedence : DEFAULTPRECEDENCE,
autogenerated : false,
};
self.Rules.push(startrule); self.startrulei = self.Rules.len()-1;
let mut startrfset = HashSet::new();
startrfset.insert(self.Rules.len()-1); self.Rulesfor.insert(self.startnti,startrfset); if self.Externtype.len()<1 {self.Externtype = self.Absyntype.clone();}
if self.bumpast {
if self.lifetime.len()==0 {self.lifetime="'src_lt".to_owned();}
self.Externtype = format!("Bumper<{},{}>",&self.lifetime,&self.Externtype);
}
if &self.Absyntype!="()" && &topgsym.rusttype!=&self.Absyntype && topgsym.rusttype.len()>0 {
let msg = format!("\nWARNING: THE TYPE FOR THE START SYMBOL ({}) IS NOT THE SAME AS THE VALUETYPE ({})\n",&topgsym.rusttype,&self.Absyntype);
if self.tracelev>0 {eprint!("{}",msg);}
else { self.genlog.push_str(&msg); }
self.Absyntype = topgsym.rusttype.clone();
}
if self.lifetime.len()>0 {
let wildtype = format!("&{} str",&self.lifetime);
self.Symbols[0].rusttype = wildtype.clone();
self.enumhash.insert(wildtype,ntcx); ntcx+=1;
self.haslt_base.insert(0);
}
if self.sametype && !self.genabsyn {self.Symbols[0].rusttype = self.Absyntype.clone();} if !self.genabsyn {self.enumhash.insert(self.Absyntype.clone(),0);}
self.reachability(); let startreach = self.Reachable.get(&(self.Symbols.len()-2)).unwrap();
for sym in &self.Symbols {
if sym.index>0 && sym.index<self.Symbols.len()-2 && !startreach.contains(&sym.index) {
let msg = format!("WARNING: The symbol {} is not reachable from the grammar's start symbol.\n\n",&sym.sym);
if self.tracelev>0 {eprint!("{}",msg);}
else {self.genlog.push_str(&msg);}
}
if !sym.terminal {
if let Some(rset) = self.Rulesfor.get(&sym.index) {
if rset.len()<1 {
let msg = format!("WARNING: The symbol {}, which was declared non-terminal, does not occur on the left-hand side of any production rule.\n\n",&sym.sym);
if self.tracelev>0 {eprint!("{}",msg);}
else {self.genlog.push_str(&msg);}
}
} else {
self.Rulesfor.insert(sym.index,HashSet::new());
}
} }
if self.tracelev>0 {self.logprint(&format!("{} rules in grammar",self.Rules.len()));}
self.ntcxmax = ntcx;
true
}}
impl Grammar
{
pub fn compute_NullableRf(&mut self)
{
let mut changed = true;
while changed
{
changed = false;
for rule in &self.Rules
{
let mut addornot = true;
for gs in &rule.rhs {
if gs.terminal || !self.Nullable.contains(&gs.index)
{addornot=false; break;}
} if (addornot) {
changed = self.Nullable.insert(rule.lhs.index) || changed;
}
} } }
pub fn Nullableseq(&self, Gs:&[Gsym]) -> bool
{
for g in Gs {
if g.terminal || !self.Nullable.contains(&g.index) {return false;}
}
return true;
}
pub fn compute_First(&mut self)
{
let mut additions:HashSet<usize> = HashSet::new();
let mut changed = true;
while changed
{
changed = false;
for rule in &self.Rules
{
let nti = rule.lhs.index; additions.clear(); for i in 0..rule.rhs.len() {
let gs = &rule.rhs[i]; if gs.terminal { additions.insert(gs.index); }
else if gs.index!=nti { if let Some(firstgs) = self.First.get(&gs.index) {
for symi in firstgs.iter() {
additions.insert(*symi);
}
} } if gs.terminal || !self.Nullable.contains(&gs.index) {
break;
}
} let mut Firstnt = self.First.entry(nti).or_default();
for j in additions.iter() { changed = Firstnt.insert(*j) || changed; }
} } }
pub fn Firstseq(&self, Gs:&[Gsym], la:usize) -> HashSet<usize>
{
let mut Fseq = HashSet::new();
let mut i = 0;
let mut nullable = true;
while nullable && i<Gs.len()
{
if (Gs[i].terminal) {Fseq.insert(Gs[i].index); nullable=false; }
else {
let firstgsym = self.First.get(&Gs[i].index).unwrap();
for s in firstgsym { Fseq.insert(*s); }
if !self.Nullable.contains(&Gs[i].index) {nullable=false;}
}
i += 1;
} if nullable {Fseq.insert(la);}
Fseq
}
pub fn genlexer(&self,fd:&mut File, fraw:&str) -> Result<(),std::io::Error>
{
let ref absyn = self.Absyntype;
let ref extype = self.Externtype;
let ltopt = if self.lifetime.len()>0 {format!("<{}>",&self.lifetime)}
else {String::new()};
let retenum = format!("RetTypeEnum{}",<opt);
let retype = if self.sametype {absyn} else {&retenum};
let lifetime = if (self.lifetime.len()>0) {&self.lifetime} else {"'t"};
write!(fd,"\n// Lexical Scanner using RawToken and StrTokenizer\n")?;
let lexername = format!("{}lexer",&self.name);
let mut keywords:HashSet<&str> = HashSet::new();
let mut singles:Vec<char> = Vec::new();
let mut doubles:Vec<&str> = Vec::new();
let mut triples:Vec<&str> = Vec::new();
for symbol in &self.Symbols
{
if !symbol.terminal {continue;}
if is_alphanum(&symbol.sym) && &symbol.sym!="EOF" && &symbol.sym!="ANY_ERROR" && !self.Haslexval.contains(&symbol.sym) {
keywords.insert(&symbol.sym);
}
else if symbol.sym.len()==1 && !is_alphanum(&symbol.sym) {
singles.push(symbol.sym.chars().next().unwrap());
}
else if symbol.sym.len()==2 && !is_alphanum(&symbol.sym) {
doubles.push(&symbol.sym);
}
else if symbol.sym.len()==3 && !is_alphanum(&symbol.sym) {
triples.push(&symbol.sym);
}
} for (sym,symmap) in self.Lexnames.iter()
{
if is_alphanum(sym) {
keywords.remove(&symmap[..]);
keywords.insert(sym);
continue;
}
if sym.len()==1 {
singles.push(sym.chars().next().unwrap());
}
else if sym.len()==2 {
doubles.push(&sym);
}
else if sym.len()==3 {
triples.push(&sym);
}
}
write!(fd,"pub struct {0}<{2}> {{
stk: StrTokenizer<{2}>,
keywords: HashSet<&'static str>,
lexnames: HashMap<&'static str,&'static str>,
shared_state: Rc<RefCell<{1}>>,",&lexername,extype,lifetime)?;
if self.bumpast {
write!(fd,"\n bump: Option<&{} bumpalo::Bump>,",lifetime)?;
}
write!(fd,"
}}
impl<{2}> {0}<{2}>
{{
pub fn from_str(s:&{2} str) -> {0}<{2}> {{
Self::new(StrTokenizer::from_str(s))
}}
pub fn from_source(s:&{2} LexSource<{2}>) -> {0}<{2}> {1} {{
",&lexername,"",lifetime)?;
if self.bumpast {
write!(fd," let mut st = Self::new(StrTokenizer::from_source(s));
st.bump = s.get_bump();
st")?;
} else {
write!(fd," Self::new(StrTokenizer::from_source(s))")?;
}
write!(fd,"
}}
pub fn new(mut stk:StrTokenizer<{2}>) -> {0}<{2}> {{
let mut lexnames = HashMap::with_capacity(64);
let mut keywords = HashSet::with_capacity(64);
let shared_state = Rc::new(RefCell::new(<{1}>::default()));
for kw in [",&lexername,extype,lifetime)?;
for kw in &keywords {write!(fd,"\"{}\",",kw)?;}
write!(fd,"] {{keywords.insert(kw);}}
for c in [")?;
for c in singles {write!(fd,"'{}',",c)?;}
write!(fd,"] {{stk.add_single(c);}}
for d in [")?;
for d in doubles {write!(fd,"\"{}\",",d)?;}
write!(fd,"] {{stk.add_double(d);}}
for d in [")?;
for d in triples {write!(fd,"\"{}\",",d)?;}
write!(fd,"] {{stk.add_triple(d);}}
for (k,v) in [")?;
for (kl,vl) in &self.Lexnames {
write!(fd,"(r#\"{}\"#,\"{}\"),",kl,vl)?;
}
write!(fd,"] {{lexnames.insert(k,v);}}\n")?;
for attr in &self.Lexextras {write!(fd," stk.{};\n",attr.trim())?;}
if self.bumpast {
write!(fd," let bump:Option<&{} bumpalo::Bump> = None;
{} {{stk,keywords,lexnames,shared_state,bump,}}\n }}\n}}\n",&self.lifetime,&lexername)?;
} else {
write!(fd," {} {{stk,keywords,lexnames,shared_state,}}\n }}\n}}\n",&lexername)?;
}
write!(fd,"impl<{0}> Tokenizer<{0},{1}> for {2}<{0}>
{{
fn nextsym(&mut self) -> Option<TerminalToken<{0},{1}>> {{
",lifetime,retype,&lexername)?;
for (condition,action) in self.Lexconditionals.iter() {
write!(fd," if {} {{ self.stk.{} }}\n",condition,action)?;
}
write!(fd," let tokopt = self.stk.next_token();
if let None = tokopt {{return None;}}
let token = tokopt.unwrap();
match token.0 {{
")?;
if keywords.len()>0 {
write!(fd," RawToken::Alphanum(sym) if self.keywords.contains(sym) => {{
let truesym = self.lexnames.get(sym).unwrap_or(&sym);
Some(TerminalToken::{}(token,truesym,<{}>::default()))
}},\n",fraw,retype)?;
} for (tname,raw,val) in &self.Lexvals {
let mut Finalval = val.clone();
if !self.sametype {
let emsg = format!("FATAL ERROR: '{}' IS NOT A SYMBOL IN THIS GRAMMAR",tname);
let symi = *self.Symhash.get(tname).expect(&emsg);
let ttype = &self.Symbols[symi].rusttype;
let ei = self.enumhash.get(ttype).expect("FATAL ERROR: GRAMMAR CORRUPTED");
Finalval = format!("RetTypeEnum::Enumvariant_{}({})",ei,val);
}
write!(fd," RawToken::{} => Some(TerminalToken::{}(token,\"{}\",{})),\n",raw,fraw,tname,&Finalval)?;
}
write!(fd," RawToken::Symbol(s) if self.lexnames.contains_key(s) => {{
let tname = self.lexnames.get(s).unwrap();
Some(TerminalToken::{}(token,tname,<{}>::default()))
}},\n",fraw,retype)?;
write!(fd," RawToken::Symbol(s) => Some(TerminalToken::{}(token,s,<{}>::default())),\n",fraw,retype)?;
write!(fd," RawToken::Alphanum(s) => Some(TerminalToken::{}(token,s,<{}>::default())),\n",fraw,retype)?;
write!(fd," _ => {{ let _rrodb=token.0.to_staticstr(); Some(TerminalToken::{}(token,_rrodb,<{}>::default())) }},\n }}\n }}",fraw,retype)?;
write!(fd,"
fn linenum(&self) -> usize {{self.stk.line()}}
fn column(&self) -> usize {{self.stk.column()}}
fn position(&self) -> usize {{self.stk.current_position()}}
fn current_line(&self) -> &str {{self.stk.current_line()}}
fn get_line(&self,i:usize) -> Option<&str> {{self.stk.get_line(i)}}
fn add_priority_symbol(&mut self, s:&'static str) {{self.stk.add_priority_symbol(s);}}
fn get_slice(&self,s:usize,l:usize) -> &str {{self.stk.get_slice(s,l)}}")?;
if (!self.sametype) || self.genabsyn {
let wildcardvar = self.enumhash.get(&self.Symbols[0].rusttype).unwrap();
if self.lifetime.len()>0 { write!(fd,"
fn transform_wildcard(&self,t:TerminalToken<{},{}>) -> TerminalToken<{},{}> {{ TerminalToken::new(t.sym,RetTypeEnum::Enumvariant_{}(self.stk.current_text()),t.line,t.column) }}",lifetime,retype,lifetime,retype,wildcardvar)?;
} else { write!(fd,"
fn transform_wildcard(&self,t:TerminalToken<{},{}>) -> TerminalToken<{},{}> {{ TerminalToken::new(t.sym,RetTypeEnum::Enumvariant_{}((self.stk.previous_position(),self.stk.current_position())),t.line,t.column) }}",lifetime,retype,lifetime,retype,wildcardvar)?;
}
}
write!(fd,"
}}//impl Tokenizer
\n")?;
Ok(())
}
pub fn gen_enum(&self,fd:&mut File) -> Result<(),std::io::Error>
{
let ref absyn = self.Absyntype;
let ref extype = self.Externtype;
let ref lifetime = self.lifetime;
let has_lt = lifetime.len()>0;
let ltopt = if has_lt {format!("<{}>",lifetime)} else {String::from("")};
let enumname = format!("RetTypeEnum{}",<opt); let symlen = self.Symbols.len();
write!(fd,"\n//Enum for return values \npub enum {} {{\n",&enumname)?;
for (typesym,eindex) in self.enumhash.iter()
{
write!(fd," Enumvariant_{}({}),\n",eindex,typesym)?;
}
write!(fd,"}}\n")?;
write!(fd,"impl{} Default for {} {{ fn default()->Self {{RetTypeEnum::Enumvariant_0(<{}>::default())}} }}\n\n",<opt,&enumname,&self.Absyntype)?;
Ok(())
}
}
pub fn checkboxlabel(s:&str) -> &str
{
if s.starts_with('[') && s.ends_with(']') {s[1..s.len()-1].trim()} else {s}
}
pub fn emptybox(s:&str) -> bool {
s.starts_with('[') && s.ends_with(']') && s[1..s.len()-1].trim().len()==0
}
pub fn checkboxexp<'t>(s:&'t str, e:&'t str) -> &'t str {
if s.starts_with('[') && s.ends_with(']') {
let t = s[1..s.len()-1].trim();
if t.len()==0 {e} else {t}
}
else {s}
}
pub fn is_alphanum(x:&str) -> bool
{
if x.len()<1 {return false};
let mut chars = x.chars();
let first = chars.next().unwrap();
if !(first=='_' || first.is_alphabetic()) {return false;}
for c in chars
{
if !(c=='_' || c.is_alphanumeric()) {return false;}
}
true
}
fn findskip(s:&str, key:char) -> Option<usize>
{
let mut i = 0;
let mut cx:i32 = 0;
for c in s.chars()
{
match c {
x if x==key && cx==0 => {return Some(i); },
'{' => {cx+=1;},
'}' => {cx-=1;},
_ => {},
} i += 1;
} return None;
}
fn findmatch(s:&str, left:char, right:char) -> (usize,usize)
{
let mut ax = (0,0);
let mut index:usize = 0;
let mut foundstart=false;
let mut cx = 0;
for c in s.chars()
{
if c==left {
cx+=1;
if !foundstart { ax=(index,0); foundstart=true; }
}
else if c==right {cx-=1;}
if cx==0 && foundstart {
ax=(ax.0,index);
return ax;
}
index+=1;
}
ax
}
pub fn nonassoc(lev:i32)-> bool { lev<NONASSOCBIT }
pub fn leftassoc(lev:i32)->bool { lev>0 }
pub fn rightassoc(lev:i32) -> bool {lev<0 && lev>NONASSOCBIT }
pub fn make_nonassoc(lev:i32) -> i32 { NONASSOCBIT-lev}
pub fn prec_level(lev:i32) -> i32
{
if lev<NONASSOCBIT {-1*(lev-NONASSOCBIT)} else {lev}
}