#![allow(dead_code)]
#![allow(unused_variables)]
#![allow(non_snake_case)]
#![allow(non_camel_case_types)]
#![allow(unused_parens)]
#![allow(unused_mut)]
#![allow(unused_assignments)]
#![allow(unused_doc_comments)]
#![allow(unused_imports)]
use std::fmt::Display;
use std::default::Default;
use std::collections::{HashMap,HashSet,BTreeSet};
use std::io::{self,Read,Write,BufReader,BufRead};
use std::cell::{RefCell,Ref,RefMut};
use std::hash::{Hash,Hasher};
use std::fs::File;
use std::io::prelude::*;
use std::path::Path;
use std::mem;
use crate::{TRACE,Lexer,Lextoken,Stateaction,Statemachine};
use crate::Stateaction::*;
/// this structure is only exported because it is required by the generated parsers.
/// There is no reason to use it in other programs.
#[derive(Clone)]
pub struct RProduction<AT:Default,ET:Default> // runtime rep of grammar rule
{
pub lhs: &'static str, // left-hand side nonterminal of rule
pub Ruleaction : fn(&mut RuntimeParser<AT,ET>) -> AT, //parser as arg
}
impl<AT:Default,ET:Default> RProduction<AT,ET>
{
pub fn new_skeleton(lh:&'static str) -> RProduction<AT,ET>
{
RProduction {
lhs : lh,
Ruleaction : |p|{AT::default()},
}
}
}//impl RProduction
pub struct Stackelement<AT:Default>
{
pub si : usize, // state index
pub value : AT, // semantic value (don't clone grammar symbols)
}
/// this is the structure created by the generated parser. The generated parser
/// program will contain a make_parser function that returns this structure.
/// Most of the pub items are, however, only exported to support the operation
/// of the parser, and should not be accessed directly. Only the functions
/// [RuntimeParser::parse] and [RuntimeParser::abort] should be called directly
/// from user programs. Only the field [RuntimeParser::exstate] should be accessed
/// by user programs.
pub struct RuntimeParser<AT:Default,ET:Default>
{
/// this the "external state" structure, with type ET defined by the grammar.
/// The semantic actions associated with each grammar rule, which are written
/// in the grammar, have ref mut access to the RuntimeParser structure, which
/// allows them to read and change the external state object. This gives
/// the parsers greater flexibility and capability, including the ability to
/// parse some non-context free languages. See the sample grammar at
/// <https://cs.hofstra.edu/~cscccl/rustlr_project/ncf.grammar>.
/// The exstate is initialized to ET::default().
pub exstate : ET, // external state structure, usage optional
/// used only by generated parser: do not reference
pub RSM : Vec<HashMap<&'static str,Stateaction>>, // runtime state machine
/// do not reference
pub Rules : Vec<RProduction<AT,ET>>, //rules with just lhs and delegate function
stopparsing : bool,
/// do not reference
pub stack : Vec<Stackelement<AT>>, // parse stack
// pub recover : HashSet<&'static str>, // for error recovery
// pub resynch : HashSet<&'static str>,
pub Errsym : &'static str,
pub linenum : usize,
pub column : usize, // not used for now
}//struct RuntimeParser
impl<AT:Default,ET:Default> RuntimeParser<AT,ET>
{
/// this is only called by the make_parser function in the machine-generated
/// parser program. *Do not call this function in other places* as it
/// only generate a skeleton.
pub fn new(rlen:usize, slen:usize) -> RuntimeParser<AT,ET>
{ // given number of rules and number states
let mut p = RuntimeParser {
RSM : Vec::with_capacity(slen),
Rules : Vec::with_capacity(rlen),
stopparsing : false,
exstate : ET::default(),
stack : Vec::with_capacity(1024),
Errsym : "",
linenum : 0,
column : 0,
// recover : HashSet::new(),
// resynch : HashSet::new(),
};
for _ in 0..slen {p.RSM.push(HashMap::new());}
p
}//new
/// this function can be called from with the "semantic" actions attached
/// to grammar production rules that are executed for each
/// "reduce" action of the parser.
pub fn abort(&mut self, msg:&str)
{
println!("!!!Parsing Aborted: {}",msg);
self.stopparsing=true;
}
// parse does not reset state stack
/// this function is used to invoke the generated parser returned by
/// make_function.
pub fn parse(&mut self, tokenizer:&mut dyn Lexer<AT>) -> AT
{
let mut result = AT::default();
// push state 0 on stack:
self.stack.push(Stackelement {si:0, value:AT::default()});
let unexpected = Stateaction::Error(String::from("unexpected end of input"));
let mut action = unexpected; //Stateaction::Error(String::from("get started"));
self.stopparsing = false;
// if !tokenizer.has_next() { self.stopparsing=true; }
// let mut lookahead = tokenizer.nextsym(); // initial, this is a Lextoken
let mut lookahead = Lextoken{sym:"EOF".to_owned(),value:AT::default()};
if let Some(tok) = tokenizer.nextsym() {lookahead=tok;}
else {self.stopparsing=true;}
while !self.stopparsing
{
let currentstate = self.stack[self.stack.len()-1].si;
if TRACE>1 {print!(" current state={}, lookahead={}, ",¤tstate,&lookahead.sym);}
let actionopt = self.RSM[currentstate].get(lookahead.sym.as_str());//.unwrap();
if TRACE>1 {println!("RSM action : {:?}",actionopt);}
if let None = actionopt {
panic!("!!PARSE ERROR: no action at state {}, lookahead {}, line {}",currentstate,&lookahead.sym,tokenizer.linenum());
}
action = actionopt.unwrap().clone(); // cloning stateaction is ok
match &action {
Stateaction::Shift(i) => { // shift to state si
// self.stack.push(Stackelement{si:*i,value:lookahead.value.clone()});
self.stack.push(Stackelement{si:*i,value:mem::replace(&mut lookahead.value,AT::default())});
// cloning here ok because it's just a token, like an int or string
// if !tokenizer.has_next() { self.stopparsing=true; }
// else {lookahead = tokenizer.nextsym();} // ADVANCE LOOKAHEAD HERE ONLY!
if let Some(tok) = tokenizer.nextsym() {lookahead=tok;}
else {
lookahead=Lextoken{sym:"EOF".to_owned(), value:AT::default()};
}
}, //shift
Stateaction::Reduce(ri) => { //reduce by rule i
let rulei = &self.Rules[*ri];
let ruleilhs = rulei.lhs; // &'static : Copy
let val = (rulei.Ruleaction)(self); // calls delegate function
let newtop = self.stack[self.stack.len()-1].si;
let goton = self.RSM[newtop].get(ruleilhs).unwrap();
if TRACE>1 {println!(" ..performing Reduce({}), new state {}, action on {}: {:?}..",ri,newtop,ruleilhs,goton);}
if let Stateaction::Gotonext(nsi) = goton {
self.stack.push(Stackelement{si:*nsi,value:val});
// DO NOT CHANGE LOOKAHEAD AFTER REDUCE!
}// goto next state after reduce
else { self.stopparsing=true; }
},
Stateaction::Accept => {
result = self.stack.pop().unwrap().value;
self.stopparsing = true;
},
Stateaction::Error(msg) => {
self.stopparsing = true;
},
Stateaction::Gotonext(_) => { //should not see this here
self.stopparsing = true;
},
}//match & action
} // main parser loop
if let Stateaction::Error(msg) = &action {
panic!("!!!Parsing failed on line {}, next symbol {}: {}",tokenizer.linenum(),&lookahead.sym,msg);
}
return result;
}//parse
}// impl RuntimeParser
////////////////////////////////////////////////////////////////
//// new version of write_fsm:
impl Statemachine
{
pub fn writeparser(&self, filename:&str)->Result<(),std::io::Error>
{
let mut fd = File::create(filename)?;
write!(fd,"//Parser generated by RustLr\n
#![allow(dead_code)]
#![allow(unused_variables)]
#![allow(non_snake_case)]
#![allow(non_camel_case_types)]
#![allow(unused_parens)]
#![allow(unused_mut)]
#![allow(unused_assignments)]
extern crate RustLr;
use RustLr::{{RuntimeParser,RProduction,Stateaction,decode_action}};\n")?;
write!(fd,"{}\n",&self.Gmr.Extras)?; // use clauses
// write static array of symbols
write!(fd,"const SYMBOLS:[&'static str;{}] = [",self.Gmr.Symbols.len())?;
for i in 0..self.Gmr.Symbols.len()-1
{
write!(fd,"\"{}\",",&self.Gmr.Symbols[i].sym)?;
}
write!(fd,"\"{}\"];\n\n",&self.Gmr.Symbols[self.Gmr.Symbols.len()-1].sym)?;
// position of symbols must be inline with self.Gmr.Symhash
// record table entries in a static array
let mut totalsize = 0;
for i in 0..self.FSM.len() { totalsize+=self.FSM[i].len(); }
write!(fd,"const TABLE:[u64;{}] = [",totalsize)?;
// generate table to represent FSM
let mut encode:u64 = 0;
for i in 0..self.FSM.len() // for each state index i
{
let row = &self.FSM[i];
for key in row.keys()
{ // see function decode for opposite translation
let k = *self.Gmr.Symhash.get(key).unwrap(); // index of symbol
encode = ((i as u64) << 48) + ((k as u64) << 32);
match row.get(key) {
Some(Shift(statei)) => { encode += (*statei as u64) << 16; },
Some(Gotonext(statei)) => { encode += ((*statei as u64) << 16)+1; },
Some(Reduce(rulei)) => { encode += ((*rulei as u64) << 16)+2; },
Some(Accept) => {encode += 3; },
_ => {encode += 4; }, // 4 indicates Error
}//match
write!(fd,"{},",encode)?;
} //for symbol index k
}//for each state index i
write!(fd,"];\n\n")?;
// must know what absyn type is when generating code.
let ref absyn = self.Gmr.Absyntype;
let ref extype = self.Gmr.Externtype;
write!(fd,"pub fn make_parser() -> RuntimeParser<{},{}>",absyn,extype)?;
write!(fd,"\n{{\n")?;
// write code to pop stack, assign labels to variables.
write!(fd," let mut parser1:RuntimeParser<{},{}> = RuntimeParser::new({},{});\n",absyn,extype,self.Gmr.Rules.len(),self.States.len())?;
// generate rules and Ruleaction delegates, must pop values from runtime stack
write!(fd," let mut rule = RProduction::<{},{}>::new_skeleton(\"{}\");\n",absyn,extype,"start")?;
for i in 0..self.Gmr.Rules.len()
{
write!(fd," rule = RProduction::<{},{}>::new_skeleton(\"{}\");\n",absyn,extype,self.Gmr.Rules[i].lhs.sym)?;
write!(fd," rule.Ruleaction = |parser|{{ ")?;
let mut k = self.Gmr.Rules[i].rhs.len();
while k>0
{
let gsym = &self.Gmr.Rules[i].rhs[k-1];
if gsym.label.len()>0 && &gsym.rusttype[0..3]=="mut"
{ write!(fd," let mut {}:{}=",gsym.label,absyn)?; }
else if gsym.label.len()>0
{ write!(fd," let {}:{}=",gsym.label,absyn)?; }
write!(fd,"parser.stack.pop()")?;
if gsym.label.len()>0 { write!(fd,".unwrap().value; ")?;}
else {write!(fd,"; ")?;}
k -= 1;
} // for each symbol on right hand side of rule
let mut semaction = &self.Gmr.Rules[i].action; //this is a string
//if semaction.len()<1 {semaction = "}}";}
//if al>1 {semaction = semaction.substring(0,al-1);}
if semaction.len()>1 {write!(fd,"{};\n",semaction.trim_end())?;}
else {write!(fd," return {}::default();}};\n",absyn)?;}
write!(fd," parser1.Rules.push(rule);\n")?;
}// for each rule
// generate code to load RSM from TABLE
write!(fd,"\n for i in 0..{} {{\n",totalsize)?;
write!(fd," let symi = ((TABLE[i] & 0x0000ffff00000000) >> 32) as usize;\n")?;
write!(fd," let sti = ((TABLE[i] & 0xffff000000000000) >> 48) as usize;\n")?;
write!(fd," parser1.RSM[sti].insert(SYMBOLS[symi],decode_action(TABLE[i]));\n }}\n\n")?;
// write!(fd,"\n for i in 0..{} {{for k in 0..{} {{\n",rows,cols)?;
// write!(fd," parser1.RSM[i].insert(SYMBOLS[k],decode_action(TABLE[i*{}+k]));\n }}}}\n\n",cols)?;
write!(fd," return parser1;\n")?;
write!(fd,"}} //make_parser\n")?;
Ok(())
}//writeparser
///////////////// non-binary version //////////////////
pub fn write_verbose(&self, filename:&str)->Result<(),std::io::Error>
{
let mut fd = File::create(filename)?;
write!(fd,"//Parser generated by RustLr\n
#![allow(dead_code)]
#![allow(unused_variables)]
#![allow(non_snake_case)]
#![allow(non_camel_case_types)]
#![allow(unused_parens)]
#![allow(unused_mut)]
#![allow(unused_assignments)]
extern crate RustLr;
use RustLr::{{RuntimeParser,RProduction,Stateaction}};\n")?;
write!(fd,"{}\n",&self.Gmr.Extras)?; // use clauses
let ref absyn = self.Gmr.Absyntype;
let ref extype = self.Gmr.Externtype;
write!(fd,"pub fn make_parser() -> RuntimeParser<{},{}>",absyn,extype)?;
write!(fd,"\n{{\n")?;
// write code to pop stack, assign labels to variables.
write!(fd," let mut parser1:RuntimeParser<{},{}> = RuntimeParser::new({},{});\n",absyn,extype,self.Gmr.Rules.len(),self.States.len())?;
// generate rules and Ruleaction delegates, must pop values from runtime stack
write!(fd," let mut rule = RProduction::<{},{}>::new_skeleton(\"{}\");\n",absyn,extype,"start")?;
for i in 0..self.Gmr.Rules.len()
{
write!(fd," rule = RProduction::<{},{}>::new_skeleton(\"{}\");\n",absyn,extype,self.Gmr.Rules[i].lhs.sym)?;
write!(fd," rule.Ruleaction = |parser|{{ ")?;
let mut k = self.Gmr.Rules[i].rhs.len();
while k>0
{
let gsym = &self.Gmr.Rules[i].rhs[k-1];
if gsym.label.len()>0 && &gsym.rusttype[0..3]=="mut"
{ write!(fd," let mut {}:{}=",gsym.label,absyn)?; }
else if gsym.label.len()>0
{ write!(fd," let {}:{}=",gsym.label,absyn)?; }
write!(fd,"parser.stack.pop()")?;
if gsym.label.len()>0 { write!(fd,".unwrap().value; ")?;}
else {write!(fd,"; ")?;}
k -= 1;
} // for each symbol on right hand side of rule
let mut semaction = &self.Gmr.Rules[i].action; //this is a string
//if semaction.len()<1 {semaction = "}}";}
//if al>1 {semaction = semaction.substring(0,al-1);}
if semaction.len()>1 {write!(fd,"{};\n",semaction.trim_end())?;}
else {write!(fd," return {}::default();}};\n",absyn)?;}
write!(fd," parser1.Rules.push(rule);\n")?;
}// for each rule
for i in 0..self.FSM.len()
{
let row = &self.FSM[i];
for key in row.keys()
{
write!(fd," parser1.RSM[{}].insert(\"{}\",Stateaction::{:?});\n",i,key,row.get(key).unwrap())?;
} //for each string key in row
}//for each state index i
write!(fd," return parser1;\n")?;
write!(fd,"}} //make_parser\n")?;
Ok(())
}//write_verbose
} // impl Statemachine