#![allow(dead_code)]
#![allow(unused_variables)]
#![allow(non_snake_case)]
#![allow(non_camel_case_types)]
#![allow(unused_parens)]
#![allow(unused_mut)]
#![allow(unused_assignments)]
#![allow(unused_doc_comments)]
#![allow(unused_imports)]
use std::fmt::Display;
use std::default::Default;
use std::collections::{HashMap,HashSet,BTreeSet};
use std::io::{self,Read,Write,BufReader,BufRead};
use std::rc::Rc;
use std::cell::{RefCell,Ref,RefMut};
use std::hash::{Hash,Hasher};
use std::any::Any;
use std::fs::File;
use std::io::prelude::*;
use std::path::Path;
use std::mem;
use crate::{Stateaction,iserror,TerminalToken,Tokenizer};
use crate::{LBox,LRc,LC};
use crate::Stateaction::*;
use crate::{lbup,lbdown,lbget};
use crate::{StandardReporter,StackedItem};
#[cfg(feature = "generator")]
use crate::{Statemachine};
#[derive(Clone)]
pub struct BaseProduction<'t,AT:Default,ET:Default,TT:Tokenizer<'t,AT>>
{
pub lhs: &'static str, pub Ruleaction : fn(&mut BaseParser<'t,AT,ET,TT>) -> AT, }
impl<'t,AT:Default,ET:Default,TT:Tokenizer<'t,AT>> BaseProduction<'t,AT,ET,TT>
{
pub fn new_skeleton(lh:&'static str) -> BaseProduction<'t,AT,ET,TT>
{
BaseProduction {
lhs : lh,
Ruleaction : |p|{ <AT>::default() },
}
}
}
pub struct BaseParser<'ilt,AT:Default,ET:Default,TT:Tokenizer<'ilt,AT>>
{
pub exstate : ET, pub shared_state : Rc<RefCell<ET>>,
pub RSM : Vec<HashMap<&'static str,Stateaction>>, pub Rules : Vec<BaseProduction<'ilt,AT,ET,TT>>, stopparsing : bool,
pub stack : Vec<StackedItem<AT>>, pub resynch : HashSet<&'static str>,
pub Errsym : &'static str,
err_occurred : bool,
pub linenum : usize,
pub column : usize,
pub position : usize, pub prev_position : usize,
pub src_id : usize,
report_line : usize,
pub Symset : HashSet<&'static str>,
pub tokenizer: TT,
popped : Vec<(usize,usize)>,
gindex : RefCell<u32>, err_report : Option<String>, }
impl<'t,AT:Default,ET:Default,TT:Tokenizer<'t,AT>> BaseParser<'t,AT,ET,TT>
{
pub fn new(rlen:usize, slen:usize, tk: TT) -> Self
{ let mut p = BaseParser {
RSM : Vec::with_capacity(slen),
Rules : Vec::with_capacity(rlen),
stopparsing : false,
exstate : ET::default(),
shared_state: Rc::new(RefCell::new(ET::default())),
stack : Vec::with_capacity(1024),
Errsym : "",
err_occurred : false,
linenum : 0,
column : 0,
position : 0,
prev_position: 0,
src_id : 0,
report_line : 0,
resynch : HashSet::new(),
Symset : HashSet::with_capacity(64),
tokenizer:tk,
popped: Vec::with_capacity(8),
gindex: RefCell::new(0),
err_report : None,
};
for _ in 0..slen {
p.RSM.push(HashMap::with_capacity(16));
}
return p;
}
pub fn get_tokenizer(&mut self) -> &mut TT {
&mut self.tokenizer
}
pub fn swap_tokenizer(&mut self, mut newtk:TT) -> TT {
std::mem::swap(&mut self.tokenizer, &mut newtk);
newtk
}
pub fn current_line(&self)->usize {self.linenum}
pub fn current_column(&self)->usize {self.column}
pub fn current_position(&self)->usize {self.position}
pub fn previous_position(&self)->usize {self.prev_position}
pub fn abort(&mut self, msg:&str)
{
self.err_report.as_mut().map_or_else(
||eprintln!("\n!!!Parsing Aborted: {}",msg),
|x|x.push_str(&format!("\n!!!Parsing Aborted: {}\n",msg)));
self.err_occurred = true;
self.stopparsing=true;
}
pub fn stop(&mut self) {
self.stopparsing = true;
}
pub fn report(&mut self, errmsg:&str) {self.report_error(errmsg,false)}
pub fn report_error(&mut self, errmsg:&str, showlc: bool)
{
if (self.report_line != self.linenum || self.linenum==0) {
if showlc {
self.err_report.as_mut().map_or_else(
||eprintln!("ERROR on line {}, column {}: {}",self.linenum,self.column,errmsg),
|x|x.push_str(&format!("ERROR on line {}, column {}: {}\n",self.linenum,self.column,errmsg)));
}
else {
self.err_report.as_mut().map_or_else(
||eprintln!("PARSER ERROR: {}",errmsg),
|x|x.push_str(&format!("PARSER ERROR: {}\n",errmsg)));
}
self.report_line = self.linenum;
}
else {
if showlc {
self.err_report.as_mut().map_or_else(
||eprint!(" ({},{}): {}",self.linenum,self.column,errmsg),
|x|x.push_str(&format!(" ({},{}): {}",self.linenum,self.column,errmsg)));
}
else {
self.err_report.as_mut().map_or_else(
||eprint!(" {}",errmsg),
|x|{x.push(' '); x.push_str(errmsg)});
}
}
self.err_occurred = true;
}
pub fn bad_pattern(&mut self,pattern:&str) -> AT
{
let msg = format!("pattern {} failed to bind to stacked values\n",pattern);
self.report(&msg);
AT::default()
}
fn errshift(&mut self, sym:&str) -> bool
{
let csi = self.stack[self.stack.len()-1].si; let actionopt = self.RSM[csi].get(sym);
if let Some(Shift(ni)) = actionopt {
self.stack.push(StackedItem::new(*ni,AT::default(),self.linenum,self.column)); true
}
else {false}
}
fn shift(&mut self, nextstate:usize, lookahead:TerminalToken<'t,AT>) -> TerminalToken<'t, AT>
{
self.linenum = lookahead.line; self.column=lookahead.column;
self.prev_position = self.position;
self.position = self.tokenizer.position();
self.stack.push(StackedItem::new(nextstate,lookahead.value,lookahead.line,lookahead.column));
self.tokenizer.next_tt()
}
pub fn popstack(&mut self) -> StackedItem<AT>
{
let item = self.stack.pop().expect("PARSER STATE MACHINE/STACK CORRUPTED");
self.linenum = item.line; self.column=item.column;
self.popped.push((item.line,item.column));
item
}
pub fn popstack_as_lbox(&mut self) -> LBox<AT>
{
let item = self.stack.pop().expect("PARSER STATE MACHINE/STACK CORRUPTED");
self.linenum = item.line; self.column=item.column;
self.popped.push((item.line,item.column));
let newuid = *self.gindex.borrow();
*self.gindex.borrow_mut() += 1;
LBox::make(item.value,item.line,item.column,newuid)
}
fn reduce(&mut self, ri:&usize)
{
self.popped.clear();
let rulei = &self.Rules[*ri];
let ruleilhs = rulei.lhs; let val = (rulei.Ruleaction)(self); let newtop = self.stack[self.stack.len()-1].si;
let goton = self.RSM[newtop].get(ruleilhs).expect("PARSER STATEMACHINE CORRUPTED");
if let Stateaction::Gotonext(nsi) = goton {
self.stack.push(StackedItem::new(*nsi,val,self.linenum,self.column));
} else {
self.report("state transition table corrupted: no suitable action after reduce");
self.stopparsing=true;
}
}
pub fn error_occurred(&self) -> bool {self.err_occurred}
pub fn lb<T>(&self,e:T) -> LBox<T> {
let newuid = *self.gindex.borrow();
*self.gindex.borrow_mut() += 1;
LBox::make(e,self.linenum,self.column,newuid)
}
pub fn lba<T:'static>(&self,e:T) -> LBox<dyn Any> {
let newuid = *self.gindex.borrow();
*self.gindex.borrow_mut() += 1;
LBox::upcast(LBox::make(e,self.linenum,self.column,newuid))
}
pub fn lrc<T>(&self,e:T) -> LRc<T> { LRc::new(e,self.linenum,self.column ) }
pub fn lrca<T:'static>(&'t self,e:T) -> LRc<dyn Any> { LRc::upcast(LRc::new(e,self.linenum,self.column )) }
pub fn lbx<T>(&self,i:usize,e:T) -> LBox<T>
{
let (mut ln,mut cl) = (self.linenum,self.column);
if i<self.popped.len() {
let index = self.popped.len() - 1 - i;
let lc = self.popped[index];
ln = lc.0; cl=lc.1;
}
let newuid = *self.gindex.borrow();
*self.gindex.borrow_mut() += 1;
LBox::make(e,ln,cl,newuid)
}
pub fn lbox<T>(&self,i:usize,e:T) -> LBox<T> { self.lbx(i,e) }
pub fn lc<T>(&self,i:usize,e:T) -> LC<T>
{
let (mut ln,mut cl) = (self.linenum,self.column);
if i<self.popped.len() {
let index = self.popped.len() - 1 - i;
let lc = self.popped[index];
ln = lc.0; cl=lc.1;
}
let uid = *self.gindex.borrow();
*self.gindex.borrow_mut() += 1;
LC::make(e,ln,cl,uid)
}
pub fn lrcn<T>(&self,i:usize,e:T) -> LRc<T>
{
let (mut ln,mut cl) = (self.linenum,self.column);
if i<self.popped.len() {
let index = self.popped.len() - 1 - i;
let lc = self.popped[index];
ln = lc.0; cl=lc.1;
}
LRc::new(e,ln,cl)
}}
impl<'t,AT:Default,ET:Default,TT:Tokenizer<'t,AT>> BaseParser<'t,AT,ET,TT>
{
pub fn error_recover(&mut self, lookahead:&mut TerminalToken<'t,AT>) -> Option<Stateaction>
{
let mut erraction = None;
if self.Errsym.len()>0 {
let errsym = self.Errsym;
let mut k = self.stack.len(); let mut spos = k+1;
while k>0 && spos>k
{
let ksi = self.stack[k-1].si;
erraction = self.RSM[ksi].get(errsym);
if let None = erraction {k-=1;} else {spos=k;}
} if spos==k { self.stack.truncate(k); } while let Some(Reduce(ri)) = erraction {
self.popped.clear();
let rulei = &self.Rules[*ri];
let ruleilhs = rulei.lhs; let val = (rulei.Ruleaction)(self);
let newtop = self.stack[self.stack.len()-1].si;
let gotonopt = self.RSM[newtop].get(ruleilhs);
match gotonopt {
Some(Gotonext(nsi)) => {
self.stack.push(StackedItem::new(*nsi,val,self.linenum,self.column));
}, _ => {self.abort("recovery failed"); },
}
let tos=self.stack[self.stack.len()-1].si;
erraction = self.RSM[tos].get(self.Errsym).clone();
} if let Some(Shift(i)) = erraction { self.stack.push(StackedItem::new(*i,AT::default(),lookahead.line,lookahead.column));
while let None = self.RSM[*i].get(lookahead.sym) {
if lookahead.sym=="EOF" {break;}
*lookahead = self.tokenizer.next_tt();
} erraction = self.RSM[*i].get(lookahead.sym);
} }
if iserror(&erraction) && self.resynch.len()>0 {
while lookahead.sym!="EOF" &&
!self.resynch.contains(lookahead.sym) {
self.linenum = lookahead.line; self.column = lookahead.column; self.prev_position=self.position; self.position = self.tokenizer.position();
*lookahead = self.tokenizer.next_tt();
} if lookahead.sym!="EOF" {
self.linenum = lookahead.line; self.column = lookahead.column; self.prev_position=self.position; self.position=self.tokenizer.position();
*lookahead = self.tokenizer.next_tt();
}
let mut k = self.stack.len()-1; let mut position = 0;
while k>0 && erraction==None
{
let ksi = self.stack[k-1].si;
erraction = self.RSM[ksi].get(lookahead.sym);
if let None=erraction {k-=1;}
} match erraction {
None => {}, _ => { self.stack.truncate(k);}, } }
let mut eofcx = 0;
while iserror(&erraction) && eofcx<1 { self.linenum = lookahead.line; self.column = lookahead.column; self.prev_position=self.position; self.position=self.tokenizer.position();
*lookahead = self.tokenizer.next_tt();
if lookahead.sym=="EOF" {eofcx+=1;}
let csi =self.stack[self.stack.len()-1].si;
erraction = self.RSM[csi].get(lookahead.sym);
} match erraction {
Some(act) if eofcx<1 => Some(*act),
_ => None,
} }
pub fn reset(&mut self) {
self.stack.clear();
self.err_occurred = false;
let mut result = AT::default();
self.exstate = ET::default();
*self.shared_state.borrow_mut() = ET::default();
}
pub fn get_err_report(&self) -> &str {
self.err_report.as_deref().unwrap_or("")
}
pub fn set_err_report(&mut self, onof:bool) {
if onof {self.err_report = Some(String::new());}
else {self.err_report = None;}
}
}
pub trait ErrReportMaker<'t,AT:Default,ET:Default,TT:Tokenizer<'t,AT>> {
fn err_reporter(&mut self, parser:&mut BaseParser<'t,AT,ET,TT>, lookahead:&TerminalToken<AT>, erropt:&Option<Stateaction>);
fn report_err(&self, parser:&mut BaseParser<'t,AT,ET,TT>, msg:&str) { parser.report(msg) }
}
impl<'t,AT:Default,ET:Default,TT:Tokenizer<'t,AT>> ErrReportMaker<'t,AT,ET,TT> for StandardReporter
{
fn err_reporter(&mut self, parser:&mut BaseParser<'t,AT,ET,TT>, lookahead:&TerminalToken<AT>, erropt:&Option<Stateaction>)
{
let mut wresult:std::io::Result<()> = Err(std::io::Error::new(std::io::ErrorKind::Other,"")); let cstate = parser.stack[parser.stack.len()-1].si; let mut actionopt = if let Some(act)=erropt {Some(act)} else {None};
let lksym = &lookahead.sym[..];
if parser.Symset.contains(lksym) {
if let None=actionopt {
actionopt = parser.RSM[cstate].get("ANY_ERROR");
}
} else {
actionopt = parser.RSM[cstate].get("ANY_ERROR");
} let mut errmsg = if let Some(Error(em)) = &actionopt {
format!("unexpected symbol '{}' on line {}, column {}: ** {} ** ..",lksym,lookahead.line,lookahead.column,em.trim())
} else {format!("unexpected symbol '{}' on line {}, column {} .. ",lksym,lookahead.line,lookahead.column)};
let srcline = parser.tokenizer.current_line();
if (srcline.len()>0) {
errmsg.push_str("\n >>");
errmsg.push_str(srcline);
errmsg.push_str("\n");
let mut cln = lookahead.column+2;
while cln>0 { errmsg.push(' '); cln-=1; }
let mut tokenlen = lookahead.sym.len();
if is_alphanum(&lookahead.sym) {tokenlen = 3;}
while tokenlen>0 { errmsg.push('^'); tokenlen-=1; }
errmsg.push('\n');
}
parser.report(&errmsg);
if self.training { let csym = lookahead.sym.to_owned();
let mut inp = String::from("");
if let None=self.scriptinopt { if let Some(outfd1) = &self.scriptoutopt {
let mut outfd = outfd1;
print!("\n>>>TRAINER: if this message is not adequate (for state {}), enter a replacement (default no change): ",cstate);
let rrrflush = io::stdout().flush();
if let Ok(n) = io::stdin().read_line(&mut inp) {
if inp.len()>5 && parser.Symset.contains(lksym) {
print!(">>>TRAINER: should this message be given for all unexpected symbols in the current state? (default yes) ");
let rrrflush2 = io::stdout().flush();
let mut inp2 = String::new();
if let Ok(n) = io::stdin().read_line(&mut inp2) {
if inp2.trim()=="no" || inp2.trim()=="No" {
wresult = write!(outfd,"{}\t{}\t{} ::: {}\n",lookahead.line,lookahead.column,&csym,inp.trim());
self.trained.insert((cstate,csym),inp);
}
else { wresult = write!(outfd,"{}\t{}\t{} ::: {}\n",lookahead.line,lookahead.column,"ANY_ERROR",inp.trim());
self.trained.insert((cstate,String::from("ANY_ERROR")),inp);
}
} } else if inp.len()>5 && !parser.Symset.contains(lksym) {
wresult = write!(outfd,"{}\t{}\t{} ::: {}\n",lookahead.line,lookahead.column,"ANY_ERROR",inp.trim());
self.trained.insert((cstate,String::from("ANY_ERROR")),inp);
}
} }} else { if let Some(brfd) = &mut self.scriptinopt {
let mut scin = brfd;
let mut readn = 0;
while readn < 1
{
inp = String::new();
match scin.read_line(&mut inp) {
Ok(n) if n>1 && &inp[0..1]!="#" && inp.trim().len()>0 => {readn=n;},
Ok(n) if n>0 => { readn=0; }, _ => {readn = 1; } } if readn>1 { let inpsplit:Vec<&str> = inp.split_whitespace().collect();
if inpsplit.len()>4 && inpsplit[3].trim()==":::" {
let inline = inpsplit[0].trim().parse::<usize>().unwrap();
let incolumn = inpsplit[1].trim().parse::<usize>().unwrap();
let insym = inpsplit[2].trim();
if parser.linenum==inline && parser.column==incolumn {
if &csym==insym || insym=="ANY_ERROR" {
let posc = inp.find(":::").unwrap()+4;
println!("\n>>>Found matching entry from training script for {}, error message: {}",insym,&inp[posc..]);
self.trained.insert((cstate,String::from(insym)),String::from(&inp[posc..]));
} } } } } }} }
}}
impl<'t,AT:Default,ET:Default,TT:Tokenizer<'t,AT>> BaseParser<'t,AT,ET,TT>
{
fn parse_core<R:ErrReportMaker<'t,AT,ET,TT>>(&mut self, err_handler:&mut R) -> AT
{
self.stack.clear();
self.err_occurred = false;
let mut result = AT::default();
self.stack.push(StackedItem::new(0,AT::default(),0,0));
self.stopparsing = false;
let mut action = Stateaction::Error("");
let mut lookahead = TerminalToken::new("EOF",AT::default(),0,0); if let Some(tok) = self.tokenizer.nextsym() {lookahead=tok;}
while !self.stopparsing
{
let tos = self.stack.len()-1;
self.linenum = self.stack[tos].line;
self.column=self.stack[tos].column;
let currentstate = self.stack[tos].si;
let mut actionopt = self.RSM[currentstate].get(lookahead.sym);
if actionopt.is_none() && lookahead.sym!="EOF" { actionopt = self.RSM[currentstate].get("_WILDCARD_TOKEN_");
lookahead = self.tokenizer.transform_wildcard(lookahead);
}
let actclone:Option<Stateaction> = match actionopt {
Some(a) => Some(*a),
None => None,
};
if iserror(&actionopt) { if !self.err_occurred {self.err_occurred = true;}
err_handler.err_reporter(self,&lookahead,&actclone);
match self.error_recover(&mut lookahead) {
None => { self.stopparsing=true; break; }
Some(act) => {action = act;}, } } else { action = actclone.unwrap(); }
match &action {
Shift(nextstate) => {
lookahead = self.shift(*nextstate,lookahead);
},
Reduce(rulei) => { self.reduce(rulei); },
Accept => {
self.stopparsing=true;
if self.stack.len()>0 {result = self.stack.pop().unwrap().value;}
else {self.err_occurred=true;}
},
_ => {}, } } return result;
}
pub fn parse(&mut self) -> AT
{
let mut stdeh = StandardReporter::new();
self.parse_core(&mut stdeh)
}
pub fn parse_train(&mut self, parserfile:&str) -> AT
{
let mut stdtrainer = StandardReporter::new_interactive_training(parserfile);
let result = self.parse_core(&mut stdtrainer);
if let Err(m) = stdtrainer.augment_training(parserfile) {
eprintln!("Error in augmenting parser: {:?}",m)
}
return result;
}
pub fn train_from_script(&mut self, parserfile:&str, scriptfile:&str)
{
let mut stdtrainer = StandardReporter::new_script_training(parserfile,scriptfile);
let result = self.parse_core(&mut stdtrainer);
if let Err(m) = stdtrainer.augment_training(parserfile) {
eprintln!("Error in augmenting parser: {:?}",m)
}
if !self.err_occurred {println!("no errors encountered during parsing");}
}
}#[cfg(feature = "generator")]
fn checkboxlabel(s:&str) -> &str
{
if s.starts_with('[') && s.ends_with(']') {s[1..s.len()-1].trim()} else {s}
}
fn is_alphanum(x:&str) -> bool
{
if x.len()<1 {return false};
let mut chars = x.chars();
let first = chars.next().unwrap();
if !(first=='_' || first.is_alphabetic()) {return false;}
for c in chars
{
if !(c=='_' || c.is_alphanumeric()) {return false;}
}
true
}