// module for generating the LR finite state machine
#![allow(dead_code)]
#![allow(unused_variables)]
#![allow(non_snake_case)]
#![allow(non_camel_case_types)]
#![allow(unused_parens)]
#![allow(unused_mut)]
#![allow(unused_assignments)]
#![allow(unused_doc_comments)]
#![allow(unused_imports)]
use std::collections::{HashMap,HashSet,BTreeSet};
use std::cell::{RefCell,Ref,RefMut};
//use std::rc::Rc;
use std::hash::{Hash,Hasher};
use std::mem;
use crate::grammar_processor::*;
use crate::lr_statemachine::{Stateaction,Statemachine,printrulela,add_action,sr_resolve};
use crate::lr_statemachine::Stateaction::*;
#[derive(Copy,Clone,PartialEq,Eq,Hash,Debug)]
pub struct LALRitem(usize,usize); // rule index, position of the dot
// use Grammar.Symbols.len()+1 for the hash mark
//propagation stores for each item if lookaheads should be propagated to
//(symindex,item). symindex identifies nextstate: FSM[si][symindex] lookup.
// don't know index yet because it's not pre-generated.
//Only kernel items will have entries (?)
pub struct LALRState
{
index: usize, // index into FSM vector
items: HashSet<LALRitem>,
lookaheads: HashMap<LALRitem,RefCell<HashSet<usize>>>, //las for each item
propagation: HashMap<LALRitem,HashSet<(usize,LALRitem)>>,
lhss: BTreeSet<usize>, // set of lhs of rules in the state
}
impl LALRState
{
pub fn new(hint:usize) -> Self
{
LALRState {
index:0,
items:HashSet::with_capacity(hint),
lookaheads:HashMap::with_capacity(hint),
propagation:HashMap::new(),
lhss: BTreeSet::new(),
}
}
fn insert(&mut self,item:LALRitem,/*las:&HashSet<usize>,*/ lhs:usize) -> bool
{
let inserted = self.items.insert(item);
if inserted {
self.lhss.insert(lhs);
if self.lookaheads.get(&item).is_none()
{self.lookaheads.insert(item,RefCell::new(HashSet::new()));}
if self.propagation.get(&item).is_none()
{self.propagation.insert(item,HashSet::new());}
}
// let lahs = self.lookaheads.get_mut(&item).unwrap();
// let mut lainserted = false;
// for la in las.iter() { lainserted = lahs.insert(*la) || lainserted; }
// lainserted || inserted
inserted
}
fn additem(&mut self,item:LALRitem, lhs:usize) -> bool
{
let inserted = self.items.insert(item);
self.lhss.insert(lhs);
inserted
}
pub fn hashval_lalr(&mut self) -> usize // note: NOT UNIQUE
{
let mut key=self.items.len() + self.lhss.len()*1000000;
let limit = usize::MAX/1000 -1;
let mut cx = 8;
for s in &self.lhss {key+=1000*s; cx-=1; if cx==0 || key>=limit {break;}}
key
}
pub fn state_eq(&self, state2:&LALRState) -> bool
{
// ignore index - set later
if self.items.len() != state2.items.len() {return false;}
for item in self.items.iter() {
if !state2.items.contains(item) {return false;}
/* ignore lookaheads - there can only be one state with same core
let las2 = state2.lookaheads.get(item).borrow();
let las1 = self.lookaheads.get(item).borrow();
if las1.is_some() && las2.is_some() {
let la2set = las2.unwrap();
for la in las1.unwrap() {
if !la2set.contains(la) {return false;}
}
} else if las1.is_some() || las2.is_some() {return false;}
*/
}// for each item
true
}// state_eq
// no need for core_eq? when adding state, propagate immediately.
// but how do we know what to propagate?
}//impl LALRState
impl PartialEq for LALRState
{
fn eq(&self, other:&LALRState) -> bool
{ self.state_eq(&other) }
fn ne(&self, other:&LALRState) ->bool
{ !self.state_eq(&other) }
}
impl Eq for LALRState {}
impl Grammar // Grammar additions
{
// First set of a sequence of symbols given set of lookaheads
pub fn Firstseqla(&self, Gs:&[Gsym], las:&HashSet<usize>) -> HashSet<usize>
{
let mut Fseq = HashSet::with_capacity(2);
let mut i = 0;
let mut nullable = true;
while nullable && i<Gs.len()
{
if (Gs[i].terminal) {Fseq.insert(Gs[i].index); nullable=false; }
else // Gs[i] is non-terminal
{
//println!("symbol {}, index {}", &Gs[i].sym, Gs[i].index);
let firstgsym = self.First.get(&Gs[i].index).unwrap();
for s in firstgsym { Fseq.insert(*s); }
if !self.Nullable.contains(&Gs[i].sym) {nullable=false;}
}
i += 1;
}//while
if nullable {
for la in las {Fseq.insert(*la);}
}
Fseq
}//FirstSeqb
//determine if a sequence of symbols is nullable
pub fn Nullableseq(&self, Gs:&[Gsym]) -> bool
{
for g in Gs {
if g.terminal || !self.Nullable.contains(&g.sym) {return false;}
}
return true;
}
}//impl Grammar
pub struct LALRMachine
{
pub Gmr: Grammar,
pub States: Vec<LALRState>,
pub Statelookup: HashMap<usize,BTreeSet<usize>>,
pub FSM: Vec<HashMap<usize,Stateaction>>,
sr_conflicts:HashMap<(usize,usize),(bool,bool)>,
}
impl LALRMachine
{
pub fn new(gram:Grammar) -> Self
{
LALRMachine {
Gmr: gram,
States: Vec::with_capacity(1024), // reserve 1K states
Statelookup: HashMap::with_capacity(1024),
FSM: Vec::with_capacity(8*1024),
sr_conflicts:HashMap::new(),
}
}//new
pub fn to_statemachine(self) -> Statemachine // transfer before writeparser
{
Statemachine {
Gmr: self.Gmr,
States: Vec::new(),
Statelookup:HashMap::new(),
FSM: self.FSM,
lalr:true,
Open:Vec::new(),
sr_conflicts:HashMap::new(),
}
}//to_statemachine
// generate the GOTO sets of a state with index si, creates new states.
// this verions does not add reduce actions since lookaheads needs to be
// propagated first.
fn makegotos(&mut self, si:usize)
{
let ref state = self.States[si];
// key to following hashmap is the next symbol's index after pi (the dot)
// the values of the map are the "kernels" of the next state to generate
let mut newstates:HashMap<usize,LALRState> = HashMap::with_capacity(128);
let mut keyvec:Vec<usize> = Vec::new(); //keys of newstates
for item in &state.items
{
let (itemri,itempi) = (item.0,item.1);
let rule = self.Gmr.Rules.get(itemri).unwrap(); // rule ref
if itempi<rule.rhs.len() { // can goto (dot before end of rule)
let nextsymi = rule.rhs[itempi].index;
if let None = newstates.get(&nextsymi) {
newstates.insert(nextsymi,LALRState::new(self.Gmr.Symbols.len()));
keyvec.push(nextsymi);
}
let symstate = newstates.get_mut(&nextsymi).unwrap();
// add new item to states associated with nextsymi
let newitem = LALRitem(itemri,itempi+1); //kernel item in new state
let lhssymi = self.Gmr.Rules[itemri].lhs.index;
symstate.insert(newitem,lhssymi);
// handled during propagation phase
//let las = state.lookaheads.get(&item).unwrap().borrow();
//symstate.lookaheads.insert(newitem,RefCell::new(las.clone())); //clone by alg
// SHIFT/GOTONEXT actions added by addstate function
}//can goto
/* NO REDUCE/ACCEPT UNTIL LATER
*/
}// for each item
// form closures for all new states and add to self.States list
for key in keyvec //keyvec must be separate to avoid borrow error
{
let mut kernel = newstates.remove(&key).unwrap();
lalrclosure(&mut kernel,&self.Gmr);
self.addstate(kernel,si,key); //only place addstate called
}
}//makegotos
// addstate ***
// psi is previous state index, nextsym is next symbol (may do lalr)
// state already closed by makegotos
fn addstate(&mut self, mut state:LALRState, psi:usize, nextsymi:usize)
{ let nextsym = &self.Gmr.Symbols[nextsymi].sym;
let newstateindex = self.States.len(); // index of new state
state.index = newstateindex;
let lookupkey = state.hashval_lalr();
if let None=self.Statelookup.get(&lookupkey) {
self.Statelookup.insert(lookupkey,BTreeSet::new());
}
let indices = self.Statelookup.get_mut(&lookupkey).unwrap();
let mut toadd = newstateindex; // defaut is add new state (will push)
for i in indices.iter() //lalr version only compares core items
{
if &state==&self.States[*i] {toadd=*i; break; } // state i exists
}
if self.Gmr.tracelev>3 {println!("Transition to state {} from state {}, symbol {}..",toadd,psi,nextsym);}
if toadd==newstateindex { // add new state
indices.insert(newstateindex); // add to StateLookup index hashset
self.States.push(state);
self.FSM.push(HashMap::with_capacity(128)); // always add row to fsm at same time
}// add new state
// add to- or change FSM TABLE ... only Shift or Gotnext added here.
let gsymbol = &self.Gmr.Symbols[nextsymi]; //self.Gmr.getsym(nextsym).
let newaction = if gsymbol.terminal {Stateaction::Shift(toadd)}
else {Stateaction::Gotonext(toadd)};
add_action(&mut self.FSM, &self.Gmr, newaction,psi,nextsymi,&mut self.sr_conflicts);
// reduce rules are only added with . at end, nextsymbol terminal,
// so a "reduce-gotonext" conflict is not possible
} //addstate
// called after all states created, propagations marked.
fn propagate_lookaheads(&mut self)
{
let mut changed = true;
while changed
{
changed = false;
for si in 0..self.States.len()
{
/*let mut items = Vec::with_capacity(self.States[si].items.len());
for item in &self.States[si].items {
items.push(*item);
}*/
for item in &self.States[si].items { //-borrow checker!
let (ri,pi) = (item.0, item.1);
if pi>0 || ri == self.Gmr.Rules.len()-1 { // kernel item
let itemlas = self.States[si].lookaheads.get(&item).unwrap().borrow();
let props = self.States[si].propagation.get(&item).unwrap();
for (nextsymi,nextitem) in props.iter() {
let mut nextstate = self.States.len();
if nextsymi>&self.Gmr.Symbols.len() { nextstate= si; }//dummy
else {
let nextaction = self.FSM[si].get(nextsymi);
match nextaction {
Some(Shift(nsi))|Some(Gotonext(nsi)) => {nextstate=*nsi;},
_ => {panic!("THIS SHOULD NOT HAPPEN");},
}//match
}
//println!("{}: found nextitem ({},{}) in expected state {}!",self.States[nextstate].items.contains(nextitem),nextitem.0,nextitem.1,nextstate);
let nextlasopt = self.States[nextstate].lookaheads.get(nextitem);
if let Some(rfcell)=nextlasopt {
if nextstate!=si || nextitem!=item { //runtime borrow check
let mut nextlas = rfcell.borrow_mut();
for la in itemlas.iter() {
//println!("propagating lookahead {} to state {} from state {}",&self.Gmr.Symbols[*la].sym, nextstate, si);
changed =nextlas.insert(*la)||changed;
}
}
}
else {println!("NO lookahead ENTRY!");}
} // each propagation mark
}//kernel item
}//for each item in state
}//for each state
} // which changed
}//propagate_lookaheads
// set reduce actions and check for conflicts
fn set_reduce(&mut self)
{
for si in 0..self.States.len()
{
for item in &self.States[si].items
{
let (ri,pi) = (item.0,item.1);
if pi==self.Gmr.Rules[ri].rhs.len() { //dot at end of rhs
let itemlas = self.States[si].lookaheads.get(item).unwrap().borrow();
for la in itemlas.iter() { // for each lookahead
//println!("adding reduce/accept rule {}, la {}",ri,&self.Gmr.Symbols[*la].sym);
let isaccept = (ri == self.Gmr.Rules.len()-1 && la==&(self.Gmr.Symbols.len()-1));
if isaccept {
add_action(&mut self.FSM,&self.Gmr,Accept,si,*la,&mut self.sr_conflicts);
}
else {
add_action(&mut self.FSM,&self.Gmr,Reduce(ri),si,*la,&mut self.sr_conflicts);
}
} // for each la
}//if reduce situation
} // for each item
} // for each state
}//set_reduce
pub fn generatefsm(&mut self)
{
// create initial state, closure from initial item:
// START --> .topsym EOF
let mut startstate=LALRState::new(self.Gmr.Rules.len());
let EOFi = self.Gmr.Symbols.len()-1;
let STARTi = self.Gmr.Symbols.len()-2;
let startrule = self.Gmr.Rules.len()-1;
let startitem = LALRitem(startrule,0);
startstate.insert(startitem,STARTi);
startstate.lookaheads.get(&startitem).unwrap().borrow_mut().insert(EOFi);
lalrclosure(&mut startstate,&self.Gmr);
self.States.push(startstate); // add start state, first state
self.FSM.push(HashMap::with_capacity(128)); // row for state
// now generate closure for state machine (not individual states)
let mut closed:usize = 0;
while closed<self.States.len()
{
self.makegotos(closed);
closed += 1;
}//while not closed
self.propagate_lookaheads();
self.set_reduce();
}//generatefsm
}//impl LALRMachine
//AXIOM: if item exists, an entry must exist for it in lookaheads
// generate closure from single kernel item, merge into existing state,
// sets propagation table
fn closure1(kernel:LALRitem, state: &mut LALRState,Gmr:&Grammar)
{// assuming kernel is a kernel item and not? in state
//use a vector as well as state.items
let (kri,kpi) = (kernel.0,kernel.1);
let ruleki = &Gmr.Rules[kri];
let lhski = ruleki.lhs.index;
//state.insert(kernel,lhski); // insert kernel into state? (below)
let mut propagate = false;
let lowerlimit = if kpi<ruleki.rhs.len() {kpi+1} else {ruleki.rhs.len()};
let dummy = Gmr.Symbols.len()+1; // distinguish from real symbols (#)
let mut kernlookaheads =Gmr.Firstseq(&ruleki.rhs[lowerlimit..],dummy);
let propagate = kernlookaheads.remove(&dummy);
let mut closure = vec![kernel];
let mut closed = 0;
while closed < closure.len()
{
let item = closure[closed];
let (ri,pi) = (item.0,item.1);
let rulei = &Gmr.Rules[ri];
let lhsi = rulei.lhs.index;
state.insert(item,lhsi); // insert into state here
let kpropagation = state.propagation.get_mut(&kernel).unwrap();
closed += 1;
if pi<rulei.rhs.len() {// find .X
let Xsym = &rulei.rhs[pi];
if propagate {
kpropagation.insert((Xsym.index,LALRitem(ri,pi+1)));
}// insert into propagation table
if !Xsym.terminal { // X is non-terminal, add closure items
for rulent in Gmr.Rulesfor.get(&Xsym.sym).unwrap() {
let newitem = LALRitem(*rulent,0);
closure.push(newitem); // add to "frontier"
}// for each rule of this non-terminal X
}//X nonterminal
}// pi not at right end
}//while !closed
}//closure1
/*
fn closure1(kernelitem:LALRitem, state: &mut LALRState,Gmr:&Grammar)
{// assuming kernelitem is a kernel item and not? in state
//use a vector as well as state.items
let (kri,kpi) = (kernelitem.0,kernelitem.1);
let ruleki = &Gmr.Rules[kri];
let lhski = ruleki.lhs.index;
state.insert(kernelitem,lhski); // insert kernel into state? (below)
let lowerlimit = if kpi<ruleki.rhs.len() {kpi+1} else {ruleki.rhs.len()};
let dummy = Gmr.Symbols.len()+1; // distinguish from real symbols (#)
let mut klookaheads = state.lookaheads.get(&kernelitem).unwrap().borrow_mut();
klookaheads.insert(dummy);
// assume kernel item already inserted into state by makegotos
let mut closure = vec![kernelitem];
let mut closed = 0;
while closed < closure.len()
{
let item = closure[closed];
let (ri,pi) = (item.0,item.1);
let rulei = &Gmr.Rules[ri];
let lhsi = rulei.lhs.index;
if closed!=0 {state.insert(item,lhsi);} // insert into state here
closed += 1;
if pi<rulei.rhs.len() {// find .X
let Xsym = &rulei.rhs[pi]; // symbol X of dragon book
let propagate = state.lookaheads.get(&item).unwrap().borrow().contains(&dummy);
if propagate {
let kpropagation = state.propagation.get_mut(&kernelitem).unwrap();
kpropagation.insert((Xsym.index,LALRitem(ri,pi+1)));
}// insert into propagation table
if !Xsym.terminal { // X is non-terminal, add closure items
let itemlas = state.lookaheads.get(&item).unwrap().borrow();
let mut Xlookaheads = Gmr.Firstseq(&rulei.rhs[pi+1..],dummy);
if Xlookaheads.remove(&dummy) {
for ila in itemlas.iter() {Xlookaheads.insert(*ila);}
}
for rulent in Gmr.Rulesfor.get(&Xsym.sym).unwrap() {
let newitem = LALRitem(*rulent,0);
closure.push(newitem); // add to "frontier"
// state.insert(newitem,Gmr.Rules[*rulent].lhs.index);
// if state.lookaheads.get(&newitem).is_none() {
// state.lookaheads.insert(newitem,RefCell::new(HashSet::new()));
// }
let mut slas = state.lookaheads.get(&newitem).unwrap().borrow_mut();
for xla in Xlookaheads.iter() {slas.insert(*xla);}
}// for each rule of this non-terminal X
}//X nonterminal
}// pi not at right end
}//while !closed
}//closure1
*/
// calls closure on all kernel elements.
fn lalrclosure(kernel:&mut LALRState, Gmr:&Grammar)
{
let mut kitems = Vec::new();
for kitem in kernel.items.iter() { kitems.push(*kitem); } // copies
for kitem in kitems { closure1(kitem,kernel,Gmr); }
}//lalrclosure generation
/*
// reform lookaheads for non-kernel items.
fn reclose(state:&LALRState, Gmr:&Grammar)
{
for item in state.items {
if (pi>0) || ri==Gmr.Rules.len()-1 { // kernel item
}
}//for each item
}
*/