rustlr 0.3.0

LR/LALR parser generator that can automatically create abstract syntax trees
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
// module for generating the LR finite state machine
#![allow(dead_code)]
#![allow(unused_variables)]
#![allow(non_snake_case)]
#![allow(non_camel_case_types)]
#![allow(unused_parens)]
#![allow(unused_mut)]
#![allow(unused_assignments)]
#![allow(unused_doc_comments)]
#![allow(unused_imports)]
use std::collections::{HashMap,HashSet,BTreeSet};
use std::cell::{RefCell,Ref,RefMut};
//use std::rc::Rc;
use std::hash::{Hash,Hasher};
use std::mem;
use crate::grammar_processor::*;
use crate::lr_statemachine::{Stateaction,Statemachine,printrulela,add_action,sr_resolve};
use crate::lr_statemachine::Stateaction::*;

#[derive(Copy,Clone,PartialEq,Eq,Hash,Debug)]
pub struct LALRitem(usize,usize);   // rule index, position of the dot

// use Grammar.Symbols.len()+1 for the hash mark

//propagation stores for each item if lookaheads should be propagated to
//(symindex,item). symindex identifies nextstate: FSM[si][symindex] lookup.
// don't know index yet because it's not pre-generated.
//Only kernel items will have entries (?)
pub struct LALRState
{
  index: usize, // index into FSM vector
  items: HashSet<LALRitem>,
  lookaheads: HashMap<LALRitem,RefCell<HashSet<usize>>>, //las for each item
  propagation: HashMap<LALRitem,HashSet<(usize,LALRitem)>>,
  lhss: BTreeSet<usize>, // set of lhs of rules in the state
}
impl LALRState
{
  pub fn new(hint:usize) -> Self
  {
    LALRState {
      index:0,
      items:HashSet::with_capacity(hint),
      lookaheads:HashMap::with_capacity(hint),
      propagation:HashMap::new(),
      lhss: BTreeSet::new(),
    }
  }

  fn insert(&mut self,item:LALRitem,/*las:&HashSet<usize>,*/ lhs:usize) -> bool
  {
     let inserted = self.items.insert(item);
     if inserted {
       self.lhss.insert(lhs);     
       if self.lookaheads.get(&item).is_none()
        {self.lookaheads.insert(item,RefCell::new(HashSet::new()));}
       if self.propagation.get(&item).is_none()
        {self.propagation.insert(item,HashSet::new());}
     }
//     let lahs = self.lookaheads.get_mut(&item).unwrap();
//     let mut lainserted = false;
//     for la in las.iter() { lainserted = lahs.insert(*la) || lainserted; }
//     lainserted || inserted
    inserted
  }

  fn additem(&mut self,item:LALRitem, lhs:usize) -> bool
  {
     let inserted = self.items.insert(item);
     self.lhss.insert(lhs);
     inserted
  }  

  pub fn hashval_lalr(&mut self) -> usize  // note: NOT UNIQUE
  {
    let mut key=self.items.len() + self.lhss.len()*1000000;    
    let limit = usize::MAX/1000 -1;
    let mut cx = 8;
    for s in &self.lhss {key+=1000*s; cx-=1; if cx==0 || key>=limit {break;}}
    key
  }

  pub fn state_eq(&self, state2:&LALRState) -> bool
  {
     // ignore index - set later
     if self.items.len() != state2.items.len() {return false;}
     for item in self.items.iter() {
       if !state2.items.contains(item) {return false;}
       /*  ignore lookaheads - there can only be one state with same core
       let las2 = state2.lookaheads.get(item).borrow();
       let las1 = self.lookaheads.get(item).borrow();
       if las1.is_some() && las2.is_some() {
         let la2set = las2.unwrap();
         for la in las1.unwrap() {
           if !la2set.contains(la) {return false;}
         }
       } else if las1.is_some() || las2.is_some() {return false;}
       */
     }// for each item
     true
  }// state_eq
  // no need for core_eq?  when adding state, propagate immediately.
  // but how do we know what to propagate?
}//impl LALRState
impl PartialEq for LALRState
{
   fn eq(&self, other:&LALRState) -> bool
   {  self.state_eq(&other) }
   fn ne(&self, other:&LALRState) ->bool
   {  !self.state_eq(&other) }
}
impl Eq for LALRState {}

impl Grammar  // Grammar additions
{
  // First set of a sequence of symbols given set of lookaheads
  pub fn Firstseqla(&self, Gs:&[Gsym], las:&HashSet<usize>) -> HashSet<usize>
  {
     let mut Fseq = HashSet::with_capacity(2);
     let mut i = 0;
     let mut nullable = true;
     while nullable && i<Gs.len() 
     {
         if (Gs[i].terminal) {Fseq.insert(Gs[i].index); nullable=false; }
	 else  // Gs[i] is non-terminal
         {
            //println!("symbol {}, index {}", &Gs[i].sym, Gs[i].index);
            let firstgsym = self.First.get(&Gs[i].index).unwrap();
	    for s in firstgsym { Fseq.insert(*s); }
	    if !self.Nullable.contains(&Gs[i].sym) {nullable=false;}
         }
	 i += 1;
     }//while
     if nullable {
      for la in las {Fseq.insert(*la);}
     }
     Fseq
  }//FirstSeqb
  //determine if a sequence of symbols is nullable
  
  pub fn Nullableseq(&self, Gs:&[Gsym]) -> bool
  {
     for g in Gs {
       if g.terminal || !self.Nullable.contains(&g.sym) {return false;}
     }
     return true;
  }
}//impl Grammar

pub struct LALRMachine
{
   pub Gmr: Grammar,
   pub States: Vec<LALRState>, 
   pub Statelookup: HashMap<usize,BTreeSet<usize>>,
   pub FSM: Vec<HashMap<usize,Stateaction>>,
   sr_conflicts:HashMap<(usize,usize),(bool,bool)>,
}
impl LALRMachine
{
  pub fn new(gram:Grammar) -> Self
  { 
       LALRMachine {
          Gmr: gram,
          States: Vec::with_capacity(1024), // reserve 1K states
          Statelookup: HashMap::with_capacity(1024),
          FSM: Vec::with_capacity(8*1024),
          sr_conflicts:HashMap::new(),
       }
  }//new
  pub fn to_statemachine(self) -> Statemachine // transfer before writeparser
  {
     Statemachine {
       Gmr: self.Gmr,
       States: Vec::new(),
       Statelookup:HashMap::new(),
       FSM: self.FSM,
       lalr:true,
       Open:Vec::new(),
       sr_conflicts:HashMap::new(),
     }
  }//to_statemachine
  
// generate the GOTO sets of a state with index si, creates new states.
// this verions does not add reduce actions since lookaheads needs to be
// propagated first.
  fn makegotos(&mut self, si:usize)
  {
     let ref state = self.States[si];
     // key to following hashmap is the next symbol's index after pi (the dot)
     // the values of the map are the "kernels" of the next state to generate
     let mut newstates:HashMap<usize,LALRState> = HashMap::with_capacity(128);
     let mut keyvec:Vec<usize> = Vec::new(); //keys of newstates
     for item in &state.items
     {
       let (itemri,itempi) = (item.0,item.1);
       let rule = self.Gmr.Rules.get(itemri).unwrap(); // rule ref
       if itempi<rule.rhs.len() { // can goto (dot before end of rule)
          let nextsymi = rule.rhs[itempi].index;
          if let None = newstates.get(&nextsymi) {
             newstates.insert(nextsymi,LALRState::new(self.Gmr.Symbols.len()));
             keyvec.push(nextsymi);
          }
          let symstate = newstates.get_mut(&nextsymi).unwrap();
          // add new item to states associated with nextsymi
          let newitem = LALRitem(itemri,itempi+1); //kernel item in new state
          
          let lhssymi = self.Gmr.Rules[itemri].lhs.index;
          symstate.insert(newitem,lhssymi); // won't overwrite
          // handled during propagation phase?
          /*
          let las = state.lookaheads.get(&item).unwrap().borrow().clone();
          let mut slas = symstate.lookaheads.get(&newitem).unwrap().borrow_mut();
          for la in las {
            if la<self.Gmr.Symbols.len() { slas.insert(la); }
          }
          */
          //symstate.lookaheads.insert(newitem,RefCell::new(las.clone())); //clone by alg
          // SHIFT/GOTONEXT actions added by addstate function
       }//can goto
       /*  NO REDUCE/ACCEPT UNTIL LATER
       */
     }// for each item 
     // form closures for all new states and add to self.States list
     for key in keyvec //keyvec must be separate to avoid borrow error
     {
        let mut kernel = newstates.remove(&key).unwrap();
        stateclosure(&mut kernel,&self.Gmr);
        self.addstate(kernel,si,key); //only place addstate called
     }
  }//makegotos

  // addstate ***
  // psi is previous state index, nextsym is next symbol (may do lalr)
  // state already closed by makegotos
  fn addstate(&mut self, mut state:LALRState, psi:usize, nextsymi:usize)
  {  let nextsym = &self.Gmr.Symbols[nextsymi].sym;
     let newstateindex = self.States.len(); // index of new state
     state.index = newstateindex;
     let lookupkey = state.hashval_lalr();
     if let None=self.Statelookup.get(&lookupkey) {
        self.Statelookup.insert(lookupkey,BTreeSet::new());
     }
     let indices = self.Statelookup.get_mut(&lookupkey).unwrap();
     let mut toadd = newstateindex; // defaut is add new state (will push)
     for i in indices.iter() //lalr version only compares core items
     {
         if &state==&self.States[*i] {toadd=*i; break; } // state i exists
     }
     if self.Gmr.tracelev>3 {println!("Transition to state {} from state {}, symbol {}..",toadd,psi,nextsym);}
     if toadd==newstateindex {  // add new state
       indices.insert(newstateindex); // add to StateLookup index hashset
       self.States.push(state);
       self.FSM.push(HashMap::with_capacity(128)); // always add row to fsm at same time
     }// add new state

     // add to- or change FSM TABLE ...  only Shift or Gotnext added here.
     let gsymbol = &self.Gmr.Symbols[nextsymi]; //self.Gmr.getsym(nextsym).
     let newaction = if gsymbol.terminal {Stateaction::Shift(toadd)}
        else {Stateaction::Gotonext(toadd)};
     add_action(&mut self.FSM, &self.Gmr, newaction,psi,nextsymi,&mut self.sr_conflicts);
     // reduce rules are only added with . at end, nextsymbol terminal,
     // so a "reduce-gotonext" conflict is not possible
  }  //addstate

 // called after all states created, propagations marked.
 fn propagate_lookaheads(&mut self)
 {
   let mut changed = true;
   while changed
   {
      changed = false;
      for si in 0..self.States.len()
      {
         /*let mut items = Vec::with_capacity(self.States[si].items.len());
         for item in &self.States[si].items {
           items.push(*item);
         }*/
         for item in &self.States[si].items { //-borrow checker!
           let (ri,pi) = (item.0, item.1);
           if pi>0 || ri == self.Gmr.Rules.len()-1 {  // kernel item
             let itemlas = self.States[si].lookaheads.get(&item).unwrap().borrow();
             let props = self.States[si].propagation.get(&item).unwrap();
             for (nextsymi,nextitem) in props.iter() {
                let mut nextstate = self.States.len();
                if nextsymi>&self.Gmr.Symbols.len() { nextstate= si; }//dummy
                else {
                  let nextaction = self.FSM[si].get(nextsymi);
                  match nextaction {
                    Some(Shift(nsi))|Some(Gotonext(nsi)) => {nextstate=*nsi;},
                    _ => {panic!("THIS SHOULD NOT HAPPEN");},
                  }//match
                }
//println!("{}: found nextitem ({},{}) in expected state {}!",self.States[nextstate].items.contains(nextitem),nextitem.0,nextitem.1,nextstate);
                let nextlasopt = self.States[nextstate].lookaheads.get(nextitem);
                if let Some(rfcell)=nextlasopt {
                  if nextstate!=si || nextitem!=item { //runtime borrow check
                    let mut nextlas = rfcell.borrow_mut();
                    for la in itemlas.iter() {
                      if *la>=self.Gmr.Symbols.len() {continue;} //no #
println!("propagating lookahead {} to state {} from state {}",&self.Gmr.Symbols[*la].sym, nextstate, si);
                      changed =nextlas.insert(*la)||changed;
                    }
                  }
                }
                //else {println!("NO lookahead ENTRY!");}
             } // each propagation mark
           }//kernel item
         }//for each item in state
      }//for each state
   } // which changed
 }//propagate_lookaheads

 // set reduce actions and check for conflicts
 fn set_reduce(&mut self)
 {
    for si in 0..self.States.len()
    {
       for item in &self.States[si].items
       {
         let (ri,pi) = (item.0,item.1);
         if pi==self.Gmr.Rules[ri].rhs.len() { //dot at end of rhs
           let itemlas = self.States[si].lookaheads.get(item).unwrap().borrow();
           for la in itemlas.iter() { // for each lookahead
             if *la>=self.Gmr.Symbols.len() {continue;} // skip the dummy
             //println!("adding reduce/accept rule {}, la {}",ri,&self.Gmr.Symbols[*la].sym);
             
             let isaccept = (ri == self.Gmr.Rules.len()-1 && la==&(self.Gmr.Symbols.len()-1));
             if isaccept {
               add_action(&mut self.FSM,&self.Gmr,Accept,si,*la,&mut self.sr_conflicts);
             }
             else {
               add_action(&mut self.FSM,&self.Gmr,Reduce(ri),si,*la,&mut self.sr_conflicts);
             }
           } // for each la
         }//if reduce situation
       } // for each item
   } // for each state
 }//set_reduce

fn reclose(&mut self) // set remaining lookaheads
{
  for state in self.States.iter_mut() {
     stateclosure(state, &self.Gmr);
  }
}//reclose

pub fn generatefsm(&mut self)
  { 
    // create initial state, closure from initial item: 
    // START --> .topsym EOF
    let mut startstate=LALRState::new(self.Gmr.Rules.len());
    let EOFi = self.Gmr.Symbols.len()-1;
    let STARTi = self.Gmr.Symbols.len()-2;
    let startrule = self.Gmr.Rules.len()-1;
    let startitem = LALRitem(startrule,0);
    startstate.insert(startitem,STARTi);
    startstate.lookaheads.get(&startitem).unwrap().borrow_mut().insert(EOFi);
    stateclosure(&mut startstate,&self.Gmr);
    self.States.push(startstate); // add start state, first state
    self.FSM.push(HashMap::with_capacity(128)); // row for state
    // now generate closure for state machine (not individual states)
    let mut closed:usize = 0;
    //println!("before makegotos");    
    while closed<self.States.len()
    {
       self.makegotos(closed);
       closed += 1;
    }//while not closed
    //println!("after makegotos");
    self.propagate_lookaheads();
    //println!("after propagate_lookaheads");    
    self.reclose(); // set lookaheads for non-kernel items
    //println!("after reclose");        
    self.set_reduce();
  }//generatefsm
  
}//impl LALRMachine



//AXIOM: if item exists, an entry must exist for it in lookaheads

// generate closure from single kernel item, merge into existing state,
// sets propagation table
/*
fn closure1(kernel:LALRitem, state: &mut LALRState,Gmr:&Grammar)
{// assuming kernel is a kernel item and not? in state
   //use a vector as well as state.items
   let (kri,kpi) = (kernel.0,kernel.1);
   let ruleki = &Gmr.Rules[kri];
   let lhski = ruleki.lhs.index;
   //state.insert(kernel,lhski);   // insert kernel into state? (below)
   let mut propagate = false;
   let lowerlimit = if kpi<ruleki.rhs.len() {kpi+1} else {ruleki.rhs.len()};
   let dummy = Gmr.Symbols.len()+1; // distinguish from real symbols (#)   
   let mut kernlookaheads =Gmr.Firstseq(&ruleki.rhs[lowerlimit..],dummy);
   let propagate = kernlookaheads.remove(&dummy);

   let mut closure = vec![kernel];
   let mut closed = 0;
   while closed < closure.len()
   {
     let item = closure[closed];
     let (ri,pi) = (item.0,item.1);
     let rulei = &Gmr.Rules[ri];
     let lhsi = rulei.lhs.index;
     state.insert(item,lhsi); // insert into state here
     let kpropagation = state.propagation.get_mut(&kernel).unwrap();
     closed += 1;
     if pi<rulei.rhs.len() {// find .X
       let Xsym = &rulei.rhs[pi];
       if propagate {
          kpropagation.insert((Xsym.index,LALRitem(ri,pi+1)));
       }// insert into propagation table
       if !Xsym.terminal {  // X is non-terminal, add closure items
         for rulent in Gmr.Rulesfor.get(&Xsym.sym).unwrap() {
           let newitem = LALRitem(*rulent,0);
           closure.push(newitem); // add to "frontier"
         }// for each rule of this non-terminal X
       }//X nonterminal
     }// pi not at right end
   }//while !closed
}//closure1
*/

// don't add any lookaheads except # JUST PROPAGATE BETWEEN KERNELS!
fn closure1(kernelitem:LALRitem, state: &mut LALRState,Gmr:&Grammar)
{// assuming kernelitem is a kernel item and already in state
   //use a vector as well as state.items
   let (kri,kpi) = (kernelitem.0,kernelitem.1);
   if kpi==0 && kri!=Gmr.Rules.len()-1 { return; } // if not kernel item
//println!("calling closure 1 on item ({},{})",kri,kpi);
   
   let ruleki = &Gmr.Rules[kri];
   let lhski = ruleki.lhs.index;
   //state.insert(kernelitem,lhski);   // insert kernel into state? (before)
   let lowerlimit = if kpi<ruleki.rhs.len() {kpi+1} else {ruleki.rhs.len()};
   let dummy = Gmr.Symbols.len()+1; // distinguish from real symbols (#)
   state.lookaheads.get(&kernelitem).unwrap().borrow_mut().insert(dummy);

   // assume kernel item already inserted into state by makegotos

   let mut closure = vec![kernelitem];
   let mut closed = 0;
   while closed < closure.len()
   {
     let item = closure[closed];
     let (ri,pi) = (item.0,item.1);
     let rulei = &Gmr.Rules[ri];
     let lhsi = rulei.lhs.index;
     if closed!=0 {state.insert(item,lhsi);} // insert into state here
     closed += 1;
     if pi<rulei.rhs.len() {// find .X
       let Xsym = &rulei.rhs[pi]; // symbol X of dragon book
       let propagate = state.lookaheads.get(&item).unwrap().borrow().contains(&dummy);
       if propagate {
          let kpropagation = state.propagation.get_mut(&kernelitem).unwrap();
          kpropagation.insert((Xsym.index,LALRitem(ri,pi+1)));
       }// insert into propagation table
       if !Xsym.terminal {  // X is non-terminal, add closure items
         // don't add any lookaheads at this stage, these are not kernels
         let mut Xlookaheads = Gmr.Firstseq(&rulei.rhs[pi+1..],dummy);
         if Xlookaheads.remove(&dummy) {  // local use of dummy
           let itemlas = state.lookaheads.get(&item).unwrap().borrow();
           // this could be the kernel item with dummy
           for ila in itemlas.iter() {Xlookaheads.insert(*ila);}
         }
         //let propagate =  Xlookaheads.remove(&dummy);
         //let kpropagation = state.propagation.get_mut(&kernelitem).unwrap();

         // for rules X --> alpha . beta, 
         for rulent in Gmr.Rulesfor.get(&Xsym.sym).unwrap() {
           let newitem = LALRitem(*rulent,0);
           if !state.items.contains(&newitem) {
             closure.push(newitem); // add to "frontier"
             state.lookaheads.insert(newitem,RefCell::new(HashSet::new()));
           }
           let mut slas = state.lookaheads.get(&newitem).unwrap().borrow_mut();
           for xla in Xlookaheads.iter() {slas.insert(*xla);}
         }// for each rule of this non-terminal X
       }//X nonterminal
     }// pi not at right end
   }//while !closed
}//closure1


// calls closure on all kernel elements. still need to propagate la's
fn stateclosure(kernel:&mut LALRState, Gmr:&Grammar)
{
   let mut kitems = Vec::new();
   for kitem in kernel.items.iter() { kitems.push(*kitem); } // copies
   for kitem in kitems {
     closure1(kitem,kernel,Gmr);
   }
}//stateclosure generation, first stage