rustlr 0.3.0

LR/LALR parser generator that can automatically create abstract syntax trees
Documentation
/////////////////////ENUM VERSION//////////////////////////////////////
   ///// semantic acition fn is _semaction_rule_{rule index}
////////////////////////////////////////////////
impl Statemachine
{
  pub fn writeenumparser(&self, filename:&str)->Result<(),std::io::Error>
  {
    let ref absyn = self.Gmr.Absyntype;

    if !is_lba(absyn) /*absyn!="LBox<dyn Any>" && absyn!="LBox<Any>"*/ {
       return self.writezcparser(filename);
    }
    
    let ref extype = self.Gmr.Externtype;
    let ref lifetime = self.Gmr.lifetime;
    let has_lt = lifetime.len()>0 && (absyn.contains(lifetime) || extype.contains(lifetime));
    let ltopt = if has_lt {format!("<{}>",lifetime)} else {String::from("")};

    let rlen = self.Gmr.Rules.len();
    // generate action fn's from strings stored in gen-time grammar
    let mut actions:Vec<String> = Vec::with_capacity(rlen);    
    for ri in 0..rlen
    {
      let lhs = &self.Gmr.Rules[ri].lhs.sym;
      let lhsi = self.Gmr.Symhash.get(lhs).expect("GRAMMAR REPRESENTATION CORRUPTED");
      let rettype = &self.Gmr.Symbols[*lhsi].rusttype; // return type
      let ltoptr = if has_lt || (lifetime.len()>0 && rettype.contains(lifetime))
        {format!("<{}>",lifetime)} else {String::from("")};
      let mut fndef = format!("fn _semaction_for_{}_{}(parser:&mut ZCParser<{},{}>) -> {} {{\n",ri,&ltoptr,absyn,extype,rettype);

      let mut k = self.Gmr.Rules[ri].rhs.len();
      //form if-let labels and patterns as we go...
      let mut labels = String::from("(");
      let mut patterns = String::from("(");
      while k>0 // k is length of right-hand side
      {
        let gsym = &self.Gmr.Rules[ri].rhs[k-1]; // rhs symbols right to left...
        let gsymi = *self.Gmr.Symhash.get(&gsym.sym).unwrap();
        let findat = gsym.label.find('@');
        let mut plab = format!("_item{}_",k-1);
        match &findat {
          None if gsym.label.len()>0 => {plab = format!("{}",&gsym.label);},
          Some(ati) if *ati>0 => {plab=format!("{}",&gsym.label[0..*ati]);},
          _ => {},
        }//match
        let poppedlab = plab.as_str();
        let ref symtype = gsym.rusttype;
        let mut stat = format!("let mut {} = lbdown!(parser.popstack().value,{}); ",poppedlab,symtype);  // no longer stackitem but lbdown!
        if symtype.len()<2 || symtype=="LBox<dyn Any>" || symtype=="LBox<Any>" {
           stat = format!("let mut {} = parser.popstack().value; ",poppedlab);
           // no need for lbdown if type is already LBA
        }           
        fndef.push_str(&stat);
        // poppedlab now bound to lbdown!
	if gsym.label.len()>1 && findat.is_some() { // if-let pattern
          labels.push_str("&mut *"); // for if-let  // *box.exp gets value
          labels.push_str(poppedlab); /*labels.push_str(".exp");*/ labels.push(',');
          // closing @ trimed in grammar_processor.rs
          let atindex = findat.unwrap();
	  patterns.push_str(&gsym.label[atindex+1..]); patterns.push(',');
	} // @@ pattern exists, with or without label
        k -= 1;      
      }// for each symbol on right hand side of rule (while k)
      // form if let pattern=labels ...
      let defaultaction = format!("<{}>::default()}}",rettype);
      let mut semaction = &self.Gmr.Rules[ri].action; //string that ends w/ rbr
      if semaction.len()<=1 {semaction = &defaultaction;}
      if labels.len()<2 {
        fndef.push_str(semaction.trim_end()); fndef.push_str("\n");
      } //empty pattern
      else { // write an if-let
        labels.push(')');  patterns.push(')');
	let pat2= format!("\n  if let {}={} {{ {}  else {{parser.report(\"{}\"); <{}>::default()}} }}\n",&patterns,&labels,semaction.trim_end(),&patterns,rettype);
        fndef.push_str(&pat2);
      }// if-let semantic action
      actions.push(fndef);
    }// generate action function for each rule  (for ri..

    ////// write to file

    let mut fd = File::create(filename)?;
    write!(fd,"//Parser generated by rustlr for grammar {}",&self.Gmr.name)?;
    write!(fd,"\n    
#![allow(unused_variables)]
#![allow(non_snake_case)]
#![allow(non_camel_case_types)]
#![allow(unused_parens)]
#![allow(unused_mut)]
#![allow(unused_imports)]
#![allow(unused_assignments)]
#![allow(dead_code)]
#![allow(irrefutable_let_patterns)]
use std::any::Any;
extern crate rustlr;
use rustlr::{{Tokenizer,TerminalToken,ZCParser,ZCRProduction,Stateaction,decode_action,LBox,lbdown,lbup,lbget,unbox}};\n")?;
    if self.Gmr.genlex {
      write!(fd,"use rustlr::{{StrTokenizer,RawToken,LexSource}};
use std::collections::{{HashMap,HashSet}};\n")?;
    }

    write!(fd,"{}\n",&self.Gmr.Extras)?; // use clauses and such

    // write static array of symbols
    write!(fd,"static SYMBOLS:[&'static str;{}] = [",self.Gmr.Symbols.len())?;
    for i in 0..self.Gmr.Symbols.len()-1
    {
      write!(fd,"\"{}\",",&self.Gmr.Symbols[i].sym)?;
    }
    write!(fd,"\"{}\"];\n\n",&self.Gmr.Symbols[self.Gmr.Symbols.len()-1].sym)?;
    // position of symbols must be inline with self.Gmr.Symhash

    // record table entries in a static array
    let mut totalsize = 0;
    for i in 0..self.FSM.len() { totalsize+=self.FSM[i].len(); }
    write!(fd,"static TABLE:[u64;{}] = [",totalsize)?;
    // generate table to represent FSM
    let mut encode:u64 = 0;
    for i in 0..self.FSM.len() // for each state index i
    {
      let row = &self.FSM[i];                          ////////LBA VERSION
      for key in row.keys()
      { // see function decode for opposite translation
        let k = *self.Gmr.Symhash.get(key).unwrap(); // index of symbol
        encode = ((i as u64) << 48) + ((k as u64) << 32);
        match row.get(key) {
          Some(Shift(statei)) => { encode += (*statei as u64) << 16; },
          Some(Gotonext(statei)) => { encode += ((*statei as u64) << 16)+1; },
          Some(Reduce(rulei)) => { encode += ((*rulei as u64) << 16)+2; },
          Some(Accept) => {encode += 3; },
          _ => {encode += 4; },  // 4 indicates Error
        }//match
        write!(fd,"{},",encode)?;
      } //for symbol index k
    }//for each state index i
    write!(fd,"];\n\n")?;

    // write action functions
    for deffn in &actions { write!(fd,"{}",deffn)?; }

    // must know what absyn type is when generating code.
    write!(fd,"\npub fn make_parser{}() -> ZCParser<{},{}>",&ltopt,absyn,extype)?; 
    write!(fd,"\n{{\n")?;
    // write code to pop stack, assign labels to variables.
    write!(fd," let mut parser1:ZCParser<{},{}> = ZCParser::new({},{});\n",absyn,extype,self.Gmr.Rules.len(),self.States.len())?;
    // generate rules and Ruleaction delegates to call action fns
     write!(fd," let mut rule = ZCRProduction::<{},{}>::new_skeleton(\"{}\");\n",absyn,extype,"start")?; // dummy for init
    for i in 0..self.Gmr.Rules.len() 
    {
      write!(fd," rule = ZCRProduction::<{},{}>::new_skeleton(\"{}\");\n",absyn,extype,self.Gmr.Rules[i].lhs.sym)?;
      write!(fd," rule.Ruleaction = |parser|{{ ")?;

    // write code to call action function, then enclose in lba
      let lhsi = self.Gmr.Symhash.get(&self.Gmr.Rules[i].lhs.sym).expect("GRAMMAR REPRESENTATION CORRUPTED");
      let fnname = format!("_semaction_for_{}_",i);
      let typei = &self.Gmr.Symbols[*lhsi].rusttype;
      if is_lba(typei) {
        write!(fd," {}(parser) }};\n",&fnname)?;
      }
      else {
        write!(fd," lbup!( LBox::new({}(parser),parser.linenum,parser.column)) }};\n",&fnname)?;
      }
      write!(fd," parser1.Rules.push(rule);\n")?;
    }// write each rule action
    
    
    write!(fd," parser1.Errsym = \"{}\";\n",&self.Gmr.Errsym)?;
    // resynch vector
    for s in &self.Gmr.Resynch {write!(fd," parser1.resynch.insert(\"{}\");\n",s)?;}

    // generate code to load RSM from TABLE
    write!(fd,"\n for i in 0..{} {{\n",totalsize)?;
    write!(fd,"   let symi = ((TABLE[i] & 0x0000ffff00000000) >> 32) as usize;\n")?;
    write!(fd,"   let sti = ((TABLE[i] & 0xffff000000000000) >> 48) as usize;\n")?;
    write!(fd,"   parser1.RSM[sti].insert(SYMBOLS[symi],decode_action(TABLE[i]));\n }}\n\n")?;
//    write!(fd,"\n for i in 0..{} {{for k in 0..{} {{\n",rows,cols)?;
//    write!(fd,"   parser1.RSM[i].insert(SYMBOLS[k],decode_action(TABLE[i*{}+k]));\n }}}}\n",cols)?;
    write!(fd," for s in SYMBOLS {{ parser1.Symset.insert(s); }}\n\n")?;

    write!(fd," load_extras(&mut parser1);\n")?;
    write!(fd," return parser1;\n")?;
    write!(fd,"}} //make_parser\n\n")?;

    ////// WRITE ENUM (test)
    if !self.Gmr.sametype { self.Gmr.gen_enum(&mut fd)?; }
    
    ////// WRITE LEXER
    if self.Gmr.genlex { self.Gmr.genlexer(&mut fd,"raw_to_lba")?; }

    ////// Augment!
    write!(fd,"fn load_extras{}(parser:&mut ZCParser<{},{}>)\n{{\n",&ltopt,absyn,extype)?;
    write!(fd,"}}//end of load_extras: don't change this line as it affects augmentation\n")?;
    Ok(())
  }//writeenumparser

}//impl statemachine