rustlr/
zc_parser.rs

1//! This module implements a zero-copy version of the runtime parser that
2//! uses the LR statemachine generated by rustlr.  It will (for now), live
3//! side along with the original parser implemented as [crate::RuntimeParser].
4//! Since Version 0.2.3, this module can now generate a basic lexical 
5//! scanner based on [crate::RawToken] and [crate::StrTokenizer].
6//!
7//! This module implements the parsing routines that uses the state machine
8//! generated by rustlr.  **The main structure here is [ZCParser]**.
9//! All parsing functions are organized around the [ZCParser::parse_core]
10//! function, which implements the basic LR parsing algorithm.  This function
11//! expects dynamic [Tokenizer] and [ErrReporter] trait-objects. 
12//! This module provides generic
13//! parsing and parser-training routines that use stdio for interface, but
14//! the [ErrReporter] trait allows custom user interfaces to be build separately.
15
16#![allow(dead_code)]
17#![allow(unused_variables)]
18#![allow(non_snake_case)]
19#![allow(non_camel_case_types)]
20#![allow(unused_parens)]
21#![allow(unused_mut)]
22#![allow(unused_assignments)]
23#![allow(unused_doc_comments)]
24#![allow(unused_imports)]
25use std::fmt::Display;
26use std::default::Default;
27use std::collections::{HashMap,HashSet,BTreeSet};
28use std::io::{self,Read,Write,BufReader,BufRead};
29use std::rc::Rc;
30use std::cell::{RefCell,Ref,RefMut};
31use std::hash::{Hash,Hasher};
32use std::any::Any;
33use std::fs::File;
34use std::io::prelude::*;
35use std::path::Path;
36use std::mem;
37//use crate::{Stateaction,Statemachine,TerminalToken,Tokenizer};
38use crate::{Stateaction,iserror,TerminalToken,Tokenizer};
39use crate::{LBox,LRc,LC};
40use crate::Stateaction::*;
41use crate::{lbup,lbdown,lbget};
42use crate::{StandardReporter,StackedItem};
43#[cfg(feature = "generator")]
44use crate::{Statemachine};
45
46//extern crate termion;
47//use termion::{color,style};
48
49
50/// this structure is only exported because it is required by the generated parsers.
51/// There is no reason to use it in other programs.  Replaces [crate::RProduction] for new parsers since version 0.2.0
52#[derive(Clone)]
53pub struct ZCRProduction<AT:Default,ET:Default>  // runtime rep of grammar rule
54{
55  pub lhs: &'static str, // left-hand side nonterminal of rule
56  pub Ruleaction : fn(&mut ZCParser<AT,ET>) -> AT, //parser as arg
57}
58impl<AT:Default,ET:Default> ZCRProduction<AT,ET>
59{
60  pub fn new_skeleton(lh:&'static str) -> ZCRProduction<AT,ET>
61  {
62     ZCRProduction {
63       lhs : lh,
64       Ruleaction : |p|{ <AT>::default() },
65     }
66  }
67}//impl ZCRProduction
68
69/* moved to runtime_parser module for uniformity
70/// These structures are what's on the parse stack.
71pub struct StackedItem<AT:Default>   // replaces Stackelement
72{
73   si : usize, // state index
74   pub value : AT, // semantic value (don't clone grammar symbols)
75   pub line: usize,  // line and column
76   pub column: usize, 
77}
78impl<AT:Default> StackedItem<AT>
79{
80  pub fn new(si:usize,value:AT,line:usize,column:usize) -> StackedItem<AT>
81  { StackedItem{si,value,line,column} }
82  /// converts the information in a stacked item to an [LBox] enclosing
83  /// the abstract syntax value along with starting line and column numbers
84  pub fn lbox(self) -> LBox<AT>  // no longer used
85  {  LBox::new(self.value,self.line,self.column) }
86}
87*/
88
89/// this is the structure created by the generated parser.  The generated parser
90/// program will contain a make_parser function that returns this structure.
91/// Most of the pub items are, however, only exported to support the operation
92/// of the parser, and should not be accessed directly.  Only the functions
93/// [ZCParser::parse], [ZCParser::report], [ZCParser::abort]
94/// and [ZCParser::error_occurred] should be called directly 
95/// from user programs.  Only the field [ZCParser::exstate] should be accessed
96/// by user programs.
97pub struct ZCParser<AT:Default,ET:Default>  
98{
99  /// this is the "external state" structure, with type ET defined by the grammar.
100  /// The semantic actions associated with each grammar rule, which are written
101  /// in the grammar, have ref mut access to the ZCParser structure, which
102  /// allows them to read and change the external state object.  This gives
103  /// the parsers greater flexibility and capability, including the ability to
104  /// parse some non-context free languages.  See 
105  /// [this sample grammar](<https://cs.hofstra.edu/~cscccl/rustlr_project/ncf.grammar>).
106  /// The exstate is initialized to ET::default().
107  pub exstate : ET,  // external state structure, usage optional
108  /// External state that can be shared
109  pub shared_state : Rc<RefCell<ET>>,
110  /// used only by generated parser: do not reference
111  pub RSM : Vec<HashMap<&'static str,Stateaction>>,  // runtime state machine
112  // do not reference
113  //pub Expected : Vec<Vec<&'static str>>,
114  /// do not reference
115  pub Rules : Vec<ZCRProduction<AT,ET>>, //rules with just lhs and delegate function
116  ////// this value should be set through abort or report
117  stopparsing : bool,
118  /// do not reference  
119  pub stack :  Vec<StackedItem<AT>>, // parse stack
120//  pub recover : HashSet<&'static str>, // for error recovery
121  pub resynch : HashSet<&'static str>,
122  pub Errsym : &'static str,
123  err_occurred : bool,
124  /// axiom: linenum and column represents the starting position of the
125  /// topmost StackedItem.
126  pub linenum : usize,
127  pub column : usize,
128  pub position : usize, // absolute byte position of input
129  pub prev_position : usize,
130  pub src_id : usize,
131  report_line : usize,
132  /// Hashset containing all grammar symbols (terminal and non-terminal). This is used for error reporting and training.
133  pub Symset : HashSet<&'static str>,
134  //pub tokenizer:&'t mut dyn Tokenizer<'t,AT>,
135  popped : Vec<(usize,usize)>,
136  gindex : RefCell<u32>,  // global index for uid
137  err_report : Option<String>, // optional err report with logging reporter
138}//struct ZCParser
139
140
141impl<AT:Default,ET:Default> ZCParser<AT,ET>
142{
143    /// this is only called by the make_parser function in the machine-generated
144    /// parser program.  *Do not call this function in other places* as it
145    /// only generates a skeleton.
146    pub fn new(rlen:usize, slen:usize/*,tk:&'t mut dyn Tokenizer<'t,AT>*/) -> ZCParser<AT,ET>
147    {  // given number of rules and number states
148       let mut p = ZCParser {
149         RSM : Vec::with_capacity(slen),
150         //Expected : Vec::with_capacity(slen),
151         Rules : Vec::with_capacity(rlen),
152         stopparsing : false,
153         exstate : ET::default(),
154         shared_state: Rc::new(RefCell::new(ET::default())),
155         stack : Vec::with_capacity(1024),
156         Errsym : "",
157         err_occurred : false,
158         linenum : 0,
159         column : 0,
160         position : 0,
161         prev_position: 0,
162         src_id : 0,
163         report_line : 0,
164         resynch : HashSet::new(),
165         //added for training
166         //training : false,
167         //trained : HashMap::new(),
168         Symset : HashSet::with_capacity(64),
169         //tokenizer:tk,
170         popped: Vec::with_capacity(8),
171         gindex: RefCell::new(0),
172         err_report : None,
173       };
174       for _ in 0..slen {
175         p.RSM.push(HashMap::with_capacity(16));
176         //p.Expected.push(Vec::new());
177       }
178       return p;
179    }//new
180
181    /// returns the current line number
182    pub fn current_line(&self)->usize {self.linenum}
183    /// returns the current column number
184    pub fn current_column(&self)->usize {self.column}
185    /// returns the current absolute byte position according to tokenizer
186    pub fn current_position(&self)->usize {self.position}
187    /// returns the previous position (before shift) according to tokenizer
188    pub fn previous_position(&self)->usize {self.prev_position}
189
190    /// this function can be called from within the semantic actions attached
191    /// to grammar production rules that are executed for each
192    /// "reduce" action of the parser.
193    pub fn abort(&mut self, msg:&str)
194    {
195       self.err_report.as_mut().map_or_else(
196         ||eprintln!("\n!!!Parsing Aborted: {}",msg),
197         |x|x.push_str(&format!("\n!!!Parsing Aborted: {}\n",msg)));
198
199       self.err_occurred = true;
200       self.stopparsing=true;
201    }
202
203
204    /// this function can be called from within the "semantic" actions attached
205    /// to production rules to terminate parsing.
206    pub fn stop(&mut self) {
207      self.stopparsing = true;
208    }
209
210    /// may be called from grammar semantic actions to report error.
211    /// this report function will print to stdout.
212    pub fn report(&mut self, errmsg:&str)  {self.report_error(errmsg,false)}
213    /// same as [ZCParser::report] but with option to display line/column
214    pub fn report_error(&mut self, errmsg:&str, showlc: bool)  
215    {  
216       //eprint!("{}",color::Fg(color::Yellow));
217       if (self.report_line != self.linenum || self.linenum==0)  {
218         if showlc {
219           self.err_report.as_mut().map_or_else(
220             ||eprintln!("ERROR on line {}, column {}: {}",self.linenum,self.column,errmsg),
221             |x|x.push_str(&format!("ERROR on line {}, column {}: {}\n",self.linenum,self.column,errmsg)));
222         }
223         else {
224           self.err_report.as_mut().map_or_else(
225             ||eprintln!("PARSER ERROR: {}",errmsg),
226             |x|x.push_str(&format!("PARSER ERROR: {}\n",errmsg)));
227         }
228         self.report_line = self.linenum;
229       }
230       else {
231         if showlc {
232           self.err_report.as_mut().map_or_else(
233             ||eprint!(" ({},{}): {}",self.linenum,self.column,errmsg),
234             |x|x.push_str(&format!(" ({},{}): {}",self.linenum,self.column,errmsg)));
235         }
236         else {
237           self.err_report.as_mut().map_or_else(
238             ||eprint!(" {}",errmsg),
239             |x|{x.push(' '); x.push_str(errmsg)});
240         }
241       }
242       //eprint!("{}",color::Fg(color::Reset));       
243       self.err_occurred = true;
244    }// report
245
246    /// this function is only exported to support the generated code
247    pub fn bad_pattern(&mut self,pattern:&str) -> AT
248    {
249       let msg = format!("pattern {} failed to bind to stacked values\n",pattern);
250       self.report(&msg);
251       //println!("FROM BAD PATTERN:");
252       AT::default()
253    }
254
255/*
256    /// sets an index that index source information, such as the source file
257    /// when compiling multiple sources. This information must be maintained externally.
258    /// The source id will also be passed on to the [LBox] and [LRc] smartpointers by
259    /// the [ZCParser::lb] function.
260    pub fn set_src_id(&mut self, id:usize)
261    { self.src_id =id; }
262*/
263
264    //called to simulate a shift
265    fn errshift(&mut self, sym:&str) -> bool
266    {
267       let csi = self.stack[self.stack.len()-1].si; // current state
268       let actionopt = self.RSM[csi].get(sym);
269       if let Some(Shift(ni)) = actionopt {
270         self.stack.push(StackedItem::new(*ni,AT::default(),self.linenum,self.column)); true
271       }
272       else {false}
273    }
274
275  // this is the LR parser shift action: push the next state, along with the
276  // value of the current lookahead token onto the parse stack, returns the
277  // next token
278  fn shift<'t>(&mut self, nextstate:usize, lookahead:TerminalToken<'t,AT>, tokenizer:&mut dyn Tokenizer<'t,AT>) -> TerminalToken<'t, AT>
279  {
280     self.linenum = lookahead.line;  self.column=lookahead.column;
281     self.prev_position = self.position; self.position = tokenizer.position();
282     self.stack.push(StackedItem::new(nextstate,lookahead.value,lookahead.line,lookahead.column));
283     //self.nexttoken()
284     tokenizer.next_tt()
285  }
286
287    /// this function is called from the generated semantic actions and should
288    /// most definitely not be called from elsewhere as it would corrupt
289    /// the base parser.
290    pub fn popstack(&mut self) -> StackedItem<AT>
291    {
292       let item = self.stack.pop().expect("PARSER STATE MACHINE/STACK CORRUPTED");
293       self.linenum = item.line; self.column=item.column;
294       self.popped.push((item.line,item.column));
295       item
296    }//popstack
297
298    pub fn popstack_as_lbox(&mut self) -> LBox<AT>
299    {
300       let item = self.stack.pop().expect("PARSER STATE MACHINE/STACK CORRUPTED");
301       self.linenum = item.line; self.column=item.column;
302       self.popped.push((item.line,item.column));
303       let newuid = *self.gindex.borrow();
304       *self.gindex.borrow_mut() += 1;           
305       LBox::make(item.value,item.line,item.column,newuid)
306    }//popstack_as_lbox
307
308    fn reduce(&mut self, ri:&usize)
309    {
310       self.popped.clear();
311       let rulei = &self.Rules[*ri];
312       let ruleilhs = rulei.lhs; // &'static : Copy
313       //let mut dummy = RuntimeParser::new(1,1);
314       let val = (rulei.Ruleaction)(self); // should be self
315       let newtop = self.stack[self.stack.len()-1].si; 
316       let goton = self.RSM[newtop].get(ruleilhs).expect("PARSER STATEMACHINE CORRUPTED");
317       if let Stateaction::Gotonext(nsi) = goton {
318/*
319the line/column must be the last thing that was popped, but how is this communicated from the semantic actions?
320Solution: When the semantic action pops, it changes self.linenum,self.column,
321instead of pop, there should be a function self.popstack() that returns value.
322This is correct because linenum/column will again reflect start of tos item
323*/
324       self.stack.push(StackedItem::new(*nsi,val,self.linenum,self.column)); 
325                //self.stack.push(Stackelement{si:*nsi,value:val});
326       }// goto next state after reduce
327              else {
328                self.report("state transition table corrupted: no suitable action after reduce");
329                self.stopparsing=true;
330              }
331    }//reduce
332
333    /// can be called to determine if an error occurred during parsing.  The parser
334    /// will not panic.
335    pub fn error_occurred(&self) -> bool {self.err_occurred}
336
337    // there may need to be other lb functions, perhaps from terminalToken
338    // or stackedItem (at least for transfer)
339
340    /// creates a [LBox] smart pointer that includes line/column information;
341    /// should be called from the semantic actions of a grammar rule, e.g.
342    ///```ignore
343    ///   E --> E:a + E:b {PlusExpr(parser.lb(a),parser.lb(b))}
344    ///```
345    pub fn lb<T>(&self,e:T) -> LBox<T> {
346      let newuid = *self.gindex.borrow();
347      *self.gindex.borrow_mut() += 1;    
348      LBox::make(e,self.linenum,self.column,newuid)
349    }
350    /// creates a `LBox<dyn Any>`, which allows attributes of different types to
351    /// be associated with grammar symbols.  Use in conjuction with [LBox::downcast], [LBox::upcast] and the [lbdown], [lbup] macros.
352    pub fn lba<T:'static>(&self,e:T) -> LBox<dyn Any> {
353      let newuid = *self.gindex.borrow();
354      *self.gindex.borrow_mut() += 1;        
355      LBox::upcast(LBox::make(e,self.linenum,self.column,newuid))
356    }
357    /// similar to [ZCParser::lb], but creates a [LRc] instead of [LBox]
358    pub fn lrc<T>(&self,e:T) -> LRc<T> { LRc::new(e,self.linenum,self.column /*,self.src_id*/) }
359    /// similar to [ZCParser::lba] but creates a [LRc]
360    pub fn lrca<T:'static>(&self,e:T) -> LRc<dyn Any> { LRc::upcast(LRc::new(e,self.linenum,self.column /*,self.src_id*/)) }
361
362    /// creates LBox enclosing e using line/column information associated
363    /// with right-hand side symbols, numbered left-to-right starting at 0
364    pub fn lbx<T>(&self,i:usize,e:T) -> LBox<T>
365    {
366       let (mut ln,mut cl) = (self.linenum,self.column);
367       if i<self.popped.len() {
368         let index = self.popped.len() - 1 - i;
369         let lc = self.popped[index];
370         ln = lc.0; cl=lc.1;
371       }
372       let newuid = *self.gindex.borrow();
373       *self.gindex.borrow_mut() += 1;
374       LBox::make(e,ln,cl,newuid)
375    }//lbx
376
377    /// alias for [Self::lbx]
378    pub fn lbox<T>(&self,i:usize,e:T) -> LBox<T> { self.lbx(i,e) }
379
380    /// creates [LC] enclosing e using line/column information associated
381    /// with right-hand side symbols, numbered left-to-right starting at 0
382    pub fn lc<T>(&self,i:usize,e:T) -> LC<T>
383    {
384       let (mut ln,mut cl) = (self.linenum,self.column);
385       if i<self.popped.len() {
386         let index = self.popped.len() - 1 - i;
387         let lc = self.popped[index];
388         ln = lc.0; cl=lc.1;
389       }
390       let uid = *self.gindex.borrow();
391       *self.gindex.borrow_mut() += 1;
392       LC::make(e,ln,cl,uid)
393    }//lbx
394
395    /// Like lbx but creates an LRc
396    pub fn lrcn<T>(&self,i:usize,e:T) -> LRc<T>
397    {
398       let (mut ln,mut cl) = (self.linenum,self.column);
399       if i<self.popped.len() {
400         let index = self.popped.len() - 1 - i;
401         let lc = self.popped[index];
402         ln = lc.0; cl=lc.1;
403       }
404       LRc::new(e,ln,cl)
405    }//lbx
406}// impl ZCParser
407
408
409//////////////////////////////////////////////////////////////////
410//////////////////////////////////////////////////////////////////
411//// new version of write_fsm: (include calls to genlexer)
412#[cfg(feature = "generator")]
413impl Statemachine
414{  /////// zc version
415  pub fn writezcparser(&self, filename:&str)->Result<(),std::io::Error>
416  {
417    let ref absyn = self.Gmr.Absyntype;
418    let ref extype = self.Gmr.Externtype;
419    let ref lifetime = self.Gmr.lifetime;
420    let has_lt = lifetime.len()>0 && (absyn.contains(lifetime) || extype.contains(lifetime));
421    let ltopt = if has_lt {format!("<{}>",lifetime)} else {String::from("")};
422
423    let mut fd = File::create(filename)?;
424    write!(fd,"//Parser generated by rustlr for grammar {}",&self.Gmr.name)?;
425    write!(fd,"\n
426#![allow(unused_variables)]
427#![allow(non_snake_case)]
428#![allow(non_camel_case_types)]
429#![allow(unused_parens)]
430#![allow(unused_mut)]
431#![allow(unused_imports)]
432#![allow(unused_assignments)]
433#![allow(dead_code)]
434#![allow(irrefutable_let_patterns)]
435#![allow(unreachable_patterns)]
436use std::rc::Rc;
437use std::cell::RefCell;
438extern crate rustlr;
439use rustlr::{{Tokenizer,TerminalToken,ZCParser,ZCRProduction,Stateaction,decode_action}};\n")?;
440    if self.Gmr.genlex {
441      write!(fd,"use rustlr::{{StrTokenizer,RawToken,LexSource}};
442use std::collections::{{HashMap,HashSet}};\n")?;
443    }
444
445    write!(fd,"{}\n",&self.Gmr.Extras)?; // use clauses and such
446
447    // write static array of symbols
448    write!(fd,"static SYMBOLS:[&'static str;{}] = [",self.Gmr.Symbols.len())?;
449    for i in 0..self.Gmr.Symbols.len()-1
450    {
451      write!(fd,"\"{}\",",&self.Gmr.Symbols[i].sym)?;
452    }
453    write!(fd,"\"{}\"];\n\n",&self.Gmr.Symbols[self.Gmr.Symbols.len()-1].sym)?;
454    // position of symbols must be inline with self.Gmr.Symhash
455
456    // record table entries in a static array
457    let mut totalsize = 0;
458    for i in 0..self.FSM.len() { totalsize+=self.FSM[i].len(); }
459    write!(fd,"static TABLE:[u64;{}] = [",totalsize)?;
460    // generate table to represent FSM
461    let mut encode:u64 = 0;
462    for i in 0..self.FSM.len() // for each state index i
463    {
464      let row = &self.FSM[i]; // this is a hashmap<usize,stateaction>
465      for key in row.keys()
466      { // see function decode for opposite translation
467        let k = *key; //*self.Gmr.Symhash.get(key).unwrap(); // index of symbol
468        encode = ((i as u64) << 48) + ((k as u64) << 32);
469        match row.get(key) {
470          Some(Shift(statei)) => { encode += (*statei as u64) << 16; },
471          Some(Gotonext(statei)) => { encode += ((*statei as u64) << 16)+1; },
472          Some(Reduce(rulei)) => { encode += ((*rulei as u64) << 16)+2; },
473          Some(Accept) => {encode += 3; },
474          _ => {encode += 4; },  // 4 indicates Error
475        }//match
476        write!(fd,"{},",encode)?;
477      } //for symbol index k
478    }//for each state index i
479    write!(fd,"];\n\n")?;
480
481    // must know what absyn type is when generating code.
482    write!(fd,"pub fn make_parser{}() -> ZCParser<{},{}>",&ltopt,absyn,extype)?; 
483    write!(fd,"\n{{\n")?;
484    // write code to pop stack, assign labels to variables.
485    write!(fd," let mut parser1:ZCParser<{},{}> = ZCParser::new({},{});\n",absyn,extype,self.Gmr.Rules.len(),self.FSM.len())?;
486    // generate rules and Ruleaction delegates, must pop values from runtime stack
487    write!(fd," let mut rule = ZCRProduction::<{},{}>::new_skeleton(\"{}\");\n",absyn,extype,"start")?;
488    for i in 0..self.Gmr.Rules.len() 
489    {
490      write!(fd," rule = ZCRProduction::<{},{}>::new_skeleton(\"{}\");\n",absyn,extype,self.Gmr.Rules[i].lhs.sym)?;      
491      write!(fd," rule.Ruleaction = |parser|{{ ")?;
492      let mut k = self.Gmr.Rules[i].rhs.len();
493
494      //form if-let labels and patterns as we go...
495      let mut labels = String::from("(");
496      let mut patterns = String::from("(");
497      while k>0 // k is length of right-hand side
498      {
499        let mut boxedlabel = false;  // see if named label is of form [x]
500        let gsym = &self.Gmr.Rules[i].rhs[k-1];
501        let findat = gsym.label.find('@');
502        let mut plab = format!("_item{}_",k-1);
503        match &findat {
504          None if gsym.label.len()>0 && !gsym.label.contains('(') => {
505            let rawlabel = gsym.label.trim();
506            let truelabel = checkboxlabel(rawlabel);
507            boxedlabel = truelabel != rawlabel;
508            plab = String::from(truelabel);             
509            // plab=format!("{}",gsym.label.trim());
510          },
511          Some(ati) if *ati>0 => {
512            let rawlabel = gsym.label[0..*ati].trim();
513            let truelabel = checkboxlabel(rawlabel);
514            boxedlabel = truelabel != rawlabel;
515            plab = String::from(truelabel);            
516          },
517          _ => {},
518        }//match
519        let poppedlab = plab.as_str();
520        if !boxedlabel {
521           write!(fd,"let mut {} = parser.popstack(); ",poppedlab)?;
522        } else {
523           write!(fd,"let mut {} = parser.popstack_as_lbox(); ",poppedlab)?;     
524        }
525        
526	if gsym.label.len()>1 && findat.is_some() { // if-let pattern
527	  let atindex = findat.unwrap();
528          if atindex>0 { // label like es:@Exp(..)@
529            //let varlab = &gsym.label[0..atindex];   //es before @: es:@..@
530            labels.push_str("&mut "); // for if-let
531            if boxedlabel {labels.push('*');}
532            labels.push_str(poppedlab); labels.push_str(".value,");
533            //write!(fd," let mut {}={}.value; ",varlab,poppedlab)?;
534          }
535          else { // non-labeled pattern: E:@..@
536            labels.push_str(poppedlab); labels.push_str(".value,");
537          }
538	  patterns.push_str(&gsym.label[atindex+1..]); patterns.push(',');
539	} // @@ pattern exists, with or without label
540	else if gsym.label.len()>0 && gsym.label.contains('(') // simple label like E:(a,b)
541        { // label exists but only simple pattern
542          labels.push_str(poppedlab); labels.push_str(".value,");
543          patterns.push_str(&gsym.label[..]); // non-mutable
544          patterns.push(',')
545        }// simple label
546        // else simple label is not a pattern, so do nothing
547        k -= 1;      
548      }// for each symbol on right hand side of rule
549      // form if let pattern=labels ...
550      let defaultaction = format!("<{}>::default()}}",absyn);
551      let mut semaction = &self.Gmr.Rules[i].action; //string that ends with }
552      if semaction.len()<=1 {semaction = &defaultaction;}
553      if labels.len()<2 { write!(fd,"{};\n",semaction.trim_end())?; } //empty pattern
554      else { // write an if-let
555        labels.push(')');  patterns.push(')');
556	write!(fd,"\n  if let {}={} {{ {}  else {{parser.bad_pattern(\"{}\")}} }};\n",&patterns,&labels,semaction.trim_end(),&patterns)?;
557      }// if-let semantic action
558
559      write!(fd," parser1.Rules.push(rule);\n")?;
560    }// for each rule
561    write!(fd," parser1.Errsym = \"{}\";\n",&self.Gmr.Errsym)?;
562    // resynch vector
563    for s in &self.Gmr.Resynch {write!(fd," parser1.resynch.insert(\"{}\");\n",s)?;}
564
565    // generate code to load RSM from TABLE
566    write!(fd,"\n for i in 0..{} {{\n",totalsize)?;
567    write!(fd,"   let symi = ((TABLE[i] & 0x0000ffff00000000) >> 32) as usize;\n")?;
568    write!(fd,"   let sti = ((TABLE[i] & 0xffff000000000000) >> 48) as usize;\n")?;
569    write!(fd,"   parser1.RSM[sti].insert(SYMBOLS[symi],decode_action(TABLE[i]));\n }}\n\n")?;
570//    write!(fd,"\n for i in 0..{} {{for k in 0..{} {{\n",rows,cols)?;
571//    write!(fd,"   parser1.RSM[i].insert(SYMBOLS[k],decode_action(TABLE[i*{}+k]));\n }}}}\n",cols)?;
572    write!(fd," for s in SYMBOLS {{ parser1.Symset.insert(s); }}\n\n")?;
573
574    write!(fd," load_extras(&mut parser1);\n")?;
575    write!(fd," return parser1;\n")?;
576    write!(fd,"}} //make_parser\n\n")?;
577
578      ////// WRITE parse_with and parse_train_with
579      let lexerlt = if has_lt {&ltopt} else {"<'t>"};
580      let traitlt = if has_lt {&self.Gmr.lifetime} else {"'t"};
581      let lexername = format!("{}lexer{}",&self.Gmr.name,lexerlt);
582      let abindex = *self.Gmr.enumhash.get(absyn).unwrap();
583      write!(fd,"pub fn parse_with{}(parser:&mut ZCParser<{},{}>, lexer:&mut dyn Tokenizer<{},{}>) -> Result<{},{}>\n{{\n",lexerlt,absyn,extype,traitlt,absyn,absyn,absyn)?;
584      write!(fd,"  let _xres_ = parser.parse(lexer); ")?;
585      write!(fd," if !parser.error_occurred() {{Ok(_xres_)}} else {{Err(_xres_)}}\n}}//parse_with public function\n")?;
586      // training version
587      write!(fd,"\npub fn parse_train_with{}(parser:&mut ZCParser<{},{}>, lexer:&mut dyn Tokenizer<{},{}>, parserpath:&str) -> Result<{},{}>\n{{\n",lexerlt,absyn,extype,traitlt,absyn,absyn,absyn)?;
588      write!(fd,"  let _xres_ = parser.parse_train(lexer,parserpath); ")?;
589      write!(fd," if !parser.error_occurred() {{Ok(_xres_)}} else {{Err(_xres_)}}\n}}//parse_train_with public function\n")?;
590
591
592    ////// WRITE LEXER
593    if self.Gmr.genlex { self.Gmr.genlexer(&mut fd,"from_raw")?; }
594
595    ////// AUGMENT!
596    write!(fd,"fn load_extras{}(parser:&mut ZCParser<{},{}>)\n{{\n",&ltopt,absyn,extype)?;
597    write!(fd,"}}//end of load_extras: don't change this line as it affects augmentation\n")?;
598    Ok(())
599  }//writezcparser
600
601
602
603/////////////////////LBA VERSION//////////////////////////////////////
604   ///// semantic acition fn is _semaction_for_{rule index}
605////////////////////////////////////////////////
606  //////////////////////////// write parser for LBox<dyn Any>
607  pub fn writelbaparser(&self, filename:&str)->Result<(),std::io::Error>
608  {
609    let ref absyn = self.Gmr.Absyntype;
610
611    if !is_lba(absyn) /*absyn!="LBox<dyn Any>" && absyn!="LBox<Any>"*/ {
612       return self.writezcparser(filename);
613    }
614    
615    let ref extype = self.Gmr.Externtype;
616    let ref lifetime = self.Gmr.lifetime;
617    let has_lt = lifetime.len()>0 && (absyn.contains(lifetime) || extype.contains(lifetime));
618    let ltopt = if has_lt {format!("<{}>",lifetime)} else {String::from("")};
619
620    let rlen = self.Gmr.Rules.len();
621    // generate action fn's from strings stored in gen-time grammar
622    let mut actions:Vec<String> = Vec::with_capacity(rlen);    
623    for ri in 0..rlen
624    {
625      let lhs = &self.Gmr.Rules[ri].lhs.sym;
626      let lhsi = &self.Gmr.Rules[ri].lhs.index;
627      //self.Gmr.Symhash.get(lhs).expect("GRAMMAR REPRESENTATION CORRUPTED");
628      let rettype = &self.Gmr.Symbols[*lhsi].rusttype; // return type
629      let ltoptr = if has_lt || (lifetime.len()>0 && rettype.contains(lifetime))
630        {format!("<{}>",lifetime)} else {String::from("")};
631      let mut fndef = format!("fn _semaction_for_{}_{}(parser:&mut ZCParser<{},{}>) -> {} {{\n",ri,&ltoptr,absyn,extype,rettype);
632
633      let mut k = self.Gmr.Rules[ri].rhs.len();
634      //form if-let labels and patterns as we go...
635      let mut labels = String::from("(");
636      let mut patterns = String::from("(");
637      while k>0 // k is length of right-hand side
638      {
639        let gsym = &self.Gmr.Rules[ri].rhs[k-1]; // rhs symbols right to left...
640        let gsymi = gsym.index; //*self.Gmr.Symhash.get(&gsym.sym).unwrap();
641        let findat = gsym.label.find('@');
642        let mut plab = format!("_item{}_",k-1);
643        match &findat {
644          None if gsym.label.len()>0 => {plab = format!("{}",&gsym.label);},
645          Some(ati) if *ati>0 => {plab=format!("{}",&gsym.label[0..*ati]);},
646          _ => {},
647        }//match
648        let poppedlab = plab.as_str();
649        let ref symtype = self.Gmr.Symbols[gsymi].rusttype; //gsym.rusttype;
650        let mut stat = format!("let mut {} = lbdown!(parser.popstack().value,{}); ",poppedlab,symtype);  // no longer stackitem but lbdown!
651        if symtype.len()<2 || symtype=="LBox<dyn Any>" || symtype=="LBox<Any>" {
652           stat = format!("let mut {} = parser.popstack().value; ",poppedlab);
653           // no need for lbdown if type is already LBA
654        }           
655        fndef.push_str(&stat);
656        // poppedlab now bound to lbdown!
657	if gsym.label.len()>1 && findat.is_some() { // if-let pattern
658          labels.push_str("&mut *"); // for if-let  // *box.exp gets value
659          labels.push_str(poppedlab); /*labels.push_str(".exp");*/ labels.push(',');
660          // closing @ trimed in grammar_processor.rs
661          let atindex = findat.unwrap();
662	  patterns.push_str(&gsym.label[atindex+1..]); patterns.push(',');
663	} // @@ pattern exists, with or without label
664        k -= 1;      
665      }// for each symbol on right hand side of rule (while k)
666      // form if let pattern=labels ...
667      let defaultaction = format!("<{}>::default()}}",rettype);
668      let mut semaction = &self.Gmr.Rules[ri].action; //string that ends w/ rbr
669      if semaction.len()<=1 {semaction = &defaultaction;}
670      if labels.len()<2 {
671        fndef.push_str(semaction.trim_end()); fndef.push_str("\n");
672      } //empty pattern
673      else { // write an if-let
674        labels.push(')');  patterns.push(')');
675	let pat2= format!("\n  if let {}={} {{ {}  else {{parser.report(\"{}\"); <{}>::default()}} }}\n",&patterns,&labels,semaction.trim_end(),&patterns,rettype);
676        fndef.push_str(&pat2);
677      }// if-let semantic action
678      actions.push(fndef);
679    }// generate action function for each rule  (for ri..
680
681    ////// write to file
682
683    let mut fd = File::create(filename)?;
684    write!(fd,"//Parser generated by rustlr for grammar {}",&self.Gmr.name)?;
685    write!(fd,"\n    
686#![allow(unused_variables)]
687#![allow(non_snake_case)]
688#![allow(non_camel_case_types)]
689#![allow(unused_parens)]
690#![allow(unused_mut)]
691#![allow(unused_imports)]
692#![allow(unused_assignments)]
693#![allow(dead_code)]
694#![allow(irrefutable_let_patterns)]
695use std::any::Any;
696use std::rc::Rc;
697use std::cell::RefCell;
698extern crate rustlr;
699use rustlr::{{Tokenizer,TerminalToken,ZCParser,ZCRProduction,Stateaction,decode_action,LBox,lbdown,lbup,lbget,unbox}};\n")?;
700    if self.Gmr.genlex {
701      write!(fd,"use rustlr::{{StrTokenizer,RawToken,LexSource}};
702use std::collections::{{HashMap,HashSet}};\n")?;
703    }
704
705    write!(fd,"{}\n",&self.Gmr.Extras)?; // use clauses and such
706
707    // write static array of symbols
708    write!(fd,"static SYMBOLS:[&'static str;{}] = [",self.Gmr.Symbols.len())?;
709    for i in 0..self.Gmr.Symbols.len()-1
710    {
711      write!(fd,"\"{}\",",&self.Gmr.Symbols[i].sym)?;
712    }
713    write!(fd,"\"{}\"];\n\n",&self.Gmr.Symbols[self.Gmr.Symbols.len()-1].sym)?;
714    // position of symbols must be inline with self.Gmr.Symhash
715
716    // record table entries in a static array
717    let mut totalsize = 0;
718    for i in 0..self.FSM.len() { totalsize+=self.FSM[i].len(); }
719    write!(fd,"static TABLE:[u64;{}] = [",totalsize)?;
720    // generate table to represent FSM
721    let mut encode:u64 = 0;
722    for i in 0..self.FSM.len() // for each state index i
723    {
724      let row = &self.FSM[i];                          ////////LBA VERSION
725      for key in row.keys()
726      { // see function decode for opposite translation
727        let k = *key; //*self.Gmr.Symhash.get(key).unwrap(); // index of symbol
728        encode = ((i as u64) << 48) + ((k as u64) << 32);
729        match row.get(key) {
730          Some(Shift(statei)) => { encode += (*statei as u64) << 16; },
731          Some(Gotonext(statei)) => { encode += ((*statei as u64) << 16)+1; },
732          Some(Reduce(rulei)) => { encode += ((*rulei as u64) << 16)+2; },
733          Some(Accept) => {encode += 3; },
734          _ => {encode += 4; },  // 4 indicates Error
735        }//match
736        write!(fd,"{},",encode)?;
737      } //for symbol index k
738    }//for each state index i
739    write!(fd,"];\n\n")?;
740
741    // write action functions
742    for deffn in &actions { write!(fd,"{}",deffn)?; }
743
744    // must know what absyn type is when generating code.
745    write!(fd,"\npub fn make_parser{}() -> ZCParser<{},{}>",&ltopt,absyn,extype)?; 
746    write!(fd,"\n{{\n")?;
747    // write code to pop stack, assign labels to variables.
748    write!(fd," let mut parser1:ZCParser<{},{}> = ZCParser::new({},{});\n",absyn,extype,self.Gmr.Rules.len(),self.FSM.len())?;
749    // generate rules and Ruleaction delegates to call action fns
750     write!(fd," let mut rule = ZCRProduction::<{},{}>::new_skeleton(\"{}\");\n",absyn,extype,"start")?; // dummy for init
751    for i in 0..self.Gmr.Rules.len() 
752    {
753      write!(fd," rule = ZCRProduction::<{},{}>::new_skeleton(\"{}\");\n",absyn,extype,self.Gmr.Rules[i].lhs.sym)?;
754      write!(fd," rule.Ruleaction = |parser|{{ ")?;
755
756    // write code to call action function, then enclose in lba
757      let lhsi = self.Gmr.Symhash.get(&self.Gmr.Rules[i].lhs.sym).expect("GRAMMAR REPRESENTATION CORRUPTED");
758      let fnname = format!("_semaction_for_{}_",i);
759      let typei = &self.Gmr.Symbols[*lhsi].rusttype;
760      if is_lba(typei) {
761        write!(fd," {}(parser) }};\n",&fnname)?;
762      }
763      else {
764        write!(fd," lbup!( LBox::new({}(parser),parser.linenum,parser.column)) }};\n",&fnname)?;
765      }
766      write!(fd," parser1.Rules.push(rule);\n")?;
767    }// write each rule action
768    
769    
770    write!(fd," parser1.Errsym = \"{}\";\n",&self.Gmr.Errsym)?;
771    // resynch vector
772    for s in &self.Gmr.Resynch {write!(fd," parser1.resynch.insert(\"{}\");\n",s)?;}
773
774    // generate code to load RSM from TABLE
775    write!(fd,"\n for i in 0..{} {{\n",totalsize)?;
776    write!(fd,"   let symi = ((TABLE[i] & 0x0000ffff00000000) >> 32) as usize;\n")?;
777    write!(fd,"   let sti = ((TABLE[i] & 0xffff000000000000) >> 48) as usize;\n")?;
778    write!(fd,"   parser1.RSM[sti].insert(SYMBOLS[symi],decode_action(TABLE[i]));\n }}\n\n")?;
779//    write!(fd,"\n for i in 0..{} {{for k in 0..{} {{\n",rows,cols)?;
780//    write!(fd,"   parser1.RSM[i].insert(SYMBOLS[k],decode_action(TABLE[i*{}+k]));\n }}}}\n",cols)?;
781    write!(fd," for s in SYMBOLS {{ parser1.Symset.insert(s); }}\n\n")?;
782
783    write!(fd," load_extras(&mut parser1);\n")?;
784    write!(fd," return parser1;\n")?;
785    write!(fd,"}} //make_parser\n\n")?;
786
787    ////// WRITE ENUM (test)
788    if !self.Gmr.sametype { self.Gmr.gen_enum(&mut fd)?; }
789    
790    ////// WRITE LEXER
791    if self.Gmr.genlex { self.Gmr.genlexer(&mut fd,"raw_to_lba")?; }
792
793    ////// Augment!
794    write!(fd,"fn load_extras{}(parser:&mut ZCParser<{},{}>)\n{{\n",&ltopt,absyn,extype)?;
795    write!(fd,"}}//end of load_extras: don't change this line as it affects augmentation\n")?;
796    Ok(())
797  }//writelbaparser
798
799
800//write-verbose no longer supported
801} // impl Statemachine
802/*
803////// independent function
804    fn iserror(actionopt:&Option<&Stateaction>) -> bool
805    {
806       match actionopt {
807           None => true,
808           Some(Error(_)) => true,
809           _ => false,
810         }
811    }//iserror
812*/
813////// independent function
814  #[cfg(feature = "generator")]
815  fn is_lba(t:&str) -> bool {
816   t.trim().starts_with("LBox") && t.contains("Any") && t.contains('<') && t.contains('>')
817  
818//    for s in ["", "LBox<dyn Any>","LBox<Any>","LBox< dyn Any>","LBox<dyn Any >",
819//              "LBox< dyn Any >"] { if s==t {return true;}}
820//    return false;
821  }//is_lba to check type
822
823
824///////////////////////////////////////////////////////////////////////////
825////// reimplementing the parsing algorithm more modularly, with aim of
826////// allowing custom parsers
827//////////// errors should compile a report
828impl<AT:Default,ET:Default> ZCParser<AT,ET>
829{
830  /// Error recovery routine of rustlr, separate from error_reporter.
831  /// This function will modify the parser and lookahead symbol and return
832  /// either the next action the parser should take (if recovery succeeded)
833  /// or None if recovery failed.
834  pub fn error_recover<'t>(&mut self, lookahead:&mut TerminalToken<'t,AT>, tokenizer:&mut dyn Tokenizer<'t,AT>) -> Option<Stateaction>
835  {
836    let mut erraction = None;
837    ///// prefer to ue Errsym method
838    if self.Errsym.len()>0 {
839      let errsym = self.Errsym;
840      // lookdown stack for state with transition on Errsym
841      // but that could be current state too (start at top)
842      let mut k = self.stack.len(); // offset by 1 because of usize
843      let mut spos = k+1;
844      while k>0 && spos>k
845      {
846        let ksi = self.stack[k-1].si;
847        erraction = self.RSM[ksi].get(errsym);
848        if let None = erraction {k-=1;} else {spos=k;}
849      }//while k>0
850      if spos==k { self.stack.truncate(k); } // new current state revealed
851      // run all reduce actions that are valid before the Errsym:
852      while let Some(Reduce(ri)) = erraction // keep reducing
853      {
854       //self.reduce(ri); // borrow error- only need mut self.stack
855              self.popped.clear();
856              let rulei = &self.Rules[*ri];
857              let ruleilhs = rulei.lhs; // &'static : Copy
858              //let mut dummy = RuntimeParser::new(1,1);
859              let val = (rulei.Ruleaction)(self); 
860              let newtop = self.stack[self.stack.len()-1].si; 
861              let gotonopt = self.RSM[newtop].get(ruleilhs);
862              match gotonopt {
863                Some(Gotonext(nsi)) => { 
864                  //self.stack.push(Stackelement{si:*nsi,value:val});
865                  self.stack.push(StackedItem::new(*nsi,val,self.linenum,self.column)); 
866                },// goto next state after reduce
867                _ => {self.abort("recovery failed"); },
868              }//match
869              // end reduce
870       
871              let tos=self.stack[self.stack.len()-1].si;
872              erraction = self.RSM[tos].get(self.Errsym).clone();
873      } // while let erraction is reduce
874      // remaining defined action on Errsym must be shift
875      if let Some(Shift(i)) = erraction { // simulate shift errsym 
876          self.stack.push(StackedItem::new(*i,AT::default(),lookahead.line,lookahead.column));
877          // keep lookahead until action is found that transitions from
878          // current state (i). but skipping ahead without reducing
879          // the error production is not a good idea.  This implementation
880	  // does NOT assume that everything following the ERROR symbol is
881	  // terminal.
882          while let None = self.RSM[*i].get(lookahead.sym) {
883            if lookahead.sym=="EOF" {break;}
884            *lookahead = tokenizer.next_tt();
885          }//while let
886          // either at end of input or found action on next symbol
887          erraction = self.RSM[*i].get(lookahead.sym);
888      } // if shift action found down under stack
889    }//errsym exists
890
891    // at this point, if erraction is None, then Errsym failed to recover,
892    // try the resynch symbol method next ...
893    if iserror(&erraction) && self.resynch.len()>0 {
894      while lookahead.sym!="EOF" &&
895        !self.resynch.contains(lookahead.sym) {
896        self.linenum = lookahead.line; self.column = lookahead.column; self.prev_position=self.position; self.position = tokenizer.position();
897        *lookahead = tokenizer.next_tt();
898      }//while
899      if lookahead.sym!="EOF" {
900        // look for state on stack that has action defined on next symbol
901        self.linenum = lookahead.line; self.column = lookahead.column; self.prev_position=self.position; self.position=tokenizer.position();
902        *lookahead = tokenizer.next_tt();
903      }
904      let mut k = self.stack.len()-1; // offset by 1 because of usize
905      let mut position = 0;
906      while k>0 && erraction==None
907      {
908         let ksi = self.stack[k-1].si;
909         erraction = self.RSM[ksi].get(lookahead.sym);
910         if let None=erraction {k-=1;}
911      }//while k>0 && erraction==None
912      match erraction {
913        None => {}, // do nothing, whill shift next symbol
914        _ => { self.stack.truncate(k);},//pop stack
915      }//match
916   }// there are resync symbols
917
918   // at this point, if erraction is None, then resynch recovery failed too.
919   // only action left is to skip ahead...
920   let mut eofcx = 0;
921   while iserror(&erraction) && eofcx<1 { //skip input
922      self.linenum = lookahead.line; self.column = lookahead.column; self.prev_position=self.position; self.position=tokenizer.position();
923      *lookahead = tokenizer.next_tt();
924      //*lookahead = self.nexttoken();
925      if lookahead.sym=="EOF" {eofcx+=1;}
926      let csi =self.stack[self.stack.len()-1].si;
927      erraction = self.RSM[csi].get(lookahead.sym);
928   }// skip ahead
929   match erraction {
930     Some(act) if eofcx<1 => Some(*act),
931     _ => None,
932   }//return match
933  }//error_recover function
934
935  /// resets parser, including external state
936  pub fn reset(&mut self) {
937    self.stack.clear();
938    self.err_occurred = false;
939    let mut result = AT::default();
940    self.exstate = ET::default();
941  }//reset
942
943  /// Retrieves recorded error report.  This function will return an empty string
944  /// if [ZCParser::set_err_report] is not called.  It will also return an
945  /// empty string if there was no error
946  pub fn get_err_report(&self) -> &str {
947    self.err_report.as_deref().unwrap_or("")
948  }
949
950  /// When given true as argument, this option will disable the output of
951  /// parser errors to stderr, and instead log them internally until retrieved
952  /// with [ZCParser::get_err_report].  Each call to this function will
953  /// clear the previous report and begin a new one.
954  /// If the bool argument is false, it will erase and turn off error logging
955  /// and print all parser errors to stderr.  This function does not affect
956  /// interactive training, which uses stdio.
957  pub fn set_err_report(&mut self, onof:bool) {
958    if onof {self.err_report = Some(String::new());}
959    else {self.err_report = None;}
960  }
961
962
963}//impl ZCParser 2
964
965
966
967/////////////////////////////////////////////////////////////////////////
968/////////////// new approach using more flexible trait object
969
970/// A trait object that implements ErrReporter is expected by the [ZCParser::parse_core]
971/// function, which implements the basic LR parsing algorithm using the
972/// generated state machine.  The struct [StandardReporter] is provided as
973/// the default ErrReporter that uses standard I/O as interface and has the
974/// ability to train the parser.  But other implementations of the trait
975/// can be created that use different interfaces, such as a graphical IDE.
976///
977/// This trait replaces [crate::ErrHandler] in the [crate::runtime_parser] module.
978pub trait ErrReporter<AT:Default,ET:Default> // not same as error recovery
979{
980  fn err_reporter(&mut self, parser:&mut ZCParser<AT,ET>, lookahead:&TerminalToken<AT>, erropt:&Option<Stateaction>, tokenizer:& dyn Tokenizer<'_,AT>);
981  fn report_err(&self, parser:&mut ZCParser<AT,ET>, msg:&str) { parser.report(msg) }
982//  fn training_mode(&self, parser:&ZCParser<AT,ET>) -> bool {false}
983//  fn interactive_mode(&self, parser:&ZCParser<AT,ET>) -> bool {false}
984}// ErrReporter trait  // not same as RuntimeParser::ErrHandler
985
986/*
987The structure here is a bit strange.  The script file is written to in
988interactive training mode and read from in script-training mode.  However,
989the actual modification of the parser file is done after the training, by
990the augmenter module.  Thus there's another wrapper function that's needed
991besides the creation of the right kind of StandardReporter.
992*/
993
994impl<AT:Default,ET:Default> ErrReporter<AT,ET> for StandardReporter
995{
996  // this function will be able to write training script to file
997  fn err_reporter(&mut self, parser:&mut ZCParser<AT,ET>, lookahead:&TerminalToken<AT>, erropt:&Option<Stateaction>, tokenizer:& dyn Tokenizer<'_,AT>)
998 { 
999  let mut wresult:std::io::Result<()> = Err(std::io::Error::new(std::io::ErrorKind::Other,"")); // dummy
1000  // known that actionop is None or Some(Error(_))
1001  let cstate = parser.stack[parser.stack.len()-1].si; // current state
1002  let mut actionopt = if let Some(act)=erropt {Some(act)} else {None};
1003  let lksym = &lookahead.sym[..];
1004  // is lookahead recognized as a grammar symbol?
1005  // if actionopt is NONE, check entry for ANY_ERROR            
1006  if parser.Symset.contains(lksym) {
1007     if let None=actionopt {
1008        actionopt = parser.RSM[cstate].get("ANY_ERROR");
1009     }
1010  }// lookahead is recognized grammar sym
1011  else {
1012     actionopt = parser.RSM[cstate].get("ANY_ERROR");
1013  }// lookahead is not a grammar sym
1014  let mut errmsg = if let Some(Error(em)) = &actionopt {
1015    format!("unexpected symbol '{}' on line {}, column {}: ** {} ** ..",lksym,lookahead.line,lookahead.column,em.trim())
1016  } else {format!("unexpected symbol '{}' on line {}, column {} .. ",lksym,lookahead.line,lookahead.column)};
1017
1018  ////// augment errmsg with current line (version 0.2.6)
1019  let srcline = tokenizer.current_line();
1020  if (srcline.len()>0) {
1021    errmsg.push_str("\n >>");
1022    errmsg.push_str(srcline);
1023    errmsg.push_str("\n");
1024    let mut cln = lookahead.column+2;
1025    while cln>0 { errmsg.push(' '); cln-=1; }
1026    //let mut tokenlen = srcline[cln-2..].find(char::is_whitespace).unwrap_or(1);
1027    let mut tokenlen = lookahead.sym.len();
1028    if is_alphanum(&lookahead.sym) {tokenlen = 3;}
1029    while tokenlen>0 { errmsg.push('^'); tokenlen-=1; }
1030    errmsg.push('\n');
1031  }// augment errmsg with current line
1032  
1033  parser.report(&errmsg);
1034
1035  if self.training {          ////// Training mode
1036    let csym = lookahead.sym.to_owned();
1037    let mut inp = String::from("");    
1038   if let None=self.scriptinopt {  // interactive mode
1039   if let Some(outfd1) = &self.scriptoutopt {
1040    let mut outfd = outfd1;
1041    print!("\n>>>TRAINER: if this message is not adequate (for state {}), enter a replacement (default no change): ",cstate);
1042    let rrrflush = io::stdout().flush();
1043    if let Ok(n) = io::stdin().read_line(&mut inp) {
1044       if inp.len()>5 && parser.Symset.contains(lksym) {
1045         print!(">>>TRAINER: should this message be given for all unexpected symbols in the current state? (default yes) ");
1046        let rrrflush2 = io::stdout().flush();
1047        let mut inp2 = String::new();
1048        if let Ok(n) = io::stdin().read_line(&mut inp2) {
1049            if inp2.trim()=="no" || inp2.trim()=="No" {
1050               wresult = write!(outfd,"{}\t{}\t{} ::: {}\n",lookahead.line,lookahead.column,&csym,inp.trim());
1051               self.trained.insert((cstate,csym),inp);
1052            }
1053            else  {// insert for any error
1054               wresult = write!(outfd,"{}\t{}\t{} ::: {}\n",lookahead.line,lookahead.column,"ANY_ERROR",inp.trim());
1055               self.trained.insert((cstate,String::from("ANY_ERROR")),inp);
1056            }
1057        }// read ok
1058       }// unexpected symbol is grammar sym
1059       else if inp.len()>5 && !parser.Symset.contains(lksym) {
1060         wresult = write!(outfd,"{}\t{}\t{} ::: {}\n",lookahead.line,lookahead.column,"ANY_ERROR",inp.trim());
1061         self.trained.insert((cstate,String::from("ANY_ERROR")),inp);
1062       }
1063    }// process user response
1064   }}// interactive mode
1065   else { // training from script mode (non-interactive)
1066    if let Some(brfd) = &mut self.scriptinopt {
1067     let mut scin = brfd;
1068     let mut readn = 0;
1069     while readn < 1
1070     {
1071       inp = String::new();
1072       match scin.read_line(&mut inp) {
1073         Ok(n) if n>1 && &inp[0..1]!="#" && inp.trim().len()>0 => {readn=n;},
1074         Ok(n) if n>0 => { readn=0; }, // keep reading
1075         _ => {readn = 1; } // stop - this means End of Stream
1076       }//match
1077       if readn>1 { // read something
1078         let inpsplit:Vec<&str> = inp.split_whitespace().collect();
1079         if inpsplit.len()>4 && inpsplit[3].trim()==":::" {
1080           let inline = inpsplit[0].trim().parse::<usize>().unwrap();
1081           let incolumn = inpsplit[1].trim().parse::<usize>().unwrap();
1082           let insym = inpsplit[2].trim();
1083           if parser.linenum==inline && parser.column==incolumn {
1084             if &csym==insym || insym=="ANY_ERROR" {
1085               let posc = inp.find(":::").unwrap()+4;
1086               println!("\n>>>Found matching entry from training script for {}, error message: {}",insym,&inp[posc..]);
1087               self.trained.insert((cstate,String::from(insym)),String::from(&inp[posc..]));
1088             } // unexpected symbol match
1089           }// line/column match
1090         }//inpsplit check
1091       }// valid training line read
1092     }//while readn<2
1093   }}//training from script mode
1094  }//if training   //// END TRAINING MODE
1095  
1096 }// standardreporter function
1097}// impl ErrReporter for StandardReporter
1098
1099
1100/////////////////////////////////////////////////////////////
1101//////////////// parse_core replaced: now uses zc tokenizer
1102impl<AT:Default,ET:Default> ZCParser<AT,ET>
1103{
1104  /// This function provides a core parser that uses the LR state machine
1105  /// generated by rustlr.  It takes as trait objects a tokenizer and an
1106  /// [ErrReporter] object that handles the display of error messages.
1107  /// This function will reset the parse stack but it will not reset the
1108  /// Tokenizer or the *external state* of the parser.
1109  pub fn parse_core<'u,'t:'u>(&mut self, tokenizer:&'u mut dyn Tokenizer<'t,AT>, err_handler:&mut dyn ErrReporter<AT,ET>) -> AT
1110  {
1111    self.stack.clear();
1112    self.err_occurred = false;
1113    let mut result = AT::default();
1114    //self.exstate = ET::default();
1115    self.stack.push(StackedItem::new(0,AT::default(),0,0));
1116    self.stopparsing = false;
1117    let mut action = Stateaction::Error("");
1118    let mut lookahead = TerminalToken::new("EOF",AT::default(),0,0); //just init
1119    // nextsym() should only be called here
1120    if let Some(tok) = tokenizer.nextsym() {lookahead=tok;}
1121    //else {self.stopparsing=true;}
1122
1123    while !self.stopparsing
1124    {
1125      let tos = self.stack.len()-1;
1126      self.linenum = self.stack[tos].line;
1127      self.column=self.stack[tos].column;
1128      //self.prev_position = tokenizer.previous_position();
1129      //self.position = tokenizer.position();
1130      let currentstate = self.stack[tos].si;
1131      let mut actionopt = self.RSM[currentstate].get(lookahead.sym);
1132
1133      if actionopt.is_none() && lookahead.sym!="EOF" { // added in version 0.2.9
1134        actionopt = self.RSM[currentstate].get("_WILDCARD_TOKEN_");
1135        // added for 0.2.94:
1136        lookahead = tokenizer.transform_wildcard(lookahead);
1137      }
1138
1139      let actclone:Option<Stateaction> = match actionopt {
1140        Some(a) => Some(*a),
1141        None => None,
1142      };
1143      if iserror(&actionopt) {  // either None or Error
1144        if !self.err_occurred {self.err_occurred = true;}
1145        
1146        err_handler.err_reporter(self,&lookahead,&actclone, tokenizer);
1147        
1148        match self.error_recover(&mut lookahead,tokenizer) {
1149          None => { self.stopparsing=true; break; }
1150          Some(act) => {action = act;}, // lookahead=la;},
1151        }//match
1152      }// iserror
1153      else { action = actclone.unwrap(); }
1154      match &action {
1155        Shift(nextstate) => {
1156           lookahead = self.shift(*nextstate,lookahead,tokenizer);
1157        },
1158        Reduce(rulei) => { self.reduce(rulei); },
1159        Accept => {
1160          self.stopparsing=true;
1161          if self.stack.len()>0 {result = self.stack.pop().unwrap().value;}
1162          else {self.err_occurred=true;}
1163        },
1164        _ => {}, // continue
1165      }//match action
1166    }// main parse loop
1167    return result;
1168  }//parse_core
1169
1170  ///provided generic parsing function that reports errors on std::io. 
1171  pub fn parse<'t>(&mut self, tokenizer:&mut dyn Tokenizer<'t,AT>) -> AT
1172  {
1173    let mut stdeh = StandardReporter::new();
1174    self.parse_core(tokenizer,&mut stdeh) 
1175  }//parse_stdio
1176
1177  ///Parses in interactive training mode with provided path to parserfile.
1178  ///The parser file will be modified and a training script file will be
1179  ///created for future retraining after grammar is modified. 
1180  ///
1181  /// When an error occurs, the parser will
1182    /// ask the human trainer for an appropriate error message: it will
1183    /// then insert an entry into its state transition table to
1184    /// give the same error message on future errors of the same type.
1185    /// If the error is caused by an unexpected token that is recognized
1186    /// as a terminal symbol of the grammar, the trainer can select to
1187    /// enter the entry 
1188    /// under the reserved ANY_ERROR symbol. If the unexpected token is
1189    /// not recognized as a grammar symbol, then the entry will always
1190    /// be entered under ANY_ERROR.  ANY_ERROR entries for a state will match
1191    /// all future unexpected symbols for that state: however, entries for
1192    /// valid grammar symbols will still override the generic entry.
1193    ///
1194    /// Example: with the parser for this [toy grammar](https://cs.hofstra.edu/~cscccl/rustlr_project/cpm.grammar), parse_train can run as follows:
1195    ///```ignore
1196    ///  Write something in C+- : cout << x y ;   
1197    ///  ERROR on line 1, column 0: unexpected symbol y ..
1198    ///  >>>TRAINER: is this error message adequate? If not, enter a better one: need another <<                   
1199    ///  >>>TRAINER: should this message be given for all unexpected symbols in the current state? (default yes) yes
1200    ///```
1201    /// (ignore the column number as the lexer for this toy language does not implement it)
1202    ///
1203    /// parse_train will then produced a [modified parser](https://cs.hofstra.edu/~cscccl/rustlr_project/cpmparser.rs) as specified
1204    /// by the filename (path) argument.  When the augmented parser is used, it will
1205    /// give a more helpful error message:
1206    ///```
1207    /// Write something in C+- : cout << x endl
1208    /// ERROR on line 1, column 0: unexpected symbol endl, ** need another << ** ..
1209    ///```
1210    ///
1211    /// parse_stdio_train calls parse_stdio, which uses stdin/stdout for user interface.
1212    /// Parsing in interactive training mode also produces a [training script file](http://cs.hofstra.edu/~cscccl/rustlr_project/cpmparser.rs_script.txt) which can
1213    /// be used to re-train a parser using [ZCParser::train_from_script]. 
1214    /// This is useful after a grammar is modified with extensions to a language.
1215  pub fn parse_train<'t>(&mut self, tokenizer:&mut dyn Tokenizer<'t,AT>, parserfile:&str) -> AT
1216    {
1217      let mut stdtrainer = StandardReporter::new_interactive_training(parserfile);
1218      let result = self.parse_core(tokenizer,&mut stdtrainer);
1219      if let Err(m) = stdtrainer.augment_training(parserfile) {
1220        eprintln!("Error in augmenting parser: {:?}",m)
1221      }
1222
1223      return result;
1224    }//parse_stdio_train
1225
1226  /// trains parser from a [training script](https://cs.hofstra.edu/~cscccl/rustlr_project/cpmparser.rs_script.txt)
1227  /// created by interactive training.  This
1228  /// is intended to be used after a grammar has been modified and the parser
1229  /// is regenerated with different state numbers.  It is the user's
1230  /// responsibility to keep consistent the parser file, script file, and sample
1231  /// input that was used when the script was created.  The script contains
1232  /// the line and column numbers of each error encountered, along with either
1233  /// the unexpected symbol that caused the error, or the reserved ANY_ERROR
1234  /// symbol if the error message is to be applied to all unexpected symbols.
1235  /// These entries must match, in sequence, the errors encountered during
1236  /// retraining - it is therefore recommended that the same tokenizer be used
1237  /// during retraining so that the same line/column information are given.
1238  /// The trainer will augment the parser (parserfile) with new Error
1239  /// entries, overriding any previous ones.  It is also recommended that the
1240  /// user examines the "load_extras" function that appears at the end of
1241  /// the [augmented parser](https://cs.hofstra.edu/~cscccl/rustlr_project/cpmparser.rs).
1242  /// The train_from_script function does not return
1243  /// a value, unlike [ZCParser::parse] and [ZCParser::parse_train].
1244  pub fn train_from_script<'t>(&mut self, tokenizer:&mut dyn Tokenizer<'t,AT>,parserfile:&str, scriptfile:&str)
1245  {
1246      let mut stdtrainer = StandardReporter::new_script_training(parserfile,scriptfile);
1247      let result = self.parse_core(tokenizer,&mut stdtrainer);
1248      if let Err(m) = stdtrainer.augment_training(parserfile) {
1249        eprintln!("Error in augmenting parser: {:?}",m)
1250      }
1251      if !self.err_occurred {println!("no errors encountered during parsing");}
1252  }//train_from_script
1253
1254}// 3rd impl ZCParser
1255#[cfg(feature = "generator")]
1256fn checkboxlabel(s:&str) -> &str
1257{
1258    if s.starts_with('[') && s.ends_with(']') {s[1..s.len()-1].trim()} else {s}
1259}// check if label is of form [x], returns x, or s if not of this form.
1260
1261// used by genlex routines
1262fn is_alphanum(x:&str) -> bool
1263{
1264
1265//  let alphan = Regex::new(r"^[_a-zA-Z][_\da-zA-Z]*$").unwrap();
1266//  alphan.is_match(x)
1267
1268  if x.len()<1 {return false};
1269  let mut chars = x.chars();
1270  let first = chars.next().unwrap();
1271  if !(first=='_' || first.is_alphabetic()) {return false;}
1272  for c in chars
1273  {
1274    if !(c=='_' || c.is_alphanumeric()) {return false;}
1275  }
1276  true
1277}//is_alphanum