rustlr/
base_parser.rs

1//! This module implements a new version of the runtime parser that
2//! uses the LR statemachine generated by rustlr.  It will (for now), live
3//! side along with the original parser implemented as [crate::RuntimeParser].
4//! Since Version 0.2.3, this module can now generate a basic lexical 
5//! scanner based on [crate::RawToken] and [crate::StrTokenizer].
6//!
7//! This module implements the parsing routines that uses the state machine
8//! generated by rustlr.  **The main structure here is [BaseParser]**.
9//! All parsing functions are organized around the [BaseParser::parse_core]
10//! function, which implements the basic LR parsing algorithm.  This function
11//! expects dynamic [Tokenizer] and [ErrReportMaker] trait-objects. 
12//! This module provides generic
13//! parsing and parser-training routines that use stdio for interface, but
14//! the [ErrReportMaker] trait allows custom user interfaces to be build separately.
15
16#![allow(dead_code)]
17#![allow(unused_variables)]
18#![allow(non_snake_case)]
19#![allow(non_camel_case_types)]
20#![allow(unused_parens)]
21#![allow(unused_mut)]
22#![allow(unused_assignments)]
23#![allow(unused_doc_comments)]
24#![allow(unused_imports)]
25use std::fmt::Display;
26use std::default::Default;
27use std::collections::{HashMap,HashSet,BTreeSet};
28use std::io::{self,Read,Write,BufReader,BufRead};
29use std::rc::Rc;
30use std::cell::{RefCell,Ref,RefMut};
31use std::hash::{Hash,Hasher};
32use std::any::Any;
33use std::fs::File;
34use std::io::prelude::*;
35use std::path::Path;
36use std::mem;
37//use crate::{Stateaction,Statemachine,TerminalToken,Tokenizer};
38use crate::{Stateaction,iserror,TerminalToken,Tokenizer};
39use crate::{LBox,LRc,LC};
40use crate::Stateaction::*;
41use crate::{lbup,lbdown,lbget};
42use crate::{StandardReporter,StackedItem};
43#[cfg(feature = "generator")]
44use crate::{Statemachine};
45
46//extern crate termion;
47//use termion::{color,style};
48
49
50/// this structure is only exported because it is required by the generated parsers.
51/// There is no reason to use it in other programs.  Replaces [crate::RProduction] for new parsers since version 0.2.0
52#[derive(Clone)]
53pub struct BaseProduction<'t,AT:Default,ET:Default,TT:Tokenizer<'t,AT>>  
54{
55  pub lhs: &'static str, // left-hand side nonterminal of rule
56  pub Ruleaction : fn(&mut BaseParser<'t,AT,ET,TT>) -> AT, //parser as arg
57}
58impl<'t,AT:Default,ET:Default,TT:Tokenizer<'t,AT>> BaseProduction<'t,AT,ET,TT>
59{
60  pub fn new_skeleton(lh:&'static str) -> BaseProduction<'t,AT,ET,TT>
61  {
62     BaseProduction {
63       lhs : lh,
64       Ruleaction : |p|{ <AT>::default() },
65     }
66  }
67}//impl BaseProduction
68
69/* imported from runtime_parser module
70/// These structures are what's on the parse stack.
71pub struct StackedItem<AT:Default>   // replaces Stackelement
72{
73   si : usize, // state index
74   pub value : AT, // semantic value (don't clone grammar symbols)
75   pub line: usize,  // line and column
76   pub column: usize, 
77}
78impl<AT:Default> StackedItem<AT>
79{
80  pub fn new(si:usize,value:AT,line:usize,column:usize) -> StackedItem<AT>
81  { StackedItem{si,value,line,column} }
82  /// converts the information in a stacked item to an [LBox] enclosing
83  /// the abstract syntax value along with starting line and column numbers
84  pub fn lbox(self) -> LBox<AT>  // no longer used
85  {  LBox::new(self.value,self.line,self.column) }
86}
87*/
88
89/// This is the structure created by the generated parser.  The generated parser
90/// program will contain a make_parser function that returns this structure.
91/// Most of the pub items are, however, only exported to support the operation
92/// of the parser, and should not be accessed directly.  Only the functions
93/// [BaseParser::parse], [BaseParser::report], [BaseParser::abort],
94/// [BaseParser::error_occurred], [BaseParser::get_tokenizer] and
95/// [BaseParser::swap_tokenizer] should be called directly 
96/// from user programs.  Only the field [BaseParser::exstate] should be accessed
97/// by user programs.
98pub struct BaseParser<'ilt,AT:Default,ET:Default,TT:Tokenizer<'ilt,AT>>  
99{
100  /// this is the "external state" structure, with type ET defined by the grammar.
101  /// The semantic actions associated with each grammar rule, which are written
102  /// in the grammar, have ref mut access to the BaseParser structure, which
103  /// allows them to read and change the external state object.  This gives
104  /// the parsers greater flexibility and capability, including the ability to
105  /// parse some non-context free languages.  See 
106  /// [this sample grammar](<https://cs.hofstra.edu/~cscccl/rustlr_project/ncf.grammar>).
107  /// The exstate is initialized to ET::default().
108  pub exstate : ET,  // external state structure, usage optional
109  /// External state that can be shared
110  pub shared_state : Rc<RefCell<ET>>,
111  /// used only by generated parser: do not reference
112  pub RSM : Vec<HashMap<&'static str,Stateaction>>,  // runtime state machine
113  // do not reference
114  //pub Expected : Vec<Vec<&'static str>>,
115  /// do not reference
116  pub Rules : Vec<BaseProduction<'ilt,AT,ET,TT>>, //rules with just lhs and delegate function
117  ////// this value should be set through abort or report
118  stopparsing : bool,
119  /// do not reference  
120  pub stack :  Vec<StackedItem<AT>>, // parse stack
121//  pub recover : HashSet<&'static str>, // for error recovery
122  pub resynch : HashSet<&'static str>,
123  pub Errsym : &'static str,
124  err_occurred : bool,
125  /// axiom: linenum and column represents the starting position of the
126  /// topmost StackedItem.
127  pub linenum : usize,
128  pub column : usize,
129  pub position : usize, // absolute byte position of input
130  pub prev_position : usize,
131  pub src_id : usize,
132  report_line : usize,
133  /// Hashset containing all grammar symbols (terminal and non-terminal). This is used for error reporting and training.
134  pub Symset : HashSet<&'static str>,
135  pub tokenizer: TT,
136  popped : Vec<(usize,usize)>,
137  gindex : RefCell<u32>,  // global index for uid
138  err_report : Option<String>, // optional err report with logging reporter
139}//struct BaseParser
140
141// 't is input lifetime
142impl<'t,AT:Default,ET:Default,TT:Tokenizer<'t,AT>> BaseParser<'t,AT,ET,TT>
143{
144    /// this is only called by the make_parser function in the machine-generated
145    /// parser program.  *Do not call this function in other places* as it
146    /// only generates a skeleton.
147    pub fn new(rlen:usize, slen:usize, tk: TT) -> Self
148    {  // given number of rules and number states
149       let mut p = BaseParser {
150         RSM : Vec::with_capacity(slen),
151         //Expected : Vec::with_capacity(slen),
152         Rules : Vec::with_capacity(rlen),
153         stopparsing : false,
154         exstate : ET::default(),
155         shared_state: Rc::new(RefCell::new(ET::default())),
156         stack : Vec::with_capacity(1024),
157         Errsym : "",
158         err_occurred : false,
159         linenum : 0,
160         column : 0,
161         position : 0,
162         prev_position: 0,
163         src_id : 0,
164         report_line : 0,
165         resynch : HashSet::new(),
166         Symset : HashSet::with_capacity(64),
167         tokenizer:tk,
168         popped: Vec::with_capacity(8),
169         gindex: RefCell::new(0),
170         err_report : None,
171       };
172       for _ in 0..slen {
173         p.RSM.push(HashMap::with_capacity(16));
174         //p.Expected.push(Vec::new());
175       }
176       return p;
177    }//new
178
179    /// returns a mutatble borrow of the parser's tokenizer
180    pub fn get_tokenizer(&mut self) -> &mut TT {
181      &mut self.tokenizer
182    }
183
184    /// replaces the parser's tokenizer with a new tokenizer, and
185    /// returns the previous tokenizer
186    pub fn swap_tokenizer(&mut self, mut newtk:TT) -> TT {
187      std::mem::swap(&mut self.tokenizer, &mut newtk);
188      newtk
189    }
190
191    /// returns the current line number
192    pub fn current_line(&self)->usize {self.linenum}
193    /// returns the current column number
194    pub fn current_column(&self)->usize {self.column}
195    /// returns the current absolute byte position according to tokenizer
196    pub fn current_position(&self)->usize {self.position}
197    /// returns the previous position (before shift) according to tokenizer
198    pub fn previous_position(&self)->usize {self.prev_position}
199
200    /// this function can be called from within the semantic actions attached
201    /// to grammar production rules that are executed for each
202    /// "reduce" action of the parser.
203    pub fn abort(&mut self, msg:&str)
204    {
205       self.err_report.as_mut().map_or_else(
206         ||eprintln!("\n!!!Parsing Aborted: {}",msg),
207         |x|x.push_str(&format!("\n!!!Parsing Aborted: {}\n",msg)));
208
209       self.err_occurred = true;
210       self.stopparsing=true;
211    }
212
213
214    /// this function can be called from within the "semantic" actions attached
215    /// to production rules to terminate parsing.
216    pub fn stop(&mut self) {
217      self.stopparsing = true;
218    }
219
220    /// may be called from grammar semantic actions to report error.
221    /// this report function will print to stdout.
222    pub fn report(&mut self, errmsg:&str)  {self.report_error(errmsg,false)}
223    /// same as [BaseParser::report] but with option to display line/column
224    pub fn report_error(&mut self, errmsg:&str, showlc: bool)  
225    {  
226       //eprint!("{}",color::Fg(color::Yellow));
227       if (self.report_line != self.linenum || self.linenum==0)  {
228         if showlc {
229           self.err_report.as_mut().map_or_else(
230             ||eprintln!("ERROR on line {}, column {}: {}",self.linenum,self.column,errmsg),
231             |x|x.push_str(&format!("ERROR on line {}, column {}: {}\n",self.linenum,self.column,errmsg)));
232         }
233         else {
234           self.err_report.as_mut().map_or_else(
235             ||eprintln!("PARSER ERROR: {}",errmsg),
236             |x|x.push_str(&format!("PARSER ERROR: {}\n",errmsg)));
237         }
238         self.report_line = self.linenum;
239       }
240       else {
241         if showlc {
242           self.err_report.as_mut().map_or_else(
243             ||eprint!(" ({},{}): {}",self.linenum,self.column,errmsg),
244             |x|x.push_str(&format!(" ({},{}): {}",self.linenum,self.column,errmsg)));
245         }
246         else {
247           self.err_report.as_mut().map_or_else(
248             ||eprint!(" {}",errmsg),
249             |x|{x.push(' '); x.push_str(errmsg)});
250         }
251       }
252       //eprint!("{}",color::Fg(color::Reset));       
253       self.err_occurred = true;
254    }// report
255
256    /// this function is only exported to support the generated code
257    pub fn bad_pattern(&mut self,pattern:&str) -> AT
258    {
259       let msg = format!("pattern {} failed to bind to stacked values\n",pattern);
260       self.report(&msg);
261       //println!("FROM BAD PATTERN:");
262       AT::default()
263    }
264
265    //called to simulate a shift
266    fn errshift(&mut self, sym:&str) -> bool
267    {
268       let csi = self.stack[self.stack.len()-1].si; // current state
269       let actionopt = self.RSM[csi].get(sym);
270       if let Some(Shift(ni)) = actionopt {
271         self.stack.push(StackedItem::new(*ni,AT::default(),self.linenum,self.column)); true
272       }
273       else {false}
274    }
275
276  // this is the LR parser shift action: push the next state, along with the
277  // value of the current lookahead token onto the parse stack, returns the
278  // next token
279  fn shift(&mut self, nextstate:usize, lookahead:TerminalToken<'t,AT>) -> TerminalToken<'t, AT>
280  {
281     self.linenum = lookahead.line;  self.column=lookahead.column;
282     self.prev_position = self.position;
283     self.position = self.tokenizer.position();
284     self.stack.push(StackedItem::new(nextstate,lookahead.value,lookahead.line,lookahead.column));
285     self.tokenizer.next_tt()
286  }
287
288    /// this function is called from the generated semantic actions and should
289    /// most definitely not be called from elsewhere as it would corrupt
290    /// the base parser.
291    pub fn popstack(&mut self) -> StackedItem<AT>
292    {
293       let item = self.stack.pop().expect("PARSER STATE MACHINE/STACK CORRUPTED");
294       self.linenum = item.line; self.column=item.column;
295       self.popped.push((item.line,item.column));
296       item
297    }//popstack
298
299    pub fn popstack_as_lbox(&mut self) -> LBox<AT>
300    {
301       let item = self.stack.pop().expect("PARSER STATE MACHINE/STACK CORRUPTED");
302       self.linenum = item.line; self.column=item.column;
303       self.popped.push((item.line,item.column));
304       let newuid = *self.gindex.borrow();
305       *self.gindex.borrow_mut() += 1;           
306       LBox::make(item.value,item.line,item.column,newuid)
307    }//popstack_as_lbox
308
309    fn reduce(&mut self, ri:&usize)
310    {
311       self.popped.clear();
312       let rulei = &self.Rules[*ri];
313       let ruleilhs = rulei.lhs; // &'static : Copy
314       let val = (rulei.Ruleaction)(self); // should be self
315       let newtop = self.stack[self.stack.len()-1].si; 
316       let goton = self.RSM[newtop].get(ruleilhs).expect("PARSER STATEMACHINE CORRUPTED");
317       if let Stateaction::Gotonext(nsi) = goton {
318       self.stack.push(StackedItem::new(*nsi,val,self.linenum,self.column)); 
319                //self.stack.push(Stackelement{si:*nsi,value:val});
320       }// goto next state after reduce
321              else {
322                self.report("state transition table corrupted: no suitable action after reduce");
323                self.stopparsing=true;
324              }
325    }//reduce
326
327    /// can be called to determine if an error occurred during parsing.  The parser
328    /// will not panic.
329    pub fn error_occurred(&self) -> bool {self.err_occurred}
330
331    // there may need to be other lb functions, perhaps from terminalToken
332    // or stackedItem (at least for transfer)
333
334    /// creates a [LBox] smart pointer that includes line/column information;
335    /// should be called from the semantic actions of a grammar rule, e.g.
336    ///```ignore
337    ///   E --> E:a + E:b {PlusExpr(parser.lb(a),parser.lb(b))}
338    ///```
339    pub fn lb<T>(&self,e:T) -> LBox<T> {
340      let newuid = *self.gindex.borrow();
341      *self.gindex.borrow_mut() += 1;    
342      LBox::make(e,self.linenum,self.column,newuid)
343    }
344    /// creates a `LBox<dyn Any>`, which allows attributes of different types to
345    /// be associated with grammar symbols.  Use in conjuction with [LBox::downcast], [LBox::upcast] and the [lbdown], [lbup] macros.
346    pub fn lba<T:'static>(&self,e:T) -> LBox<dyn Any> {
347      let newuid = *self.gindex.borrow();
348      *self.gindex.borrow_mut() += 1;        
349      LBox::upcast(LBox::make(e,self.linenum,self.column,newuid))
350    }
351    /// similar to [BaseParser::lb], but creates a [LRc] instead of [LBox]
352    pub fn lrc<T>(&self,e:T) -> LRc<T> { LRc::new(e,self.linenum,self.column /*,self.src_id*/) }
353    /// similar to [BaseParser::lba] but creates a [LRc]
354    pub fn lrca<T:'static>(&'t self,e:T) -> LRc<dyn Any> { LRc::upcast(LRc::new(e,self.linenum,self.column /*,self.src_id*/)) }
355
356    /// creates LBox enclosing e using line/column information associated
357    /// with right-hand side symbols, numbered left-to-right starting at 0
358    pub fn lbx<T>(&self,i:usize,e:T) -> LBox<T>
359    {
360       let (mut ln,mut cl) = (self.linenum,self.column);
361       if i<self.popped.len() {
362         let index = self.popped.len() - 1 - i;
363         let lc = self.popped[index];
364         ln = lc.0; cl=lc.1;
365       }
366       let newuid = *self.gindex.borrow();
367       *self.gindex.borrow_mut() += 1;
368       LBox::make(e,ln,cl,newuid)
369    }//lbx
370
371    /// alias for [Self::lbx]
372    pub fn lbox<T>(&self,i:usize,e:T) -> LBox<T> { self.lbx(i,e) }
373
374    /// creates [LC] enclosing e using line/column information associated
375    /// with right-hand side symbols, numbered left-to-right starting at 0
376    pub fn lc<T>(&self,i:usize,e:T) -> LC<T>
377    {
378       let (mut ln,mut cl) = (self.linenum,self.column);
379       if i<self.popped.len() {
380         let index = self.popped.len() - 1 - i;
381         let lc = self.popped[index];
382         ln = lc.0; cl=lc.1;
383       }
384       let uid = *self.gindex.borrow();
385       *self.gindex.borrow_mut() += 1;
386       LC::make(e,ln,cl,uid)
387    }//lbx
388
389    /// Like lbx but creates an LRc
390    pub fn lrcn<T>(&self,i:usize,e:T) -> LRc<T>
391    {
392       let (mut ln,mut cl) = (self.linenum,self.column);
393       if i<self.popped.len() {
394         let index = self.popped.len() - 1 - i;
395         let lc = self.popped[index];
396         ln = lc.0; cl=lc.1;
397       }
398       LRc::new(e,ln,cl)
399    }//lbx
400}// impl BaseParser
401
402
403///////////////////////////////////////////////////////////////////////////
404impl<'t,AT:Default,ET:Default,TT:Tokenizer<'t,AT>> BaseParser<'t,AT,ET,TT>
405{
406  /// Error recovery routine of rustlr, separate from error_reporter.
407  /// This function will modify the parser and lookahead symbol and return
408  /// either the next action the parser should take (if recovery succeeded)
409  /// or None if recovery failed.
410  pub fn error_recover(&mut self, lookahead:&mut TerminalToken<'t,AT>) -> Option<Stateaction>
411  {
412    let mut erraction = None;
413    ///// prefer to ue Errsym method
414    if self.Errsym.len()>0 {
415      let errsym = self.Errsym;
416      // lookdown stack for state with transition on Errsym
417      // but that could be current state too (start at top)
418      let mut k = self.stack.len(); // offset by 1 because of usize
419      let mut spos = k+1;
420      while k>0 && spos>k
421      {
422        let ksi = self.stack[k-1].si;
423        erraction = self.RSM[ksi].get(errsym);
424        if let None = erraction {k-=1;} else {spos=k;}
425      }//while k>0
426      if spos==k { self.stack.truncate(k); } // new current state revealed
427      // run all reduce actions that are valid before the Errsym:
428      while let Some(Reduce(ri)) = erraction // keep reducing
429      {
430       //self.reduce(ri); // borrow error- only need mut self.stack
431              self.popped.clear();
432              let rulei = &self.Rules[*ri];
433              let ruleilhs = rulei.lhs; // &'static : Copy
434              //let mut dummy = RuntimeParser::new(1,1);
435              let val = (rulei.Ruleaction)(self); 
436              let newtop = self.stack[self.stack.len()-1].si; 
437              let gotonopt = self.RSM[newtop].get(ruleilhs);
438              match gotonopt {
439                Some(Gotonext(nsi)) => { 
440                  //self.stack.push(Stackelement{si:*nsi,value:val});
441                  self.stack.push(StackedItem::new(*nsi,val,self.linenum,self.column)); 
442                },// goto next state after reduce
443                _ => {self.abort("recovery failed"); },
444              }//match
445              // end reduce
446       
447              let tos=self.stack[self.stack.len()-1].si;
448              erraction = self.RSM[tos].get(self.Errsym).clone();
449      } // while let erraction is reduce
450      // remaining defined action on Errsym must be shift
451      if let Some(Shift(i)) = erraction { // simulate shift errsym 
452          self.stack.push(StackedItem::new(*i,AT::default(),lookahead.line,lookahead.column));
453          // keep lookahead until action is found that transitions from
454          // current state (i). but skipping ahead without reducing
455          // the error production is not a good idea.  This implementation
456	  // does NOT assume that everything following the ERROR symbol is
457	  // terminal.
458          while let None = self.RSM[*i].get(lookahead.sym) {
459            if lookahead.sym=="EOF" {break;}
460            *lookahead = self.tokenizer.next_tt();
461          }//while let
462          // either at end of input or found action on next symbol
463          erraction = self.RSM[*i].get(lookahead.sym);
464      } // if shift action found down under stack
465    }//errsym exists
466
467    // at this point, if erraction is None, then Errsym failed to recover,
468    // try the resynch symbol method next ...
469    if iserror(&erraction) && self.resynch.len()>0 {
470      while lookahead.sym!="EOF" &&
471        !self.resynch.contains(lookahead.sym) {
472        self.linenum = lookahead.line; self.column = lookahead.column; self.prev_position=self.position; self.position = self.tokenizer.position();
473        *lookahead = self.tokenizer.next_tt();
474      }//while
475      if lookahead.sym!="EOF" {
476        // look for state on stack that has action defined on next symbol
477        self.linenum = lookahead.line; self.column = lookahead.column; self.prev_position=self.position; self.position=self.tokenizer.position();
478        *lookahead = self.tokenizer.next_tt();
479      }
480      let mut k = self.stack.len()-1; // offset by 1 because of usize
481      let mut position = 0;
482      while k>0 && erraction==None
483      {
484         let ksi = self.stack[k-1].si;
485         erraction = self.RSM[ksi].get(lookahead.sym);
486         if let None=erraction {k-=1;}
487      }//while k>0 && erraction==None
488      match erraction {
489        None => {}, // do nothing, whill shift next symbol
490        _ => { self.stack.truncate(k);},//pop stack
491      }//match
492   }// there are resync symbols
493
494   // at this point, if erraction is None, then resynch recovery failed too.
495   // only action left is to skip ahead...
496   let mut eofcx = 0;
497   while iserror(&erraction) && eofcx<1 { //skip input
498      self.linenum = lookahead.line; self.column = lookahead.column; self.prev_position=self.position; self.position=self.tokenizer.position();
499      *lookahead = self.tokenizer.next_tt();
500      if lookahead.sym=="EOF" {eofcx+=1;}
501      let csi =self.stack[self.stack.len()-1].si;
502      erraction = self.RSM[csi].get(lookahead.sym);
503   }// skip ahead
504   match erraction {
505     Some(act) if eofcx<1 => Some(*act),
506     _ => None,
507   }//return match
508  }//error_recover function
509
510  /// resets parser, including external state
511  pub fn reset(&mut self) {
512    self.stack.clear();
513    self.err_occurred = false;
514    let mut result = AT::default();
515    self.exstate = ET::default();
516    *self.shared_state.borrow_mut() = ET::default();
517  }//reset
518
519  /// Retrieves recorded error report.  This function will return an empty string
520  /// if [BaseParser::set_err_report] is not called.  It will also return an
521  /// empty string if there was no error
522  pub fn get_err_report(&self) -> &str {
523    self.err_report.as_deref().unwrap_or("")
524  }
525
526  /// When given true as argument, this option will disable the output of
527  /// parser errors to stderr, and instead log them internally until retrieved
528  /// with [BaseParser::get_err_report].  Each call to this function will
529  /// clear the previous report and begin a new one.
530  /// If the bool argument is false, it will erase and turn off error logging
531  /// and print all parser errors to stderr.  This function does not affect
532  /// interactive training, which uses stdio.
533  pub fn set_err_report(&mut self, onof:bool) {
534    if onof {self.err_report = Some(String::new());}
535    else {self.err_report = None;}
536  }
537
538
539}//impl BaseParser 2
540
541
542
543/////////////////////////////////////////////////////////////////////////
544/////////////// new approach using more flexible trait object
545
546/// A trait object that implements ErrReportMaker is expected by the [BaseParser::parse_core]
547/// function, which implements the basic LR parsing algorithm using the
548/// generated state machine.  The struct [StandardReporter] is provided as
549/// the default ErrReportMaker that uses standard I/O as interface and has the
550/// ability to train the parser.  But other implementations of the trait
551/// can be created that use different interfaces, such as a graphical IDE.
552///
553/// This trait replaces [crate::ErrHandler] in the [crate::runtime_parser] module.
554pub trait ErrReportMaker<'t,AT:Default,ET:Default,TT:Tokenizer<'t,AT>> // not same as error recovery
555{
556  fn err_reporter(&mut self, parser:&mut BaseParser<'t,AT,ET,TT>, lookahead:&TerminalToken<AT>, erropt:&Option<Stateaction>);
557  fn report_err(&self, parser:&mut BaseParser<'t,AT,ET,TT>, msg:&str) { parser.report(msg) }
558}// ErrReportMaker trait  // not same as RuntimeParser::ErrHandler
559
560/*
561The structure here is a bit strange.  The script file is written to in
562interactive training mode and read from in script-training mode.  However,
563the actual modification of the parser file is done after the training, by
564the augmenter module.  Thus there's another wrapper function that's needed
565besides the creation of the right kind of StandardReporter.
566*/
567
568impl<'t,AT:Default,ET:Default,TT:Tokenizer<'t,AT>> ErrReportMaker<'t,AT,ET,TT> for StandardReporter
569{
570  // this function will be able to write training script to file
571  fn err_reporter(&mut self, parser:&mut BaseParser<'t,AT,ET,TT>, lookahead:&TerminalToken<AT>, erropt:&Option<Stateaction>)
572 { 
573  let mut wresult:std::io::Result<()> = Err(std::io::Error::new(std::io::ErrorKind::Other,"")); // dummy
574  // known that actionop is None or Some(Error(_))
575  let cstate = parser.stack[parser.stack.len()-1].si; // current state
576  let mut actionopt = if let Some(act)=erropt {Some(act)} else {None};
577  let lksym = &lookahead.sym[..];
578  // is lookahead recognized as a grammar symbol?
579  // if actionopt is NONE, check entry for ANY_ERROR            
580  if parser.Symset.contains(lksym) {
581     if let None=actionopt {
582        actionopt = parser.RSM[cstate].get("ANY_ERROR");
583     }
584  }// lookahead is recognized grammar sym
585  else {
586     actionopt = parser.RSM[cstate].get("ANY_ERROR");
587  }// lookahead is not a grammar sym
588  let mut errmsg = if let Some(Error(em)) = &actionopt {
589    format!("unexpected symbol '{}' on line {}, column {}: ** {} ** ..",lksym,lookahead.line,lookahead.column,em.trim())
590  } else {format!("unexpected symbol '{}' on line {}, column {} .. ",lksym,lookahead.line,lookahead.column)};
591
592  ////// augment errmsg with current line (version 0.2.6)
593  let srcline = parser.tokenizer.current_line();
594  if (srcline.len()>0) {
595    errmsg.push_str("\n >>");
596    errmsg.push_str(srcline);
597    errmsg.push_str("\n");
598    let mut cln = lookahead.column+2;
599    while cln>0 { errmsg.push(' '); cln-=1; }
600    //let mut tokenlen = srcline[cln-2..].find(char::is_whitespace).unwrap_or(1);
601    let mut tokenlen = lookahead.sym.len();
602    if is_alphanum(&lookahead.sym) {tokenlen = 3;}
603    while tokenlen>0 { errmsg.push('^'); tokenlen-=1; }
604    errmsg.push('\n');
605  }// augment errmsg with current line
606  
607  parser.report(&errmsg);
608
609  if self.training {          ////// Training mode
610    let csym = lookahead.sym.to_owned();
611    let mut inp = String::from("");    
612   if let None=self.scriptinopt {  // interactive mode
613   if let Some(outfd1) = &self.scriptoutopt {
614    let mut outfd = outfd1;
615    print!("\n>>>TRAINER: if this message is not adequate (for state {}), enter a replacement (default no change): ",cstate);
616    let rrrflush = io::stdout().flush();
617    if let Ok(n) = io::stdin().read_line(&mut inp) {
618       if inp.len()>5 && parser.Symset.contains(lksym) {
619         print!(">>>TRAINER: should this message be given for all unexpected symbols in the current state? (default yes) ");
620        let rrrflush2 = io::stdout().flush();
621        let mut inp2 = String::new();
622        if let Ok(n) = io::stdin().read_line(&mut inp2) {
623            if inp2.trim()=="no" || inp2.trim()=="No" {
624               wresult = write!(outfd,"{}\t{}\t{} ::: {}\n",lookahead.line,lookahead.column,&csym,inp.trim());
625               self.trained.insert((cstate,csym),inp);
626            }
627            else  {// insert for any error
628               wresult = write!(outfd,"{}\t{}\t{} ::: {}\n",lookahead.line,lookahead.column,"ANY_ERROR",inp.trim());
629               self.trained.insert((cstate,String::from("ANY_ERROR")),inp);
630            }
631        }// read ok
632       }// unexpected symbol is grammar sym
633       else if inp.len()>5 && !parser.Symset.contains(lksym) {
634         wresult = write!(outfd,"{}\t{}\t{} ::: {}\n",lookahead.line,lookahead.column,"ANY_ERROR",inp.trim());
635         self.trained.insert((cstate,String::from("ANY_ERROR")),inp);
636       }
637    }// process user response
638   }}// interactive mode
639   else { // training from script mode (non-interactive)
640    if let Some(brfd) = &mut self.scriptinopt {
641     let mut scin = brfd;
642     let mut readn = 0;
643     while readn < 1
644     {
645       inp = String::new();
646       match scin.read_line(&mut inp) {
647         Ok(n) if n>1 && &inp[0..1]!="#" && inp.trim().len()>0 => {readn=n;},
648         Ok(n) if n>0 => { readn=0; }, // keep reading
649         _ => {readn = 1; } // stop - this means End of Stream
650       }//match
651       if readn>1 { // read something
652         let inpsplit:Vec<&str> = inp.split_whitespace().collect();
653         if inpsplit.len()>4 && inpsplit[3].trim()==":::" {
654           let inline = inpsplit[0].trim().parse::<usize>().unwrap();
655           let incolumn = inpsplit[1].trim().parse::<usize>().unwrap();
656           let insym = inpsplit[2].trim();
657           if parser.linenum==inline && parser.column==incolumn {
658             if &csym==insym || insym=="ANY_ERROR" {
659               let posc = inp.find(":::").unwrap()+4;
660               println!("\n>>>Found matching entry from training script for {}, error message: {}",insym,&inp[posc..]);
661               self.trained.insert((cstate,String::from(insym)),String::from(&inp[posc..]));
662             } // unexpected symbol match
663           }// line/column match
664         }//inpsplit check
665       }// valid training line read
666     }//while readn<2
667   }}//training from script mode
668  }//if training   //// END TRAINING MODE
669  
670 }// standardreporter function
671}// impl ErrReportMaker for StandardReporter
672
673
674/////////////////////////////////////////////////////////////
675//////////////// parse_core replaced: now uses zc tokenizer
676impl<'t,AT:Default,ET:Default,TT:Tokenizer<'t,AT>> BaseParser<'t,AT,ET,TT>
677{
678  /// This function is exported because it's required at runtime.
679  /// Under normal circumstances, this function should ***not*** be called
680  /// directly. In the recommended auto mode, which generates the abstract
681  /// syntax and lexical tokenizer, call instead the `parse_with` function
682  /// that is specific to each grammar.
683  fn parse_core<R:ErrReportMaker<'t,AT,ET,TT>>(&mut self, err_handler:&mut R) -> AT
684  {
685    self.stack.clear();
686    self.err_occurred = false;
687    let mut result = AT::default();
688    //self.exstate = ET::default();
689    self.stack.push(StackedItem::new(0,AT::default(),0,0));
690    self.stopparsing = false;
691    let mut action = Stateaction::Error("");
692    let mut lookahead = TerminalToken::new("EOF",AT::default(),0,0); //just init
693    // nextsym() should only be called here
694    if let Some(tok) = self.tokenizer.nextsym() {lookahead=tok;}
695    //else {self.stopparsing=true;}
696
697    while !self.stopparsing
698    {
699      let tos = self.stack.len()-1;
700      self.linenum = self.stack[tos].line;
701      self.column=self.stack[tos].column;
702      let currentstate = self.stack[tos].si;
703      let mut actionopt = self.RSM[currentstate].get(lookahead.sym);
704
705      if actionopt.is_none() && lookahead.sym!="EOF" { // added in version 0.2.9
706        actionopt = self.RSM[currentstate].get("_WILDCARD_TOKEN_");
707        // added for 0.2.94:
708        lookahead = self.tokenizer.transform_wildcard(lookahead);
709      }
710
711      let actclone:Option<Stateaction> = match actionopt {
712        Some(a) => Some(*a),
713        None => None,
714      };
715      if iserror(&actionopt) {  // either None or Error
716        if !self.err_occurred {self.err_occurred = true;}
717        
718        err_handler.err_reporter(self,&lookahead,&actclone);
719        
720        match self.error_recover(&mut lookahead) {
721          None => { self.stopparsing=true; break; }
722          Some(act) => {action = act;}, // lookahead=la;},
723        }//match
724      }// iserror
725      else { action = actclone.unwrap(); }
726      match &action {
727        Shift(nextstate) => {
728           lookahead = self.shift(*nextstate,lookahead);
729        },
730        Reduce(rulei) => { self.reduce(rulei); },
731        Accept => {
732          self.stopparsing=true;
733          if self.stack.len()>0 {result = self.stack.pop().unwrap().value;}
734          else {self.err_occurred=true;}
735        },
736        _ => {}, // continue
737      }//match action
738    }// main parse loop
739    return result;
740  }//parse_core
741
742  /// This function is exported because it's required at runtime.
743  /// Under normal circumstances, this function should ***not*** be called
744  /// directly. In the recommended auto mode, which generates the abstract
745  /// syntax and lexical tokenizer, call instead the `parse_with` function
746  /// that is specific to each grammar.
747  pub fn parse(&mut self) -> AT
748  {
749    let mut stdeh = StandardReporter::new();
750    self.parse_core(&mut stdeh) 
751  }//parse_stdio
752
753  /// This function is exported because it's required at runtime.
754  /// Under normal circumstances, this function should ***not*** be called
755  /// directly. In the recommended auto mode, which generates the abstract
756  /// syntax and lexical tokenizer, call instead the `parse_train_with` function
757  /// that is specific to each grammar.
758  pub fn parse_train(&mut self, parserfile:&str) -> AT
759    {
760      let mut stdtrainer = StandardReporter::new_interactive_training(parserfile);
761      let result = self.parse_core(&mut stdtrainer);
762      if let Err(m) = stdtrainer.augment_training(parserfile) {
763        eprintln!("Error in augmenting parser: {:?}",m)
764      }
765
766      return result;
767    }//parse_stdio_train
768
769  /// trains parser from a [training script](https://cs.hofstra.edu/~cscccl/rustlr_project/cpmparser.rs_script.txt)
770  /// created by interactive training.  This
771  /// is intended to be used after a grammar has been modified and the parser
772  /// is regenerated with different state numbers.  It is the user's
773  /// responsibility to keep consistent the parser file, script file, and sample
774  /// input that was used when the script was created.  The script contains
775  /// the line and column numbers of each error encountered, along with either
776  /// the unexpected symbol that caused the error, or the reserved ANY_ERROR
777  /// symbol if the error message is to be applied to all unexpected symbols.
778  /// These entries must match, in sequence, the errors encountered during
779  /// retraining - it is therefore recommended that the same tokenizer be used
780  /// during retraining so that the same line/column information are given.
781  /// The trainer will augment the parser (parserfile) with new Error
782  /// entries, overriding any previous ones.  It is also recommended that the
783  /// user examines the "load_extras" function that appears at the end of
784  /// the [augmented parser](https://cs.hofstra.edu/~cscccl/rustlr_project/cpmparser.rs).
785  /// The train_from_script function does not return
786  /// a value, unlike [BaseParser::parse] and [BaseParser::parse_train].
787  pub fn train_from_script(&mut self, parserfile:&str, scriptfile:&str)
788  {
789      let mut stdtrainer = StandardReporter::new_script_training(parserfile,scriptfile);
790      let result = self.parse_core(&mut stdtrainer);
791      if let Err(m) = stdtrainer.augment_training(parserfile) {
792        eprintln!("Error in augmenting parser: {:?}",m)
793      }
794      if !self.err_occurred {println!("no errors encountered during parsing");}
795  }//train_from_script
796
797}// 3rd impl BaseParser
798#[cfg(feature = "generator")]
799fn checkboxlabel(s:&str) -> &str
800{
801    if s.starts_with('[') && s.ends_with(']') {s[1..s.len()-1].trim()} else {s}
802}// check if label is of form [x], returns x, or s if not of this form.
803
804// used by genlex routines
805fn is_alphanum(x:&str) -> bool
806{
807
808//  let alphan = Regex::new(r"^[_a-zA-Z][_\da-zA-Z]*$").unwrap();
809//  alphan.is_match(x)
810
811  if x.len()<1 {return false};
812  let mut chars = x.chars();
813  let first = chars.next().unwrap();
814  if !(first=='_' || first.is_alphabetic()) {return false;}
815  for c in chars
816  {
817    if !(c=='_' || c.is_alphanumeric()) {return false;}
818  }
819  true
820}//is_alphanum
rustlr/base_parser.rs

rustlr/
base_parser.rs