rustlr/base_parser.rs
1//! This module implements a new version of the runtime parser that
2//! uses the LR statemachine generated by rustlr. It will (for now), live
3//! side along with the original parser implemented as [crate::RuntimeParser].
4//! Since Version 0.2.3, this module can now generate a basic lexical
5//! scanner based on [crate::RawToken] and [crate::StrTokenizer].
6//!
7//! This module implements the parsing routines that uses the state machine
8//! generated by rustlr. **The main structure here is [BaseParser]**.
9//! All parsing functions are organized around the [BaseParser::parse_core]
10//! function, which implements the basic LR parsing algorithm. This function
11//! expects dynamic [Tokenizer] and [ErrReportMaker] trait-objects.
12//! This module provides generic
13//! parsing and parser-training routines that use stdio for interface, but
14//! the [ErrReportMaker] trait allows custom user interfaces to be build separately.
15
16#![allow(dead_code)]
17#![allow(unused_variables)]
18#![allow(non_snake_case)]
19#![allow(non_camel_case_types)]
20#![allow(unused_parens)]
21#![allow(unused_mut)]
22#![allow(unused_assignments)]
23#![allow(unused_doc_comments)]
24#![allow(unused_imports)]
25use std::fmt::Display;
26use std::default::Default;
27use std::collections::{HashMap,HashSet,BTreeSet};
28use std::io::{self,Read,Write,BufReader,BufRead};
29use std::rc::Rc;
30use std::cell::{RefCell,Ref,RefMut};
31use std::hash::{Hash,Hasher};
32use std::any::Any;
33use std::fs::File;
34use std::io::prelude::*;
35use std::path::Path;
36use std::mem;
37//use crate::{Stateaction,Statemachine,TerminalToken,Tokenizer};
38use crate::{Stateaction,iserror,TerminalToken,Tokenizer};
39use crate::{LBox,LRc,LC};
40use crate::Stateaction::*;
41use crate::{lbup,lbdown,lbget};
42use crate::{StandardReporter,StackedItem};
43#[cfg(feature = "generator")]
44use crate::{Statemachine};
45
46//extern crate termion;
47//use termion::{color,style};
48
49
50/// this structure is only exported because it is required by the generated parsers.
51/// There is no reason to use it in other programs. Replaces [crate::RProduction] for new parsers since version 0.2.0
52#[derive(Clone)]
53pub struct BaseProduction<'t,AT:Default,ET:Default,TT:Tokenizer<'t,AT>>
54{
55 pub lhs: &'static str, // left-hand side nonterminal of rule
56 pub Ruleaction : fn(&mut BaseParser<'t,AT,ET,TT>) -> AT, //parser as arg
57}
58impl<'t,AT:Default,ET:Default,TT:Tokenizer<'t,AT>> BaseProduction<'t,AT,ET,TT>
59{
60 pub fn new_skeleton(lh:&'static str) -> BaseProduction<'t,AT,ET,TT>
61 {
62 BaseProduction {
63 lhs : lh,
64 Ruleaction : |p|{ <AT>::default() },
65 }
66 }
67}//impl BaseProduction
68
69/* imported from runtime_parser module
70/// These structures are what's on the parse stack.
71pub struct StackedItem<AT:Default> // replaces Stackelement
72{
73 si : usize, // state index
74 pub value : AT, // semantic value (don't clone grammar symbols)
75 pub line: usize, // line and column
76 pub column: usize,
77}
78impl<AT:Default> StackedItem<AT>
79{
80 pub fn new(si:usize,value:AT,line:usize,column:usize) -> StackedItem<AT>
81 { StackedItem{si,value,line,column} }
82 /// converts the information in a stacked item to an [LBox] enclosing
83 /// the abstract syntax value along with starting line and column numbers
84 pub fn lbox(self) -> LBox<AT> // no longer used
85 { LBox::new(self.value,self.line,self.column) }
86}
87*/
88
89/// This is the structure created by the generated parser. The generated parser
90/// program will contain a make_parser function that returns this structure.
91/// Most of the pub items are, however, only exported to support the operation
92/// of the parser, and should not be accessed directly. Only the functions
93/// [BaseParser::parse], [BaseParser::report], [BaseParser::abort],
94/// [BaseParser::error_occurred], [BaseParser::get_tokenizer] and
95/// [BaseParser::swap_tokenizer] should be called directly
96/// from user programs. Only the field [BaseParser::exstate] should be accessed
97/// by user programs.
98pub struct BaseParser<'ilt,AT:Default,ET:Default,TT:Tokenizer<'ilt,AT>>
99{
100 /// this is the "external state" structure, with type ET defined by the grammar.
101 /// The semantic actions associated with each grammar rule, which are written
102 /// in the grammar, have ref mut access to the BaseParser structure, which
103 /// allows them to read and change the external state object. This gives
104 /// the parsers greater flexibility and capability, including the ability to
105 /// parse some non-context free languages. See
106 /// [this sample grammar](<https://cs.hofstra.edu/~cscccl/rustlr_project/ncf.grammar>).
107 /// The exstate is initialized to ET::default().
108 pub exstate : ET, // external state structure, usage optional
109 /// External state that can be shared
110 pub shared_state : Rc<RefCell<ET>>,
111 /// used only by generated parser: do not reference
112 pub RSM : Vec<HashMap<&'static str,Stateaction>>, // runtime state machine
113 // do not reference
114 //pub Expected : Vec<Vec<&'static str>>,
115 /// do not reference
116 pub Rules : Vec<BaseProduction<'ilt,AT,ET,TT>>, //rules with just lhs and delegate function
117 ////// this value should be set through abort or report
118 stopparsing : bool,
119 /// do not reference
120 pub stack : Vec<StackedItem<AT>>, // parse stack
121// pub recover : HashSet<&'static str>, // for error recovery
122 pub resynch : HashSet<&'static str>,
123 pub Errsym : &'static str,
124 err_occurred : bool,
125 /// axiom: linenum and column represents the starting position of the
126 /// topmost StackedItem.
127 pub linenum : usize,
128 pub column : usize,
129 pub position : usize, // absolute byte position of input
130 pub prev_position : usize,
131 pub src_id : usize,
132 report_line : usize,
133 /// Hashset containing all grammar symbols (terminal and non-terminal). This is used for error reporting and training.
134 pub Symset : HashSet<&'static str>,
135 pub tokenizer: TT,
136 popped : Vec<(usize,usize)>,
137 gindex : RefCell<u32>, // global index for uid
138 err_report : Option<String>, // optional err report with logging reporter
139}//struct BaseParser
140
141// 't is input lifetime
142impl<'t,AT:Default,ET:Default,TT:Tokenizer<'t,AT>> BaseParser<'t,AT,ET,TT>
143{
144 /// this is only called by the make_parser function in the machine-generated
145 /// parser program. *Do not call this function in other places* as it
146 /// only generates a skeleton.
147 pub fn new(rlen:usize, slen:usize, tk: TT) -> Self
148 { // given number of rules and number states
149 let mut p = BaseParser {
150 RSM : Vec::with_capacity(slen),
151 //Expected : Vec::with_capacity(slen),
152 Rules : Vec::with_capacity(rlen),
153 stopparsing : false,
154 exstate : ET::default(),
155 shared_state: Rc::new(RefCell::new(ET::default())),
156 stack : Vec::with_capacity(1024),
157 Errsym : "",
158 err_occurred : false,
159 linenum : 0,
160 column : 0,
161 position : 0,
162 prev_position: 0,
163 src_id : 0,
164 report_line : 0,
165 resynch : HashSet::new(),
166 Symset : HashSet::with_capacity(64),
167 tokenizer:tk,
168 popped: Vec::with_capacity(8),
169 gindex: RefCell::new(0),
170 err_report : None,
171 };
172 for _ in 0..slen {
173 p.RSM.push(HashMap::with_capacity(16));
174 //p.Expected.push(Vec::new());
175 }
176 return p;
177 }//new
178
179 /// returns a mutatble borrow of the parser's tokenizer
180 pub fn get_tokenizer(&mut self) -> &mut TT {
181 &mut self.tokenizer
182 }
183
184 /// replaces the parser's tokenizer with a new tokenizer, and
185 /// returns the previous tokenizer
186 pub fn swap_tokenizer(&mut self, mut newtk:TT) -> TT {
187 std::mem::swap(&mut self.tokenizer, &mut newtk);
188 newtk
189 }
190
191 /// returns the current line number
192 pub fn current_line(&self)->usize {self.linenum}
193 /// returns the current column number
194 pub fn current_column(&self)->usize {self.column}
195 /// returns the current absolute byte position according to tokenizer
196 pub fn current_position(&self)->usize {self.position}
197 /// returns the previous position (before shift) according to tokenizer
198 pub fn previous_position(&self)->usize {self.prev_position}
199
200 /// this function can be called from within the semantic actions attached
201 /// to grammar production rules that are executed for each
202 /// "reduce" action of the parser.
203 pub fn abort(&mut self, msg:&str)
204 {
205 self.err_report.as_mut().map_or_else(
206 ||eprintln!("\n!!!Parsing Aborted: {}",msg),
207 |x|x.push_str(&format!("\n!!!Parsing Aborted: {}\n",msg)));
208
209 self.err_occurred = true;
210 self.stopparsing=true;
211 }
212
213
214 /// this function can be called from within the "semantic" actions attached
215 /// to production rules to terminate parsing.
216 pub fn stop(&mut self) {
217 self.stopparsing = true;
218 }
219
220 /// may be called from grammar semantic actions to report error.
221 /// this report function will print to stdout.
222 pub fn report(&mut self, errmsg:&str) {self.report_error(errmsg,false)}
223 /// same as [BaseParser::report] but with option to display line/column
224 pub fn report_error(&mut self, errmsg:&str, showlc: bool)
225 {
226 //eprint!("{}",color::Fg(color::Yellow));
227 if (self.report_line != self.linenum || self.linenum==0) {
228 if showlc {
229 self.err_report.as_mut().map_or_else(
230 ||eprintln!("ERROR on line {}, column {}: {}",self.linenum,self.column,errmsg),
231 |x|x.push_str(&format!("ERROR on line {}, column {}: {}\n",self.linenum,self.column,errmsg)));
232 }
233 else {
234 self.err_report.as_mut().map_or_else(
235 ||eprintln!("PARSER ERROR: {}",errmsg),
236 |x|x.push_str(&format!("PARSER ERROR: {}\n",errmsg)));
237 }
238 self.report_line = self.linenum;
239 }
240 else {
241 if showlc {
242 self.err_report.as_mut().map_or_else(
243 ||eprint!(" ({},{}): {}",self.linenum,self.column,errmsg),
244 |x|x.push_str(&format!(" ({},{}): {}",self.linenum,self.column,errmsg)));
245 }
246 else {
247 self.err_report.as_mut().map_or_else(
248 ||eprint!(" {}",errmsg),
249 |x|{x.push(' '); x.push_str(errmsg)});
250 }
251 }
252 //eprint!("{}",color::Fg(color::Reset));
253 self.err_occurred = true;
254 }// report
255
256 /// this function is only exported to support the generated code
257 pub fn bad_pattern(&mut self,pattern:&str) -> AT
258 {
259 let msg = format!("pattern {} failed to bind to stacked values\n",pattern);
260 self.report(&msg);
261 //println!("FROM BAD PATTERN:");
262 AT::default()
263 }
264
265 //called to simulate a shift
266 fn errshift(&mut self, sym:&str) -> bool
267 {
268 let csi = self.stack[self.stack.len()-1].si; // current state
269 let actionopt = self.RSM[csi].get(sym);
270 if let Some(Shift(ni)) = actionopt {
271 self.stack.push(StackedItem::new(*ni,AT::default(),self.linenum,self.column)); true
272 }
273 else {false}
274 }
275
276 // this is the LR parser shift action: push the next state, along with the
277 // value of the current lookahead token onto the parse stack, returns the
278 // next token
279 fn shift(&mut self, nextstate:usize, lookahead:TerminalToken<'t,AT>) -> TerminalToken<'t, AT>
280 {
281 self.linenum = lookahead.line; self.column=lookahead.column;
282 self.prev_position = self.position;
283 self.position = self.tokenizer.position();
284 self.stack.push(StackedItem::new(nextstate,lookahead.value,lookahead.line,lookahead.column));
285 self.tokenizer.next_tt()
286 }
287
288 /// this function is called from the generated semantic actions and should
289 /// most definitely not be called from elsewhere as it would corrupt
290 /// the base parser.
291 pub fn popstack(&mut self) -> StackedItem<AT>
292 {
293 let item = self.stack.pop().expect("PARSER STATE MACHINE/STACK CORRUPTED");
294 self.linenum = item.line; self.column=item.column;
295 self.popped.push((item.line,item.column));
296 item
297 }//popstack
298
299 pub fn popstack_as_lbox(&mut self) -> LBox<AT>
300 {
301 let item = self.stack.pop().expect("PARSER STATE MACHINE/STACK CORRUPTED");
302 self.linenum = item.line; self.column=item.column;
303 self.popped.push((item.line,item.column));
304 let newuid = *self.gindex.borrow();
305 *self.gindex.borrow_mut() += 1;
306 LBox::make(item.value,item.line,item.column,newuid)
307 }//popstack_as_lbox
308
309 fn reduce(&mut self, ri:&usize)
310 {
311 self.popped.clear();
312 let rulei = &self.Rules[*ri];
313 let ruleilhs = rulei.lhs; // &'static : Copy
314 let val = (rulei.Ruleaction)(self); // should be self
315 let newtop = self.stack[self.stack.len()-1].si;
316 let goton = self.RSM[newtop].get(ruleilhs).expect("PARSER STATEMACHINE CORRUPTED");
317 if let Stateaction::Gotonext(nsi) = goton {
318 self.stack.push(StackedItem::new(*nsi,val,self.linenum,self.column));
319 //self.stack.push(Stackelement{si:*nsi,value:val});
320 }// goto next state after reduce
321 else {
322 self.report("state transition table corrupted: no suitable action after reduce");
323 self.stopparsing=true;
324 }
325 }//reduce
326
327 /// can be called to determine if an error occurred during parsing. The parser
328 /// will not panic.
329 pub fn error_occurred(&self) -> bool {self.err_occurred}
330
331 // there may need to be other lb functions, perhaps from terminalToken
332 // or stackedItem (at least for transfer)
333
334 /// creates a [LBox] smart pointer that includes line/column information;
335 /// should be called from the semantic actions of a grammar rule, e.g.
336 ///```ignore
337 /// E --> E:a + E:b {PlusExpr(parser.lb(a),parser.lb(b))}
338 ///```
339 pub fn lb<T>(&self,e:T) -> LBox<T> {
340 let newuid = *self.gindex.borrow();
341 *self.gindex.borrow_mut() += 1;
342 LBox::make(e,self.linenum,self.column,newuid)
343 }
344 /// creates a `LBox<dyn Any>`, which allows attributes of different types to
345 /// be associated with grammar symbols. Use in conjuction with [LBox::downcast], [LBox::upcast] and the [lbdown], [lbup] macros.
346 pub fn lba<T:'static>(&self,e:T) -> LBox<dyn Any> {
347 let newuid = *self.gindex.borrow();
348 *self.gindex.borrow_mut() += 1;
349 LBox::upcast(LBox::make(e,self.linenum,self.column,newuid))
350 }
351 /// similar to [BaseParser::lb], but creates a [LRc] instead of [LBox]
352 pub fn lrc<T>(&self,e:T) -> LRc<T> { LRc::new(e,self.linenum,self.column /*,self.src_id*/) }
353 /// similar to [BaseParser::lba] but creates a [LRc]
354 pub fn lrca<T:'static>(&'t self,e:T) -> LRc<dyn Any> { LRc::upcast(LRc::new(e,self.linenum,self.column /*,self.src_id*/)) }
355
356 /// creates LBox enclosing e using line/column information associated
357 /// with right-hand side symbols, numbered left-to-right starting at 0
358 pub fn lbx<T>(&self,i:usize,e:T) -> LBox<T>
359 {
360 let (mut ln,mut cl) = (self.linenum,self.column);
361 if i<self.popped.len() {
362 let index = self.popped.len() - 1 - i;
363 let lc = self.popped[index];
364 ln = lc.0; cl=lc.1;
365 }
366 let newuid = *self.gindex.borrow();
367 *self.gindex.borrow_mut() += 1;
368 LBox::make(e,ln,cl,newuid)
369 }//lbx
370
371 /// alias for [Self::lbx]
372 pub fn lbox<T>(&self,i:usize,e:T) -> LBox<T> { self.lbx(i,e) }
373
374 /// creates [LC] enclosing e using line/column information associated
375 /// with right-hand side symbols, numbered left-to-right starting at 0
376 pub fn lc<T>(&self,i:usize,e:T) -> LC<T>
377 {
378 let (mut ln,mut cl) = (self.linenum,self.column);
379 if i<self.popped.len() {
380 let index = self.popped.len() - 1 - i;
381 let lc = self.popped[index];
382 ln = lc.0; cl=lc.1;
383 }
384 let uid = *self.gindex.borrow();
385 *self.gindex.borrow_mut() += 1;
386 LC::make(e,ln,cl,uid)
387 }//lbx
388
389 /// Like lbx but creates an LRc
390 pub fn lrcn<T>(&self,i:usize,e:T) -> LRc<T>
391 {
392 let (mut ln,mut cl) = (self.linenum,self.column);
393 if i<self.popped.len() {
394 let index = self.popped.len() - 1 - i;
395 let lc = self.popped[index];
396 ln = lc.0; cl=lc.1;
397 }
398 LRc::new(e,ln,cl)
399 }//lbx
400}// impl BaseParser
401
402
403///////////////////////////////////////////////////////////////////////////
404impl<'t,AT:Default,ET:Default,TT:Tokenizer<'t,AT>> BaseParser<'t,AT,ET,TT>
405{
406 /// Error recovery routine of rustlr, separate from error_reporter.
407 /// This function will modify the parser and lookahead symbol and return
408 /// either the next action the parser should take (if recovery succeeded)
409 /// or None if recovery failed.
410 pub fn error_recover(&mut self, lookahead:&mut TerminalToken<'t,AT>) -> Option<Stateaction>
411 {
412 let mut erraction = None;
413 ///// prefer to ue Errsym method
414 if self.Errsym.len()>0 {
415 let errsym = self.Errsym;
416 // lookdown stack for state with transition on Errsym
417 // but that could be current state too (start at top)
418 let mut k = self.stack.len(); // offset by 1 because of usize
419 let mut spos = k+1;
420 while k>0 && spos>k
421 {
422 let ksi = self.stack[k-1].si;
423 erraction = self.RSM[ksi].get(errsym);
424 if let None = erraction {k-=1;} else {spos=k;}
425 }//while k>0
426 if spos==k { self.stack.truncate(k); } // new current state revealed
427 // run all reduce actions that are valid before the Errsym:
428 while let Some(Reduce(ri)) = erraction // keep reducing
429 {
430 //self.reduce(ri); // borrow error- only need mut self.stack
431 self.popped.clear();
432 let rulei = &self.Rules[*ri];
433 let ruleilhs = rulei.lhs; // &'static : Copy
434 //let mut dummy = RuntimeParser::new(1,1);
435 let val = (rulei.Ruleaction)(self);
436 let newtop = self.stack[self.stack.len()-1].si;
437 let gotonopt = self.RSM[newtop].get(ruleilhs);
438 match gotonopt {
439 Some(Gotonext(nsi)) => {
440 //self.stack.push(Stackelement{si:*nsi,value:val});
441 self.stack.push(StackedItem::new(*nsi,val,self.linenum,self.column));
442 },// goto next state after reduce
443 _ => {self.abort("recovery failed"); },
444 }//match
445 // end reduce
446
447 let tos=self.stack[self.stack.len()-1].si;
448 erraction = self.RSM[tos].get(self.Errsym).clone();
449 } // while let erraction is reduce
450 // remaining defined action on Errsym must be shift
451 if let Some(Shift(i)) = erraction { // simulate shift errsym
452 self.stack.push(StackedItem::new(*i,AT::default(),lookahead.line,lookahead.column));
453 // keep lookahead until action is found that transitions from
454 // current state (i). but skipping ahead without reducing
455 // the error production is not a good idea. This implementation
456 // does NOT assume that everything following the ERROR symbol is
457 // terminal.
458 while let None = self.RSM[*i].get(lookahead.sym) {
459 if lookahead.sym=="EOF" {break;}
460 *lookahead = self.tokenizer.next_tt();
461 }//while let
462 // either at end of input or found action on next symbol
463 erraction = self.RSM[*i].get(lookahead.sym);
464 } // if shift action found down under stack
465 }//errsym exists
466
467 // at this point, if erraction is None, then Errsym failed to recover,
468 // try the resynch symbol method next ...
469 if iserror(&erraction) && self.resynch.len()>0 {
470 while lookahead.sym!="EOF" &&
471 !self.resynch.contains(lookahead.sym) {
472 self.linenum = lookahead.line; self.column = lookahead.column; self.prev_position=self.position; self.position = self.tokenizer.position();
473 *lookahead = self.tokenizer.next_tt();
474 }//while
475 if lookahead.sym!="EOF" {
476 // look for state on stack that has action defined on next symbol
477 self.linenum = lookahead.line; self.column = lookahead.column; self.prev_position=self.position; self.position=self.tokenizer.position();
478 *lookahead = self.tokenizer.next_tt();
479 }
480 let mut k = self.stack.len()-1; // offset by 1 because of usize
481 let mut position = 0;
482 while k>0 && erraction==None
483 {
484 let ksi = self.stack[k-1].si;
485 erraction = self.RSM[ksi].get(lookahead.sym);
486 if let None=erraction {k-=1;}
487 }//while k>0 && erraction==None
488 match erraction {
489 None => {}, // do nothing, whill shift next symbol
490 _ => { self.stack.truncate(k);},//pop stack
491 }//match
492 }// there are resync symbols
493
494 // at this point, if erraction is None, then resynch recovery failed too.
495 // only action left is to skip ahead...
496 let mut eofcx = 0;
497 while iserror(&erraction) && eofcx<1 { //skip input
498 self.linenum = lookahead.line; self.column = lookahead.column; self.prev_position=self.position; self.position=self.tokenizer.position();
499 *lookahead = self.tokenizer.next_tt();
500 if lookahead.sym=="EOF" {eofcx+=1;}
501 let csi =self.stack[self.stack.len()-1].si;
502 erraction = self.RSM[csi].get(lookahead.sym);
503 }// skip ahead
504 match erraction {
505 Some(act) if eofcx<1 => Some(*act),
506 _ => None,
507 }//return match
508 }//error_recover function
509
510 /// resets parser, including external state
511 pub fn reset(&mut self) {
512 self.stack.clear();
513 self.err_occurred = false;
514 let mut result = AT::default();
515 self.exstate = ET::default();
516 *self.shared_state.borrow_mut() = ET::default();
517 }//reset
518
519 /// Retrieves recorded error report. This function will return an empty string
520 /// if [BaseParser::set_err_report] is not called. It will also return an
521 /// empty string if there was no error
522 pub fn get_err_report(&self) -> &str {
523 self.err_report.as_deref().unwrap_or("")
524 }
525
526 /// When given true as argument, this option will disable the output of
527 /// parser errors to stderr, and instead log them internally until retrieved
528 /// with [BaseParser::get_err_report]. Each call to this function will
529 /// clear the previous report and begin a new one.
530 /// If the bool argument is false, it will erase and turn off error logging
531 /// and print all parser errors to stderr. This function does not affect
532 /// interactive training, which uses stdio.
533 pub fn set_err_report(&mut self, onof:bool) {
534 if onof {self.err_report = Some(String::new());}
535 else {self.err_report = None;}
536 }
537
538
539}//impl BaseParser 2
540
541
542
543/////////////////////////////////////////////////////////////////////////
544/////////////// new approach using more flexible trait object
545
546/// A trait object that implements ErrReportMaker is expected by the [BaseParser::parse_core]
547/// function, which implements the basic LR parsing algorithm using the
548/// generated state machine. The struct [StandardReporter] is provided as
549/// the default ErrReportMaker that uses standard I/O as interface and has the
550/// ability to train the parser. But other implementations of the trait
551/// can be created that use different interfaces, such as a graphical IDE.
552///
553/// This trait replaces [crate::ErrHandler] in the [crate::runtime_parser] module.
554pub trait ErrReportMaker<'t,AT:Default,ET:Default,TT:Tokenizer<'t,AT>> // not same as error recovery
555{
556 fn err_reporter(&mut self, parser:&mut BaseParser<'t,AT,ET,TT>, lookahead:&TerminalToken<AT>, erropt:&Option<Stateaction>);
557 fn report_err(&self, parser:&mut BaseParser<'t,AT,ET,TT>, msg:&str) { parser.report(msg) }
558}// ErrReportMaker trait // not same as RuntimeParser::ErrHandler
559
560/*
561The structure here is a bit strange. The script file is written to in
562interactive training mode and read from in script-training mode. However,
563the actual modification of the parser file is done after the training, by
564the augmenter module. Thus there's another wrapper function that's needed
565besides the creation of the right kind of StandardReporter.
566*/
567
568impl<'t,AT:Default,ET:Default,TT:Tokenizer<'t,AT>> ErrReportMaker<'t,AT,ET,TT> for StandardReporter
569{
570 // this function will be able to write training script to file
571 fn err_reporter(&mut self, parser:&mut BaseParser<'t,AT,ET,TT>, lookahead:&TerminalToken<AT>, erropt:&Option<Stateaction>)
572 {
573 let mut wresult:std::io::Result<()> = Err(std::io::Error::new(std::io::ErrorKind::Other,"")); // dummy
574 // known that actionop is None or Some(Error(_))
575 let cstate = parser.stack[parser.stack.len()-1].si; // current state
576 let mut actionopt = if let Some(act)=erropt {Some(act)} else {None};
577 let lksym = &lookahead.sym[..];
578 // is lookahead recognized as a grammar symbol?
579 // if actionopt is NONE, check entry for ANY_ERROR
580 if parser.Symset.contains(lksym) {
581 if let None=actionopt {
582 actionopt = parser.RSM[cstate].get("ANY_ERROR");
583 }
584 }// lookahead is recognized grammar sym
585 else {
586 actionopt = parser.RSM[cstate].get("ANY_ERROR");
587 }// lookahead is not a grammar sym
588 let mut errmsg = if let Some(Error(em)) = &actionopt {
589 format!("unexpected symbol '{}' on line {}, column {}: ** {} ** ..",lksym,lookahead.line,lookahead.column,em.trim())
590 } else {format!("unexpected symbol '{}' on line {}, column {} .. ",lksym,lookahead.line,lookahead.column)};
591
592 ////// augment errmsg with current line (version 0.2.6)
593 let srcline = parser.tokenizer.current_line();
594 if (srcline.len()>0) {
595 errmsg.push_str("\n >>");
596 errmsg.push_str(srcline);
597 errmsg.push_str("\n");
598 let mut cln = lookahead.column+2;
599 while cln>0 { errmsg.push(' '); cln-=1; }
600 //let mut tokenlen = srcline[cln-2..].find(char::is_whitespace).unwrap_or(1);
601 let mut tokenlen = lookahead.sym.len();
602 if is_alphanum(&lookahead.sym) {tokenlen = 3;}
603 while tokenlen>0 { errmsg.push('^'); tokenlen-=1; }
604 errmsg.push('\n');
605 }// augment errmsg with current line
606
607 parser.report(&errmsg);
608
609 if self.training { ////// Training mode
610 let csym = lookahead.sym.to_owned();
611 let mut inp = String::from("");
612 if let None=self.scriptinopt { // interactive mode
613 if let Some(outfd1) = &self.scriptoutopt {
614 let mut outfd = outfd1;
615 print!("\n>>>TRAINER: if this message is not adequate (for state {}), enter a replacement (default no change): ",cstate);
616 let rrrflush = io::stdout().flush();
617 if let Ok(n) = io::stdin().read_line(&mut inp) {
618 if inp.len()>5 && parser.Symset.contains(lksym) {
619 print!(">>>TRAINER: should this message be given for all unexpected symbols in the current state? (default yes) ");
620 let rrrflush2 = io::stdout().flush();
621 let mut inp2 = String::new();
622 if let Ok(n) = io::stdin().read_line(&mut inp2) {
623 if inp2.trim()=="no" || inp2.trim()=="No" {
624 wresult = write!(outfd,"{}\t{}\t{} ::: {}\n",lookahead.line,lookahead.column,&csym,inp.trim());
625 self.trained.insert((cstate,csym),inp);
626 }
627 else {// insert for any error
628 wresult = write!(outfd,"{}\t{}\t{} ::: {}\n",lookahead.line,lookahead.column,"ANY_ERROR",inp.trim());
629 self.trained.insert((cstate,String::from("ANY_ERROR")),inp);
630 }
631 }// read ok
632 }// unexpected symbol is grammar sym
633 else if inp.len()>5 && !parser.Symset.contains(lksym) {
634 wresult = write!(outfd,"{}\t{}\t{} ::: {}\n",lookahead.line,lookahead.column,"ANY_ERROR",inp.trim());
635 self.trained.insert((cstate,String::from("ANY_ERROR")),inp);
636 }
637 }// process user response
638 }}// interactive mode
639 else { // training from script mode (non-interactive)
640 if let Some(brfd) = &mut self.scriptinopt {
641 let mut scin = brfd;
642 let mut readn = 0;
643 while readn < 1
644 {
645 inp = String::new();
646 match scin.read_line(&mut inp) {
647 Ok(n) if n>1 && &inp[0..1]!="#" && inp.trim().len()>0 => {readn=n;},
648 Ok(n) if n>0 => { readn=0; }, // keep reading
649 _ => {readn = 1; } // stop - this means End of Stream
650 }//match
651 if readn>1 { // read something
652 let inpsplit:Vec<&str> = inp.split_whitespace().collect();
653 if inpsplit.len()>4 && inpsplit[3].trim()==":::" {
654 let inline = inpsplit[0].trim().parse::<usize>().unwrap();
655 let incolumn = inpsplit[1].trim().parse::<usize>().unwrap();
656 let insym = inpsplit[2].trim();
657 if parser.linenum==inline && parser.column==incolumn {
658 if &csym==insym || insym=="ANY_ERROR" {
659 let posc = inp.find(":::").unwrap()+4;
660 println!("\n>>>Found matching entry from training script for {}, error message: {}",insym,&inp[posc..]);
661 self.trained.insert((cstate,String::from(insym)),String::from(&inp[posc..]));
662 } // unexpected symbol match
663 }// line/column match
664 }//inpsplit check
665 }// valid training line read
666 }//while readn<2
667 }}//training from script mode
668 }//if training //// END TRAINING MODE
669
670 }// standardreporter function
671}// impl ErrReportMaker for StandardReporter
672
673
674/////////////////////////////////////////////////////////////
675//////////////// parse_core replaced: now uses zc tokenizer
676impl<'t,AT:Default,ET:Default,TT:Tokenizer<'t,AT>> BaseParser<'t,AT,ET,TT>
677{
678 /// This function is exported because it's required at runtime.
679 /// Under normal circumstances, this function should ***not*** be called
680 /// directly. In the recommended auto mode, which generates the abstract
681 /// syntax and lexical tokenizer, call instead the `parse_with` function
682 /// that is specific to each grammar.
683 fn parse_core<R:ErrReportMaker<'t,AT,ET,TT>>(&mut self, err_handler:&mut R) -> AT
684 {
685 self.stack.clear();
686 self.err_occurred = false;
687 let mut result = AT::default();
688 //self.exstate = ET::default();
689 self.stack.push(StackedItem::new(0,AT::default(),0,0));
690 self.stopparsing = false;
691 let mut action = Stateaction::Error("");
692 let mut lookahead = TerminalToken::new("EOF",AT::default(),0,0); //just init
693 // nextsym() should only be called here
694 if let Some(tok) = self.tokenizer.nextsym() {lookahead=tok;}
695 //else {self.stopparsing=true;}
696
697 while !self.stopparsing
698 {
699 let tos = self.stack.len()-1;
700 self.linenum = self.stack[tos].line;
701 self.column=self.stack[tos].column;
702 let currentstate = self.stack[tos].si;
703 let mut actionopt = self.RSM[currentstate].get(lookahead.sym);
704
705 if actionopt.is_none() && lookahead.sym!="EOF" { // added in version 0.2.9
706 actionopt = self.RSM[currentstate].get("_WILDCARD_TOKEN_");
707 // added for 0.2.94:
708 lookahead = self.tokenizer.transform_wildcard(lookahead);
709 }
710
711 let actclone:Option<Stateaction> = match actionopt {
712 Some(a) => Some(*a),
713 None => None,
714 };
715 if iserror(&actionopt) { // either None or Error
716 if !self.err_occurred {self.err_occurred = true;}
717
718 err_handler.err_reporter(self,&lookahead,&actclone);
719
720 match self.error_recover(&mut lookahead) {
721 None => { self.stopparsing=true; break; }
722 Some(act) => {action = act;}, // lookahead=la;},
723 }//match
724 }// iserror
725 else { action = actclone.unwrap(); }
726 match &action {
727 Shift(nextstate) => {
728 lookahead = self.shift(*nextstate,lookahead);
729 },
730 Reduce(rulei) => { self.reduce(rulei); },
731 Accept => {
732 self.stopparsing=true;
733 if self.stack.len()>0 {result = self.stack.pop().unwrap().value;}
734 else {self.err_occurred=true;}
735 },
736 _ => {}, // continue
737 }//match action
738 }// main parse loop
739 return result;
740 }//parse_core
741
742 /// This function is exported because it's required at runtime.
743 /// Under normal circumstances, this function should ***not*** be called
744 /// directly. In the recommended auto mode, which generates the abstract
745 /// syntax and lexical tokenizer, call instead the `parse_with` function
746 /// that is specific to each grammar.
747 pub fn parse(&mut self) -> AT
748 {
749 let mut stdeh = StandardReporter::new();
750 self.parse_core(&mut stdeh)
751 }//parse_stdio
752
753 /// This function is exported because it's required at runtime.
754 /// Under normal circumstances, this function should ***not*** be called
755 /// directly. In the recommended auto mode, which generates the abstract
756 /// syntax and lexical tokenizer, call instead the `parse_train_with` function
757 /// that is specific to each grammar.
758 pub fn parse_train(&mut self, parserfile:&str) -> AT
759 {
760 let mut stdtrainer = StandardReporter::new_interactive_training(parserfile);
761 let result = self.parse_core(&mut stdtrainer);
762 if let Err(m) = stdtrainer.augment_training(parserfile) {
763 eprintln!("Error in augmenting parser: {:?}",m)
764 }
765
766 return result;
767 }//parse_stdio_train
768
769 /// trains parser from a [training script](https://cs.hofstra.edu/~cscccl/rustlr_project/cpmparser.rs_script.txt)
770 /// created by interactive training. This
771 /// is intended to be used after a grammar has been modified and the parser
772 /// is regenerated with different state numbers. It is the user's
773 /// responsibility to keep consistent the parser file, script file, and sample
774 /// input that was used when the script was created. The script contains
775 /// the line and column numbers of each error encountered, along with either
776 /// the unexpected symbol that caused the error, or the reserved ANY_ERROR
777 /// symbol if the error message is to be applied to all unexpected symbols.
778 /// These entries must match, in sequence, the errors encountered during
779 /// retraining - it is therefore recommended that the same tokenizer be used
780 /// during retraining so that the same line/column information are given.
781 /// The trainer will augment the parser (parserfile) with new Error
782 /// entries, overriding any previous ones. It is also recommended that the
783 /// user examines the "load_extras" function that appears at the end of
784 /// the [augmented parser](https://cs.hofstra.edu/~cscccl/rustlr_project/cpmparser.rs).
785 /// The train_from_script function does not return
786 /// a value, unlike [BaseParser::parse] and [BaseParser::parse_train].
787 pub fn train_from_script(&mut self, parserfile:&str, scriptfile:&str)
788 {
789 let mut stdtrainer = StandardReporter::new_script_training(parserfile,scriptfile);
790 let result = self.parse_core(&mut stdtrainer);
791 if let Err(m) = stdtrainer.augment_training(parserfile) {
792 eprintln!("Error in augmenting parser: {:?}",m)
793 }
794 if !self.err_occurred {println!("no errors encountered during parsing");}
795 }//train_from_script
796
797}// 3rd impl BaseParser
798#[cfg(feature = "generator")]
799fn checkboxlabel(s:&str) -> &str
800{
801 if s.starts_with('[') && s.ends_with(']') {s[1..s.len()-1].trim()} else {s}
802}// check if label is of form [x], returns x, or s if not of this form.
803
804// used by genlex routines
805fn is_alphanum(x:&str) -> bool
806{
807
808// let alphan = Regex::new(r"^[_a-zA-Z][_\da-zA-Z]*$").unwrap();
809// alphan.is_match(x)
810
811 if x.len()<1 {return false};
812 let mut chars = x.chars();
813 let first = chars.next().unwrap();
814 if !(first=='_' || first.is_alphabetic()) {return false;}
815 for c in chars
816 {
817 if !(c=='_' || c.is_alphanumeric()) {return false;}
818 }
819 true
820}//is_alphanum