rustlr/
lib.rs

1//! Rustlr is an LR-style parser generator for Rust.  Advanced features
2//! include:
3//!  1. Option to automatically generate the AST datatypes and semantic actions, with manual overrides possible.  Rustlr's grammar format contains a sublanguage
4//!   that controls how ASTS are created, so that the generated types do
5//!   not necessarily reflect the format of the grammar.
6//!  2. Option to use [bumpalo](https://docs.rs/bumpalo/latest/bumpalo/index.html) to create
7//!  ASTS types that enable *nested* pattern matching against recursive types.
8//! 
9//!  3. Recognizes regex-style operators `*`, `+` and `?`, which simplify
10//!  the writing of grammars and allow better ASTs to be created.
11//!  4. An experimental feature that recognizes *Selective Marcus-Leermakers*
12//!  grammars.  This is a class of unambiguous grammars that's 
13//!  larger than traditional LR grammars.  They are especially helpful
14//!  in avoiding conflicts when new production rules are added to a grammar.
15//!  5. The ability to train the parser interactively for better error reporting
16//!  6. Also generates parsers for F# and other .Net languages
17//!
18//! A **[TUTORIAL](<https://chuckcscccl.github.io/rustlr_project/>)**
19//! is separately available that will explain the
20//! format of grammars and how to generate and deploy parsers for several 
21//! examples.  The documentation found here should be used as a technical
22//! reference.
23//!
24//! **INSTALLING RUSTLR**
25//!
26//! Rustlr consists of two main components: the parser generation routines and
27//! the runtime parser routines that interpret the generated parsing tables.
28//! The default installation will install both.  However, the runtime parser
29//! can be installed independently.
30//!
31//! 
32//! Rustlr should first be installed as a command-line application:
33//! **`cargo install rustlr`**.  This will install both the generator and
34//! runtime parser.
35//!
36//! Parser generation can also be invoked from within a rust
37//! program with the [generate] function of the rustlr crate.
38//!
39//! Once a parser has been generated and included in another crate, rustlr
40//! should be installed with only the runtime parsing routines with
41//! **`cargo add rustlr --no-default-features`**.  Alternatively, add the
42//! the following to your Cargo.toml:
43//! ```
44//!   [dependencies]
45//!   rustlr = { version = "0.6.2", default-features = false }
46//! ```
47//!
48//! **Compatibility Notice:**
49//!
50//! There is another optional feature, `legacy-parser`, that can be enabled
51//! with or without the parser generation routines, that is required for
52//! grammars and parsers for very old versions of rustlr (prior to version 0.2).
53//! This feature is *not* included by default and must be installed with
54//! the `cargo install/add --features legacy-parser` option.
55//!
56//! Many of the items exported are only required by the parsers
57//! that are generated, and are not intended to be used in other programs.
58//! However, rustlr uses traits and trait objects to loosely couple the 
59//! various components of the runtime parser so that custom interfaces, such as
60//! those for graphical IDEs, can be built around a basic [ZCParser::parse_core]
61//! function.
62//!
63
64#![allow(dead_code)]
65#![allow(unused_variables)]
66#![allow(non_snake_case)]
67#![allow(non_camel_case_types)]
68#![allow(unused_parens)]
69#![allow(unused_assignments)]
70#![allow(unused_doc_comments)]
71#![allow(unused_imports)]
72
73mod shared_defs;
74pub use shared_defs::*;
75#[cfg(feature = "generator")]
76mod grammar_processor;
77#[cfg(feature = "generator")]
78use grammar_processor::*;
79#[cfg(feature = "generator")]
80mod lr_statemachine;
81#[cfg(feature = "generator")]
82use lr_statemachine::*;
83pub mod lexer_interface;
84pub use lexer_interface::*;
85pub mod runtime_parser;
86pub use runtime_parser::*;
87mod augmenter;
88use augmenter::*;
89pub mod generic_absyn;
90pub use generic_absyn::*;
91pub mod zc_parser;
92#[cfg(feature = "generator")]
93mod parser_writer;
94#[cfg(feature = "generator")]
95mod sd_parserwriter;
96#[cfg(feature = "generator")]
97mod fs_parserwriter;
98#[cfg(feature = "generator")]
99mod ast_writer;
100#[cfg(feature = "generator")]
101mod fs_astwriter;
102#[cfg(feature = "generator")]
103mod bumpast_writer;
104#[cfg(feature = "generator")]
105mod lalr_statemachine;
106#[cfg(feature = "generator")]
107mod selmlk; // experimental
108
109pub mod base_parser; // experimental
110pub use base_parser::{BaseParser,BaseProduction};
111
112//mod logos_lexer;
113
114#[cfg(feature = "generator")]
115mod yacc_ast;
116#[cfg(feature = "generator")]
117mod yaccparser;
118#[cfg(feature = "generator")]
119use lalr_statemachine::LALRMachine;
120#[cfg(feature = "generator")]
121use selmlk::{MLStatemachine};
122pub use zc_parser::{ZCParser,ZCRProduction};
123#[cfg(feature = "legacy-parser")]
124pub use runtime_parser::{RuntimeParser,RProduction,StackedItem};
125
126pub const RUSTLRVERSION:&'static str = "0.6.1";
127
128/// This function can be called from within Rust to generate a parser/lexer.
129/// It takes the same arguments as the rustlr command-line application.
130/// Furthermore, if given the `-trace 0` option, no output will be
131/// sent to stdout or stderr.  Instead, a log of events is recorded and
132/// is returned.  An `Ok(_)` result indicates that some parser was created
133/// and an `Err(_)` result indicates failure.
134/// Example:
135/// ```ignore
136///   let report = rustlr::generate("simplecalc.grammar -o src/main.rs -trace 0");
137/// ```
138#[cfg(feature = "generator")]
139pub fn generate(argv:&str) -> Result<String,String> {
140  let asplit:Vec<_> = argv.split_whitespace().collect();
141  rustle1(&asplit)
142}
143
144
145/// This function is retained for backwards compatiblity.  It is recommended
146/// to call [generate] instead.
147#[cfg(feature = "generator")]
148pub fn rustle(args:&Vec<String>) -> Result<String,String> // called from main
149{
150  let mut args2 = Vec::new();
151  for s in args { args2.push(&s[..]); }
152  rustle1(&args2[..])
153}
154#[cfg(feature = "generator")]
155fn rustle1(args:&[&str]) -> Result<String,String> // called from main
156{
157  let argc = args.len();
158  if argc<2 {
159    //eprintln!("Must give path of .grammar file"); return;
160    return Err("Must give path of .grammar file".to_owned());
161  }
162  let mut filepath = "";
163  let mut parserfile = String::from("");  // -o target
164  let mut lalr = false;  // changed from false in version 0.2.0
165  let mut newlalr = true;
166  let mut tracelev:usize = 1; // trace-level
167  let mut verbose = false;
168  let mut zc = false;
169  let mut newbase = true;
170  let mut genlex = false;
171  let mut genabsyn = false;
172  let mut lrsd = false;
173  let mut lrsdmaxk:usize = selmlk::MAXK;
174  let mut regenerate = false;
175  let mut mode = 0;
176  let mut conv_yacc = false;
177  let mut inlinetable = true;
178  let mut argi = 1; // next argument position
179  while argi<argc
180  {
181     match args[argi] {
182       filen if filen.ends_with(".grammar") => {filepath = args[argi];},
183       filen if filen.ends_with(".y") => {
184          filepath=args[argi];
185	  conv_yacc=true;
186	  break;
187       },
188       "lr1" | "LR1" | "-lr1" => { lalr=false; newlalr=false; },
189       "lalr" | "LALR" | "-lalr" => {newlalr=true; },
190       "lalr1" | "LALR1" | "-lalr1" => {newlalr=true; },
191       "oldlalr" | "-oldlalr" | "-selML" => {newlalr=false; lalr=true;}
192       "-lrsd" | "lrsd" => {
193         newlalr=false; lalr=false; lrsd=true;
194         if argi+1<argc {
195           if let Ok(mk)=args[argi+1].parse::<usize>() {
196             lrsdmaxk=mk; argi+=1;
197           } // next arg is number
198         }//if next arg exists
199       },
200       "-regenerate" => { regenerate=true; },
201       "-fsharp" => {mode=1;},
202       "-trace" => {
203          argi+=1;
204          if argi<argc {
205            if let Ok(lv) = args[argi].parse::<usize>() {tracelev=lv; }
206          if tracelev>0 {println!("trace-level set to {}",tracelev);}
207          }
208       },
209       "-table" => { inlinetable = false; },
210       "verbose" | "-verbose" => { verbose=true; },
211       "-zc" | "zero_copy" => {zc=true; newbase=false;},
212       "-newbase" | "-base" => {newbase = true; zc=false; genabsyn=true; genlex=true;},
213       "genlex" | "-genlex" => {genlex=true; },
214       "-genabsyn" | "-ast" | "-auto" => {genabsyn = true; },
215       "-nozc" => {zc=false;},
216       "binary" | "-binary" => { verbose=false; },       
217       "-o" => {
218          argi+=1;
219          if argi<argc {parserfile = String::from(args[argi]);}
220       },
221       _ => {},    
222     }//match directive
223     argi+=1;
224  }//while there are command-line args
225
226  if filepath.len()==0 {
227    //eprintln!("Must give path of .grammar file or .y file to convert from");
228    return Err("Must give path of .grammar file or .y file to convert from".to_owned());
229  }
230  if conv_yacc {
231    yaccparser::convert_from_yacc(filepath);
232    return Ok(String::new());
233    //return Ok(".y grammar converted to .grammar\n".to_owned());
234  }
235
236  if zc && verbose {
237     //eprintln!("verbose mode not compatible with -zc option");
238     return Err("verbose mode not compatible with -zc option".to_owned());
239  }
240  if tracelev>0 && verbose {println!("verbose parsers should be used for diagnositic purposes and cannot be trained/augmented");}
241  if tracelev>1 {println!("parsing grammar from {}",&filepath);}
242  let mut grammar1 = Grammar::new();
243  grammar1.genlex = genlex;
244  grammar1.genabsyn = genabsyn;
245  grammar1.tracelev = tracelev;
246  //grammar1.tablefile = tablefile;
247  grammar1.mode = mode; // 0 for rust, 1 for fsharp
248  let parsedok = grammar1.parse_grammar(filepath);  //  ***
249  if !parsedok {
250    //println!("\nFailed to process grammar");
251    return Err(format!("\nFailed to process grammar at {}",filepath));
252  }
253  // Check grammar integrity: now done inside parse
254  if grammar1.name.len()<2  { // derive grammar name from filepath
255     let doti = if let Some(p)= filepath.rfind('.') {p} else {filepath.len()};
256     let mut slashi = if let Some(p) = filepath.rfind('/') {p+1} else {0};
257     if slashi==0 {
258       slashi = if let Some(p) = filepath.rfind('\\') {p+1} else {0};
259     }
260     grammar1.name = filepath[slashi..doti].to_string();
261  }// derive grammar name
262  let gramname = grammar1.name.clone();
263
264  let pfsuffix = if mode==1 {"fs"} else {"rs"};
265
266  let slashpos = parserfile.rfind('/').or(parserfile.rfind('\\'));
267  //  if let None = slashpos {slashpos = parserfile.rfind('\\');}
268  if grammar1.genabsyn {
269     let mut astpath = format!("{}_ast.{}",&gramname,pfsuffix);
270     if let Some(pos) = slashpos { astpath=format!("{}{}",&parserfile[..pos+1],&astpath); }
271     let wres;
272     if mode==1 {wres = grammar1.write_fsast(&astpath); }
273     else if !grammar1.bumpast { wres = grammar1.writeabsyn(&astpath); }
274     else {wres = grammar1.write_bumpast(&astpath); }
275     if !wres.is_ok() {
276       //eprintln!("Failed to generate abstract syntax");
277       return Err("Failed to generate abstract syntax".to_owned());
278     }
279  }
280  if !inlinetable {
281     let mut fsmpath = format!("{}_table.fsm",&gramname);
282     if let Some(pos) = slashpos { fsmpath=format!("{}{}",&parserfile[..pos+1],&fsmpath); }
283     grammar1.tablefile = fsmpath;
284  }
285
286 grammar1.delay_transform(); // static delayed reduction markers
287
288
289  if tracelev>2 {println!("computing Nullable set");}
290  grammar1.compute_NullableRf();
291  if tracelev>2 {println!("computing First sets");}
292  //grammar1.compute_FirstIM();
293  grammar1.compute_First();
294
295  let mut fsm0;
296  if lrsd {
297    grammar1.logprint(&format!("Generating Experimental LR-Selective Delay State Machine with Max Delay = {}",lrsdmaxk));
298    let mut lrsdfsm = MLStatemachine::new(grammar1);
299    lrsdfsm.regenerate = regenerate;
300    lrsdfsm.selml(lrsdmaxk);
301    //fsm0 = lrsdfsm.to_statemachine();
302    if lrsdfsm.failed {
303      //println!("NO PARSER GENERATED"); return;
304      return Err("LR SELECTIVE DELAY FAILURE. NO PARSER GENERATED".to_owned());
305    }
306    if !lrsdfsm.failed && lrsdfsm.regenerate {
307      lrsdfsm.Gmr.logprint("Re-Generating LR(1) machine for transformed grammar...");
308      lrsd = false;
309      fsm0 = Statemachine::new(lrsdfsm.Gmr);
310      fsm0.lalr = false;
311      fsm0.generatefsm(); //GENERATE THE FSM
312    } else {     fsm0 = lrsdfsm.to_statemachine(); }
313    // but of course there will be more conflicts since there will be
314    // more rules.  The original rules that caused conflicts for LR are
315    // still there??
316
317  } else  // not lrsd
318  if newlalr { // newlalr takes precedence over other flags
319     grammar1.logprint("Generating LALR(1) state machine");
320     let mut lalrfsm = LALRMachine::new(grammar1);
321     lalrfsm.generatefsm();
322     fsm0 = lalrfsm.to_statemachine();
323  }
324  else {
325    grammar1.logprint(&format!("Generating {} state machine for grammar {}...",if lalr {"older LALR"} else {"LR1"},&gramname));
326    fsm0 = Statemachine::new(grammar1);
327    fsm0.lalr = lalr;
328    if lalr {fsm0.Open = Vec::with_capacity(1024); } // important
329    fsm0.generatefsm(); //GENERATE THE FSM
330  } // old code
331  if tracelev>2 && !newlalr && !lrsd { for state in &fsm0.States {printstate(state,&fsm0.Gmr);} }
332  else if tracelev>1 && !newlalr && !lrsd {   printstate(&fsm0.States[0],&fsm0.Gmr); }//print states
333  if parserfile.len()<1 || parserfile.ends_with('/') || parserfile.ends_with('\\') {parserfile.push_str(&format!("{}parser.{}",&gramname,pfsuffix));}
334  if fsm0.States.len()>65536  {
335    return Err(format!("too many states: {} execeeds limit of 65536",fsm0.States.len()));
336  }
337  let write_result =
338    if mode==1 { fsm0.writefsparser(&parserfile) }
339    else if newbase && !lrsd {
340      fsm0.writebaseenumparser(&parserfile)
341    }
342    else if newbase && lrsd {
343      fsm0.writelrsdbaseparser(&parserfile)    
344    }
345    else if zc {  // write zero-copy parser
346      //fsm0.writezcparser(&parserfile)
347      //fsm0.writelbaparser(&parserfile)
348      if !lrsd {fsm0.writeenumparser(&parserfile)}
349      else {fsm0.writelrsdparser(&parserfile)}
350    }
351    else {  // non-zc, original before version 0.2.0
352      if verbose /*fsm0.States.len()<=16*/ {fsm0.write_verbose(&parserfile)}
353      else {fsm0.writeparser(&parserfile)}
354    }; // write_result =
355  //if tracelev>0 && !lrsd {eprintln!("{} total states",fsm0.FSM.len());}
356  fsm0.Gmr.logprint(&format!("{} total states",fsm0.FSM.len()));
357  if let Ok(_) = write_result {
358     fsm0.Gmr.logprint(&format!("Parser saved in {}",&parserfile));
359  }
360  else if let Err(err) = write_result {
361     return Err(format!("failed to write parser, likely due to invalid -o destination\n{:?}",err));    
362  }
363  let mut savedlog = String::new();
364  if tracelev==0 {fsm0.Gmr.swap_log(&mut savedlog);}
365  Ok(savedlog)
366}//rustle1