rustlr/
lib.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
//! Rustlr is an LR-style parser generator for Rust.  Advanced features
//! include:
//!  1. Option to automatically generate the AST datatypes and semantic actions, with manual overrides possible.  Rustlr's grammar format contains a sublanguage
//!   that controls how ASTS are created, so that the generated types do
//!   not necessarily reflect the format of the grammar.
//!  2. Option to use [bumpalo](https://docs.rs/bumpalo/latest/bumpalo/index.html) to create
//!  ASTS types that enable *nested* pattern matching against recursive types.
//! 
//!  3. Recognizes regex-style operators `*`, `+` and `?`, which simplify
//!  the writing of grammars and allow better ASTs to be created.
//!  4. An experimental feature that recognizes *Selective Marcus-Leermakers*
//!  grammars.  This is a class of unambiguous grammars that's 
//!  larger than traditional LR grammars.  They are especially helpful
//!  in avoiding conflicts when new production rules are added to a grammar.
//!  5. The ability to train the parser interactively for better error reporting
//!  6. Also generates parsers for F# and other .Net languages
//!
//! A **[TUTORIAL](<https://chuckcscccl.github.io/rustlr_project/>)**
//! is separately available that will explain the
//! format of grammars and how to generate and deploy parsers for several 
//! examples.  The documentation found here should be used as a technical
//! reference.
//!
//! **INSTALLING RUSTLR**
//!
//! Rustlr consists of two main components: the parser generation routines and
//! the runtime parser routines that interpret the generated parsing tables.
//! The default installation will install both.  However, the runtime parser
//! can be installed independently.
//!
//! 
//! Rustlr should first be installed as a command-line application:
//! **`cargo install rustlr`**.  This will install both the generator and
//! runtime parser.
//!
//! Parser generation can also be invoked from within a rust
//! program with the [generate] function of the rustlr crate.
//!
//! Once a parser has been generated and included in another crate, rustlr
//! should be installed with only the runtime parsing routines with
//! **`cargo add rustlr --no-default-features`**.  Alternatively, add the
//! the following to your Cargo.toml:
//! ```
//!   [dependencies]
//!   rustlr = { version = "0.6.2", default-features = false }
//! ```
//!
//! **Compatibility Notice:**
//!
//! There is another optional feature, `legacy-parser`, that can be enabled
//! with or without the parser generation routines, that is required for
//! grammars and parsers for very old versions of rustlr (prior to version 0.2).
//! This feature is *not* included by default and must be installed with
//! the `cargo install/add --features legacy-parser` option.
//!
//! Many of the items exported are only required by the parsers
//! that are generated, and are not intended to be used in other programs.
//! However, rustlr uses traits and trait objects to loosely couple the 
//! various components of the runtime parser so that custom interfaces, such as
//! those for graphical IDEs, can be built around a basic [ZCParser::parse_core]
//! function.
//!

#![allow(dead_code)]
#![allow(unused_variables)]
#![allow(non_snake_case)]
#![allow(non_camel_case_types)]
#![allow(unused_parens)]
#![allow(unused_assignments)]
#![allow(unused_doc_comments)]
#![allow(unused_imports)]

mod shared_defs;
pub use shared_defs::*;
#[cfg(feature = "generator")]
mod grammar_processor;
#[cfg(feature = "generator")]
use grammar_processor::*;
#[cfg(feature = "generator")]
mod lr_statemachine;
#[cfg(feature = "generator")]
use lr_statemachine::*;
pub mod lexer_interface;
pub use lexer_interface::*;
pub mod runtime_parser;
pub use runtime_parser::*;
mod augmenter;
use augmenter::*;
pub mod generic_absyn;
pub use generic_absyn::*;
pub mod zc_parser;
#[cfg(feature = "generator")]
mod parser_writer;
#[cfg(feature = "generator")]
mod sd_parserwriter;
#[cfg(feature = "generator")]
mod fs_parserwriter;
#[cfg(feature = "generator")]
mod ast_writer;
#[cfg(feature = "generator")]
mod fs_astwriter;
#[cfg(feature = "generator")]
mod bumpast_writer;
#[cfg(feature = "generator")]
mod lalr_statemachine;
#[cfg(feature = "generator")]
mod selmlk; // experimental

pub mod base_parser; // experimental
pub use base_parser::{BaseParser,BaseProduction};

//mod logos_lexer;

#[cfg(feature = "generator")]
mod yacc_ast;
#[cfg(feature = "generator")]
mod yaccparser;
#[cfg(feature = "generator")]
use lalr_statemachine::LALRMachine;
#[cfg(feature = "generator")]
use selmlk::{MLStatemachine};
pub use zc_parser::{ZCParser,ZCRProduction};
#[cfg(feature = "legacy-parser")]
pub use runtime_parser::{RuntimeParser,RProduction,StackedItem};

pub const RUSTLRVERSION:&'static str = "0.6.1";

/// This function can be called from within Rust to generate a parser/lexer.
/// It takes the same arguments as the rustlr command-line application.
/// Furthermore, if given the `-trace 0` option, no output will be
/// sent to stdout or stderr.  Instead, a log of events is recorded and
/// is returned.  An `Ok(_)` result indicates that some parser was created
/// and an `Err(_)` result indicates failure.
/// Example:
/// ```ignore
///   let report = rustlr::generate("simplecalc.grammar -o src/main.rs -trace 0");
/// ```
#[cfg(feature = "generator")]
pub fn generate(argv:&str) -> Result<String,String> {
  let asplit:Vec<_> = argv.split_whitespace().collect();
  rustle1(&asplit)
}


/// This function is retained for backwards compatiblity.  It is recommended
/// to call [generate] instead.
#[cfg(feature = "generator")]
pub fn rustle(args:&Vec<String>) -> Result<String,String> // called from main
{
  let mut args2 = Vec::new();
  for s in args { args2.push(&s[..]); }
  rustle1(&args2[..])
}
#[cfg(feature = "generator")]
fn rustle1(args:&[&str]) -> Result<String,String> // called from main
{
  let argc = args.len();
  if argc<2 {
    //eprintln!("Must give path of .grammar file"); return;
    return Err("Must give path of .grammar file".to_owned());
  }
  let mut filepath = "";
  let mut parserfile = String::from("");  // -o target
  let mut lalr = false;  // changed from false in version 0.2.0
  let mut newlalr = true;
  let mut tracelev:usize = 1; // trace-level
  let mut verbose = false;
  let mut zc = false;
  let mut newbase = true;
  let mut genlex = false;
  let mut genabsyn = false;
  let mut lrsd = false;
  let mut lrsdmaxk:usize = selmlk::MAXK;
  let mut regenerate = false;
  let mut mode = 0;
  let mut conv_yacc = false;
  let mut inlinetable = true;
  let mut argi = 1; // next argument position
  while argi<argc
  {
     match args[argi] {
       filen if filen.ends_with(".grammar") => {filepath = args[argi];},
       filen if filen.ends_with(".y") => {
          filepath=args[argi];
	  conv_yacc=true;
	  break;
       },
       "lr1" | "LR1" | "-lr1" => { lalr=false; newlalr=false; },
       "lalr" | "LALR" | "-lalr" => {newlalr=true; },
       "lalr1" | "LALR1" | "-lalr1" => {newlalr=true; },
       "oldlalr" | "-oldlalr" | "-selML" => {newlalr=false; lalr=true;}
       "-lrsd" | "lrsd" => {
         newlalr=false; lalr=false; lrsd=true;
         if argi+1<argc {
           if let Ok(mk)=args[argi+1].parse::<usize>() {
             lrsdmaxk=mk; argi+=1;
           } // next arg is number
         }//if next arg exists
       },
       "-regenerate" => { regenerate=true; },
       "-fsharp" => {mode=1;},
       "-trace" => {
          argi+=1;
          if argi<argc {
            if let Ok(lv) = args[argi].parse::<usize>() {tracelev=lv; }
          if tracelev>0 {println!("trace-level set to {}",tracelev);}
          }
       },
       "-table" => { inlinetable = false; },
       "verbose" | "-verbose" => { verbose=true; },
       "-zc" | "zero_copy" => {zc=true; newbase=false;},
       "-newbase" | "-base" => {newbase = true; zc=false; genabsyn=true; genlex=true;},
       "genlex" | "-genlex" => {genlex=true; },
       "-genabsyn" | "-ast" | "-auto" => {genabsyn = true; },
       "-nozc" => {zc=false;},
       "binary" | "-binary" => { verbose=false; },       
       "-o" => {
          argi+=1;
          if argi<argc {parserfile = String::from(args[argi]);}
       },
       _ => {},    
     }//match directive
     argi+=1;
  }//while there are command-line args

  if filepath.len()==0 {
    //eprintln!("Must give path of .grammar file or .y file to convert from");
    return Err("Must give path of .grammar file or .y file to convert from".to_owned());
  }
  if conv_yacc {
    yaccparser::convert_from_yacc(filepath);
    return Ok(String::new());
    //return Ok(".y grammar converted to .grammar\n".to_owned());
  }

  if zc && verbose {
     //eprintln!("verbose mode not compatible with -zc option");
     return Err("verbose mode not compatible with -zc option".to_owned());
  }
  if tracelev>0 && verbose {println!("verbose parsers should be used for diagnositic purposes and cannot be trained/augmented");}
  if tracelev>1 {println!("parsing grammar from {}",&filepath);}
  let mut grammar1 = Grammar::new();
  grammar1.genlex = genlex;
  grammar1.genabsyn = genabsyn;
  grammar1.tracelev = tracelev;
  //grammar1.tablefile = tablefile;
  grammar1.mode = mode; // 0 for rust, 1 for fsharp
  let parsedok = grammar1.parse_grammar(filepath);  //  ***
  if !parsedok {
    //println!("\nFailed to process grammar");
    return Err(format!("\nFailed to process grammar at {}",filepath));
  }
  // Check grammar integrity: now done inside parse
  if grammar1.name.len()<2  { // derive grammar name from filepath
     let doti = if let Some(p)= filepath.rfind('.') {p} else {filepath.len()};
     let mut slashi = if let Some(p) = filepath.rfind('/') {p+1} else {0};
     if slashi==0 {
       slashi = if let Some(p) = filepath.rfind('\\') {p+1} else {0};
     }
     grammar1.name = filepath[slashi..doti].to_string();
  }// derive grammar name
  let gramname = grammar1.name.clone();

  let pfsuffix = if mode==1 {"fs"} else {"rs"};

  let slashpos = parserfile.rfind('/').or(parserfile.rfind('\\'));
  //  if let None = slashpos {slashpos = parserfile.rfind('\\');}
  if grammar1.genabsyn {
     let mut astpath = format!("{}_ast.{}",&gramname,pfsuffix);
     if let Some(pos) = slashpos { astpath=format!("{}{}",&parserfile[..pos+1],&astpath); }
     let wres;
     if mode==1 {wres = grammar1.write_fsast(&astpath); }
     else if !grammar1.bumpast { wres = grammar1.writeabsyn(&astpath); }
     else {wres = grammar1.write_bumpast(&astpath); }
     if !wres.is_ok() {
       //eprintln!("Failed to generate abstract syntax");
       return Err("Failed to generate abstract syntax".to_owned());
     }
  }
  if !inlinetable {
     let mut fsmpath = format!("{}_table.fsm",&gramname);
     if let Some(pos) = slashpos { fsmpath=format!("{}{}",&parserfile[..pos+1],&fsmpath); }
     grammar1.tablefile = fsmpath;
  }

 grammar1.delay_transform(); // static delayed reduction markers


  if tracelev>2 {println!("computing Nullable set");}
  grammar1.compute_NullableRf();
  if tracelev>2 {println!("computing First sets");}
  //grammar1.compute_FirstIM();
  grammar1.compute_First();

  let mut fsm0;
  if lrsd {
    grammar1.logprint(&format!("Generating Experimental LR-Selective Delay State Machine with Max Delay = {}",lrsdmaxk));
    let mut lrsdfsm = MLStatemachine::new(grammar1);
    lrsdfsm.regenerate = regenerate;
    lrsdfsm.selml(lrsdmaxk);
    //fsm0 = lrsdfsm.to_statemachine();
    if lrsdfsm.failed {
      //println!("NO PARSER GENERATED"); return;
      return Err("LR SELECTIVE DELAY FAILURE. NO PARSER GENERATED".to_owned());
    }
    if !lrsdfsm.failed && lrsdfsm.regenerate {
      lrsdfsm.Gmr.logprint("Re-Generating LR(1) machine for transformed grammar...");
      lrsd = false;
      fsm0 = Statemachine::new(lrsdfsm.Gmr);
      fsm0.lalr = false;
      fsm0.generatefsm(); //GENERATE THE FSM
    } else {     fsm0 = lrsdfsm.to_statemachine(); }
    // but of course there will be more conflicts since there will be
    // more rules.  The original rules that caused conflicts for LR are
    // still there??

  } else  // not lrsd
  if newlalr { // newlalr takes precedence over other flags
     grammar1.logprint("Generating LALR(1) state machine");
     let mut lalrfsm = LALRMachine::new(grammar1);
     lalrfsm.generatefsm();
     fsm0 = lalrfsm.to_statemachine();
  }
  else {
    grammar1.logprint(&format!("Generating {} state machine for grammar {}...",if lalr {"older LALR"} else {"LR1"},&gramname));
    fsm0 = Statemachine::new(grammar1);
    fsm0.lalr = lalr;
    if lalr {fsm0.Open = Vec::with_capacity(1024); } // important
    fsm0.generatefsm(); //GENERATE THE FSM
  } // old code
  if tracelev>2 && !newlalr && !lrsd { for state in &fsm0.States {printstate(state,&fsm0.Gmr);} }
  else if tracelev>1 && !newlalr && !lrsd {   printstate(&fsm0.States[0],&fsm0.Gmr); }//print states
  if parserfile.len()<1 || parserfile.ends_with('/') || parserfile.ends_with('\\') {parserfile.push_str(&format!("{}parser.{}",&gramname,pfsuffix));}
  if fsm0.States.len()>65536  {
    return Err(format!("too many states: {} execeeds limit of 65536",fsm0.States.len()));
  }
  let write_result =
    if mode==1 { fsm0.writefsparser(&parserfile) }
    else if newbase && !lrsd {
      fsm0.writebaseenumparser(&parserfile)
    }
    else if newbase && lrsd {
      fsm0.writelrsdbaseparser(&parserfile)    
    }
    else if zc {  // write zero-copy parser
      //fsm0.writezcparser(&parserfile)
      //fsm0.writelbaparser(&parserfile)
      if !lrsd {fsm0.writeenumparser(&parserfile)}
      else {fsm0.writelrsdparser(&parserfile)}
    }
    else {  // non-zc, original before version 0.2.0
      if verbose /*fsm0.States.len()<=16*/ {fsm0.write_verbose(&parserfile)}
      else {fsm0.writeparser(&parserfile)}
    }; // write_result =
  //if tracelev>0 && !lrsd {eprintln!("{} total states",fsm0.FSM.len());}
  fsm0.Gmr.logprint(&format!("{} total states",fsm0.FSM.len()));
  if let Ok(_) = write_result {
     fsm0.Gmr.logprint(&format!("Parser saved in {}",&parserfile));
  }
  else if let Err(err) = write_result {
     return Err(format!("failed to write parser, likely due to invalid -o destination\n{:?}",err));    
  }
  let mut savedlog = String::new();
  if tracelev==0 {fsm0.Gmr.swap_log(&mut savedlog);}
  Ok(savedlog)
}//rustle1