rustlr 0.5.0

Bottom-Up Parser Generator with Advanced Options
Documentation
//! Rustlr is an LR-style parser generator for Rust.  Advanced features
//! include:
//!  1. Option to automatically generate the AST datatypes and semantic actions, with manual overrides possible.  Rustlr's grammar format contains a sublanguage
//!   that controls how ASTS are created, so that the generated types do
//!   not necessarily reflect the format of the grammar.
//!  2. Option to use [bumpalo](https://docs.rs/bumpalo/latest/bumpalo/index.html) to create
//!  ASTS types that enable *nested* pattern atching against recursive types.
//! 
//!  3. Recognizes regex-style operators `*`, `+` and `?`, which simplify
//!  the writing of grammars and allow better ASTs to be created.
//!  4. An experimental feature that recognizes *Selective Marcus-Leermakers*
//!  grammars.  This is a class of unambiguous grammars that's 
//!  larger than traditional LR grammars.  They are especially helpful
//!  in avoiding conflicts when new production rules are added to a grammar.
//!  5. The ability to train the parser interactively for better error reporting
//!  6. Also generates parsers for F# and other .Net languages
//!
//! A **[tutorial](<https://chuckcscccl.github.io/rustlr_project/>)**
//! is separately available that will explain the
//! format of grammars and how to generate and deploy parsers for several 
//! examples.  The documentation found here should be used as a technical
//! reference.
//!
//! Rustlr should be installed as an executable (**cargo install rustlr**),
//! although parser generation can also be invoked with the [rustle] function.
//! Many of the items exported by this crate are only required by the parsers
//! that are generated, and are not intended to be used in other programs.
//! However, rustlr uses traits and trait objects to loosely couple the 
//! various components of the runtime parser so that custom interfaces, such as
//! those for graphical IDEs, can built around a basic [ZCParser::parse_core]
//! function.
//!

#![allow(dead_code)]
#![allow(unused_variables)]
#![allow(non_snake_case)]
#![allow(non_camel_case_types)]
#![allow(unused_parens)]
#![allow(unused_assignments)]
#![allow(unused_doc_comments)]
#![allow(unused_imports)]

#[cfg(feature = "generator")]
mod generator_lib;
#[cfg(feature = "generator")]
pub use generator_lib::*;

pub mod zc_parser;
use zc_parser::*;

mod grammar_processor;
use grammar_processor::*;
mod lr_statemachine;
use lr_statemachine::*;
pub mod lexer_interface;
pub use lexer_interface::*;
pub mod runtime_parser;
use runtime_parser::*;
mod augmenter;
use augmenter::*;
pub mod generic_absyn;
pub use generic_absyn::*;
pub mod zc_parser;
use zc_parser::*;
mod parser_writer;
mod sd_parserwriter;
mod fs_parserwriter;
mod ast_writer;
mod fs_astwriter;

mod bumpast_writer;

mod lalr_statemachine;
mod selmlk; // experimental

//mod logos_lexer;

mod yacc_ast;
mod yaccparser;

use lalr_statemachine::LALRMachine;
use selmlk::{MLStatemachine};

pub use lr_statemachine::{Stateaction,decode_action};
pub use runtime_parser::{RuntimeParser,RProduction};
pub use zc_parser::{ZCParser,ZCRProduction};
//pub use enhancements::{ParseValue,ParseResult,Enhanced_Lexer};

pub const VERSION:&'static str = "0.4.13";

/// This function can be called from within Rust to generate a parser/lexer.
/// It takes the same arguments as the rustlr command-line application.
/// Furthermore, if given the `-trace 0` option, no output will be
/// sent to stdout or stderr.  Instead, a log of events is recorded and
/// is returned.  An `Ok(_)` result indicates that some parser was created
/// and an `Err(_)` result indicates failure.
/// Example:
/// ```ignore
///   let report = rustlr::generate("simplecalc.grammar -o src/main.rs -trace 0");
/// ```  
pub fn generate(argv:&str) -> Result<String,String> {
  let asplit:Vec<_> = argv.split_whitespace().collect();
  rustle1(&asplit)
}


/// This function is retained for backwards compatiblity.  It is recommended
/// to call [generate] instead.
pub fn rustle(args:&Vec<String>) -> Result<String,String> // called from main
{
  let mut args2 = Vec::new();
  for s in args { args2.push(&s[..]); }
  rustle1(&args2[..])
}

fn rustle1(args:&[&str]) -> Result<String,String> // called from main
{
  let argc = args.len();
  if argc<2 {
    //eprintln!("Must give path of .grammar file"); return;
    return Err("Must give path of .grammar file".to_owned());
  }
  let mut filepath = "";
  let mut parserfile = String::from("");  // -o target
  let mut lalr = false;  // changed from false in version 0.2.0
  let mut newlalr = true;
  let mut tracelev:usize = 1; // trace-level
  let mut verbose = false;
  let mut zc = true;
  let mut genlex = false;
  let mut genabsyn = false;
  let mut lrsd = false;
  let mut lrsdmaxk:usize = selmlk::MAXK;
  let mut regenerate = false;
  let mut mode = 0;
  let mut conv_yacc = false;
  let mut argi = 1; // next argument position
  while argi<argc
  {
     match args[argi] {
       filen if filen.ends_with(".grammar") => {filepath = args[argi];},
       filen if filen.ends_with(".y") => {
          filepath=args[argi];
	  conv_yacc=true;
	  break;
       },
       "lr1" | "LR1" | "-lr1" => { lalr=false; newlalr=false; },
       "lalr" | "LALR" | "-lalr" => {newlalr=true; },
       "lalr1" | "LALR1" | "-lalr1" => {newlalr=true; },
       "oldlalr" | "-oldlalr" | "-selML" => {newlalr=false; lalr=true;}
       "-lrsd" | "lrsd" => {
         newlalr=false; lalr=false; lrsd=true;
         if argi+1<argc {
           if let Ok(mk)=args[argi+1].parse::<usize>() {
             lrsdmaxk=mk; argi+=1;
           } // next arg is number
         }//if next arg exists
       },
       "-regenerate" => { regenerate=true; },
       "-fsharp" => {mode=1;},
       "-trace" => {
          argi+=1;
          if argi<argc {
            if let Ok(lv) = args[argi].parse::<usize>() {tracelev=lv; }
          if tracelev>0 {println!("trace-level set to {}",tracelev);}
          }
       },
       "verbose" | "-verbose" => { verbose=true; },
       "-zc" | "zero_copy" => {zc=true;},
       "genlex" | "-genlex" => {genlex=true; },
       "-genabsyn" | "-ast" | "-auto" => {genabsyn = true; },
       "-nozc" => {zc=false;},
       "binary" | "-binary" => { verbose=false; },       
       "-o" => {
          argi+=1;
          if argi<argc {parserfile = String::from(args[argi]);}
       },
       _ => {},    
     }//match directive
     argi+=1;
  }//while there are command-line args

  if filepath.len()==0 {
    //eprintln!("Must give path of .grammar file or .y file to convert from");
    return Err("Must give path of .grammar file or .y file to convert from".to_owned());
  }
  if conv_yacc {
    yaccparser::convert_from_yacc(filepath);
    return Ok(String::new());
    //return Ok(".y grammar converted to .grammar\n".to_owned());
  }

  if zc && verbose {
     //eprintln!("verbose mode not compatible with -zc option");
     return Err("verbose mode not compatible with -zc option".to_owned());
  }
  if tracelev>0 && verbose {println!("verbose parsers should be used for diagnositic purposes and cannot be trained/augmented");}
  if tracelev>1 {println!("parsing grammar from {}",&filepath);}
  let mut grammar1 = Grammar::new();
  grammar1.genlex = genlex;
  grammar1.genabsyn = genabsyn;
  grammar1.tracelev = tracelev;
  grammar1.mode = mode; // 0 for rust, 1 for fsharp
  let parsedok = grammar1.parse_grammar(filepath);  //  ***
  if !parsedok {
    //println!("\nFailed to process grammar");
    return Err(format!("\nFailed to process grammar at {}",filepath));
  }
  // Check grammar integrity: now done inside parse
  if grammar1.name.len()<2  { // derive grammar name from filepath
     let doti = if let Some(p)= filepath.rfind('.') {p} else {filepath.len()};
     let mut slashi = if let Some(p) = filepath.rfind('/') {p+1} else {0};
     if slashi==0 {
       slashi = if let Some(p) = filepath.rfind('\\') {p+1} else {0};
     }
     grammar1.name = filepath[slashi..doti].to_string();
  }// derive grammar name
  let gramname = grammar1.name.clone();

  let pfsuffix = if mode==1 {"fs"} else {"rs"};

  if grammar1.genabsyn {
     let mut slashpos = parserfile.rfind('/');
     if let None = slashpos {slashpos = parserfile.rfind('\\');}
     let mut astpath = format!("{}_ast.{}",&gramname,pfsuffix);
     if let Some(pos) = slashpos { astpath=format!("{}{}",&parserfile[..pos+1],&astpath); }
     let wres;
     if mode==1 {wres = grammar1.write_fsast(&astpath); }
     else if !grammar1.bumpast { wres = grammar1.writeabsyn(&astpath); }
     else {wres = grammar1.write_bumpast(&astpath); }
     if !wres.is_ok() {
       //eprintln!("Failed to generate abstract syntax");
       return Err("Failed to generate abstract syntax".to_owned());
     }
  }

 grammar1.delay_transform(); // static delayed reduction markers


  if tracelev>2 {println!("computing Nullable set");}
  grammar1.compute_NullableRf();
  if tracelev>2 {println!("computing First sets");}
  //grammar1.compute_FirstIM();
  grammar1.compute_First();

  let mut fsm0;
  if lrsd {
    grammar1.logprint(&format!("Generating Experimental LR-Selective Delay State Machine with Max Delay = {}",lrsdmaxk));
    let mut lrsdfsm = MLStatemachine::new(grammar1);
    lrsdfsm.regenerate = regenerate;
    lrsdfsm.selml(lrsdmaxk);
    //fsm0 = lrsdfsm.to_statemachine();
    if lrsdfsm.failed {
      //println!("NO PARSER GENERATED"); return;
      return Err("LR SELECTIVE DELAY FAILURE. NO PARSER GENERATED".to_owned());
    }
    if !lrsdfsm.failed && lrsdfsm.regenerate {
      lrsdfsm.Gmr.logprint("Re-Generating LR(1) machine for transformed grammar...");
      lrsd = false;
      fsm0 = Statemachine::new(lrsdfsm.Gmr);
      fsm0.lalr = false;
      fsm0.generatefsm(); //GENERATE THE FSM
    } else {     fsm0 = lrsdfsm.to_statemachine(); }
    // but of course there will be more conflicts since there will be
    // more rules.  The original rules that caused conflicts for LR are
    // still there??

  } else  // not lrsd
  if newlalr { // newlalr takes precedence over other flags
     grammar1.logprint("Generating LALR(1) state machine");
     let mut lalrfsm = LALRMachine::new(grammar1);
     lalrfsm.generatefsm();
     fsm0 = lalrfsm.to_statemachine();
  }
  else {
    grammar1.logprint(&format!("Generating {} state machine for grammar {}...",if lalr {"older LALR"} else {"LR1"},&gramname));
    fsm0 = Statemachine::new(grammar1);
    fsm0.lalr = lalr;
    if lalr {fsm0.Open = Vec::with_capacity(1024); } // important
    fsm0.generatefsm(); //GENERATE THE FSM
  } // old code
  if tracelev>2 && !newlalr && !lrsd { for state in &fsm0.States {printstate(state,&fsm0.Gmr);} }
  else if tracelev>1 && !newlalr && !lrsd {   printstate(&fsm0.States[0],&fsm0.Gmr); }//print states
  if parserfile.len()<1 || parserfile.ends_with('/') || parserfile.ends_with('\\') {parserfile.push_str(&format!("{}parser.{}",&gramname,pfsuffix));}
  if fsm0.States.len()>65536  {
    return Err(format!("too many states: {} execeeds limit of 65536",fsm0.States.len()));
  }
  let write_result =
    if mode==1 { fsm0.writefsparser(&parserfile) }
    else
    if zc {  // write zero-copy parser
      //fsm0.writezcparser(&parserfile)
      //fsm0.writelbaparser(&parserfile)
      if !lrsd {fsm0.writeenumparser(&parserfile)}
      else {fsm0.writelrsdparser(&parserfile)}
    }
    else {  // non-zc, original before version 0.2.0
      if verbose /*fsm0.States.len()<=16*/ {fsm0.write_verbose(&parserfile)}
      else {fsm0.writeparser(&parserfile)}
    }; // write_result =
  //if tracelev>0 && !lrsd {eprintln!("{} total states",fsm0.FSM.len());}
  fsm0.Gmr.logprint(&format!("{} total states",fsm0.FSM.len()));
  if let Ok(_) = write_result {
     fsm0.Gmr.logprint(&format!("Parser saved in {}",&parserfile));
  }
  else if let Err(err) = write_result {
     return Err(format!("failed to write parser, likely due to invalid -o destination\n{:?}",err));    
  }
  let mut savedlog = String::new();
  if tracelev==0 {fsm0.Gmr.swap_log(&mut savedlog);}
  Ok(savedlog)
}//rustle