rustlr 0.3.0

LR/LALR parser generator that can automatically create abstract syntax trees
Documentation
open System;
open System.Collections.Generic;
open Option;

type Stateaction = Shift of int | Reduce of int | Gotonext of int | Accept | ParseError of string;;

// SYMBOLS OF GRAMMAR
let SYMBOLS = [| "_WILDCARD_TOKEN_";"E";"ES";"+";"-";"*";"/";
"(";")";"=";";";"let";"in";"int";"var";"START";"EOF" |];

let TABLE:uint64 [] = [|17180065792UL; 55835033600UL; 47244967936UL; 30065164288UL; 4295229441UL; 60129673216UL; 8590000129UL; 281522221678592UL; 281543696187395UL; 281505041874944UL; 281535106383872UL; 281492156776448UL; 281479272202241UL; 281530811744256UL; 562975723290626UL; 563001493094402UL; 562967133356034UL; 562962838388738UL; 562988608192514UL; 562984313225218UL; 562971428323330UL; 562992903159810UL; 844472175099904UL; 844454995296256UL; 844485059805184UL; 844480765165568UL; 844429225689089UL; 844442110197760UL; 1125917087563776UL; 1125942857170944UL; 1125912792530944UL; 1125925677563904UL; 1125921382400000UL; 1407404948717568UL; 1407392063619072UL; 1407435013226496UL; 1407430718586880UL; 1407379179503617UL; 1407422128521216UL; 1688879925428224UL; 1688854156279809UL; 1688897105231872UL; 1688867040329728UL; 1688909989937152UL; 1688905695297536UL; 1970363491680258UL; 1970350606778370UL; 1970346311811074UL; 1970342016843778UL; 1970376376582146UL; 1970359196712962UL; 1970367786647554UL; 1970337721876482UL; 2251821289242624UL; 2251842764472320UL; 2251812699373568UL; 2251816994406400UL; 2251825584406528UL; 2533291970658306UL; 2533287675691010UL; 2533313445494786UL; 2533326330396674UL; 2533317740462082UL; 2533309150527490UL; 2533300561117184UL; 2533296265953280UL; 2814797012336642UL; 2814766947565570UL; 2814809897238530UL; 2814779832467458UL; 2814805602271234UL; 2814818487173122UL; 3096254808981504UL; 3096271988785152UL; 3096229039964161UL; 3096280578850816UL; 3096284873490432UL; 3096241923883008UL; 3377716900593664UL; 3377759850201088UL; 3377755555561472UL; 3377746965495808UL; 3377729785692160UL; 3377704016740353UL; 3659204762402816UL; 3659191877304320UL; 3659234826911744UL; 3659221942206464UL; 3659178993516545UL; 3659230532272128UL; 3940709803622400UL; 3940696918917120UL; 3940653970292737UL; 3940679739113472UL; 3940666854014976UL; 3940705508982784UL; 4222150421381120UL; 4222146126217216UL; 4222137536348160UL; 4222141831380992UL; 4222163306807296UL; 4503625398091776UL; 4503612513058816UL; 4503616808091648UL; 4503633988616192UL; 4503621102927872UL; 4785134734278658UL; 4785130439311362UL; 4785091784605698UL; 4785104669507586UL; 4785121849376770UL; 4785143324213250UL; 5066575350923266UL; 5066588235825154UL; 5066592530792450UL; 5066566760988674UL; 5066583940857858UL; 5066601120727042UL; 5066562466021378UL; 5066571055955970UL; 5348046033059840UL; 5348076097241090UL; 5348067507306498UL; 5348063212339202UL; 5348058917371906UL; 5348037442535426UL; 5348041737502722UL; 5348050328223744UL; 5629525304934400UL; 5629521009770496UL; 5629533894148098UL; 5629516714278914UL; 5629542484082690UL; 5629538189115394UL; 5629551074017282UL; 5629512419311618UL; 5911008870924290UL; 5910991691055106UL; 5911017460858882UL; 5911000280989698UL; 5910987396087810UL; 5911026050793474UL; 5910995986022402UL; 5911013165891586UL; 6192453784174593UL; 6192509617307648UL; 6192466667700224UL; 6192496732602368UL; 6192505322668032UL; 6192479552798720UL; 6473963119509506UL; 6473967414476802UL; 6473937349705730UL; 6473950234607618UL; 6473941644673026UL; 6473958824542210UL; 6473976004411394UL; 6473945939640322UL; 6755412326744064UL; 6755420916613120UL; 6755425211777024UL; 6755416621776896UL; 6755450982301696UL; 7036878714437633UL; 7036891597832192UL; 7036904482930688UL; 7036921662734336UL; 7036934547439616UL; 7036930252800000UL; 7318383754739714UL; 7318392344674306UL; 7318362280165376UL; 7318388049707010UL; 7318375165198336UL; 7318400934608898UL; 7318366575198208UL; 7318370870034432UL; |];


// LR Runtime Statemachine: info must be loaded into FSM.
let states = 27;
let mutable GRSM:(Dictionary<string,Stateaction>) [] = Array.zeroCreate states;
for i in 1..states do
  GRSM.[i-1] <- Dictionary<string,Stateaction>();

// type of binary formatted table is uint64 []
//prectable.["+"] <- 200;

let decode_action (ncode:uint64) =
  let symi = int((ncode &&& 0x0000ffff00000000UL) >>> 32)  // symbol index
  let sti =  int((ncode &&& 0xffff000000000000UL) >>> 48)  // state index
  let satype = int(ncode &&& 0x000000000000ffffUL)  // stateaction type
  let savalue = int((ncode &&& 0x00000000ffff0000UL) >>> 16) //action value
  let action =
    match (satype,savalue) with
      | (0,si) -> Shift(si)
      | (1,si) -> Gotonext(si)
      | (2,ri) -> Reduce(ri)
      | (3,_) -> Accept
      | _ -> ParseError("parse table corrupted")
  (sti,symi,action);;
//  GRSM.[sti].[SYMBOLS.[symi]] <- action;;


for i in 0 .. TABLE.Length-1 do
  let (sti,symi,action) = decode_action (TABLE.[i])
  GRSM.[sti].[SYMBOLS.[symi]] <- action

//Console.WriteLine("done");

////// Runtime parser - using the parameterized type suggests either a 
////// single absyntype or a generated enum.

type StackedItem<'AT> =
  {  mutable statei: int;
     mutable svalue: 'AT;
     mutable line : int;
     mutable column : int;
  };;

// this is the only token type the abs parser needs to know about
type TerminalToken<'AT> =
  {
    mutable sym: string;
    mutable svalue: 'AT;
    mutable line: int;
    mutable column: int;
  }
  member this.set(a,b,c,d) =
    this.sym<-a; this.svalue<-b; this.line <- c; this.column <- d;;

// runtime parser and runtime production:
type RTProduction<'AT,'ET> =
  {
    lhs : string;  // left-hand side nonterminal name
    action : (RTParser<'AT,'ET>) -> 'AT; 
  }
and RTParser<'AT,'ET> =
  {
    mutable exstate: 'ET;
    mutable RSM: (Dictionary<string,Stateaction>) [];
    mutable stack: ResizeArray<StackedItem<'AT>>;
    resynch: HashSet<string>;
    mutable err_occurred: bool;
    mutable stopparsing: bool;
    mutable line: int;
    mutable column: int;
    mutable src_id: int;
    Symset: HashSet<string>;
    Rules: RTProduction<'AT,'ET> [];
    mutable NextToken: unit -> TerminalToken<'AT> option;
  }
  member this.UpdateState (x:'ET) = this.exstate <- x;
  member this.State() = this.exstate;
  member this.Pop() =
    let lasti = this.stack.Count-1
    let item = this.stack.[lasti]
    this.stack.RemoveAt(lasti)
    this.line <- item.line; this.column <- item.column;
    item;
  member this.abort (msg:string) =
    Console.Error.WriteLine(msg);
    this.err_occurred <- true;
    this.stopparsing <- true;
  member this.report_error(msg:string, showlc:bool) =
    if showlc then Console.Error.Write("Line "+string(this.line)+", Col "+string(this.column)+":");
    Console.Error.WriteLine(msg);
    this.err_occurred <- true;
    
  member this.shift(nextstate:int, lookahead:TerminalToken<'AT>) =
    this.line <- lookahead.line; this.column <- lookahead.column;
    this.stack.Add({StackedItem.statei=nextstate; svalue=lookahead.svalue; line=lookahead.line; column=lookahead.column})
    let nextsym = this.NextToken();
    if (isNone nextsym) then {TerminalToken.sym="EOF"; svalue=Unchecked.defaultof<'AT>; line=this.line; column=this.column}
    else nextsym.Value
    
  member this.reduce(ruleindex:int) =
    let rulei = this.Rules.[ruleindex]
    let semval = rulei.action(this)
    let sti = this.stack.Count-1;
    let newstate = this.stack.[sti].statei;
    let goton = this.RSM.[newstate].[rulei.lhs]
    match goton with
      | Gotonext nsi ->
          this.stack.Add {StackedItem.statei=nsi; svalue=semval; line=this.line; column=this.column}
      | _ ->
          Console.Error.WriteLine("LR state transition table corrupted; no suitable action after reduce");
          exit(1);

  ////////////// core parse function
  member this.parse_core() =  // no error handlers yet
    this.stack.Clear()
    this.err_occurred <- false
    this.stopparsing <- false
    this.exstate <- Unchecked.defaultof<'ET>
    let mutable result = Unchecked.defaultof<'AT>
    let mutable tosi = 0;
    this.stack.Add {statei=0;svalue=result;line=0;column=0;}
    let mutable action = ParseError(""); // dummy
    let eoftoken = {TerminalToken.sym="EOF";svalue=Unchecked.defaultof<'AT>; line=0; column=0;}
    let mutable lookahead = eoftoken;
    match this.NextToken() with
      | Some(tok) -> lookahead <-tok
      | None -> this.stopparsing <- true
    while not(this.stopparsing) do
      tosi <- this.stack.Count-1
      let tos = this.stack.[tosi]
      this.line <- tos.line; this.column <- tos.column;
      let currentstate = tos.statei
      let lexinfo = ", line "+string(this.line)+", column "+string(this.column);
      let mutable actionopt = try (Some(this.RSM.[currentstate].[lookahead.sym])) with | _ -> None;
      match actionopt with
        | Some(Shift(nextstate)) -> lookahead <- this.shift(nextstate,lookahead)
        | Some(Reduce(ri)) -> this.reduce(ri)
        | Some(Accept) ->
           this.stopparsing <- true
           if this.stack.Count<1 then this.err_occurred<-true
           else result <- this.Pop().svalue
        | Some(ParseError(msg)) -> this.abort(msg+lexinfo)
        | _ -> this.abort("Unexpected Token "+string(lookahead.sym)+lexinfo)
        //| Some(Gotonext _) -> this.abort("LR parse table corrupted"+lexinfo);
    // while not(stopparsing)
    result;;
    
let err_reporter1(self:RTParser<'AT,'ET>, lookahead:TerminalToken<'AT>, actionopt:Stateaction option) =
  self.report_error("Unexpected Token"+string(lookahead.sym),true);

let err_recover1(self:RTParser<'AT,'ET>, lookahead:TerminalToken<'AT>) =
  None;

(*
        | _ ->
          err_reporter(this,lookahead,actionopt);
          actionopt = err_recover(this,lookahead)
          if (isSome actionopt)
      let action = match actionopt with
                 | Some(ParseError(_)) | None -> 
                   err_reporter(this,lookahead,actionopt)
                   let opt2 = err_recover(this,lookahead)
                   if (isSome opt2) then opt2.Value
                   else
                     this.stopparsing <- true
                 | _ -> actionopt.Value
*)

///// Tokenizer must ultimately produce terminal tokens


///testing active pattern idea:
// define active pattern for TerminalToken, ignore line,column:
let (|TToken|) (t:TerminalToken<'AT>) =
//  let sym = t.sym
//  let svalue = t.svalue
  TToken(t.sym,t.svalue);;

let tok1 = {TerminalToken.sym="abc"; svalue=1; line=1; column=1;}

match tok1 with
  | TToken("abc",1) -> printfn "ok"
  | _ -> printfn "not ok";;


// requires absLexer.dll:

let convert_lt<'T> (lt:LexToken) =
  {TerminalToken.sym = lt.token_type; svalue=(lt.token_value :?> 'T); line=lt.line; column=lt.column;}

(*
let lt1 = LexToken("int",3);

//let tt1:TerminalToken<int> = convert_lt lt1; // warning loss of static typing
let tt1 = convert_lt<int> lt1; // warning loss of static typing

printfn "lexical token: %A" lt1;
printfn "terminal token: %A" tt1;
*)