parol 4.5.0

LL(k) and LALR(1) parser generator for Rust
Documentation
%start Parol
%title "Parol grammar"
%comment "Parol's own grammar"
%grammar_type 'll(k)'
%line_comment '//'
%block_comment '/*' '*/'
%nt_type UserTypeName = crate::parser::parol_grammar::UserDefinedTypeName
%nt_type ScannerState = crate::parser::parol_grammar::ScannerConfig
%nt_type ScannerStateDirectives = crate::parser::parol_grammar::ScannerStateSwitch

%%

// The start symbol
Parol
    : Prolog GrammarDefinition
    ;

Prolog
    : StartDeclaration { Declaration } { ScannerState }
    ;

StartDeclaration
    : '%start'^ Identifier
    ;

Declaration
    : '%title'^ String
    | '%comment'^ String
    | '%user_type'^ Identifier '='^ UserTypeName // User type alias definition
    | "%nt_type"^ Identifier@nt_name '='^ UserTypeName@nt_type // User type for a non-terminal
    | "%t_type"^ UserTypeName@t_type // User type for all terminals, the last will win
    | '%grammar_type'^ RawString // The type of the grammar, e.g. LL(k), LALR(1)
    | ScannerDirectives
    ;

ScannerDirectives
    : '%line_comment'^ TokenLiteral
    | '%block_comment'^ TokenLiteral TokenLiteral
    | '%auto_newline_off'^
    | '%auto_ws_off'^
    | '%skip'^ IdentifierList
    | '%on'^ IdentifierList ScannerStateDirectives
    | '%allow_unmatched'^
    ;

ScannerStateDirectives
    : '%enter'^ Identifier // Scanner state enter directives
    | '%push'^ Identifier // Scanner state push directives
    | '%pop' // Scanner state pop directives
    ;

GrammarDefinition
    : '%%'^ Production { Production } // There must be at least one production - with the start symbol
    ;

DoubleColon
    : '::'
    ;

Production
    : Identifier ':'^ Alternations ';'^
    ;

Alternations
    : Alternation { '|'^ Alternation }
    ;

Alternation
    : { Factor }
    ;

Factor
    : Group | Repeat | Optional | Symbol
    ;

Symbol
    : NonTerminal // EBNF: Meta-identifier
    | SimpleToken
    | TokenWithStates
    ;

// EBNF: Terminal-string
TokenLiteral
    : String // Treated as a regular expression
    | RawString // Regex meta characters need not to be escaped by the user
    | Regex // Treated as a regular expression
    ;

TokenExpression
    : TokenLiteral [ LookAhead ]
    ;

SimpleToken
    : TokenExpression [ ASTControl ]
    ;

TokenWithStates
    : '<'^ IdentifierList '>'^ TokenExpression [ ASTControl ]
    ;

String
    : /"(\\.|[^"])*"/
    ;

RawString
    : /'(\\.|[^'])*'/
    ;

Regex
    : "/(\\.|[^\/])*/" // We use '/' as delimiter for regex and thus quotes for the Regex terminal
    ;

// A non-empty grouping
Group
    : '(' Alternations ')'
    ;

// A non-empty optional expression
Optional
    : '[' Alternations ']'
    ;

// A non-empty repetition
Repeat
    : '{' Alternations '}'
    ;

NonTerminal
    : Identifier [ ASTControl ]
    ;

Identifier
    : /[a-zA-Z_][a-zA-Z0-9_]*/
    ;

// A complete scanner definition
ScannerState
    : '%scanner'^ Identifier@state_name '{'^ { ScannerDirectives } '}'^
    ;

// The list of scanner states a terminal symbol is associated with
IdentifierList
    : Identifier { ','^ Identifier }
    ;

ASTControl
    : CutOperator | MemberName [ UserTypeDeclaration ] | UserTypeDeclaration
    ;

// The name of the member in the AST as it will appear in the generated code
MemberName
    : '@'^ Identifier
    ;

// Prevents the symbol from being propagated to the AST
CutOperator
    : '^'^
    ;

// Assigns the user type to a symbol
UserTypeDeclaration
    : ':'^ UserTypeName
    ;

// A valid Rust qualified name
UserTypeName
    : Identifier { DoubleColon^ Identifier }
    ;

// The lookahead
LookAhead
    : ( PositiveLookahead | NegativeLookahead ) TokenLiteral
    ;

// The positive lookahead operator
PositiveLookahead
    : '?='^
    ;

// The negative lookahead operator
NegativeLookahead
    : '?!'^
    ;