%start Parol
%title "Parol grammar"
%comment "Parol's own grammar"
%grammar_type 'll(k)'
%line_comment '//'
%block_comment '/*' '*/'
%nt_type UserTypeName = crate::parser::parol_grammar::UserDefinedTypeName
%nt_type ScannerState = crate::parser::parol_grammar::ScannerConfig
%nt_type ScannerStateDirectives = crate::parser::parol_grammar::ScannerStateSwitch
%%
// The start symbol
Parol
: Prolog GrammarDefinition
;
Prolog
: StartDeclaration { Declaration } { ScannerState }
;
StartDeclaration
: '%start'^ Identifier
;
Declaration
: '%title'^ String
| '%comment'^ String
| '%user_type'^ Identifier '='^ UserTypeName // User type alias definition
| "%nt_type"^ Identifier@nt_name '='^ UserTypeName@nt_type // User type for a non-terminal
| "%t_type"^ UserTypeName@t_type // User type for all terminals, the last will win
| '%grammar_type'^ RawString // The type of the grammar, e.g. LL(k), LALR(1)
| ScannerDirectives
;
ScannerDirectives
: '%line_comment'^ TokenLiteral
| '%block_comment'^ TokenLiteral TokenLiteral
| '%auto_newline_off'^
| '%auto_ws_off'^
| '%skip'^ IdentifierList
| '%on'^ IdentifierList ScannerStateDirectives
| '%allow_unmatched'^
;
ScannerStateDirectives
: '%enter'^ Identifier // Scanner state enter directives
| '%push'^ Identifier // Scanner state push directives
| '%pop' // Scanner state pop directives
;
GrammarDefinition
: '%%'^ Production { Production } // There must be at least one production - with the start symbol
;
DoubleColon
: '::'
;
Production
: Identifier ':'^ Alternations ';'^
;
Alternations
: Alternation { '|'^ Alternation }
;
Alternation
: { Factor }
;
Factor
: Group | Repeat | Optional | Symbol
;
Symbol
: NonTerminal // EBNF: Meta-identifier
| SimpleToken
| TokenWithStates
;
// EBNF: Terminal-string
TokenLiteral
: String // Treated as a regular expression
| RawString // Regex meta characters need not to be escaped by the user
| Regex // Treated as a regular expression
;
TokenExpression
: TokenLiteral [ LookAhead ]
;
SimpleToken
: TokenExpression [ ASTControl ]
;
TokenWithStates
: '<'^ IdentifierList '>'^ TokenExpression [ ASTControl ]
;
String
: /"(\\.|[^"])*"/
;
RawString
: /'(\\.|[^'])*'/
;
Regex
: "/(\\.|[^\/])*/" // We use '/' as delimiter for regex and thus quotes for the Regex terminal
;
// A non-empty grouping
Group
: '(' Alternations ')'
;
// A non-empty optional expression
Optional
: '[' Alternations ']'
;
// A non-empty repetition
Repeat
: '{' Alternations '}'
;
NonTerminal
: Identifier [ ASTControl ]
;
Identifier
: /[a-zA-Z_][a-zA-Z0-9_]*/
;
// A complete scanner definition
ScannerState
: '%scanner'^ Identifier@state_name '{'^ { ScannerDirectives } '}'^
;
// The list of scanner states a terminal symbol is associated with
IdentifierList
: Identifier { ','^ Identifier }
;
ASTControl
: CutOperator | MemberName [ UserTypeDeclaration ] | UserTypeDeclaration
;
// The name of the member in the AST as it will appear in the generated code
MemberName
: '@'^ Identifier
;
// Prevents the symbol from being propagated to the AST
CutOperator
: '^'^
;
// Assigns the user type to a symbol
UserTypeDeclaration
: ':'^ UserTypeName
;
// A valid Rust qualified name
UserTypeName
: Identifier { DoubleColon^ Identifier }
;
// The lookahead
LookAhead
: ( PositiveLookahead | NegativeLookahead ) TokenLiteral
;
// The positive lookahead operator
PositiveLookahead
: '?='^
;
// The negative lookahead operator
NegativeLookahead
: '?!'^
;