# Tokay grammar / parser
# This program is used to generate Tokays own parser, and is written in Tokay itself.
# Use the tooling in `gen_grammar/` for now.
# Whitespace & EOL
_ : @{ # true whitespace is made of comments and escaped line-breaks as well
Char<\t >+
'#' Char<^\n>*
'\\' '\r'? '\n'
}
___ : (T_EOL _)* # optional line-breaks followed by whitespace
_standalone_ : @{
# helper parselet to ensure that identifiers stand alone
# fixme: When generic parselets are available, this can be replaced by a Standalone<K> invocation
peek not Char<A-Z_a-z> _
_
}
T_EOL : @{
'\n' _
'\r' '\n'? _
';' _
peek EOF accept # accept is used to bypass bug #64 here
peek '}'
}
# Prime Tokens
T_OctDigit : Char<0-7>
T_HexDigit : Char<0-9A-Fa-f>
T_EscapeSequence : @{
# Named escape sequences
'a' "\x07"
'b' "\x08"
'f' "\x0c"
'n' "\n"
'r' "\r"
't' "\t"
'v' "\x0b"
# Encoded escape sequences
# fixme: This can be resolved better as soon as the Repeat generic builtin is ready
# ASCII Octal (8-Bit)
T_OctDigit T_OctDigit T_OctDigit chr(int($1) * 64 + int($2) * 8 + int($3))
# ASCII Hex (8-Bit)
'x' T_HexDigit T_HexDigit chr(int("0x" + $0.substr(1)))
# Unicode (32-Bit)
'u' T_HexDigit T_HexDigit T_HexDigit T_HexDigit chr(int("0x" + $0.substr(1)))
# Unicode (64-Bit)
'U' T_HexDigit T_HexDigit T_HexDigit T_HexDigit \
T_HexDigit T_HexDigit T_HexDigit T_HexDigit \
chr(int("0x" + $0.substr(1)))
# fixme: In case when odd amount of digits is provided, a syntax error shall occur.
# This is like in Python: "\x2" SyntaxError: (unicode error) 'unicodeescape' codec can't decode bytes in position 0-2: truncated \xXX escape
Char
}
T_Identifier : @{
ast("identifier", Ident)
}
T_Consumable : @{
Char<A-Z_> Char<0-9A-Z_a-z>* ast("identifier", $0)
}
T_Alias : @{
Char<A-Z_a-z> Char<0-9A-Z_a-z>* ast("value_string", $0)
}
T_String : @{
'"' {
'\\' T_EscapeSequence
Char<^\\\">
EOF error("Unclosed string, expecting '\"'")
}* str_join("", $2) expect '"'
}
T_Touch : @{
'\'' {
'\\' T_EscapeSequence
Char<^\\\'>
EOF error("Unclosed match, expecting '\''")
}* str_join("", $2) expect '\''
}
T_Integer : @{
ast("value_integer", Int)
}
T_Float : @{
ast("value_float", Float)
}
# Character-classes
CclChar : @{
'\\' T_EscapeSequence
Char<^\>>
EOF error("Unclosed character-class, expecting ']'")
}
CclRange : @{
CclChar '-' CclChar ast("range", $1 + $3)
CclChar ast("char")
}
Ccl : @{
'^' CclRange* ast("ccl_neg")
CclRange* ast("ccl")
}
# Statics, Variables, Loads
Subscript : @{
'[' _ Expression ']' _ ast("item")
}
Attribute : @{
'.' _ T_Alias ast("attribute")
}
Capture : @{
'$' T_Alias _ ast("capture_alias")
'$' T_Integer _ ast("capture_index")
'$' '(' _ ___ Expression ')' _ ast("capture_expr")
'$' error("'$...': Expecting identifier, integer or (expression)")
}
Variable : @{
T_Identifier
Capture
}
Lvalue : @{
Variable _ Subscript* ast("lvalue") # Lvalue currently doesn't allow attribute assignment!
}
Load : @{
Lvalue '++' ast("inplace_post_inc")
Lvalue '--' ast("inplace_post_dec")
'++' expect Lvalue ast("inplace_pre_inc")
'--' expect Lvalue ast("inplace_pre_dec")
Variable
}
# Parselet
Parselet : @{
'@' _ ParseletGenerics? _ ParseletArguments? expect Block ast("value_parselet")
}
## Parselet: Generics
ParseletGeneric : @{
T_Identifier _ (':' _ expect Atomic)? ast("gen")
}
ParseletGenerics : @{
'<' _ (ParseletGeneric (',' _)?)* _ expect '>' _
}
## Parselet: Arguments
ParseletArgument : @{
T_Identifier _ ('=' _ expect Expression?)? ast("arg")
}
ParseletArguments : @{
(ParseletArgument (',' _)?)+
}
# Parselet: Instance
StaticParseletInstance : T_Consumable | Parselet
ParseletInstanceArgument : @{
T_Identifier _ ':' _ expect Atomic _ ast("genarg_named")
Atomic _ ast("genarg")
}
ParseletInstance : @{
StaticParseletInstance '<' _ (ParseletInstanceArgument (',' _)?)+ _ expect '>' _ ast("value_generic")
StaticParseletInstance
}
# Inline Blocks and Sequences
InlineSequenceItem : @{
T_Alias _ '=>' _ expect Expression ast("alias")
Expression '=>' _ expect Expression ast("alias")
Expression
}
InlineSequence : @{
# Special case: Expression followed by "," is considered as a list with a single item (syntactic sugar)
Expression ___ ',' _ ___ peek ')' ast("list")
# A sequence is a list of items optionally separated by ","
(InlineSequenceItem ___ (',' _)? ___)+ ast("inline_sequence")
# The empty sequences generates an empty list
Void ast("list")
}
InlineBlock : @{
'(' _ ___ InlineSequence {___ '|' _ ___ InlineSequence}+ ___ expect ')' ast("block")
'(' _ ___ InlineSequence ___ expect ')'
}
# Call parameters (used by calls and rvalues)
CallArgument : @{
T_Identifier _ '=' _ expect Expression ast("callarg_named")
Expression ast("callarg")
}
CallArguments : @{
(CallArgument (',' _)? ___)+
}
# Token
TokenLiteral : @{
'\'' T_Touch '\'' ast("value_token_match")
T_Touch ast("value_token_touch")
'Chars' '<' Ccl '>' ast("value_token_ccls")
'Chars' ast("value_token_anys")
'Char' '<' Ccl '>' ast("value_token_ccl")
'Char' ast("value_token_any")
}
TokenAtom : @{
TokenLiteral
InlineBlock
'@' _ InlineBlock ast("area")
Block
ParseletInstance '(' _ ___ CallArguments? ___ expect ')' ast("call")
ParseletInstance
}
Token1 : @{ # todo: Token1 can be renamed back to Token again when #31 is fixed and Self is used.
TokenAtom '+' ast("op_mod_pos")
TokenAtom '*' ast("op_mod_kle")
TokenAtom '?' ast("op_mod_opt")
TokenAtom
'peek' _standalone_ expect Token1 ast("op_mod_peek")
'not' _standalone_ expect Token1 ast("op_mod_not")
'expect' _standalone_ expect Token1 ast("op_mod_expect")
}
# Expression & Flow
## Literals
Literal : @{
'true' _standalone_ ast("value_true")
'false' _standalone_ ast("value_false")
'void' _standalone_ ast("value_void")
'null' _standalone_ ast("value_null")
T_String ast("value_string")
T_Float
T_Integer
}
## Atomic elements, including if and loops as they are atomic part of expressions
Atomic : @{
'(' _ ___ HoldExpression ___ ')'
Literal
Token1
'if' _standalone_ expect HoldExpression ___ expect Statement (___ 'else' _standalone_ ___ expect Statement)? ast("op_if")
'for' _standalone_ expect Lvalue _ expect 'in' _standalone_ expect Expression ___ expect Statement ast("op_for")
'loop' _standalone_ HoldExpression ___ Block ast("op_loop")
'loop' _standalone_ expect Block ast("op_loop")
Load
}
# Rvalue can be a function call or value attribute/subscript
Rvalue : @{
Rvalue '(' _ ___ CallArguments? expect ')' ast("call")
Rvalue (Attribute | Subscript)* ast("rvalue")
Atomic
}
Unary : @{
'-' not '-' _ Unary ast("op_unary_neg")
'!' _ Unary ast("op_unary_not")
Rvalue _
}
MulDiv : @{
MulDiv '*' _ expect Unary ast("op_binary_mul")
MulDiv '//' _ expect Unary ast("op_binary_divi")
MulDiv '/' _ expect Unary ast("op_binary_div")
MulDiv '%' _ expect Unary ast("op_binary_mod")
Unary
}
AddSub : @{
AddSub '+' not '+' _ expect MulDiv ast("op_binary_add")
AddSub '-' not '-' _ expect MulDiv ast("op_binary_sub")
MulDiv
}
Comparison : @{
AddSub {
'==' _ expect AddSub ast("cmp_eq")
'!=' _ expect AddSub ast("cmp_neq")
'<=' _ expect AddSub ast("cmp_lteq")
'>=' _ expect AddSub ast("cmp_gteq")
'<' _ expect AddSub ast("cmp_lt")
'>' _ expect AddSub ast("cmp_gt")
}+ ast("comparison")
AddSub
}
LogicalAnd : @{
LogicalAnd '&&' _ expect Comparison ast("op_logical_and")
Comparison
}
LogicalOr : @{
LogicalOr '||' _ expect LogicalAnd ast("op_logical_or")
LogicalAnd
}
HoldExpression : @{
Lvalue _ '+=' _ expect HoldExpression ast("assign_add_hold")
Lvalue _ '-=' _ expect HoldExpression ast("assign_sub_hold")
Lvalue _ '*=' _ expect HoldExpression ast("assign_mul_hold")
Lvalue _ '/=' _ expect HoldExpression ast("assign_div_hold")
Lvalue _ '//=' _ expect HoldExpression ast("assign_divi_hold")
Lvalue _ '%=' _ expect HoldExpression ast("assign_mod_hold")
Lvalue _ '=' not ('>' | '=') _ expect HoldExpression ast("assign_hold")
LogicalOr
}
Expression : @{
Lvalue _ '+=' _ expect HoldExpression ast("assign_add")
Lvalue _ '-=' _ expect HoldExpression ast("assign_sub")
Lvalue _ '*=' _ expect HoldExpression ast("assign_mul")
Lvalue _ '/=' _ expect HoldExpression ast("assign_div")
Lvalue _ '//=' _ expect HoldExpression ast("assign_divi")
Lvalue _ '%=' _ expect HoldExpression ast("assign_mod")
Lvalue _ '=' not ('>' | '=') _ expect HoldExpression ast("assign")
LogicalOr
}
# Statement and Assignment
Statement : @{
'accept' _standalone_ Expression? ast("op_accept")
'break' _standalone_ Expression? ast("op_break")
'continue' _standalone_ Expression? ast("op_continue")
'exit' _standalone_ Expression? ast("op_exit")
'next' _standalone_ ast("op_next")
'push' _standalone_ Expression? ast("op_push")
'reject' _standalone_ ast("op_reject")
'repeat' _standalone_ Expression? ast("op_repeat")
'return' _standalone_ Expression? ast("op_accept")
Expression
}
# Blocks and Sequences
Block : @{
'{' _ ___ '}' ast("value_void")
'{' _ Instruction* _ expect '}' ast("block")
}
SequenceItem : @{
T_Alias _ '=>' _ expect Expression ast("alias")
Expression '=>' _ expect Expression ast("alias")
Statement
}
Sequence : @{
(SequenceItem (',' _)?)+ ast("sequence")
}
Sequences : @{
Sequence {'|' _ Sequence}+ ast("block")
Sequence
}
Instruction : @{
'begin' _standalone_ Sequences expect T_EOL ast("begin")
'end' _standalone_ Sequences expect T_EOL ast("end")
T_Identifier _ ':' _ {
Literal _ peek T_EOL
Token1 _ peek T_EOL
Sequences
} expect T_EOL ast("constant")
Statement T_EOL
Sequences expect T_EOL
T_EOL
}
Nop : Void ast("op_nop")
# Main
Tokay : @{
Instruction+
Char error("Parse error, unexpected token", true)
}
_ Tokay? expect EOF ast("main")
#_ Tokay? expect EOF ast_print(ast("main"))