// Try to match short sequences before longer ones
/*
=============================================================
Whitespace and Comments
=============================================================
*/
WHITESPACE = _{ SPACE | BLOCK_COMMENT | LINE_COMMENT | DOC_COMMENT }
SPACE = _{ " " | "\t" | NEWLINE }
LINE_COMMENT = _{ "//" ~ (!NEWLINE ~ ANY)* ~ NEWLINE }
// TODO: can we avoid negative lookahead?
BLOCK_COMMENT = _{ "/*" ~ (!"*/" ~ ANY)* ~ "*/" }
// block_comment_content = { ( !"*" ~ ANY | "*" ~ !"/" ~ ANY )* }
// BLOCK_COMMENT = _{ "/*" ~ block_comment_content ~ "*/" }
// TODO: can we avoid negative lookahead?
DOC_COMMENT = @{ "///" ~ (!NEWLINE ~ ANY)* ~ NEWLINE }
NONE_COALESCE_OP = { "??" }
QUESTION = { "?" }
/*
=============================================================
Basic Identifiers and Tokens
=============================================================
*/
identifier = @{
(ASCII_ALPHA_LOWER | "_") ~ (ASCII_ALPHA_LOWER | ASCII_DIGIT | "_")* ~ !(ASCII_ALPHA_UPPER)
}
type_identifier = @{
ASCII_ALPHA_UPPER ~ (ASCII_ALPHANUMERIC | "_")*
}
constant_identifier = @{
"_"? ~ ASCII_ALPHA_UPPER ~ (ASCII_ALPHA_UPPER | ASCII_DIGIT | "_")* ~ !(ASCII_ALPHA_LOWER)
}
COMMA = _{ "," ~ " "* }
// PEST BUG: need explicit spaces after comma
TRAILING_COMMA = _{ ","? }
SCOPE_OPERATOR = _{ "::" }
/*
=============================================================
Literals
=============================================================
*/
basic_literal = { tuple_lit | float_lit | int_lit | string_lit | bool_lit | none_lit }
none_lit = { "none" }
int_lit = @{ "-"? ~ ( hex_int | dec_int ) }
unsigned_int_lit = @{ ( hex_int | dec_int ) }
dec_int = { ASCII_DIGIT ~ ( "_"? ~ ASCII_DIGIT )* }
hex_int = { "0x" ~ ASCII_HEX_DIGIT ~ ( "_"? ~ ASCII_HEX_DIGIT )* }
float_lit = @{ ("+" | "-")? ~ ASCII_DIGIT+ ~ "." ~ ASCII_DIGIT+ }
string_lit = @{ "\"" ~ ("\\\"" | !"\"" ~ ANY)* ~ "\"" }
bool_lit = @{ "true" | "false" }
tuple_lit = { "(" ~ expression ~ (COMMA ~ expression)* ~ TRAILING_COMMA ~ ")" }
enum_literal = { (qualified_type_identifier ~ SCOPE_OPERATOR)? ~ type_identifier ~ (struct_literal_optional_field_list | tuple_fields)? }
struct_literal = { qualified_type_identifier ~ struct_literal_optional_field_list }
struct_literal_optional_field_list = { "{" ~ struct_field_list? ~ "}" }
anonymous_struct_literal = { "{" ~ struct_field_list ~ "}" }
struct_field = { field_label ~ expression }
tuple_fields = { "(" ~ expression ~ (COMMA ~ expression)* ~ TRAILING_COMMA ~ ")" }
struct_field_list = {
(struct_field ~ (COMMA ~ struct_field)* ~ TRAILING_COMMA ~ rest_fields?)
| rest_fields
}
rest_fields = { ".." }
initializer_list = { "[" ~ (expression ~ (COMMA ~ expression)*)? ~ TRAILING_COMMA ~ "]" }
empty_map_marker= { ":" }
initializer_pair_list = { "[" ~ (empty_map_marker | (map_entry ~ (COMMA ~ map_entry)*)?) ~ TRAILING_COMMA ~ "]" }
map_entry = { expression ~ ":" ~ expression }
/*
=============================================================
Interpolated String Literals
=============================================================
*/
interpolated_string = ${
"'" ~ interpolated_content ~ "'"
}
interpolated_content = _{
(interpolation | text)*
}
interpolation = !{
"{" ~ expression ~ (":" ~ format_specifier)? ~ "}"
}
text = @{
(!"'" ~ !"{" ~ ANY)+
}
format_specifier = @{
"?"
| "x"
| "X"
| "b"
| "f"
| ".." ~ ASCII_DIGIT+ ~ ("f" | "s")
}
/*
=============================================================
Operators
=============================================================
*/
// Operators and their precedence are defined from lowest to highest.
logical = { none_coalesce ~ ((op_and | op_or) ~ none_coalesce)* }
none_coalesce = { range ~ ( op_none_coalesce ~ range )* }
range = { comparison ~ ((inclusive_range_op | exclusive_range_op) ~ comparison)? }
comparison = { addition ~ ((op_lte | op_gte | op_eq | op_neq | op_lt | op_gt) ~ addition)* }
addition = { multiplication ~ ((op_add | op_sub) ~ multiplication)* }
multiplication = { prefix ~ ((op_mul | op_div | op_mod) ~ prefix)* }
prefix = { prefix_op* ~ postfix }
prefix_op = { op_not | op_neg | op_borrow_mut_ref }
exclusive_range_op = { ".." }
inclusive_range_op = { "..=" }
op_or = @{ "||" }
op_and = @{ "&&" }
op_eq = @{ "==" }
op_neq = @{ "!=" }
op_lt = @{ "<" }
op_gt = @{ ">" }
op_lte = @{ "<=" }
op_gte = @{ ">=" }
op_add = @{ "+" }
op_sub = @{ "-" }
op_mul = @{ "*" }
op_div = @{ "/" }
op_mod = @{ "%" }
op_none_coalesce = @{ NONE_COALESCE_OP }
// Prefix op
op_neg = @{ "-" }
op_not = @{ "!" }
op_borrow_mut_ref = @{ "&" }
/*
=============================================================
Postfix Expressions
=============================================================
*/
postfix = { term ~ postfix_op* }
postfix_op = {
member_call_postfix
| function_call_postfix
| unwrap_postfix
| subscript_postfix
| member_access_postfix
}
member_call_postfix = { member_access_postfix ~ generic_arguments? ~ function_call_args }
// No whitespace allowed before this rule
function_call_postfix = { generic_arguments? ~ function_call_args }
// No whitespace allowed before this rule
function_call_args = {
"(" ~ (arg_expression ~ (COMMA ~ arg_expression)* ~ TRAILING_COMMA)? ~ ")"
}
subscript_postfix = { "[" ~ expression ~ ("," ~ expression)? ~ "]" }
member_access_postfix = { dot_identifier }
dot_identifier = { "." ~ identifier }
unwrap_postfix = { QUESTION ~ !QUESTION }
static_member_reference = {
qualified_type_identifier ~ SCOPE_OPERATOR ~ identifier
}
/*
=============================================================
Types
=============================================================
*/
type_name = { base_type ~ optional_marker? }
optional_marker = { "?" }
base_type = {
qualified_type_identifier
| function_type
| fixed_capacity_map_type
| dynamic_map_type
| fixed_capacity_array_type
| slice_view_type
| tuple_type
| struct_type
| unit_type
}
fixed_capacity_array_type = { "[" ~ type_name ~ ";" ~ unsigned_int_lit ~ "]" }
slice_view_type = { "[" ~ type_name ~ "]" }
fixed_capacity_map_type = { "[" ~ type_name ~ ":" ~ type_name ~ ";" ~ unsigned_int_lit ~ "]" }
dynamic_map_type = { "[" ~ type_name ~ ":" ~ type_name ~ "]" }
tuple_type = { "(" ~ type_name ~ (COMMA ~ type_name)* ~ TRAILING_COMMA ~ ")" }
// "(" ~ type_name ~ (COMMA ~ type_name)* ~ ")"
struct_type = { ("{" ~ struct_type_fields ~ "}") }
struct_type_fields = { (struct_type_field ~ (COMMA ~ struct_type_field)* ~ TRAILING_COMMA)? }
struct_type_field = { field_label ~ type_name }
// "{" ~ struct_type_field ~ (COMMA ~ struct_type_field)* ~ TRAILING_COMMA? ~ "}"
// struct_type_field = { field_label ~ type_name }
unit_type = @{ "()" }
field_label = { identifier ~ ":" }
generic_argument_int_tuple = { "(" ~ unsigned_int_lit ~ COMMA ~ unsigned_int_lit ~ ")"}
generic_argument_item = _{ type_name | generic_argument_int_tuple | unsigned_int_lit }
COMMA_HACK = _{ ("," | ";")? ~ " "* }
generic_arguments = { "<" ~ generic_argument_item ~ (COMMA_HACK ~ generic_argument_item)* ~ ">" }
function_type = {
"(" ~ function_params? ~ ")" ~ "->" ~ type_name
}
function_params = {
type_name ~ (COMMA ~ type_name)*
}
/*
=============================================================
Qualified Identifiers
=============================================================
*/
qualified_identifier = {
module_segments? ~ identifier ~ generic_arguments?
}
qualified_type_identifier = {
module_segments? ~ type_identifier ~ generic_arguments?
}
module_segments = { (identifier ~ SCOPE_OPERATOR)+ }
/*
=============================================================
Program Structure
=============================================================
*/
item = { attribute* ~ definition }
program = { SOI ~ (item | expression)* ~ EOI }
definition = {
(use_def | mod_def | type_def | struct_def | enum_def | function_def | impl_def | const_def | trait_def)
}
/*
=============================================================
Declarations and Definitions
=============================================================
*/
trait_def = {
"trait" ~ type_identifier ~ "{" ~ impl_item* ~ "}"
}
type_identifier_optional_type_variables = { type_identifier ~ generic_type_variables? }
generic_type_variable = { type_identifier }
generic_type_variables = { "<" ~ generic_type_variable ~ (COMMA ~ generic_type_variable)* ~ ">" }
impl_def = {
"impl" ~ type_identifier_optional_type_variables ~ "{" ~ (impl_item)* ~ "}"
}
impl_item = { normal_member_function | external_member_function }
struct_def = { "struct" ~ type_identifier_optional_type_variables ~ struct_type? }
type_def = { "type" ~ type_identifier ~ "=" ~ type_name }
enum_def = { "enum" ~ type_identifier_optional_type_variables ~ "{" ~ enum_variants? ~ "}" }
enum_variants = { enum_variant ~ (COMMA ~ enum_variant)* ~ TRAILING_COMMA }
enum_variant = {
struct_variant // Struct variant must come first
| direct_variant // Direct payload variant (single element, no parens)
| tuple_variant // Then tuple variant
| simple_variant // Simple variant must come last
}
simple_variant = { type_identifier }
direct_variant = { type_identifier ~ type_name } // NEW: Single direct payload
tuple_variant = { type_identifier ~ tuple_type }
struct_variant = { type_identifier ~ struct_type }
function_def = {
external_function
| normal_function
}
external_function = {
"external" ~ int_lit ~ function_signature
}
normal_function = {
function_signature ~ block
}
external_member_function = {
"external" ~ int_lit ~ member_signature
}
normal_member_function = {
attribute* ~ member_signature ~ block
}
const_def = {
"const" ~ constant_identifier ~ type_coerce? ~ "=" ~ expression
}
all_imports = { "*" }
use_def = {
"use" ~ import_path ~ ("::" ~ (all_imports | "{" ~ import_list ~ "}"))?
}
mod_def = {
"mod" ~ import_path ~ ("::" ~ (all_imports | "{" ~ import_list ~ "}"))?
}
import_path = { identifier ~ ("::" ~ identifier)* }
import_list = {
import_item ~ (COMMA ~ import_item)* ~ TRAILING_COMMA
}
import_item = { type_identifier | identifier }
/*
=============================================================
Function Parameters and Signatures
=============================================================
*/
function_identifier = { identifier }
function_signature = {
"fn" ~ function_identifier ~ generic_type_variables? ~ "(" ~ (parameter_list ~ TRAILING_COMMA)? ~ ")" ~ return_type?
}
member_signature = {
"fn" ~ function_identifier ~ generic_type_variables? ~ "(" ~ (self_parameter ~ (COMMA ~ parameter_list)? | parameter_list)? ~ ")" ~ return_type?
}
self_identifier = @{ "self" }
self_parameter = {
mut_keyword? ~ self_identifier
}
parameter_list = { parameter ~ (COMMA ~ parameter)* }
parameter = { maybe_mut_identifier ~ ":" ~ type_name }
mut_keyword = @{ "mut" }
return_type = { "->" ~ type_name }
/*
=============================================================
Loop Constructs and Control Flow
=============================================================
*/
for_loop = { "for" ~ for_pattern ~ "in" ~ arg_expression ~ expression }
for_pattern = { for_pair | maybe_mut_identifier }
for_pair = { maybe_mut_identifier ~ COMMA ~ maybe_mut_identifier }
while_loop = { "while" ~ expression ~ expression }
/*
=============================================================
Variable Definitions and Assignments
=============================================================
*/
variable_definition = {
variable_item ~ type_coerce? ~ "=" ~ expression
}
type_coerce = { ":" ~ type_name }
assign_op = { normal_assign_op | compound_assign_op }
normal_assign_op = @{ "=" }
compound_assign_op = {
add_assign_op
| sub_assign_op
| mul_assign_op
| div_assign_op
| modulo_assign_op
}
add_assign_op = @{ "+=" }
sub_assign_op = @{ "-=" }
mul_assign_op = @{ "*=" }
div_assign_op = @{ "/=" }
modulo_assign_op = @{ "%=" }
/*
=============================================================
Expressions
=============================================================
*/
expression = {
with_expr
| if_expr
| when_expr
| guard_expr
| match_expr
| for_loop
| while_loop
// | initializer_list
//| initializer_pair_list
| destructuring_assignment // TODO: move to statement
| variable_definition // TODO: move to statement
| assignment // TODO: move to assignment_statement
| block
// | logical // maybe lowest
}
//statement = {
// variable_definition |
// destructuring_assignment |
// assignment_statement |
// expression_statement
//for_loop |
//while_loop |
// }
assignment = { logical ~ (assign_op ~ expression)? }
// assignable_target = {
// (identifier ~ (member_access_postfix | subscript_postfix)*)
//}
// assignment_statement = { assignable_target ~ assign_op ~ expression }
arg_expression = { logical }
destructuring_assignment = { at_least_two_variables_list ~ "=" ~ expression }
parenthesized = { "(" ~ expression ~ ")" }
term = {
context_access
| static_member_reference
| parenthesized
| basic_literal // must be before identifier_reference
| qualified_identifier
| constant_reference
| struct_literal
| enum_literal
| anonymous_struct_literal
| initializer_list
| initializer_pair_list
| interpolated_string
| lambda
}
// "@" is a temporary placeholder for context access.
// In a future update, this will be removed so you can write "." or ".calling(…)" directly.
context_access = @{ "@" }
lambda = {
"|" ~ optional_variable_list ~ "|" ~ expression
}
// variable or function
constant_reference = { module_segments? ~ constant_identifier }
if_expr = { "if" ~ expression ~ expression ~ ("else" ~ (if_expr | expression))? }
block = { "{" ~ expression* ~ "}" }
// block = { "{" ~ statement* ~ expression? ~ "}" }
with_expr = { "with" ~ variable_binding_list ~ expression }
when_expr = { "when" ~ variable_binding_list ~ expression ~ ("else" ~ expression)? }
/*
=============================================================
Match Expressions
=============================================================
*/
match_expr = { "match" ~ arg_expression ~ "{" ~ match_arms ~ "}" }
match_arms = { match_arm+ }
match_arm = { match_pattern ~ "->" ~ (block ~ TRAILING_COMMA | expression ~ COMMA) }
/*
=============================================================
Patterns
=============================================================
*/
match_pattern = { wildcard_pattern | normal_pattern }
normal_pattern = { pattern ~ guard_clause? }
pattern = {
enum_pattern
| basic_literal
}
destructuring_pattern = {
struct_destruct
| tuple_destruct
}
struct_destruct = { "{" ~ (pattern_variable ~ ("," ~ pattern_variable)* ~ ","?)? ~ "}" }
tuple_destruct = { (pattern_variable_or_wildcard ~ ("," ~ pattern_variable_or_wildcard)* ~ ","?)? }
enum_pattern = { type_identifier ~ destructuring_pattern? }
pattern_variable = { maybe_mut_identifier }
pattern_variable_or_wildcard = { maybe_mut_identifier | "_" }
wildcard_pattern = { "_" }
/*
=============================================================
Guard Clauses
=============================================================
*/
guard_clause = { "|" ~ (wildcard_pattern | expression) }
guard_item = { guard_clause ~ "->" ~ expression }
guard_expr = { guard_item+ }
/*
=============================================================
Variable Lists
=============================================================
*/
optional_variable_list = { (variable_item ~ (COMMA ~ variable_item)*)? ~ TRAILING_COMMA }
at_least_two_variables_list = { variable_item ~ (COMMA ~ variable_item)+ }
maybe_mut_identifier = { mut_keyword? ~ identifier }
variable_item = { maybe_mut_identifier }
variable_binding_list = { variable_binding ~ (COMMA ~ variable_binding)* ~ TRAILING_COMMA }
variable_binding = { variable_item ~ ("=" ~ arg_expression)? }
/*
===============================
Attributes
===============================
*/
attribute = { outer_attribute | inner_attribute }
outer_attribute = { "#" ~ "[" ~ meta_item ~ "]" }
inner_attribute = { "#!" ~ "[" ~ meta_item ~ "]" }
// outer_attribute = { "#" ~ "[" ~ meta_item_list ~ "]" } // be able to have multiple items on each group
// inner_attribute = { "#!" ~ "[" ~ meta_item_list ~ "]" } // again, more fun to have multiple items in each group
meta_item = { meta_key_value | meta_list | meta_path }
meta_path = { qualified_identifier }
meta_key_value = { qualified_identifier ~ "=" ~ meta_value }
meta_list = { qualified_identifier ~ "(" ~ meta_item_list? ~ ")" } // Function-call-like
meta_item_list = { meta_item ~ (COMMA ~ meta_item)* ~ TRAILING_COMMA? } // "arguments"
meta_value = { basic_literal | meta_path | meta_list } // argument item: literal, a name or a "function call"