// "lexer" items
reserved = @{"let" | "in" | "if" | "else" | "for" | "fold" | "cast" | "ann" | "loop" | "is" | "do" | "done" | "set!" | "fail!" | "struct" | "require" | "provide" | "transmute" | "collect" | "with" | "type" | "extern" | "call" | "untagged" }
nat_literal = @{ ASCII_DIGIT+ }
type_name = @{ ASCII_ALPHA_UPPER ~ (ASCII_ALPHANUMERIC | "_") * }
var_name = @{ (!reserved ~ ASCII_ALPHA_LOWER ~ (ASCII_ALPHANUMERIC | "_")*) |
(reserved ~ var_name) }
cgvar_name = @{ "$" ~ ASCII_ALPHA_LOWER ~ (ASCII_ALPHANUMERIC | "_")* }
add = {"+"}
sub = {"-"}
append = {"++"}
exp = {"**"}
mul = {"*"}
div = {"/"}
modulo = {"%"}
lor = {"||"}
land = {"&&"}
lnot = {"!"}
lshift = {"<<"}
rshift = {">>"}
bor = {"|"}
band = {"&"}
bxor = {"^"}
bnot = {"~"}
equal = {"=="}
gt = {">"}
ge = {">="}
lt = {"<"}
le = {"<="}
string_literal = ${ "\"" ~ inner ~ "\"" }
inner = @{ char* }
char = {
!("\"" | "\\") ~ ANY
| "\\" ~ ("\"" | "\\" | "/" | "b" | "f" | "n" | "r" | "t")
| "\\" ~ ("u" ~ ASCII_HEX_DIGIT{4})
}
WHITESPACE = _{" " | "\t" | "\n"}
COMMENT = _{ "#" ~ (!"\n" ~ ANY)* ~ "\n" }
// top-level program
program = {
SOI ~ definition+ ~ "-" ~ "-" ~ "-" ~ expr ~ EOI |
SOI ~ definition+ ~ EOI |
SOI ~ expr ~ EOI
}
// definitions
definition = {
fun_def | fun_def_gen | struct_def | require | require_lib | provide | alias
}
fun_def = { "def" ~ var_name ~ "(" ~ fun_args ~ ")" ~ (":" ~ type_expr)? ~ "=" ~ expr }
fun_def_gen = { "def" ~ var_name ~ "<" ~ generic_params ~ ">" ~ "(" ~ fun_args ~ ")" ~ (":" ~ type_expr)? ~ "=" ~ expr }
generic_params = {
((cgvar_name) ~ ",")* ~ cgvar_name |
((cgvar_name) ~ ",")* ~ ((type_name) ~ ",")* ~ type_name
}
fun_args = {
(var_name ~ ":" ~ type_expr ~ ",")* ~ (var_name ~ ":" ~ type_expr)?
}
struct_def = { "struct" ~ type_name ~ "{" ~ fun_args ~ "}" }
struct_def_gen = { "struct" ~ "<" ~ generic_params ~ ">" ~ "{" ~ fun_args ~ "}"}
require = { "require" ~ string_literal }
require_lib = { "import" ~ (var_name ~ "/")* ~ var_name }
provide = { "provide" ~ (var_name | type_name) }
alias = { "type" ~ type_name ~ "=" ~ type_expr }
// types
type_expr = _{
type_union
}
type_union = { (type_qmark | type_terminal) ~ ("|" ~ (type_qmark | type_terminal))* }
type_qmark = { type_terminal ~ "?"}
type_terminal = _{
type_vector |
type_dynvectorof |
type_vectorof |
type_name |
type_natrange |
type_bytes |
type_dynbytes |
"(" ~ type_expr ~ ")"
}
type_vector = { "[" ~ (type_expr ~ ",")* ~ type_expr? ~ "]" }
type_vectorof = { "[" ~ type_expr ~ ";" ~ const_expr ~ "]" }
type_dynvectorof = { "[" ~ type_expr ~ ";" ~ "]" }
type_bytes = {"%[" ~ const_expr ~ "]" }
type_dynbytes = {"%[" ~ "]" }
type_natrange = { "{" ~ const_expr ~ (".." ~ const_expr)? ~ "}" }
// const-expr. A precedence tower, similar to the bin-expr one.
const_expr = _{
const_add_expr
}
const_add_expr = { const_mult_expr ~ (add ~ const_mult_expr)* }
const_mult_expr = { const_term_expr ~ (mul ~ const_term_expr)* }
const_term_expr = _{ nat_literal | cgvar_name }
// expression
expr = _{
cflow_expr |
bin_expr
}
cflow_expr = _{ let_expr | let_q_expr | if_expr | loop_expr | fail_expr | assert_expr | fold_expr | for_expr }
fail_expr = {"fail!"}
assert_expr = {"assert!" ~ expr ~ "in" ~ expr}
for_expr = {"for" ~ var_name ~ "in" ~ expr ~ "collect" ~ expr}
fold_expr = {"for" ~ var_name ~ "in" ~ expr ~ "fold" ~ var_name ~ "=" ~ expr ~ "with" ~ expr}
let_expr = {
//"let" ~ var_name ~ "=" ~ expr ~ "in" ~ expr
"let" ~ var_name ~ "=" ~ expr ~ ("," ~ var_name ~ "=" ~ expr)* ~ "in" ~ expr
}
let_q_expr = {
//"let?" ~ var_name ~ "=" ~ expr ~ "in" ~ expr
"let?" ~ var_name ~ "=" ~ expr ~ ("," ~ var_name ~ "=" ~ expr)* ~ "in" ~ expr
}
if_expr = {
"if" ~ expr ~ "then" ~ expr ~ "else" ~ expr
}
loop_expr = {
"loop" ~ const_expr ~ "do" ~ (set_bang ~ ";")* ~ (set_bang)? ~ "return" ~ expr
}
set_bang = {
var_name ~ "<-" ~ expr
}
bin_expr = _{
logic_expr
}
// The "precedence" tower. From *loosest-binding* to tightest-binding.
// Unlike in a CFG, in a PEG grammar, this is most naturally expressed as a rule with unbounded arity, rather than a binary rule.
// This does require a bit of postprocessing (basically a fold) to turn into a binary form.
logic_expr = { bitlogic_expr ~ ((lor | land) ~ bitlogic_expr)*}
bitlogic_expr = { rel_expr ~ ((bor | band | bxor) ~ rel_expr)*}
rel_expr = { shift_expr ~ ((equal | le | lt | ge | gt) ~ shift_expr)* }
shift_expr = { add_expr ~ ((lshift | rshift) ~ add_expr)* }
add_expr = { mult_expr ~ ((append | add | sub) ~ mult_expr)* }
mult_expr = { uni_expr ~ ((exp | mul | div | modulo) ~ uni_expr)* }
uni_expr = { ((lnot | bnot) ~ apply_expr) | apply_expr }
apply_expr = {
terminal_expr ~ (call_args | field_access | vector_ref | vector_slice | vector_update | as_type | into_type | tfish_call_args)*
}
tfish_call_args = { "<" ~ ((tfish_type | tfish_cgvar) ~ ",")* ~ (tfish_type | tfish_cgvar) ~ ">" ~ call_args}
tfish_type = { type_name ~ "=" ~ type_expr }
tfish_cgvar = { cgvar_name ~ "=" ~ const_expr }
call_args = { "(" ~ (expr ~ ",")* ~ expr? ~ ")" }
field_access = { "." ~ var_name }
vector_ref = { "[" ~ expr ~ "]" }
vector_slice = { "[" ~ expr ~ ".." ~ expr ~ "]" }
vector_update = { "[" ~ expr ~ "=>" ~ expr ~ "]" }
as_type = { "::" ~ type_expr }
into_type = { ":!" ~ type_expr }
terminal_expr = _{
hex_literal |
string_literal |
is_type |
unsafe_expr |
extern_call_expr |
extern_expr |
nat_literal |
cgvar_name |
var_name |
bytes_literal |
vector_literal |
struct_literal |
for_literal |
cflow_expr |
"(" ~ expr ~ ")"
}
is_type = { var_name ~ "is" ~ type_expr }
vector_literal = { "[" ~ (expr ~ ",")* ~ (expr)? ~ "]" }
bytes_literal = { "%[" ~ (expr ~ ",")* ~ (expr)? ~ "]" }
struct_literal = { type_name ~ "{" ~ (var_name ~ ":" ~ expr ~ ",")* ~ (var_name ~ ":" ~ expr)? ~ "}" }
hex_literal = ${ "x\"" ~ (ASCII_HEX_DIGIT ~ ASCII_HEX_DIGIT)* ~ "\""}
for_literal = { "[" ~ expr ~ "for" ~ var_name ~ "in" ~ expr ~ "]" }
extern_expr = { "extern" ~ string_literal }
extern_call_expr = { "extern" ~ "call" ~ string_literal ~ call_args }
unsafe_expr = { "unsafe" ~ expr }