// JCL Grammar v1.0 - PEG specification using Pest with Pratt Parser
// Refactored to eliminate left-recursion using Pratt parsing for expressions
WHITESPACE = _{ " " | "\t" | "\r" | "\n" }
// Doc comments (compound atomic to preserve content without whitespace splitting)
doccomment = ${ "///" ~ (!"\n" ~ ANY)* ~ "\n"? }
doccomments = { doccomment+ }
// Regular comments (silent)
COMMENT = _{ "#" ~ (!"\n" ~ ANY)* | "/*" ~ (!"*/" ~ ANY)* ~ "*/" }
// Entry point
program = { SOI ~ statement* ~ EOI }
statement = {
doccomments? ~ stmtbody
}
stmtbody = {
import_stmt
| function_def
| assignment
| for_loop
| expression
}
// ============================================================================
// IMPORTS
// ============================================================================
import_stmt = {
"import" ~ import_items ~ "from" ~ string
}
import_items = {
"*"
| "(" ~ import_item ~ ("," ~ import_item)* ~ ")"
| import_item
}
import_item = {
identifier ~ ("as" ~ identifier)?
}
// ============================================================================
// FUNCTION DEFINITIONS
// ============================================================================
function_def = {
"fn" ~ identifier ~ "(" ~ param_list? ~ ")" ~ type_annotation? ~ "=" ~ expression
}
param_list = {
param ~ ("," ~ param)*
}
param = {
identifier ~ type_annotation?
}
type_annotation = {
":" ~ type_expr
}
type_expr = {
"string" | "int" | "float" | "bool" | "any"
| "list" ~ "<" ~ type_expr ~ ">"
| "map" ~ "<" ~ type_expr ~ "," ~ type_expr ~ ">"
}
// ============================================================================
// ASSIGNMENTS
// ============================================================================
assignment = {
"mut"? ~ identifier ~ type_annotation? ~ "=" ~ expression
}
// ============================================================================
// FOR LOOPS
// ============================================================================
for_loop = {
"for" ~ for_variables ~ "in" ~ for_iterables ~ "(" ~ statement* ~ ")"
}
for_variables = {
identifier ~ ("," ~ identifier)*
}
for_iterables = {
expression ~ ("," ~ expression)*
}
// ============================================================================
// EXPRESSIONS (Pratt Parser)
// ============================================================================
// Main expression entry point - uses Pratt parsing
// Keep operators at top level so Pratt parser can see them directly
expression = {
prefix* ~ primary ~ postfix* ~
(infix ~ prefix* ~ primary ~ postfix*)*
}
// Prefix operators
prefix = _{ neg | not }
neg = { "-" }
not = { "not" | "!" }
// Infix operators (for binary operations)
infix = _{
or_op | and_op
| eq_op | ne_op | le_op | ge_op | lt_op | gt_op
| null_coalesce_op
| add_op | sub_op
| mul_op | div_op | mod_op
| pipe_op
| ternary_op
}
or_op = { "or" }
and_op = { "and" }
eq_op = { "==" }
ne_op = { "!=" }
le_op = { "<=" }
ge_op = { ">=" }
lt_op = { "<" }
gt_op = { ">" }
null_coalesce_op = { "??" }
add_op = { "+" }
sub_op = { "-" }
mul_op = { "*" }
div_op = { "/" }
mod_op = { "%" }
pipe_op = { "|" }
ternary_op = { "?" ~ expression ~ ":" }
// Postfix operators
postfix = _{
optional_chain
| member_access
| index_access
| call_args
}
optional_chain = { "?." ~ identifier }
member_access = { "." ~ identifier }
index_access = { "[" ~ expression ~ "]" }
call_args = { "(" ~ argument_list? ~ ")" }
argument_list = {
expression ~ ("," ~ expression)*
}
// Primary expressions (atoms - no recursion here)
primary = {
"(" ~ expression ~ ")"
| if_expr
| when_expr
| lambda
| list_comprehension
| try_expr
| list
| map
| multiline_string
| quoted_string // Try simple quoted strings first
| interpolated_string // Then try interpolated strings
| number
| boolean
| null
| identifier
}
// ============================================================================
// CONTROL FLOW
// ============================================================================
// If expression
if_expr = {
"if" ~ expression ~ "then" ~ expression ~ ("else" ~ expression)?
}
// When expression (pattern matching)
when_expr = {
"when" ~ expression ~ "(" ~ when_arm ~ ("," ~ when_arm)* ~ ","? ~ ")"
}
when_arm = {
when_pattern ~ ("if" ~ expression)? ~ "=>" ~ expression
}
when_pattern = {
"_" | "*"
| "(" ~ expression ~ ("," ~ expression)* ~ ")"
| literal_value
| identifier
}
literal_value = {
quoted_string | number | boolean | null
}
// Try expression
try_expr = {
"try" ~ "(" ~ expression ~ ("," ~ expression)? ~ ")"
}
// ============================================================================
// FUNCTIONS
// ============================================================================
// Lambda: x => x * 2 or (x, y) => x + y
lambda = {
lambda_params ~ "=>" ~ expression
}
lambda_params = {
identifier
| "(" ~ identifier ~ ("," ~ identifier)* ~ ")"
}
// ============================================================================
// COMPREHENSIONS
// ============================================================================
// List comprehension: [x * 2 for x in list if x > 0]
list_comprehension = {
"[" ~ expression ~ "for" ~ comprehension_clause ~ comprehension_filter? ~ "]"
}
comprehension_clause = {
identifier ~ ("," ~ identifier)? ~ "in" ~ expression
}
comprehension_filter = {
"if" ~ expression
}
// ============================================================================
// LITERALS
// ============================================================================
// Identifiers
identifier = @{
!keyword ~ (ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_" | "-")*
}
// Keywords (type names are NOT keywords - they're only special in type contexts)
keyword = {
"if" | "then" | "else" | "when" | "match" | "for" | "in"
| "fn" | "mut" | "import" | "from" | "as" | "try"
| "true" | "false" | "null"
| "and" | "or" | "not"
}
// Strings
string = {
multiline_string | interpolated_string | quoted_string
}
quoted_string = @{
"\"" ~ (!"\"" ~ !"${" ~ (escape_sequence | ANY))* ~ "\""
}
multiline_string = @{
"\"\"\"" ~ (!"\"\"\"" ~ ANY)* ~ "\"\"\""
}
escape_sequence = @{
"\\" ~ ("n" | "t" | "r" | "\"" | "\\" | "$")
}
// String interpolation: "Hello, ${name}!"
interpolated_string = {
"\"" ~ interpolation_part+ ~ "\""
}
interpolation_part = {
string_literal_part | interpolation
}
string_literal_part = @{
(!"${" ~ !"\"" ~ (escape_sequence | ANY))+
}
interpolation = {
"${" ~ expression ~ "}"
}
// Numbers
number = @{
"-"? ~ ASCII_DIGIT+ ~ ("." ~ ASCII_DIGIT+)?
}
// Booleans
boolean = { "true" | "false" }
// Null
null = { "null" }
// ============================================================================
// COLLECTIONS
// ============================================================================
// Lists use square brackets []
list = {
"[" ~ "]"
| "[" ~ expression ~ ("," ~ expression)* ~ ","? ~ "]"
}
// Maps use parentheses ()
map = {
"(" ~ ")"
| "(" ~ map_entry ~ ("," ~ map_entry)* ~ ","? ~ ")"
}
map_entry = {
identifier ~ ("=" | ":") ~ expression
}