// Pikchr grammar for pest parser
// See https://pikchr.org/ for language reference
// === Top-level ===
program = { SOI ~ statement_list ~ EOI }
statement_list = { EOL* ~ (statement ~ (EOL+ ~ statement)*)? ~ EOL* }
statement = {
labeled_statement
| direction
| assignment
| define
| assert_stmt
| print_stmt
| error_stmt
| object_stmt
| macro_call // must be last - matches any IDENT
}
// Error statement - produces a syntax error with message
error_stmt = { "error" ~ STRING }
labeled_statement = { PLACENAME ~ ":" ~ (position | object_stmt) }
// === Directions ===
// Direction keywords must not be followed by alphanumeric (word boundary)
direction = @{ ("up" | "down" | "left" | "right") ~ !ASCII_ALPHANUMERIC }
// === Assignments ===
assignment = { lvalue ~ assign_op ~ rvalue }
assign_op = { "+=" | "-=" | "*=" | "/=" | "=" }
lvalue = { variable | "fill" | "color" | "thickness" }
rvalue = { expr | HEX_COLOR | PLACENAME } // PLACENAME for color names like "Red", HEX_COLOR for #ff0000
variable = { "$" ~ IDENT | IDENT }
HEX_COLOR = @{ "#" ~ ASCII_HEX_DIGIT{3,8} } // #rgb, #rrggbb, or #rrggbbaa
// === Macros ===
define = { "define" ~ IDENT ~ CODEBLOCK }
// CODEBLOCK handles nested braces by recursively matching balanced {}
CODEBLOCK = @{ "{" ~ CODEBLOCK_INNER* ~ "}" }
CODEBLOCK_INNER = { CODEBLOCK | (!("{" | "}") ~ ANY) }
// Macro invocation: macroname(arg1, arg2, ...) or just macroname
macro_call = { IDENT ~ ("(" ~ macro_args? ~ ")")? }
macro_args = { macro_arg ~ ("," ~ macro_arg)* }
macro_arg = { STRING | expr | IDENT }
// === Assert & Print ===
assert_stmt = { "assert" ~ "(" ~ (expr ~ "==" ~ expr | position ~ "==" ~ position) ~ ")" }
print_stmt = { "print" ~ print_args }
// Print args can include expr, STRING, or PLACENAME (for color names like Red, Orange)
print_args = { print_arg ~ ("," ~ print_arg)* }
print_arg = { STRING | expr | PLACENAME }
// === Objects ===
object_stmt = { basetype ~ attribute_list? }
basetype = {
CLASSNAME
| STRING ~ textposition?
| sublist
}
sublist = { "[" ~ statement_list ~ "]" }
CLASSNAME = {
"arc" | "arrow" | "box" | "circle" | "cylinder" | "diamond" | "dot"
| "ellipse" | "file" | "line" | "move" | "oval" | "spline" | "text"
}
// === Attributes ===
attribute_list = { attribute+ }
attribute = {
numproperty ~ relexpr
| dashproperty ~ expr?
| colorproperty ~ rvalue
| boolproperty
| "go"? ~ direction ~ "until" ~ "even" ~ "with"? ~ position
| "go"? ~ direction ~ "even" ~ "with"? ~ position
| "go"? ~ direction ~ optrelexpr
| "go"? ~ optrelexpr ~ "heading" ~ expr
| "close"
| "chop"
| "from" ~ position
| "then" ~ "to" ~ position
| "to" ~ position
| "then" ~ (direction ~ "until" ~ "even" ~ "with"? ~ position | direction ~ "even" ~ "with"? ~ position | direction ~ optrelexpr | optrelexpr ~ "heading" ~ expr | optrelexpr ~ EDGEPT)?
| "at" ~ position
| "with" ~ withclause
| "same" ~ ("as" ~ object)?
| STRING ~ textposition?
| "fit"
| "behind" ~ object
| relexpr // bare expression for default direction movement
}
withclause = { dot_edge ~ "at" ~ position | EDGEPT ~ "at" ~ position }
numproperty = { "height" | "ht" | "width" | "wid" | "radius" | "rad" | "diameter" | "thickness" }
dashproperty = { "dotted" | "dashed" }
colorproperty = { "fill" | "color" }
boolproperty = {
"cw" | "ccw"
| "invis" | "invisible"
| "thick" | "thin" | "solid"
| "<->" | "->" | "<-"
// HTML entity arrow tokens
| "↔" | "←" | "→"
| "←" | "→"
// Unicode arrow tokens
| "↔" | "←" | "→"
}
textposition = { textattr+ }
textattr = {
"above" | "below" | "center"
| "ljust" | "rjust"
| "bold" | "italic" | "monospace" | "mono" // monospace before mono!
| "big" | "small"
| "aligned"
}
// === Expressions ===
relexpr = { expr ~ percent? }
optrelexpr = { relexpr? }
percent = { "%" }
// Expression with proper operator precedence:
// Addition/subtraction has lower precedence than multiplication/division
expr = { term ~ (add_op ~ term)* }
add_op = { "+" | "-" }
term = { prefix? ~ primary ~ (mul_op ~ prefix? ~ primary)* }
mul_op = { "*" | "/" }
prefix = { "-" | "+" }
// Keep old infix for backward compatibility during transition
infix = { "+" | "-" | "*" | "/" }
primary = {
"(" ~ expr ~ ")"
| "(" ~ ("fill" | "color" | "thickness") ~ ")"
| func_call
| dist_call
| NUMBER
| NTH ~ "vertex" ~ "of" ~ object ~ dot_xy // 2nd vertex of spline.x
| object ~ dot_edge ~ dot_xy // C4.n.x - edge point coordinate
| object ~ dot_xy // C4.x - object coordinate
| object ~ dot_prop // object.width, object.height, etc.
| variable // user-defined variables like $x, linewid
// NOTE: bare place/object is NOT an expr! Use position rule for places.
}
func_call = { FUNC1 ~ "(" ~ expr ~ ")" | FUNC2 ~ "(" ~ expr ~ "," ~ expr ~ ")" }
dist_call = { "dist" ~ "(" ~ position ~ "," ~ position ~ ")" }
FUNC1 = { "abs" | "cos" | "sin" | "int" | "sqrt" }
FUNC2 = { "max" | "min" }
// === Positions ===
// Each alternative is a named rule so parse_position knows which pattern matched.
position = {
pos_tuple
| pos_group
| pos_place_offset_paren
| pos_place_offset
| pos_between
| pos_bracket
| pos_above_below
| pos_left_right
| pos_heading
| pos_edgept_of
| pos_coords
| pos_place
}
pos_tuple = { "(" ~ position ~ "," ~ position ~ ")" }
pos_group = { "(" ~ position ~ ")" }
pos_place_offset_paren = { place ~ ("+" | "-") ~ "(" ~ expr ~ "," ~ expr ~ ")" }
pos_place_offset = { place ~ ("+" | "-") ~ expr ~ "," ~ expr }
pos_between = { expr ~ ("between" | ("of" ~ "the")? ~ "way" ~ "between") ~ position ~ "and" ~ position }
pos_bracket = { expr ~ "<" ~ position ~ "," ~ position ~ ">" }
pos_above_below = { expr ~ above_below ~ position }
pos_left_right = { expr ~ left_right_of ~ position }
pos_heading = { expr ~ "on"? ~ "heading" ~ (EDGEPT | expr) ~ ("of" | "from") ~ position }
pos_edgept_of = { expr ~ EDGEPT ~ "of" ~ position }
pos_coords = { expr ~ "," ~ expr }
pos_place = { place }
place = {
NTH ~ "vertex" ~ "of" ~ object
| EDGEPT ~ "of" ~ object
| object ~ dot_edge
| object
}
// === Objects & References ===
object = { objectname | nth }
objectname = {
"this" ~ dot_name*
| PLACENAME ~ dot_name*
}
nth = {
NTH ~ "last"? ~ CLASSNAME
| NTH ~ "last"? ~ "[" ~ "]"
| "first" ~ "[" ~ "]"
| "first" ~ CLASSNAME?
| "last" ~ "[" ~ "]"
| "last" ~ CLASSNAME?
| "previous"
}
NTH = @{ ASCII_DIGIT+ ~ ("st" | "nd" | "rd" | "th") }
// === Edge Points ===
// IMPORTANT: longer keywords must come before shorter ones in PEG!
// Position modifiers - need to be rules so they're captured in AST
above_below = { "above" | "below" }
left_right_of = { ("left" | "right") ~ "of" }
// Single-letter edge points must not be followed by identifier chars
// Rule is atomic (@) to prevent implicit whitespace before boundary check
EDGEPT = @{
"north" | "south" | "east" | "west"
| "start" | "end" | "center"
| "bottom" | "bot"
| "top" | "left" | "right"
| "ne" | "nw" | "se" | "sw"
| ("n" | "s" | "e" | "w" | "c" | "t") ~ !ASCII_ALPHANUMERIC
}
// Dot accessors - differentiated by what follows
dot_xy = { "." ~ ("x" | "y") }
dot_edge = { "." ~ EDGEPT }
dot_prop = { "." ~ (numproperty | dashproperty | colorproperty) }
dot_name = { "." ~ PLACENAME }
// === Tokens ===
PLACENAME = @{ ASCII_ALPHA_UPPER ~ (ASCII_ALPHANUMERIC | "_")* }
// IDENT must not match keywords - use negative lookahead
// Keywords: last, first, previous, this, and all direction/edge/attribute keywords
// Keywords must be followed by non-alphanumeric to be recognized (word boundary)
keyword = @{
("last" | "first" | "previous" | "this"
| "above" | "below" | "left" | "right" | "up" | "down"
| "at" | "from" | "to" | "with" | "then" | "and" | "of"
| "north" | "south" | "east" | "west" | "start" | "end" | "center"
| "top" | "bottom" | "ne" | "nw" | "se" | "sw"
| "chop" | "close" | "same" | "fit" | "behind" | "go" | "even" | "between" | "way" | "the" | "heading" | "on"
| "cw" | "ccw" | "invis" | "invisible" | "thick" | "thin" | "solid"
| "bold" | "italic" | "mono" | "monospace" | "big" | "small" | "aligned" | "ljust" | "rjust"
| "height" | "ht" | "width" | "wid" | "radius" | "rad" | "diameter" | "thickness"
| "fill" | "color" | "dotted" | "dashed"
| "define" | "assert" | "print" | "error"
| "abs" | "cos" | "sin" | "int" | "sqrt" | "max" | "min" | "dist") ~ !ASCII_ALPHANUMERIC
}
IDENT = @{ !keyword ~ (ASCII_ALPHA_LOWER | "_" | "@") ~ (ASCII_ALPHANUMERIC | "_")* }
// NUMBER must not match ordinals like 6th, 2nd, etc.
NUMBER = @{
("0x" | "0X") ~ ASCII_HEX_DIGIT+
| ASCII_DIGIT+ ~ !("st" | "nd" | "rd" | "th") ~ ("." ~ ASCII_DIGIT*)? ~ (("e" | "E") ~ ("+" | "-")? ~ ASCII_DIGIT+)? ~ UNIT?
| "." ~ ASCII_DIGIT+ ~ (("e" | "E") ~ ("+" | "-")? ~ ASCII_DIGIT+)? ~ UNIT?
}
UNIT = { "in" | "cm" | "mm" | "pt" | "px" | "pc" }
// String with proper escape handling - all escape sequences
STRING = @{ "\"" ~ ("\\" ~ ANY | !("\"" | "\\") ~ ANY)* ~ "\"" }
// === Whitespace & Comments ===
WHITESPACE = _{ " " | "\t" | "\r" | line_continuation }
line_continuation = { "\\" ~ NEWLINE }
// Note: # comments must NOT start with hex digits (to allow #ff0000 hex colors)
COMMENT = _{ "//" ~ (!NEWLINE ~ ANY)* | "#" ~ !ASCII_HEX_DIGIT ~ (!NEWLINE ~ ANY)* | "/*" ~ (!"*/" ~ ANY)* ~ "*/" }
EOL = { NEWLINE | ";" }
NEWLINE = _{ "\n" }