pikru 1.2.0

A pure Rust implementation of pikchr, a PIC-like diagram markup language that generates SVG
Documentation
// Pikchr grammar for pest parser
// See https://pikchr.org/ for language reference

// === Top-level ===
program = { SOI ~ statement_list ~ EOI }

statement_list = { EOL* ~ (statement ~ (EOL+ ~ statement)*)? ~ EOL* }

statement = {
    labeled_statement
  | direction
  | assignment
  | define
  | assert_stmt
  | print_stmt
  | error_stmt
  | object_stmt
  | macro_call  // must be last - matches any IDENT
}

// Error statement - produces a syntax error with message
error_stmt = { "error" ~ STRING }

labeled_statement = { PLACENAME ~ ":" ~ (position | object_stmt) }

// === Directions ===
// Direction keywords must not be followed by alphanumeric (word boundary)
direction = @{ ("up" | "down" | "left" | "right") ~ !ASCII_ALPHANUMERIC }

// === Assignments ===
assignment = { lvalue ~ assign_op ~ rvalue }
assign_op = { "+=" | "-=" | "*=" | "/=" | "=" }
lvalue = { variable | "fill" | "color" | "thickness" }
rvalue = { expr | HEX_COLOR | PLACENAME }  // PLACENAME for color names like "Red", HEX_COLOR for #ff0000
variable = { "$" ~ IDENT | IDENT }
HEX_COLOR = @{ "#" ~ ASCII_HEX_DIGIT{3,8} }  // #rgb, #rrggbb, or #rrggbbaa

// === Macros ===
define = { "define" ~ IDENT ~ CODEBLOCK }
// CODEBLOCK handles nested braces by recursively matching balanced {}
CODEBLOCK = @{ "{" ~ CODEBLOCK_INNER* ~ "}" }
CODEBLOCK_INNER = { CODEBLOCK | (!("{" | "}") ~ ANY) }

// Macro invocation: macroname(arg1, arg2, ...) or just macroname
macro_call = { IDENT ~ ("(" ~ macro_args? ~ ")")? }
macro_args = { macro_arg ~ ("," ~ macro_arg)* }
macro_arg = { STRING | expr | IDENT }

// === Assert & Print ===
assert_stmt = { "assert" ~ "(" ~ (expr ~ "==" ~ expr | position ~ "==" ~ position) ~ ")" }
print_stmt = { "print" ~ print_args }
// Print args can include expr, STRING, or PLACENAME (for color names like Red, Orange)
print_args = { print_arg ~ ("," ~ print_arg)* }
print_arg = { STRING | expr | PLACENAME }

// === Objects ===
object_stmt = { basetype ~ attribute_list? }

basetype = {
    CLASSNAME
  | STRING ~ textposition?
  | sublist
}

sublist = { "[" ~ statement_list ~ "]" }

CLASSNAME = {
    "arc" | "arrow" | "box" | "circle" | "cylinder" | "diamond" | "dot"
  | "ellipse" | "file" | "line" | "move" | "oval" | "spline" | "text"
}

// === Attributes ===
attribute_list = { attribute+ }

attribute = {
    numproperty ~ relexpr
  | dashproperty ~ expr?
  | colorproperty ~ rvalue
  | boolproperty
  | "go"? ~ direction ~ "until" ~ "even" ~ "with"? ~ position
  | "go"? ~ direction ~ "even" ~ "with"? ~ position
  | "go"? ~ direction ~ optrelexpr
  | "go"? ~ optrelexpr ~ "heading" ~ expr
  | "close"
  | "chop"
  | "from" ~ position
  | "then" ~ "to" ~ position
  | "to" ~ position
  | "then" ~ (direction ~ "until" ~ "even" ~ "with"? ~ position | direction ~ "even" ~ "with"? ~ position | direction ~ optrelexpr | optrelexpr ~ "heading" ~ expr | optrelexpr ~ EDGEPT)?
  | "at" ~ position
  | "with" ~ withclause
  | "same" ~ ("as" ~ object)?
  | STRING ~ textposition?
  | "fit"
  | "behind" ~ object
  | relexpr  // bare expression for default direction movement
}

withclause = { dot_edge ~ "at" ~ position | EDGEPT ~ "at" ~ position }

numproperty = { "height" | "ht" | "width" | "wid" | "radius" | "rad" | "diameter" | "thickness" }
dashproperty = { "dotted" | "dashed" }
colorproperty = { "fill" | "color" }
boolproperty = {
    "cw" | "ccw"
  | "invis" | "invisible"
  | "thick" | "thin" | "solid"
  | "<->" | "->" | "<-"
  // HTML entity arrow tokens
  | "&leftrightarrow;" | "&leftarrow;" | "&rightarrow;"
  | "&larr;" | "&rarr;"
  // Unicode arrow tokens
  | "↔" | "←" | "→"
}

textposition = { textattr+ }
textattr = {
    "above" | "below" | "center"
  | "ljust" | "rjust"
  | "bold" | "italic" | "monospace" | "mono"  // monospace before mono!
  | "big" | "small"
  | "aligned"
}

// === Expressions ===
relexpr = { expr ~ percent? }
optrelexpr = { relexpr? }
percent = { "%" }

// Expression with proper operator precedence:
// Addition/subtraction has lower precedence than multiplication/division
expr = { term ~ (add_op ~ term)* }
add_op = { "+" | "-" }
term = { prefix? ~ primary ~ (mul_op ~ prefix? ~ primary)* }
mul_op = { "*" | "/" }
prefix = { "-" | "+" }
// Keep old infix for backward compatibility during transition
infix = { "+" | "-" | "*" | "/" }

primary = {
    "(" ~ expr ~ ")"
  | "(" ~ ("fill" | "color" | "thickness") ~ ")"
  | func_call
  | dist_call
  | NUMBER
  | NTH ~ "vertex" ~ "of" ~ object ~ dot_xy  // 2nd vertex of spline.x
  | object ~ dot_edge ~ dot_xy  // C4.n.x - edge point coordinate
  | object ~ dot_xy  // C4.x - object coordinate
  | object ~ dot_prop  // object.width, object.height, etc.
  | variable  // user-defined variables like $x, linewid
  // NOTE: bare place/object is NOT an expr! Use position rule for places.
}

func_call = { FUNC1 ~ "(" ~ expr ~ ")" | FUNC2 ~ "(" ~ expr ~ "," ~ expr ~ ")" }
dist_call = { "dist" ~ "(" ~ position ~ "," ~ position ~ ")" }

FUNC1 = { "abs" | "cos" | "sin" | "int" | "sqrt" }
FUNC2 = { "max" | "min" }

// === Positions ===
// Each alternative is a named rule so parse_position knows which pattern matched.
position = {
    pos_tuple
  | pos_group
  | pos_place_offset_paren
  | pos_place_offset
  | pos_between
  | pos_bracket
  | pos_above_below
  | pos_left_right
  | pos_heading
  | pos_edgept_of
  | pos_coords
  | pos_place
}

pos_tuple = { "(" ~ position ~ "," ~ position ~ ")" }
pos_group = { "(" ~ position ~ ")" }
pos_place_offset_paren = { place ~ ("+" | "-") ~ "(" ~ expr ~ "," ~ expr ~ ")" }
pos_place_offset = { place ~ ("+" | "-") ~ expr ~ "," ~ expr }
pos_between = { expr ~ ("between" | ("of" ~ "the")? ~ "way" ~ "between") ~ position ~ "and" ~ position }
pos_bracket = { expr ~ "<" ~ position ~ "," ~ position ~ ">" }
pos_above_below = { expr ~ above_below ~ position }
pos_left_right = { expr ~ left_right_of ~ position }
pos_heading = { expr ~ "on"? ~ "heading" ~ (EDGEPT | expr) ~ ("of" | "from") ~ position }
pos_edgept_of = { expr ~ EDGEPT ~ "of" ~ position }
pos_coords = { expr ~ "," ~ expr }
pos_place = { place }

place = {
    NTH ~ "vertex" ~ "of" ~ object
  | EDGEPT ~ "of" ~ object
  | object ~ dot_edge
  | object
}

// === Objects & References ===
object = { objectname | nth }

objectname = {
    "this" ~ dot_name*
  | PLACENAME ~ dot_name*
}

nth = {
    NTH ~ "last"? ~ CLASSNAME
  | NTH ~ "last"? ~ "[" ~ "]"
  | "first" ~ "[" ~ "]"
  | "first" ~ CLASSNAME?
  | "last" ~ "[" ~ "]"
  | "last" ~ CLASSNAME?
  | "previous"
}

NTH = @{ ASCII_DIGIT+ ~ ("st" | "nd" | "rd" | "th") }

// === Edge Points ===
// IMPORTANT: longer keywords must come before shorter ones in PEG!
// Position modifiers - need to be rules so they're captured in AST
above_below = { "above" | "below" }
left_right_of = { ("left" | "right") ~ "of" }

// Single-letter edge points must not be followed by identifier chars
// Rule is atomic (@) to prevent implicit whitespace before boundary check
EDGEPT = @{
    "north" | "south" | "east" | "west"
  | "start" | "end" | "center"
  | "bottom" | "bot"
  | "top" | "left" | "right"
  | "ne" | "nw" | "se" | "sw"
  | ("n" | "s" | "e" | "w" | "c" | "t") ~ !ASCII_ALPHANUMERIC
}

// Dot accessors - differentiated by what follows
dot_xy = { "." ~ ("x" | "y") }
dot_edge = { "." ~ EDGEPT }
dot_prop = { "." ~ (numproperty | dashproperty | colorproperty) }
dot_name = { "." ~ PLACENAME }

// === Tokens ===
PLACENAME = @{ ASCII_ALPHA_UPPER ~ (ASCII_ALPHANUMERIC | "_")* }

// IDENT must not match keywords - use negative lookahead
// Keywords: last, first, previous, this, and all direction/edge/attribute keywords
// Keywords must be followed by non-alphanumeric to be recognized (word boundary)
keyword = @{
    ("last" | "first" | "previous" | "this"
  | "above" | "below" | "left" | "right" | "up" | "down"
  | "at" | "from" | "to" | "with" | "then" | "and" | "of"
  | "north" | "south" | "east" | "west" | "start" | "end" | "center"
  | "top" | "bottom" | "ne" | "nw" | "se" | "sw"
  | "chop" | "close" | "same" | "fit" | "behind" | "go" | "even" | "between" | "way" | "the" | "heading" | "on"
  | "cw" | "ccw" | "invis" | "invisible" | "thick" | "thin" | "solid"
  | "bold" | "italic" | "mono" | "monospace" | "big" | "small" | "aligned" | "ljust" | "rjust"
  | "height" | "ht" | "width" | "wid" | "radius" | "rad" | "diameter" | "thickness"
  | "fill" | "color" | "dotted" | "dashed"
  | "define" | "assert" | "print" | "error"
  | "abs" | "cos" | "sin" | "int" | "sqrt" | "max" | "min" | "dist") ~ !ASCII_ALPHANUMERIC
}

IDENT = @{ !keyword ~ (ASCII_ALPHA_LOWER | "_" | "@") ~ (ASCII_ALPHANUMERIC | "_")* }

// NUMBER must not match ordinals like 6th, 2nd, etc.
NUMBER = @{
    ("0x" | "0X") ~ ASCII_HEX_DIGIT+
  | ASCII_DIGIT+ ~ !("st" | "nd" | "rd" | "th") ~ ("." ~ ASCII_DIGIT*)? ~ (("e" | "E") ~ ("+" | "-")? ~ ASCII_DIGIT+)? ~ UNIT?
  | "." ~ ASCII_DIGIT+ ~ (("e" | "E") ~ ("+" | "-")? ~ ASCII_DIGIT+)? ~ UNIT?
}
UNIT = { "in" | "cm" | "mm" | "pt" | "px" | "pc" }

// String with proper escape handling - all escape sequences
STRING = @{ "\"" ~ ("\\" ~ ANY | !("\"" | "\\") ~ ANY)* ~ "\"" }

// === Whitespace & Comments ===
WHITESPACE = _{ " " | "\t" | "\r" | line_continuation }
line_continuation = { "\\" ~ NEWLINE }
// Note: # comments must NOT start with hex digits (to allow #ff0000 hex colors)
COMMENT = _{ "//" ~ (!NEWLINE ~ ANY)* | "#" ~ !ASCII_HEX_DIGIT ~ (!NEWLINE ~ ANY)* | "/*" ~ (!"*/" ~ ANY)* ~ "*/" }
EOL = { NEWLINE | ";" }
NEWLINE = _{ "\n" }