// ────────────────────────────────
// RON (Rusty Object Notation)
// PEG grammar for pest ≥2.7
// ────────────────────────────────
// Notes
// • Silent rules (suffix “_”) are skipped automatically.
// • Top-level rule is ron.
// • Raw strings accept any number of `#` up to 16; expand if you
// need more. Keeping them bounded lets the PEG stay regular.
// • Identifiers rely on Unicode XID_Start/XID_Continue classes,
// which pest exposes as built-ins.
// ────────────────────────────────
//////////////////////////////////
// LEXICAL LAYER //
//////////////////////////////////
// ── whitespace & comments ──
WHITESPACE = _{ " " | "\t" | "\n" | "\r"
| "\u{000B}" | "\u{000C}" | "\u{0085}"
| "\u{200E}" | "\u{200F}" | "\u{2028}" | "\u{2029}" }
LINE_COMMENT = _{ "//" ~ (!NEWLINE ~ ANY)* ~ (NEWLINE | EOI) }
BLOCK_COMMENT = _{ "/*" ~ (BLOCK_COMMENT | (!"*/" ~ ANY))* ~ "*/" }
COMMENT = _{ LINE_COMMENT | BLOCK_COMMENT }
NEWLINE = _{ "\n" | "\r\n" }
//////////////////////////////////
// ATOMS //
//////////////////////////////////
// ── digits ──
digit = _{ '0'..'9' }
digit_binary = _{ "0" | "1" }
digit_octal = _{ '0'..'7' }
digit_hexadecimal = _{ digit | 'a'..'f' | 'A'..'F' }
// ── integer & float suffix helpers ──
sign = { "+" | "-" }
int_suffix = { ( "i" | "u" ) ~ ( "8" | "16" | "32" | "64" | "128" ) }
float_suffix = { "f" ~ ( "32" | "64" ) }
// ── escapes ──
escape_ascii = _{ "'" | "\"" | "\\" | "n" | "r" | "t" | "0" }
escape_byte = _{ "x" ~ digit_hexadecimal ~ digit_hexadecimal }
escape_unicode = _{ "u" ~ digit_hexadecimal{4,6} }
// ── identifier ──
ident_std = @{ ( "_" | XID_START ) ~ XID_CONTINUE* }
ident_raw = @{ "r#" ~ ( XID_CONTINUE | "." | "+" | "-" )+ }
ident = { ident_raw | ident_std }
// ── helpers ──
comma = _{ "," }
//////////////////////////////////
// LITERALS //
//////////////////////////////////
// ── unsigned integers ──
unsigned_binary = { "0b" ~ digit_binary ~ ( digit_binary | "_" )* }
unsigned_octal = { "0o" ~ digit_octal ~ ( digit_octal | "_" )* }
unsigned_hexadecimal = { "0x" ~ digit_hexadecimal ~ ( digit_hexadecimal | "_" )* }
unsigned_decimal = { digit ~ ( digit | "_" )* }
unsigned = { unsigned_binary
| unsigned_octal
| unsigned_hexadecimal
| unsigned_decimal }
// ── integer ──
integer = { sign? ~ unsigned ~ int_suffix? }
// ── float ──
float_int = { digit ~ ( digit | "_" )* }
float_std = { float_int ~ "." ~ ( digit ~ ( digit | "_" )* )? }
float_frac = { "." ~ digit ~ ( digit | "_" )* }
float_exp = { ( "e" | "E" ) ~ sign? ~ digit ~ ( digit | "_" )* }
float_num = { ( float_std | float_frac | float_int ) ~ float_exp? }
float = { sign? ~ ( "inf" | "NaN" | float_num ) ~ float_suffix? }
// ── byte ──
ascii_char = _{ !( "'" | "\\" ) ~ ANY }
byte_content = { ascii_char | "\\" ~ ( escape_ascii | escape_byte ) }
byte = @{ "b'" ~ byte_content ~ "'" }
// ── char ──
char_body = { !( "'" | "\\" ) ~ ANY | "\\" ~ ( "\\" | "'" ) }
char = @{ "'" ~ char_body ~ "'" }
// ── string (standard) ──
string_char = { !( "\"" | "\\" ) ~ ANY }
string_escape = { "\\" ~ ( escape_ascii | escape_byte | escape_unicode ) }
string_std = @{ "\"" ~ ( string_char | string_escape )* ~ "\"" }
hashes = _{ "#"{0,16} }
string_raw = @{ "r" ~ hashes ~ "\"" ~ ( !("\"" ~ hashes) ~ ANY )* ~ "\"" ~ hashes }
// ── byte string ──
byte_string_std = { "b" ~ string_std }
byte_string_raw = @{ "br" ~ hashes ~ "\"" ~ ( !("\"" ~ hashes) ~ ANY )* ~ "\"" ~ hashes }
byte_string = { byte_string_std | byte_string_raw }
// ── bool ──
bool = { "true" | "false" }
//////////////////////////////////
// COMPOSITES //
//////////////////////////////////
// ── option ──
option_some = { "Some" ~ "(" ~ value ~ ")" }
option = { "None" | option_some }
// ── structs ──
named_field = { ident ~ ":" ~ value }
unit_struct = { ident | "()" }
tuple_struct = { ident? ~ tuple }
named_struct = { ident? ~ "(" ~ ( named_field ~ ( comma ~ named_field )* ~ comma? )? ~ ")" }
struct = { named_struct | tuple_struct | unit_struct }
// ── collections ──
list = { "[" ~ ( value ~ ( comma ~ value )* ~ comma? )? ~ "]" }
map_entry = { value ~ ":" ~ value }
map = { "{" ~ ( map_entry ~ ( comma ~ map_entry )* ~ comma? )? ~ "}" }
tuple = { "(" ~ ( value ~ ( comma ~ value )* ~ comma? )? ~ ")" }
// ── enums ──
enum_variant_unit = { ident }
enum_variant_tuple = { ident ~ tuple }
enum_variant_named = { ident ~ "(" ~ ( named_field ~ ( comma ~ named_field )* ~ comma? )? ~ ")" }
enum_variant = { enum_variant_named | enum_variant_tuple | enum_variant_unit }
// ── top-level “value” union ──
value = { integer | byte | float | string_std | string_raw | byte_string
| char | bool | option | list | map
| tuple | struct | enum_variant }
// ── extensions (“#![enable(foo, bar)]”) ──
extension_name = { ident }
extensions_inner = { "enable" ~ "(" ~ extension_name ~ ( comma ~ extension_name )* ~ comma? ~ ")" }
extensions = { "#!" ~ "[" ~ extensions_inner ~ "]" }
//////////////////////////////////
// ENTRY POINT //
//////////////////////////////////
ron = { SOI ~ extensions? ~ value ~ EOI }