eure-parol 0.1.7

Eure parser implementation using Parol
Documentation
%start Eure
%title "Eure"
%grammar_type 'LL(k)'
%line_comment '//'
%block_comment '/*' '*/'

%on At %enter Header
%on TextStart %enter Text
%on CodeBlockStart3 %enter InCodeBlock3
%on CodeBlockStart4 %enter InCodeBlock4
%on CodeBlockStart5 %enter InCodeBlock5
%on CodeBlockStart6 %enter InCodeBlock6
%on LitStr1Start %push InLitStr1
%on LitStr2Start %push InLitStr2
%on LitStr3Start %push InLitStr3
%on DelimCodeStart1 %enter InDelimCode1
%on DelimCodeStart2 %enter InDelimCode2
%on DelimCodeStart3 %enter InDelimCode3

%scanner Header {
  %auto_newline_off
  %on NewlineBind %enter INITIAL
  %on NewlineTextStart %enter Text
  %on GrammarNewline %enter INITIAL
  %on Begin %enter INITIAL
  %on LitStr1Start %push InLitStr1
  %on LitStr2Start %push InLitStr2
  %on LitStr3Start %push InLitStr3
}
%scanner Text {
  %auto_newline_off
  %auto_ws_off
  %on Text %enter INITIAL
  %on GrammarNewline %enter INITIAL
}
%scanner InCodeBlock3 { %auto_newline_off %auto_ws_off %on CodeBlockEnd3 %enter INITIAL }
%scanner InCodeBlock4 { %auto_newline_off %auto_ws_off %on CodeBlockEnd4 %enter INITIAL }
%scanner InCodeBlock5 { %auto_newline_off %auto_ws_off %on CodeBlockEnd5 %enter INITIAL }
%scanner InCodeBlock6 { %auto_newline_off %auto_ws_off %on CodeBlockEnd6 %enter INITIAL }
%scanner InLitStr1 { %auto_newline_off %auto_ws_off %on LitStr1End %pop }
%scanner InLitStr2 { %auto_newline_off %auto_ws_off %on LitStr2End %pop }
%scanner InLitStr3 { %auto_newline_off %auto_ws_off %on LitStr3End %pop }
%scanner InDelimCode1 { %auto_newline_off %auto_ws_off %on DelimCodeEnd1 %enter INITIAL }
%scanner InDelimCode2 { %auto_newline_off %auto_ws_off %on DelimCodeEnd2 %enter INITIAL }
%scanner InDelimCode3 { %auto_newline_off %auto_ws_off %on DelimCodeEnd3 %enter INITIAL }

%%

Eure: [ TopLevelBinding ] { Binding } { Section } ;
TopLevelBinding: ValueBinding | TextBinding ;
RootBinding: RootValueBinding | RootTextBinding ;
  RootValueBinding: NewlineBind Value ;
  RootTextBinding: NewlineTextStart [Ws^] [Text] [GrammarNewline] ;

Binding: Keys BindingRhs ;
  BindingRhs: ValueBinding | SectionBinding | TextBinding ;
  ValueBinding: Bind Value ;
  SectionBinding: Begin Eure End ;
  TextBinding: TextStart [Ws^] [Text] [GrammarNewline] ;

Section: At Keys SectionBody ;
  SectionBody: [ FlatBody ] | BlockBody ;
  FlatBody: SectionHead { Binding } ;
  SectionHead: RootBinding | NewlineHead ;
  NewlineHead: GrammarNewline [ FlatRootBinding ] ;
  FlatRootBinding: ValueBinding | TextBinding ;
  BlockBody: Begin Eure End ;
  Keys: FirstKey { KeyTail } ;
  FirstKey: Key | ArrayMarker ;
  KeyTail: DotKey | ArrayMarker ;
  DotKey: Dot Key ;
  ArrayMarker: ArrayBegin [ Integer ] ArrayEnd ;
  Key: KeyIdent | ExtensionNameSpace | String | Integer | Float | KeyTuple | TupleIndex ;
  TupleIndex: '#' Integer ;
  // extension name must always be valid ident, not quoted ident
  ExtensionNameSpace: Ext KeyIdent ;
  // Key-specific tuple (only allows types that can be converted to ObjectKey)
  KeyTuple: LParen [ KeyTupleElements ] RParen ;
  KeyTupleElements: KeyValue [ KeyTupleElementsTail ] ;
  KeyTupleElementsTail: Comma [ KeyTupleElements ] ;
  // Values that can be used as object keys
  KeyValue: Integer | Boolean | Str | KeyTuple ;
  // In the key context, true, false, and null are valid identifiers.
  KeyIdent: Ident | True | False | Null ;

Value: Object | Array | Tuple | Number | Boolean | Null | Strings | Hole | CodeBlock | InlineCode ;
  Object: Begin [ ValueBinding [ Comma ] ] { Keys MapBind Value [ Comma ] } End ;
  MapBind: '=>' ;
  Array: ArrayBegin [ ArrayElements ] ArrayEnd ;
  ArrayElements: Value [ ArrayElementsTail ] ;
  ArrayElementsTail: Comma [ ArrayElements ] ;
  Tuple: LParen [ TupleElements ] RParen ;
  TupleElements: Value [ TupleElementsTail ] ;
  TupleElementsTail: Comma [ TupleElements ] ;
  // Number: unified numeric type (Float before Integer for longest match)
  Number: Float | Integer | Inf | NaN ;
  // Integer: optional sign, digits with underscores
  Integer: <INITIAL, Header>/[-+]?\d[\d_]*/ ;
  // Float: requires decimal point or exponent, optional f32/f64 suffix
  // NOTE: Requires at least one digit after decimal to avoid conflicts with dot-separated integer keys (e.g., a.1.x)
  Float: <INITIAL, Header>/[-+]?\d[\d_]*\.\d+([eE][-+]?\d+)?(f32|f64)?|[-+]?\d[\d_]*[eE][-+]?\d+(f32|f64)?/ ;
  // Infinity and NaN as separate tokens
  Inf: /[-+]?[Ii]nf/ ;
  NaN: /[Nn]a[Nn]/ ;
  Boolean: True | False ;
  True: <INITIAL, Header>'true' ;
  False: <INITIAL, Header>'false';
  Null: <INITIAL, Header>'null' ;
  Hole: /![\p{XID_Start}_]?[\p{XID_Continue}-]*/ ;

// Unified String type for both keys and values
// Includes escaped strings ("...") and literal strings ('...')
// Literal strings have delimited variants for content containing single quotes
String: Str | LitStr | LitStr1 | LitStr2 | LitStr3 ;

// Strings with continuation support (can mix any string types)
Strings: String { Continue String } ;

// Escaped string: "..." with backslash escapes
Str: <INITIAL, Header>/"([^"]|\\")*"/ ;

// Literal string: '...' with no escape processing (single quote cannot appear inside)
LitStr: <INITIAL, Header>/'[^']*'/ ;

// No escape character in text
Text: <Text>/[^\r\n]*/ ;

// Inline code: delimited code or language-prefixed code (optional language)
InlineCode: DelimCode | InlineCode1 ;
// InlineCode1 with OPTIONAL language prefix (0+ chars before backtick)
InlineCode1: /[a-zA-Z0-9-_]*`[^`\r\n]*`/ ;

// Delimited literal strings: <'...'>, <<'...'>>, <<<'...'>>>
// SQuote must be single char - end delimiters are '>", '>>" or '>>>" after the quote
LitStr1: LitStr1Start { NoSQuote | SQuote } LitStr1End ;
LitStr2: LitStr2Start { NoSQuote | SQuote } LitStr2End ;
LitStr3: LitStr3Start { NoSQuote | SQuote } LitStr3End ;

// Delimited code: <`...`>, <<`...`>>, <<<`...`>>> with optional language prefix
// BacktickDelim must be single char - end delimiters are `>", `>>" or `>>>" after the backtick
DelimCode: DelimCode3 | DelimCode2 | DelimCode1 ;
DelimCode1: DelimCodeStart1 { NoBacktick | BacktickDelim } DelimCodeEnd1 ;
DelimCode2: DelimCodeStart2 { NoBacktick | BacktickDelim } DelimCodeEnd2 ;
DelimCode3: DelimCodeStart3 { NoBacktick | BacktickDelim } DelimCodeEnd3 ;

CodeBlock: CodeBlock3 | CodeBlock4 | CodeBlock5 | CodeBlock6;
CodeBlock3: CodeBlockStart3 { NoBacktick | Backtick2 } CodeBlockEnd3;
CodeBlock4: CodeBlockStart4 { NoBacktick | Backtick3 } CodeBlockEnd4;
CodeBlock5: CodeBlockStart5 { NoBacktick | Backtick4 } CodeBlockEnd5;
CodeBlock6: CodeBlockStart6 { NoBacktick | Backtick5 } CodeBlockEnd6;

// Start tokens for delimited literal strings (order: longest first)
LitStr3Start: <INITIAL, Header>"<<<'" ;
LitStr2Start: <INITIAL, Header>"<<'" ;
LitStr1Start: <INITIAL, Header>"<'" ;

// Start tokens for delimited code with optional language prefix (order: longest first)
DelimCodeStart3: /[a-zA-Z0-9-_]*<<<`/ ;
DelimCodeStart2: /[a-zA-Z0-9-_]*<<`/ ;
DelimCodeStart1: /[a-zA-Z0-9-_]*<`/ ;

CodeBlockStart3: /`{3}[a-zA-Z0-9-_]*[\s--\r\n]*(\r\n|\r|\n)/;
CodeBlockStart4: /`{4}[a-zA-Z0-9-_]*[\s--\r\n]*(\r\n|\r|\n)/;
CodeBlockStart5: /`{5}[a-zA-Z0-9-_]*[\s--\r\n]*(\r\n|\r|\n)/;
CodeBlockStart6: /`{6}[a-zA-Z0-9-_]*[\s--\r\n]*(\r\n|\r|\n)/;

// Order matters here. Scanner should eagerly match the longest pattern.
CodeBlockEnd3: <InCodeBlock3>/`{3}/;
Backtick2: <InCodeBlock3>/`{1,2}/;

// Order matters here. Scanner should eagerly match the longest pattern.
CodeBlockEnd4: <InCodeBlock4>/`{4}/;
Backtick3: <InCodeBlock4>/`{1,3}/;

// Order matters here. Scanner should eagerly match the longest pattern.
CodeBlockEnd5: <InCodeBlock5>/`{5}/;
Backtick4: <InCodeBlock5>/`{1,4}/;

// Order matters here. Scanner should eagerly match the longest pattern.
CodeBlockEnd6: <InCodeBlock6>/`{6}/;
Backtick5: <InCodeBlock6>/`{1,5}/;

// NoBacktick shared across code blocks and delimited code
NoBacktick: <InCodeBlock3, InCodeBlock4, InCodeBlock5, InCodeBlock6, InDelimCode1, InDelimCode2, InDelimCode3>/[^`]+/ ;

// Delimited literal string content (end tokens must come before squote token for eager matching)
LitStr3End: <InLitStr3>"'>>>" ;
LitStr2End: <InLitStr2>"'>>" ;
LitStr1End: <InLitStr1>"'>" ;
SQuote: <InLitStr1, InLitStr2, InLitStr3>"'" ;
NoSQuote: <InLitStr1, InLitStr2, InLitStr3>/[^']+/ ;

// Delimited code content (end tokens must come before backtick token for eager matching)
DelimCodeEnd3: <InDelimCode3>'`>>>' ;
DelimCodeEnd2: <InDelimCode2>'`>>' ;
DelimCodeEnd1: <InDelimCode1>'`>' ;
BacktickDelim: <InDelimCode1, InDelimCode2, InDelimCode3>'`' ;

GrammarNewline: <Text, Header>/\r\n|\r|\n/ ;
Ws: <Text>/[\s--\r\n]+/ ;
At: <INITIAL, Header>'@' ;
Ext: <INITIAL, Header>'$' ;
Dot: <INITIAL, Header>'.' ;
Begin: <INITIAL, Header>'{' ;
End: '}' ;
ArrayBegin: <INITIAL, Header>'[' ;
ArrayEnd: <INITIAL, Header>']' ;
LParen: <INITIAL, Header>'(' ;
RParen: <INITIAL, Header>')' ;
NewlineBind: <Header>/(\r\n|\r|\n)*=/ ;
Bind: '=' ;
Comma: <INITIAL, Header>',' ;
Continue: '\' ;
NewlineTextStart: <Header>/(\r\n|\r|\n)*:/ ;
TextStart: ":" ;
Ident: <INITIAL, Header>/[\p{XID_Start}_][\p{XID_Continue}-]*/ ;