%start Eure
%title "Eure"
%grammar_type 'LL(k)'
%line_comment '//'
%block_comment '/*' '*/'
%on At %enter Header
%on TextStart %enter Text
%on CodeBlockStart3 %enter InCodeBlock3
%on CodeBlockStart4 %enter InCodeBlock4
%on CodeBlockStart5 %enter InCodeBlock5
%on CodeBlockStart6 %enter InCodeBlock6
%on LitStr1Start %push InLitStr1
%on LitStr2Start %push InLitStr2
%on LitStr3Start %push InLitStr3
%on DelimCodeStart1 %enter InDelimCode1
%on DelimCodeStart2 %enter InDelimCode2
%on DelimCodeStart3 %enter InDelimCode3
%scanner Header {
%auto_newline_off
%on NewlineBind %enter INITIAL
%on NewlineTextStart %enter Text
%on GrammarNewline %enter INITIAL
%on Begin %enter INITIAL
%on LitStr1Start %push InLitStr1
%on LitStr2Start %push InLitStr2
%on LitStr3Start %push InLitStr3
}
%scanner Text {
%auto_newline_off
%auto_ws_off
%on Text %enter INITIAL
%on GrammarNewline %enter INITIAL
}
%scanner InCodeBlock3 { %auto_newline_off %auto_ws_off %on CodeBlockEnd3 %enter INITIAL }
%scanner InCodeBlock4 { %auto_newline_off %auto_ws_off %on CodeBlockEnd4 %enter INITIAL }
%scanner InCodeBlock5 { %auto_newline_off %auto_ws_off %on CodeBlockEnd5 %enter INITIAL }
%scanner InCodeBlock6 { %auto_newline_off %auto_ws_off %on CodeBlockEnd6 %enter INITIAL }
%scanner InLitStr1 { %auto_newline_off %auto_ws_off %on LitStr1End %pop }
%scanner InLitStr2 { %auto_newline_off %auto_ws_off %on LitStr2End %pop }
%scanner InLitStr3 { %auto_newline_off %auto_ws_off %on LitStr3End %pop }
%scanner InDelimCode1 { %auto_newline_off %auto_ws_off %on DelimCodeEnd1 %enter INITIAL }
%scanner InDelimCode2 { %auto_newline_off %auto_ws_off %on DelimCodeEnd2 %enter INITIAL }
%scanner InDelimCode3 { %auto_newline_off %auto_ws_off %on DelimCodeEnd3 %enter INITIAL }
%%
Eure: [ TopLevelBinding ] { Binding } { Section } ;
TopLevelBinding: ValueBinding | TextBinding ;
RootBinding: RootValueBinding | RootTextBinding ;
RootValueBinding: NewlineBind Value ;
RootTextBinding: NewlineTextStart [Ws^] [Text] [GrammarNewline] ;
Binding: Keys BindingRhs ;
BindingRhs: ValueBinding | SectionBinding | TextBinding ;
ValueBinding: Bind Value ;
SectionBinding: Begin Eure End ;
TextBinding: TextStart [Ws^] [Text] [GrammarNewline] ;
Section: At Keys SectionBody ;
SectionBody: [ FlatBody ] | BlockBody ;
FlatBody: SectionHead { Binding } ;
SectionHead: RootBinding | NewlineHead ;
NewlineHead: GrammarNewline [ FlatRootBinding ] ;
FlatRootBinding: ValueBinding | TextBinding ;
BlockBody: Begin Eure End ;
Keys: FirstKey { KeyTail } ;
FirstKey: Key | ArrayMarker ;
KeyTail: DotKey | ArrayMarker ;
DotKey: Dot Key ;
ArrayMarker: ArrayBegin [ Integer ] ArrayEnd ;
Key: KeyIdent | ExtensionNameSpace | String | Integer | Float | KeyTuple | TupleIndex ;
TupleIndex: '#' Integer ;
// extension name must always be valid ident, not quoted ident
ExtensionNameSpace: Ext KeyIdent ;
// Key-specific tuple (only allows types that can be converted to ObjectKey)
KeyTuple: LParen [ KeyTupleElements ] RParen ;
KeyTupleElements: KeyValue [ KeyTupleElementsTail ] ;
KeyTupleElementsTail: Comma [ KeyTupleElements ] ;
// Values that can be used as object keys
KeyValue: Integer | Boolean | Str | KeyTuple ;
// In the key context, true, false, and null are valid identifiers.
KeyIdent: Ident | True | False | Null ;
Value: Object | Array | Tuple | Number | Boolean | Null | Strings | Hole | CodeBlock | InlineCode ;
Object: Begin [ ValueBinding [ Comma ] ] { Keys MapBind Value [ Comma ] } End ;
MapBind: '=>' ;
Array: ArrayBegin [ ArrayElements ] ArrayEnd ;
ArrayElements: Value [ ArrayElementsTail ] ;
ArrayElementsTail: Comma [ ArrayElements ] ;
Tuple: LParen [ TupleElements ] RParen ;
TupleElements: Value [ TupleElementsTail ] ;
TupleElementsTail: Comma [ TupleElements ] ;
// Number: unified numeric type (Float before Integer for longest match)
Number: Float | Integer | Inf | NaN ;
// Integer: optional sign, digits with underscores
Integer: <INITIAL, Header>/[-+]?\d[\d_]*/ ;
// Float: requires decimal point or exponent, optional f32/f64 suffix
// NOTE: Requires at least one digit after decimal to avoid conflicts with dot-separated integer keys (e.g., a.1.x)
Float: <INITIAL, Header>/[-+]?\d[\d_]*\.\d+([eE][-+]?\d+)?(f32|f64)?|[-+]?\d[\d_]*[eE][-+]?\d+(f32|f64)?/ ;
// Infinity and NaN as separate tokens
Inf: /[-+]?[Ii]nf/ ;
NaN: /[Nn]a[Nn]/ ;
Boolean: True | False ;
True: <INITIAL, Header>'true' ;
False: <INITIAL, Header>'false';
Null: <INITIAL, Header>'null' ;
Hole: /![\p{XID_Start}_]?[\p{XID_Continue}-]*/ ;
// Unified String type for both keys and values
// Includes escaped strings ("...") and literal strings ('...')
// Literal strings have delimited variants for content containing single quotes
String: Str | LitStr | LitStr1 | LitStr2 | LitStr3 ;
// Strings with continuation support (can mix any string types)
Strings: String { Continue String } ;
// Escaped string: "..." with backslash escapes
Str: <INITIAL, Header>/"([^"]|\\")*"/ ;
// Literal string: '...' with no escape processing (single quote cannot appear inside)
LitStr: <INITIAL, Header>/'[^']*'/ ;
// No escape character in text
Text: <Text>/[^\r\n]*/ ;
// Inline code: delimited code or language-prefixed code (optional language)
InlineCode: DelimCode | InlineCode1 ;
// InlineCode1 with OPTIONAL language prefix (0+ chars before backtick)
InlineCode1: /[a-zA-Z0-9-_]*`[^`\r\n]*`/ ;
// Delimited literal strings: <'...'>, <<'...'>>, <<<'...'>>>
// SQuote must be single char - end delimiters are '>", '>>" or '>>>" after the quote
LitStr1: LitStr1Start { NoSQuote | SQuote } LitStr1End ;
LitStr2: LitStr2Start { NoSQuote | SQuote } LitStr2End ;
LitStr3: LitStr3Start { NoSQuote | SQuote } LitStr3End ;
// Delimited code: <`...`>, <<`...`>>, <<<`...`>>> with optional language prefix
// BacktickDelim must be single char - end delimiters are `>", `>>" or `>>>" after the backtick
DelimCode: DelimCode3 | DelimCode2 | DelimCode1 ;
DelimCode1: DelimCodeStart1 { NoBacktick | BacktickDelim } DelimCodeEnd1 ;
DelimCode2: DelimCodeStart2 { NoBacktick | BacktickDelim } DelimCodeEnd2 ;
DelimCode3: DelimCodeStart3 { NoBacktick | BacktickDelim } DelimCodeEnd3 ;
CodeBlock: CodeBlock3 | CodeBlock4 | CodeBlock5 | CodeBlock6;
CodeBlock3: CodeBlockStart3 { NoBacktick | Backtick2 } CodeBlockEnd3;
CodeBlock4: CodeBlockStart4 { NoBacktick | Backtick3 } CodeBlockEnd4;
CodeBlock5: CodeBlockStart5 { NoBacktick | Backtick4 } CodeBlockEnd5;
CodeBlock6: CodeBlockStart6 { NoBacktick | Backtick5 } CodeBlockEnd6;
// Start tokens for delimited literal strings (order: longest first)
LitStr3Start: <INITIAL, Header>"<<<'" ;
LitStr2Start: <INITIAL, Header>"<<'" ;
LitStr1Start: <INITIAL, Header>"<'" ;
// Start tokens for delimited code with optional language prefix (order: longest first)
DelimCodeStart3: /[a-zA-Z0-9-_]*<<<`/ ;
DelimCodeStart2: /[a-zA-Z0-9-_]*<<`/ ;
DelimCodeStart1: /[a-zA-Z0-9-_]*<`/ ;
CodeBlockStart3: /`{3}[a-zA-Z0-9-_]*[\s--\r\n]*(\r\n|\r|\n)/;
CodeBlockStart4: /`{4}[a-zA-Z0-9-_]*[\s--\r\n]*(\r\n|\r|\n)/;
CodeBlockStart5: /`{5}[a-zA-Z0-9-_]*[\s--\r\n]*(\r\n|\r|\n)/;
CodeBlockStart6: /`{6}[a-zA-Z0-9-_]*[\s--\r\n]*(\r\n|\r|\n)/;
// Order matters here. Scanner should eagerly match the longest pattern.
CodeBlockEnd3: <InCodeBlock3>/`{3}/;
Backtick2: <InCodeBlock3>/`{1,2}/;
// Order matters here. Scanner should eagerly match the longest pattern.
CodeBlockEnd4: <InCodeBlock4>/`{4}/;
Backtick3: <InCodeBlock4>/`{1,3}/;
// Order matters here. Scanner should eagerly match the longest pattern.
CodeBlockEnd5: <InCodeBlock5>/`{5}/;
Backtick4: <InCodeBlock5>/`{1,4}/;
// Order matters here. Scanner should eagerly match the longest pattern.
CodeBlockEnd6: <InCodeBlock6>/`{6}/;
Backtick5: <InCodeBlock6>/`{1,5}/;
// NoBacktick shared across code blocks and delimited code
NoBacktick: <InCodeBlock3, InCodeBlock4, InCodeBlock5, InCodeBlock6, InDelimCode1, InDelimCode2, InDelimCode3>/[^`]+/ ;
// Delimited literal string content (end tokens must come before squote token for eager matching)
LitStr3End: <InLitStr3>"'>>>" ;
LitStr2End: <InLitStr2>"'>>" ;
LitStr1End: <InLitStr1>"'>" ;
SQuote: <InLitStr1, InLitStr2, InLitStr3>"'" ;
NoSQuote: <InLitStr1, InLitStr2, InLitStr3>/[^']+/ ;
// Delimited code content (end tokens must come before backtick token for eager matching)
DelimCodeEnd3: <InDelimCode3>'`>>>' ;
DelimCodeEnd2: <InDelimCode2>'`>>' ;
DelimCodeEnd1: <InDelimCode1>'`>' ;
BacktickDelim: <InDelimCode1, InDelimCode2, InDelimCode3>'`' ;
GrammarNewline: <Text, Header>/\r\n|\r|\n/ ;
Ws: <Text>/[\s--\r\n]+/ ;
At: <INITIAL, Header>'@' ;
Ext: <INITIAL, Header>'$' ;
Dot: <INITIAL, Header>'.' ;
Begin: <INITIAL, Header>'{' ;
End: '}' ;
ArrayBegin: <INITIAL, Header>'[' ;
ArrayEnd: <INITIAL, Header>']' ;
LParen: <INITIAL, Header>'(' ;
RParen: <INITIAL, Header>')' ;
NewlineBind: <Header>/(\r\n|\r|\n)*=/ ;
Bind: '=' ;
Comma: <INITIAL, Header>',' ;
Continue: '\' ;
NewlineTextStart: <Header>/(\r\n|\r|\n)*:/ ;
TextStart: ":" ;
Ident: <INITIAL, Header>/[\p{XID_Start}_][\p{XID_Continue}-]*/ ;