pkl-parser 0.8.1

A rust Pkl Parser!
Documentation
file = { SOI ~ stmt* ~ EOI}

// add support for:
// when stmt
// for stmt
// things like that...
//
// the only file in the pkl stdlib not
// working is Benchmark.pkl
stmt = { typealias|module|import|extends|amends|function|class|property }

KEYWORDS = {"typealias"|"module"|"import"|"extends"|"amends"|"function"|"in"|"let"|"is" |
    "local"|"fixed"|"const"|"hidden"|"open"|"true"|"false"|"null"|"Infinity"|"NaN" |
    "extends"|"abstract"|"class"|"new"|"this"
}

WHITESPACE = _{ " "|"\t"|"\n"|"\r"|";" }
SPACE = _{ " "|"\t" }

WHITESPACE_COMMENT = {WHITESPACE|COMMENT|";"}
SPACE_COMMENT = {SPACE|COMMENT}

EQUAL = !{WHITESPACE_COMMENT* ~ "="}
COLON = !{WHITESPACE_COMMENT* ~ ":"}

line_comment = _{ "//" ~ (!"\n" ~ ANY)* }
doc_comment = @{ "///" ~ (!("\n"~!(SPACE*~"///")) ~ ANY)* }
multiline_comment = _{ "/*" ~ (!"*/" ~ ANY)* ~ "*/" }
annotation = !{"@" ~ ident ~ ("{" ~ (property)* ~ "}")?}

COMMENT = { doc_comment|line_comment|multiline_comment|annotation }

// a stmt with a doc comment

// we accept all of them before any stmt and
// we should check later if they are allowed
// before a given statement
PREFIX_KEYWORD = @{"local"|"fixed"|"const"|"external"|"hidden"|"abstract"|"open"}


// ********
//  *****
//  exprs
//  *****
// ********

expr = !{ (prefix? ~ primary_expr ~ postfix? ~ ((infix ~ prefix? ~ primary_expr ~ postfix?)|(type_op~TYPE))*) }
primary_expr = {if_else_expr|let_expr|function_expr|indexing_expr|class_instance|fn_call|literal|ident|amended_object|parenthesized}

parenthesized = !{"(" ~ expr ~ ")"}

condition = {expr}
if = {expr}
else = {expr}
if_else_expr = {"if" ~ "(" ~ condition ~ ")" ~ if ~ "else" ~ else }

let_value = {expr}
let_expr = {"let" ~ "(" ~ ident ~ (COLON ~ TYPE)? ~ EQUAL ~ let_value ~ ")" ~ expr}

class_instance = ${"new" ~ (WHITESPACE_COMMENT+ ~ TYPE)? ~ WHITESPACE_COMMENT* ~ object}

function_expr_arg = { ident ~ (COLON ~ TYPE)? }
function_expr = !{"(" ~ (function_expr_arg ~ ("," ~ function_expr_arg)* ~ ","?)* ~ ")" ~ "->" ~ expr}

nullable_read = _{"read?"}
globbed_read = _{"read*"}
globbed_import = _{"import*"}
fn_call = !{ (nullable_read|globbed_read|globbed_import|ident) ~ "(" ~ (expr ~ ("," ~ expr)* ~ ","?)? ~ ")"}

// operations
infix  = _{ add|sub|pow|mul|div|div_r|modulo|and|or|pipe|comp_equal|comp_not_equal|comp_greater_equal|comp_greater|comp_less_equal|comp_less|null_coalescing  }
prefix = _{ neg|logical_not }
postfix = { non_null }

type_op = ${(is_op|as_op)~WHITESPACE_COMMENT+}

non_null = { "!!" } // non null operator
neg    =  { "-" } // Negation

add    =   { "+"  } // Addition
sub    =   { "-"  } // Subtraction
mul    =   { "*"  } // Multiplication
div    =   { "/"  } // Division
div_r  =   { "~/" } // Division rounded
pow    =   { "**" } // Exponentiation
modulo =   { "%"  } // Modulo

comp_equal         = { "==" }
comp_not_equal     = { "!=" }
comp_greater       = { ">" }
comp_greater_equal = { ">=" }
comp_less          = { "<" }
comp_less_equal    = { "<=" }

pipe        = { "|>" }

and         = { "&&" }
or          = { "||" }

is_op     =   _{ "is" }
as_op     =   _{ "as" }

logical_not = _{"!"}
null_coalescing = _{ "??" }



ident_with_opt_dots = !{ident ~ ("." ~ ident)*}
ident_with_start_symbol = { ("_"|"$") ~ (ASCII_ALPHA|ASCII_DIGIT|"_")+ }
ident_without_start_symbol = { ASCII_ALPHA+ ~ (ASCII_ALPHA|ASCII_DIGIT|"_")* }
basic_ident = @{ ident_without_start_symbol|ident_with_start_symbol }
illegal_ident = { "`" ~ (!"`" ~ ANY)+ ~ "`" }
blank_ident = _{ "_" }
ident = @{ basic_ident|illegal_ident|blank_ident }

// either
// a[anything]
// a.anything
// a?.anything
indexing_expr = ${(class_instance|literal|index_expr|fn_call|ident|amended_object|parenthesized) ~ ((WHITESPACE_COMMENT*~(null_propagation|dot_expr))|(SPACE_COMMENT*~index_expr))+ }
null_propagation = !{ "?." ~ (fn_call|ident) }

dot_expr = !{ "." ~ (fn_call|ident) }
// index_expr => listing[index]
index_expr = !{ "[" ~ expr ~ "]" }

literal = !{ bool|null|float|int|string }

null = _{ "null" }
bool = @{ "true"|"false" }

// Strings
unicode_digits = @{  ASCII_HEX_DIGIT{1, 6}  }
escape_sequences = {"\\t"|"\\n"|"\\r"|"\\\""|"\\\\"|("\\u" ~ "{" ~ unicode_digits ~ "}") }

interpolation = !{ "\\(" ~ expr ~ ")" }

basic_string_true_content = { (!"\"" ~ (interpolation|escape_sequences|ANY))* }
basic_string = ${"\"" ~ basic_string_true_content ~ "\""}


multiline_content = { (!multiline_end ~ (interpolation|escape_sequences|ANY))* }
multiline_end = { "\n" ~ SPACE* ~ "\"\"\"" }
multiline_start = { "\"\"\"" ~ SPACE? ~ "\n" }
multiline_string = ${ multiline_start ~ multiline_content ~ multiline_end}

custom_escaped = @{ "\\#"|escape_sequences }
custom_string = ${
    PUSH("#"*) ~ "\""
        ~ raw_string_interior
        ~ "\"" ~ POP
}
raw_string_interior = {
    (
        !("\"" ~ PEEK)    // unless the next character is a quotation mark
                          // followed by the correct amount of number signs,
                          // consume content
        ~ (interpolation|custom_escaped|ANY)
    )*
}

string = { multiline_string|basic_string|custom_string }

// Numbers
hex_int = @{ "0x" ~ ASCII_HEX_DIGIT+ ~ ("_" ~ ASCII_HEX_DIGIT+)* }
octal_int = @{ "0o" ~ ASCII_OCT_DIGIT+ ~ ("_" ~ ASCII_OCT_DIGIT+)* }
binary_int = @{  "0b" ~ ASCII_BIN_DIGIT+ ~ ("_" ~ ASCII_BIN_DIGIT+)* }
basic_int = @ { ASCII_DIGIT+ ~ ("_" ~ ASCII_DIGIT+)* }
int = { hex_int|octal_int|binary_int|basic_int }

exponent = { ("e"|"E") ~ ("+"|"-")? ~ basic_int+ }
float = @{ "NaN"|("Infinity"|(basic_int? ~ "." ~ basic_int+ ~ exponent?)|(basic_int~exponent))}

// expr parsed for listing and mappings
// then in evaluation step we check if the
// fields are valid
obj_body = { "{" ~ amending_function? ~ (property|bracket_field|field_amending|iterable_spread|when_generator|for_generator|expr)* ~ "}" }
amending_function = !{ident ~ (","~ident)* ~ "->"}
object = !{ obj_body ~ (obj_body)* }

// add support for objects amended with {} {} syntax
amended_object = !{ "(" ~ ident_with_opt_dots ~ ")" ~ object }

mapping_field = !{ expr ~ "]" ~ (object|(EQUAL~(field_amending|expr)))}
predicate_match = !{"["~expr~"]"~"]"~(object|(EQUAL~expr))}
bracket_field = !{"[" ~ (predicate_match|mapping_field)}

field_amending = !{"(" ~ "this" ~ "[" ~ expr ~ "]" ~ ")" ~ object}
is_nullable     = _{"?"}
iterable_spread     = !{"..."~is_nullable?~expr}

when_generator = !{ "when" ~ "(" ~ expr ~ ")" ~ obj_body ~ ("else" ~ obj_body)? }
for_in = @{"in" ~ WHITESPACE+}
for_generator = !{ "for" ~ "(" ~ ident ~ ("," ~ ident)* ~ for_in ~ expr ~ ")" ~ obj_body }

// ********
//  *****
//  stmts
//  *****
// ********

property = ${ (PREFIX_KEYWORD ~ WHITESPACE_COMMENT+)* ~ ident ~ WHITESPACE_COMMENT* ~ (object|((COLON ~ WHITESPACE_COMMENT* ~ TYPE)? ~ EQUAL ~ WHITESPACE_COMMENT* ~ expr)|(COLON ~ WHITESPACE_COMMENT* ~ TYPE))}

typealias_params = !{WHITESPACE* ~ ident ~ ("," ~ ident)* ~ WHITESPACE* }
typealias = ${ "typealias" ~ WHITESPACE_COMMENT+  ~ ident ~ ("<" ~ typealias_params ~ ">")? ~ EQUAL ~ WHITESPACE_COMMENT* ~ TYPE }

module_keyword = @{ ("open" ~ WHITESPACE_COMMENT+)? ~ "module"}
module = ${ module_keyword ~ WHITESPACE_COMMENT+ ~ ident_with_opt_dots }

src = {"\"" ~ (!"\"" ~ ANY)+ ~ "\""}

amends = ${ "amends" ~ WHITESPACE_COMMENT+ ~ src}
extends = ${ "extends" ~ WHITESPACE_COMMENT+ ~ src}

import_as = {ident}
import = ${ (globbed_import|"import") ~ WHITESPACE_COMMENT+ ~ src ~ (WHITESPACE_COMMENT+ ~ "as" ~ WHITESPACE_COMMENT+ ~ import_as)? }

arg = !{ ident ~ (COLON ~ TYPE)? }
function_args = !{ arg ~ ("," ~ arg)* }
return_type_annotation = !{COLON ~ TYPE}
function_name = {ident}
function = ${ (PREFIX_KEYWORD ~ WHITESPACE_COMMENT+)* ~ "function" ~ WHITESPACE_COMMENT+ ~ function_name ~ WHITESPACE_COMMENT* ~ (type_attribute ~ WHITESPACE_COMMENT*)? ~ "(" ~ WHITESPACE_COMMENT* ~ function_args? ~ WHITESPACE_COMMENT* ~ ")" ~ return_type_annotation? ~ (EQUAL ~ WHITESPACE_COMMENT* ~ expr)? }

out_type = ${"out" ~ WHITESPACE_COMMENT+ ~ TYPE}
in_type = ${"in" ~ WHITESPACE_COMMENT+ ~ TYPE}
class_attribute = !{"<" ~ (out_type|in_type|TYPE) ~ ("," ~ (out_type|in_type|TYPE))* ~ ">"}

class_extends = ${"extends" ~ WHITESPACE_COMMENT+ ~ TYPE }
class_body = !{ ((property|function))* }
class = ${(PREFIX_KEYWORD ~ WHITESPACE_COMMENT+)* ~ "class" ~ WHITESPACE_COMMENT+ ~ ident ~ class_attribute? ~ (WHITESPACE_COMMENT* ~ class_extends)? ~ (WHITESPACE_COMMENT* ~ "{" ~ WHITESPACE_COMMENT* ~ class_body ~ WHITESPACE_COMMENT* ~ "}")?}

// ********
//  *****
//  types
//  *****
// ********

// do not use 'type' cause it's a rust keyword
TYPE = !{ (default_prefix? ~ (imported_type|basic_type|string|function_type|paren_type) ~ nullable?) ~ ("|"~(default_prefix? ~ (imported_type|basic_type|string|function_type) ~ nullable?))* }
nullable = _{"?"}
default_prefix = _{"*"}
basic_type = {type_with_restraints|type_with_attr|ident}
type_attribute = {"<" ~ TYPE ~ ("," ~ TYPE)* ~ ">"}
type_with_attr = !{ ident ~ type_attribute }
type_with_restraints = !{ (type_with_attr|ident) ~ "(" ~ expr ~ (","~expr)* ~ ")"}

function_type = !{"(" ~ (TYPE ~ ("," ~ TYPE)* ~ ","?)* ~ ")" ~ "->" ~ TYPE}
paren_type = !{"(" ~ TYPE ~ ")"}
// ex: semver.Version
imported_type = {(ident ~ ".")* ~ basic_type}