// Config
WHITESPACE = _{ " " | "\n" | "\t" }
COMMENT    = _{ "#" ~ (!"\n" ~ ANY)* }

// Literals
true_lit   =  { "true" }
false_lit  =  { "false" }
bool       = _{ true_lit | false_lit }
null       =  { "null" }
underscore =  { "_" }

// Numbers
digit        = _{ '0'..'9' }
positive_int = _{ digit ~ (digit | "_")* }
int          = @{ positive_int }
plus         =  { "+" }
minus        =  { "-" }
exp          = _{ ^"e" ~ (plus | minus)? ~ int }
float        = @{ int ~ "." ~ positive_int? ~ exp? | int ~ exp }

// Strings
raw_single_quoted_string   =  { (!("\\" | "'") ~ ANY)+ }
raw_double_quoted_string   =  { (!("\\" | "\"") ~ ANY)+ }
raw_backtick_quoted_string =  { (!("\\" | "`") ~ ANY)+ }
hex                        =  { '0'..'9' | 'a'..'f' | 'A'..'F' }
unicode_hex                =  { hex{1, 6} }
predefined                 =  { "n" | "r" | "t" | "\\" | "\"" | "'" | "`" | "0" }
byte                       =  { "x" ~ hex{2} }
unicode                    =  { "u" ~ "{" ~ unicode_hex ~ "}" }
escape                     =  { "\\" ~ (predefined | byte | unicode) }
single_quoted_string       = _{ "'" ~ (raw_single_quoted_string | escape)* ~ "'" }
double_quoted_string       = _{ "\"" ~ (raw_double_quoted_string | escape)* ~ "\"" }
backtick_quoted_string     = _{ "`" ~ (raw_backtick_quoted_string | escape)* ~ "`" }
string                     = ${ single_quoted_string | double_quoted_string | backtick_quoted_string }
binary_string              = ${ "b" ~ (single_quoted_string | double_quoted_string | backtick_quoted_string) }

// Regexes
raw_regex_string =  { (!("\\" | "/") ~ ANY)+ }
escape_regex     =  { "\\" ~ ANY }
regex_flag       =  { "i" }
regex            = ${ "/" ~ (raw_regex_string | escape_regex)* ~ "/" ~ regex_flag* }

// Identifiers
ident_char = _{ 'a'..'z' | 'A'..'Z' | '0'..'9' | "_" }
ident      = @{ (ASCII_ALPHA ~ ident_char* | "_" ~ ident_char+) ~ "?"? }

// Operators
// NOTE: order IS important
binary_operator = _{
    concat
  | add
  | sub
  | pow
  | mul
  | idiv
  | div
  | rem
  | gen_eq
  | gen_ne
  | gen_le
  | gen_lt
  | gen_ge
  | gen_gt
  | str_eq
  | str_ne
  | str_le
  | str_lt
  | str_ge
  | str_gt
  | in_op
  | not_in
  | and
  | or
  | pipe
  | point
}

gen_eq = { "==" }
gen_ne = { "!=" }
gen_lt = { "<" }
gen_le = { "<=" }
gen_gt = { ">" }
gen_ge = { ">=" }

str_eq = { "eq" }
str_ne = { "ne" }
str_lt = { "lt" }
str_le = { "le" }
str_gt = { "gt" }
str_ge = { "ge" }

add  = { "+" }
sub  = { "-" }
mul  = { "*" }
div  = { "/" }
idiv = { "//" }
rem  = { "%" }
pow  = { "**" }

concat = { "++" }

point = { "." }
pipe  = { "|" }

and = { "&&" | "and" }
or  = { "||" | "or" }

in_op  = { "in" }
not_in = { "not in" }

unary_operator = _{ not | neg }
not            =  { "!" }
neg            =  { "-" }

open_indexing = { "[" }

// Functions
func_arg   = { ((ident ~ "=")? ~ expr) | expr }
func       = { ident ~ "(" ~ func_arg? ~ ("," ~ func_arg)* ~ ")" }
func_chain = { func ~ (point ~ func)* }
lambda     = { (ident | ("(" ~ (ident ~ ","?)* ~ ")")) ~ "=>" ~ expr }

// Expressions
slice_term  =  { unary_operator* ~ term }
full_slice  =  { slice_term ~ ":" ~ slice_term }
start_slice =  { slice_term ~ ":" }
end_slice   =  { ":" ~ slice_term }
slice       = _{ full_slice | start_slice | end_slice }
expr        =  { unary_operator* ~ term ~ (binary_operator ~ unary_operator* ~ term | open_indexing ~ (slice | expr) ~ "]")* }
term        = _{
    func
  | lambda
  | bool
  | null
  | regex
  | binary_string
  | string
  | float
  | int
  | ident
  | underscore
  | list
  | map
  | "(" ~ expr ~ ")"
}

// Lists & Maps
list_body = _{ expr ~ ("," ~ expr)* }
list      =  { "[" ~ list_body* ~ "]" }
map_entry =  { (string | ident) ~ ":" ~ expr }
map_body  = _{ map_entry ~ ("," ~ map_entry)* }
map       =  { "{" ~ map_body* ~ "}" }

// End-chain parsers
full_expr = _{ SOI ~ expr ~ EOI }

expr_name_tuple =  { "(" ~ ((ident | string) ~ ","?)+ ~ ")" }
expr_name       =  { ident | string | expr_name_tuple }
named_expr      =  { expr ~ "as" ~ expr_name }
opt_named_expr  = _{ named_expr | expr }
named_exprs     = _{ SOI ~ opt_named_expr ~ ("," ~ opt_named_expr)* ~ ","? ~ EOI }

named_func     =  { func ~ "as" ~ expr_name }
opt_named_func = _{ named_func | func }
named_aggs     = _{ SOI ~ opt_named_func ~ ("," ~ opt_named_func)* ~ ","? ~ EOI }

css_char     = _{ LETTER | NUMBER | "*" | "_" | "-" | "[" | "]" | ":" | "." | "#" | ">" | "~" | "+" | "," | "\"" | "'" | "=" | "^" | "/" | "(" | ")" | "$" }
css_selector =  { ('a'..'z' | 'A'..'Z' | ":" | "." | "#" | "[" | "*" | "&") ~ css_char* }

scraping_select   = _{ func_chain | css_selector }
scraping_leaf     =  { expr_name ~ ":" ~ (func | ident) ~ ("," ~ expr)? ~ ";"? }
scraping_brackets =  { scraping_select ~ "{" ~ (scraping_brackets | scraping_leaf)* ~ "}" }
scraping_expr     = _{ SOI ~ scraping_brackets+ ~ EOI }