// JSONPath Parser
// This grammar defines the structure for parsing JSONPath expressions.
// It follows a structured approach with grouped rules for clarity and maintainability.
// Entry Point
// Defines the overall structure of a JSONPath expression, starting with SOI (start of input) and ending with EOI (end of input).
jsonpath = _{ SOI ~ jsonpath_query ~ EOI }
// Core Query Structure
jsonpath_query = _{ root_identifier ~ segments }
// Segments
segments = _{ (S ~ segment)* }
// Whitespace Handling
B = _{ "\x20" | "\x09" | "\x0A" | "\x0D" } // Basic whitespace characters: space, tab, newline, carriage return
S = _{ B* } // Optional whitespace
// Identifiers
root_identifier = _{ "$" } // Root node identifier
current_node_identifier = _{ "@" } // Current node identifier
// Segment Types
segment = _{ child_segment | descendant_segment }
child_segment = { bracketed_selection | ("." ~ (wildcard_selector | member_name_shorthand)) }
descendant_segment = { ".." ~ (bracketed_selection | wildcard_selector | member_name_shorthand) }
// Bracketed Selection
bracketed_selection = { "[" ~ S ~ selector ~ (S ~ "," ~ S ~ selector)* ~ S ~ "]" }
// Selectors
selector = _{ name_selector | wildcard_selector | slice_selector | index_selector | filter_selector }
wildcard_selector = { "*" } // Matches any element
name_selector = _{ string_literal } // Matches a specific name
index_selector = @{ int } // Matches a specific index
slice_selector = { (start ~ S)? ~ ":" ~ S ~ (stop ~ S)? ~ (":" ~ (S ~ step)?)? } // Array slicing
filter_selector = { "?" ~ S ~ logical_expr } // Filter based on logical expression
// Integer and Slice Components
int = { "0" | ("-"? ~ ASCII_NONZERO_DIGIT ~ ASCII_DIGIT*) }
start = @{ int }
stop = @{ int }
step = @{ int }
// String Literals
string_literal = _{ ("\x22" ~ double_quoted ~ "\x22") | ("\x27" ~ single_quoted ~ "\x27") }
double_quoted = { double_quoted_char* }
single_quoted = { single_quoted_char* }
double_quoted_char = _{ unescaped | "\x27" | (ESC ~ "\x22") | (ESC ~ escapable) }
single_quoted_char = _{ unescaped | "\x22" | (ESC ~ "\x27") | (ESC ~ escapable) }
ESC = _{ "\x5C" } // Escape character (backslash)
// Escapable Characters
escapable = _{ "\x62" | "\x66" | "\x6E" | "\x72" | "\x74" | "/" | "\\" | ("\x75" ~ hexchar) }
hexchar = _{ non_surrogate | (high_surrogate ~ "\\u" ~ low_surrogate) }
non_surrogate = _{ ((ASCII_DIGIT | ^"A" | ^"B" | ^"C" | ^"E" | ^"F") ~ ASCII_HEX_DIGIT{3}) | (^"D" ~ '\u{30}'..'\u{37}' ~ ASCII_HEX_DIGIT{2}) }
high_surrogate = _{ ^"D" ~ ("8" | "9" | ^"A" | ^"B") ~ ASCII_HEX_DIGIT{2} }
low_surrogate = _{ ^"D" ~ (^"C" | ^"D" | ^"E" | ^"F") ~ ASCII_HEX_DIGIT{2} }
// Unescaped Characters
unescaped = _{ '\u{20}'..'\u{21}' | '\u{23}'..'\u{26}' | '\u{28}'..'\u{5B}' | '\u{5D}'..'\u{D7FF}' | '\u{E000}'..'\u{10FFFF}' }
// Logical Expressions
logical_expr = _{ logical_or_expr }
logical_or_expr = { logical_and_expr ~ (S ~ "||" ~ S ~ logical_and_expr)* }
logical_and_expr = { basic_expr ~ (S ~ "&&" ~ S ~ basic_expr)* }
basic_expr = _{ paren_expr | comparison_expr | test_expr }
paren_expr = { (logical_not_op ~ S)? ~ "(" ~ S ~ logical_expr ~ S ~ ")" }
logical_not_op = { "!" }
test_expr = { (logical_not_op ~ S)? ~ (filter_query | function_expr) }
// Queries in Filters
filter_query = _{ rel_query | root_query }
root_query = { jsonpath_query }
rel_query = { current_node_identifier ~ segments }
// Comparison Expressions
comparison_expr = { comparable ~ S ~ comparison_op ~ S ~ comparable }
comparison_op = { "==" | "!=" | "<=" | ">=" | "<" | ">" | "contains" | "in" }
comparable = _{ literal | singular_query | function_expr }
// Literals
literal = _{ number | string_literal | array_literal | true_literal | false_literal | null }
true_literal = { "true" }
false_literal = { "false" }
null = { "null" }
number = { (int | "-0") ~ frac? ~ exp? }
frac = { "." ~ ASCII_DIGIT+ }
exp = { ^"e" ~ ("-" | "+")? ~ ASCII_DIGIT+ }
array_literal = { "[" ~ S ~ (literal ~ (S ~ "," ~ S ~ literal)*)? ~ S ~ "]" }
// Singular Queries
singular_query = _{ rel_singular_query | abs_singular_query }
rel_singular_query = { current_node_identifier ~ singular_query_segments }
abs_singular_query = { root_identifier ~ singular_query_segments }
singular_query_segments = _{ (S ~ (name_segment | index_segment))* }
name_segment = { ("[" ~ name_selector ~ "]") | ("." ~ member_name_shorthand) }
index_segment = { "[" ~ index_selector ~ "]" }
// Function Expressions
function_expr = { function_name ~ "(" ~ S ~ (function_argument ~ (S ~ "," ~ S ~ function_argument)*)? ~ S ~ ")" }
function_name = { function_name_first ~ function_name_char* }
function_name_first = _{ ASCII_ALPHA_LOWER }
function_name_char = _{ function_name_first | "_" | ASCII_DIGIT }
function_argument = _{ filter_query | logical_expr | function_expr | literal }
// Member Names
member_name_shorthand = { name_first ~ name_char* }
name_first = _{ ASCII_ALPHA | "_" | '\u{80}'..'\u{D7FF}' | '\u{E000}'..'\u{10FFFF}' }
name_char = _{ name_first | ASCII_DIGIT }