interstellar 0.2.0

// GQL Grammar - Extended with WHERE clause and full expression support
// Supports:
// - Node patterns: (n:Label {prop: value})
// - Edge patterns: -[e:TYPE]->, <-[e:TYPE]-, -[e:TYPE]-
// - Multiple patterns: (a), (b)
// - Path quantifiers: *1..3
// - Extended RETURN: n.name, n.age AS personAge
// - WHERE clause with comparison, logical, and string operators
// - Arithmetic expressions
// - Aggregate functions with DISTINCT: count(DISTINCT p.city)

WHITESPACE = _{ " " | "\t" | "\r" | "\n" }

// Keywords (case-insensitive) with word boundary check
// The !ASCII_ALPHANUMERIC ensures keywords don't match prefixes of longer words
// e.g., OR should not match the "OR" in "ORDER"
MATCH    = @{ ^"match" ~ !ASCII_ALPHANUMERIC }
RETURN   = @{ ^"return" ~ !ASCII_ALPHANUMERIC }
WHERE    = @{ ^"where" ~ !ASCII_ALPHANUMERIC }
ORDER    = @{ ^"order" ~ !ASCII_ALPHANUMERIC }
BY       = @{ ^"by" ~ !ASCII_ALPHANUMERIC }
GROUP    = @{ ^"group" ~ !ASCII_ALPHANUMERIC }
LIMIT    = @{ ^"limit" ~ !ASCII_ALPHANUMERIC }
OFFSET   = @{ ^"offset" ~ !ASCII_ALPHANUMERIC }
AS       = @{ ^"as" ~ !ASCII_ALPHANUMERIC }
AND      = @{ ^"and" ~ !ASCII_ALPHANUMERIC }
OR       = @{ ^"or" ~ !ASCII_ALPHANUMERIC }
NOT      = @{ ^"not" ~ !ASCII_ALPHANUMERIC }
TRUE     = @{ ^"true" ~ !ASCII_ALPHANUMERIC }
FALSE    = @{ ^"false" ~ !ASCII_ALPHANUMERIC }
NULL     = @{ ^"null" ~ !ASCII_ALPHANUMERIC }
ASC      = @{ ^"asc" ~ !ASCII_ALPHANUMERIC }
DESC     = @{ ^"desc" ~ !ASCII_ALPHANUMERIC }
IN       = @{ ^"in" ~ !ASCII_ALPHANUMERIC }
IS       = @{ ^"is" ~ !ASCII_ALPHANUMERIC }
CONTAINS = @{ ^"contains" ~ !ASCII_ALPHANUMERIC }
STARTS   = @{ ^"starts" ~ !ASCII_ALPHANUMERIC }
ENDS     = @{ ^"ends" ~ !ASCII_ALPHANUMERIC }
WITH     = @{ ^"with" ~ !ASCII_ALPHANUMERIC }
DISTINCT = @{ ^"distinct" ~ !ASCII_ALPHANUMERIC }
EXISTS   = @{ ^"exists" ~ !ASCII_ALPHANUMERIC }
CASE     = @{ ^"case" ~ !ASCII_ALPHANUMERIC }
WHEN     = @{ ^"when" ~ !ASCII_ALPHANUMERIC }
THEN     = @{ ^"then" ~ !ASCII_ALPHANUMERIC }
ELSE     = @{ ^"else" ~ !ASCII_ALPHANUMERIC }
END      = @{ ^"end" ~ !ASCII_ALPHANUMERIC }
UNION    = @{ ^"union" ~ !ASCII_ALPHANUMERIC }
ALL      = @{ ^"all" ~ !ASCII_ALPHANUMERIC }
OPTIONAL = @{ ^"optional" ~ !ASCII_ALPHANUMERIC }
PATH     = @{ ^"path" ~ !ASCII_ALPHANUMERIC }
UNWIND   = @{ ^"unwind" ~ !ASCII_ALPHANUMERIC }
LET      = @{ ^"let" ~ !ASCII_ALPHANUMERIC }
SKIP     = @{ ^"skip" ~ !ASCII_ALPHANUMERIC }
HAVING   = @{ ^"having" ~ !ASCII_ALPHANUMERIC }
ANY_KW   = @{ ^"any" ~ !ASCII_ALPHANUMERIC }
NONE_KW  = @{ ^"none" ~ !ASCII_ALPHANUMERIC }
SINGLE   = @{ ^"single" ~ !ASCII_ALPHANUMERIC }

// Subquery keywords
CALL     = @{ ^"call" ~ !ASCII_ALPHANUMERIC }
FOREACH  = @{ ^"foreach" ~ !ASCII_ALPHANUMERIC }
YIELD    = @{ ^"yield" ~ !ASCII_ALPHANUMERIC }

// Mutation keywords
CREATE   = @{ ^"create" ~ !ASCII_ALPHANUMERIC }
SET      = @{ ^"set" ~ !ASCII_ALPHANUMERIC }
REMOVE   = @{ ^"remove" ~ !ASCII_ALPHANUMERIC }
DELETE   = @{ ^"delete" ~ !ASCII_ALPHANUMERIC }
DETACH   = @{ ^"detach" ~ !ASCII_ALPHANUMERIC }
MERGE    = @{ ^"merge" ~ !ASCII_ALPHANUMERIC }
ON       = @{ ^"on" ~ !ASCII_ALPHANUMERIC }

// Schema/DDL keywords
TYPE       = @{ ^"type" ~ !ASCII_ALPHANUMERIC }
NODE       = @{ ^"node" ~ !ASCII_ALPHANUMERIC }
EDGE       = @{ ^"edge" ~ !ASCII_ALPHANUMERIC }
GRAPH      = @{ ^"graph" ~ !ASCII_ALPHANUMERIC }
FROM_KW    = @{ ^"from" ~ !ASCII_ALPHANUMERIC }
TO_KW      = @{ ^"to" ~ !ASCII_ALPHANUMERIC }
ALTER      = @{ ^"alter" ~ !ASCII_ALPHANUMERIC }
DROP_KW    = @{ ^"drop" ~ !ASCII_ALPHANUMERIC }
ADD        = @{ ^"add" ~ !ASCII_ALPHANUMERIC }
ALLOW      = @{ ^"allow" ~ !ASCII_ALPHANUMERIC }
ADDITIONAL = @{ ^"additional" ~ !ASCII_ALPHANUMERIC }
PROPERTIES = @{ ^"properties" ~ !ASCII_ALPHANUMERIC }
VALIDATION = @{ ^"validation" ~ !ASCII_ALPHANUMERIC }
SCHEMA     = @{ ^"schema" ~ !ASCII_ALPHANUMERIC }
STRICT     = @{ ^"strict" ~ !ASCII_ALPHANUMERIC }
CLOSED     = @{ ^"closed" ~ !ASCII_ALPHANUMERIC }
WARN_KW    = @{ ^"warn" ~ !ASCII_ALPHANUMERIC }
DEFAULT    = @{ ^"default" ~ !ASCII_ALPHANUMERIC }

// Property type keywords
STRING_TYPE = @{ ^"string" ~ !ASCII_ALPHANUMERIC }
INT_TYPE    = @{ ^"int" ~ !ASCII_ALPHANUMERIC }
FLOAT_TYPE  = @{ ^"float" ~ !ASCII_ALPHANUMERIC }
BOOL_TYPE   = @{ ^"bool" ~ !ASCII_ALPHANUMERIC }
LIST_TYPE   = @{ ^"list" ~ !ASCII_ALPHANUMERIC }
MAP_TYPE    = @{ ^"map" ~ !ASCII_ALPHANUMERIC }
ANY_TYPE    = @{ ^"any" ~ !ASCII_ALPHANUMERIC }

// Index DDL keywords
INDEX      = @{ ^"index" ~ !ASCII_ALPHANUMERIC }
UNIQUE     = @{ ^"unique" ~ !ASCII_ALPHANUMERIC }

// Entry point - statement which may be a read query, UNION, mutation, or DDL
statement = { SOI ~ (ddl_statement | mutation_statement | read_statement) ~ EOI }

// Read-only statement (existing query structure with UNION support)
read_statement = { query ~ (union_clause ~ query)* }

// ============================================================
// Mutation Statements
// ============================================================
// GQL supports three forms of mutation statements:
// 1. CREATE-only: CREATE (n:Label) [RETURN ...]
// 2. MATCH + mutations: MATCH (n) SET n.prop = val [RETURN ...]
// 3. MERGE: MERGE (n:Label) ON CREATE SET ... ON MATCH SET ... [RETURN ...]
mutation_statement = {
    create_only_statement
    | match_mutation_statement
    | merge_statement
}

// CREATE without preceding MATCH
// e.g., CREATE (n:Person {name: 'Alice'}) RETURN n
create_only_statement = { create_clause+ ~ return_clause? }

// MATCH followed by mutation clauses
// e.g., MATCH (n:Person) WHERE n.name = 'Alice' SET n.age = 31 RETURN n
// FOREACH clauses can appear after other mutations
match_mutation_statement = {
    match_clause ~
    optional_match_clause* ~
    where_clause? ~
    mutation_clause+ ~
    foreach_clause* ~
    return_clause?
}

// MERGE statement - upsert operation
// e.g., MERGE (n:Person {name: 'Alice'}) ON CREATE SET n.created = 123
merge_statement = { merge_clause ~ merge_action* ~ return_clause? }

// ============================================================
// DDL Statements (Schema Definition)
// ============================================================

ddl_statement = {
    create_index
    | drop_index
    | create_node_type
    | create_edge_type
    | alter_node_type
    | alter_edge_type
    | drop_node_type
    | drop_edge_type
    | set_schema_validation
}

// CREATE NODE TYPE Person (name STRING NOT NULL, age INT)
create_node_type = {
    CREATE ~ NODE ~ TYPE ~ identifier ~
    "(" ~ property_def_list? ~ ")"
}

// CREATE EDGE TYPE KNOWS (since INT) FROM Person TO Person
create_edge_type = {
    CREATE ~ EDGE ~ TYPE ~ identifier ~
    "(" ~ property_def_list? ~ ")" ~
    edge_endpoint_clause
}

// FROM Person, Employee TO Company, Startup
edge_endpoint_clause = {
    FROM_KW ~ type_name_list ~ TO_KW ~ type_name_list
}

// Comma-separated list of type names
type_name_list = { identifier ~ ("," ~ identifier)* }

// Property definitions
property_def_list = { property_def ~ ("," ~ property_def)* }

// name STRING NOT NULL DEFAULT 'unknown'
property_def = {
    identifier ~ property_type ~ not_null_modifier? ~ default_modifier?
}

// NOT NULL modifier
not_null_modifier = { NOT ~ NULL }

// DEFAULT literal
default_modifier = { DEFAULT ~ literal }

// Property types
property_type = {
    list_type
    | map_type
    | STRING_TYPE
    | INT_TYPE
    | FLOAT_TYPE
    | BOOL_TYPE
    | ANY_TYPE
}

// LIST or LIST<STRING>
list_type = { LIST_TYPE ~ ("<" ~ property_type ~ ">")? }

// MAP or MAP<INT>
map_type = { MAP_TYPE ~ ("<" ~ property_type ~ ">")? }

// ALTER NODE TYPE Person ALLOW ADDITIONAL PROPERTIES
// ALTER NODE TYPE Person ADD email STRING
// ALTER NODE TYPE Person DROP email
alter_node_type = {
    ALTER ~ NODE ~ TYPE ~ identifier ~ alter_type_action
}

// ALTER EDGE TYPE KNOWS ALLOW ADDITIONAL PROPERTIES
alter_edge_type = {
    ALTER ~ EDGE ~ TYPE ~ identifier ~ alter_type_action
}

alter_type_action = {
    allow_additional_properties
    | add_property_action
    | drop_property_action
}

allow_additional_properties = { ALLOW ~ ADDITIONAL ~ PROPERTIES }

add_property_action = { ADD ~ property_def }

drop_property_action = { DROP_KW ~ identifier }

// DROP NODE TYPE Person
drop_node_type = { DROP_KW ~ NODE ~ TYPE ~ identifier }

// DROP EDGE TYPE KNOWS
drop_edge_type = { DROP_KW ~ EDGE ~ TYPE ~ identifier }

// SET SCHEMA VALIDATION STRICT
set_schema_validation = { SET ~ SCHEMA ~ VALIDATION ~ validation_mode }

validation_mode = { STRICT | CLOSED | WARN_KW | NONE_KW }

// ============================================================
// Index DDL Statements
// ============================================================
// CREATE INDEX idx_name ON :Label(property)
// CREATE UNIQUE INDEX idx_name ON :Label(property)
// CREATE INDEX idx_name ON (property)  -- all labels
// DROP INDEX idx_name

// CREATE [UNIQUE] INDEX name ON [:Label](property)
// CREATE [RTREE] INDEX name ON [:Label](property)  (spec-56)
create_index = {
    CREATE ~ (UNIQUE | RTREE)? ~ INDEX ~ identifier ~
    ON ~ index_target ~ "(" ~ identifier ~ ")"
}

RTREE = { "RTREE" }

// Index target: :Label for specific label, or empty for all labels
// :Person means only vertices with label Person
// :KNOWS means only edges with label KNOWS
index_target = { (":" ~ identifier)? }

// DROP INDEX name
drop_index = { DROP_KW ~ INDEX ~ identifier }

// ============================================================
// Mutation Clauses
// ============================================================
mutation_clause = { create_clause | set_clause | remove_clause | delete_clause | detach_delete_clause }

// FOREACH clause - iterate over a list and apply mutations
// FOREACH (variable IN expression | mutation+)
// e.g., FOREACH (n IN nodes(p) | SET n.visited = true)
// e.g., FOREACH (i IN items | SET i.processed = true REMOVE i.pending)
foreach_clause = { FOREACH ~ "(" ~ identifier ~ IN ~ expression ~ pipe_token ~ foreach_mutation+ ~ ")" }

// Mutations allowed inside FOREACH
foreach_mutation = { set_clause | remove_clause | delete_clause | detach_delete_clause | create_clause | nested_foreach }

// Nested FOREACH for recursive iteration
nested_foreach = { FOREACH ~ "(" ~ identifier ~ IN ~ expression ~ pipe_token ~ foreach_mutation+ ~ ")" }

// CREATE clause - creates new patterns (vertices and edges)
// e.g., CREATE (n:Person {name: 'Alice'}), (m:Person {name: 'Bob'})
create_clause = { CREATE ~ pattern ~ ("," ~ pattern)* }

// SET clause - updates properties
// e.g., SET n.age = 31, n.status = 'active'
set_clause = { SET ~ set_item ~ ("," ~ set_item)* }
set_item = { property_access ~ "=" ~ expression }

// REMOVE clause - removes properties
// e.g., REMOVE n.age, n.status
remove_clause = { REMOVE ~ remove_item ~ ("," ~ remove_item)* }
remove_item = { property_access }

// DELETE clause - deletes elements (fails if vertex has edges)
// e.g., DELETE n, m
delete_clause = { DELETE ~ variable ~ ("," ~ variable)* }

// DETACH DELETE clause - deletes vertices with automatic edge removal
// e.g., DETACH DELETE n
detach_delete_clause = { DETACH ~ DELETE ~ variable ~ ("," ~ variable)* }

// MERGE clause - upsert pattern (match or create)
// e.g., MERGE (n:Person {name: 'Alice'})
merge_clause = { MERGE ~ pattern }

// MERGE actions - what to do on create or match
merge_action = { on_create_action | on_match_action }
on_create_action = { ON ~ CREATE ~ SET ~ set_item ~ ("," ~ set_item)* }
on_match_action = { ON ~ MATCH ~ SET ~ set_item ~ ("," ~ set_item)* }

// UNION clause - combines results from multiple queries
// UNION (without ALL) deduplicates results, UNION ALL keeps duplicates
union_clause = { UNION ~ ALL? }

// Single query with optional WHERE, LET, GROUP BY, HAVING, ORDER BY, and LIMIT clauses
// GROUP BY comes after RETURN (GQL/SQL style)
// HAVING filters groups after aggregation (must come after GROUP BY)
// OPTIONAL MATCH clauses come after the main MATCH clause
// WITH PATH enables path tracking, UNWIND expands lists into rows
// LET binds computed values to variables for use in RETURN
// WITH clause pipes results between query parts (after WHERE/LET, before RETURN)
// CALL subqueries execute nested queries for each row
query = { match_clause ~ optional_match_clause* ~ with_path_clause? ~ unwind_clause* ~ where_clause? ~ call_clause* ~ call_procedure_clause* ~ let_clause* ~ with_clause* ~ return_clause ~ group_by_clause? ~ having_clause? ~ order_clause? ~ limit_clause? }

// ============================================================
// CALL Subquery Clause
// ============================================================
// CALL { subquery } executes a nested query for each row in the outer query.
// Variables can be imported from outer scope using WITH at the start.
// CALL subqueries must end with a RETURN clause.

call_clause = { CALL ~ "{" ~ call_body ~ "}" }

// ============================================================
// CALL Procedure Clause
// ============================================================
// CALL interstellar.shortestPath(source, target) YIELD path, distance
// Calls a named procedure with arguments and yields named results.

call_procedure_clause = { CALL ~ procedure_name ~ "(" ~ procedure_args? ~ ")" ~ yield_clause? }
procedure_name = { identifier ~ ("." ~ identifier)* }
procedure_args = { expression ~ ("," ~ expression)* }
yield_clause = { YIELD ~ yield_item ~ ("," ~ yield_item)* }
yield_item = { identifier ~ (AS ~ identifier)? }

// Body of a CALL subquery - can be a single query or UNION of queries
call_body = { call_query ~ (union_clause ~ call_query)* }

// Query inside CALL - starts with optional importing WITH
// Distinguished from regular WITH by position (must be first in call_query)
// MATCH is optional (can just transform imported variables)
call_query = {
    importing_with? ~
    match_clause? ~
    optional_match_clause* ~
    where_clause? ~
    call_clause* ~
    with_clause* ~
    return_clause ~
    order_clause? ~
    limit_clause?
}

// Importing WITH - brings variables from outer scope into the subquery
// Distinguished from regular WITH by position (first in call_query)
// NOTE: Uses negative lookahead !PATH to distinguish from WITH PATH clause
importing_with = { WITH ~ !PATH ~ return_item ~ ("," ~ return_item)* }

// WITH clause - pipes results between query parts
// WITH projects columns forward, resetting variable scope
// Can include DISTINCT, WHERE (after projection), ORDER BY, LIMIT
// NOTE: Uses negative lookahead !PATH to distinguish from WITH PATH clause
with_clause = { WITH ~ !PATH ~ DISTINCT? ~ return_item ~ ("," ~ return_item)* ~ with_where_clause? ~ order_clause? ~ with_limit_clause? }

// WHERE clause within WITH (filters on projected columns)
with_where_clause = { WHERE ~ expression }

// LIMIT clause within WITH (separate to avoid ambiguity with main limit_clause)
with_limit_clause = { LIMIT ~ integer ~ ((OFFSET | SKIP) ~ integer)? }

// WITH PATH clause - enables path tracking and collection
// Allows using path() function in RETURN to get the traversed path
with_path_clause = { WITH ~ PATH ~ (AS ~ identifier)? }

// UNWIND clause - expands a list into individual rows
// UNWIND expression AS variable
unwind_clause = { UNWIND ~ expression ~ AS ~ identifier }

// LET clause - binds computed values to variables
// LET variable = expression
let_clause = { LET ~ identifier ~ "=" ~ expression }

// MATCH clause - multiple patterns separated by commas
match_clause = { MATCH ~ pattern ~ ("," ~ pattern)* }

// OPTIONAL MATCH clause - matches if possible, produces nulls if not
// Can reference variables from previous MATCH or OPTIONAL MATCH clauses
optional_match_clause = { OPTIONAL ~ MATCH ~ pattern ~ ("," ~ pattern)* }

// WHERE clause
where_clause = { WHERE ~ expression }

// Pattern: alternating nodes and edges starting with a node
// e.g., (a)-[:KNOWS]->(b)-[:WORKS_WITH]->(c)
pattern = { node_pattern ~ (edge_pattern ~ node_pattern)* }

// Node pattern: (variable:Label:Label2 {prop: value, prop2: value2} WHERE expr)
node_pattern = { "(" ~ variable? ~ label_filter? ~ property_filter? ~ inline_where? ~ ")" }

// Edge pattern with direction
// -[variable:TYPE {prop: value} WHERE expr]->  (outgoing)
// <-[variable:TYPE {prop: value} WHERE expr]-  (incoming)
// -[variable:TYPE {prop: value} WHERE expr]-   (both/undirected)
edge_pattern = { 
    left_arrow? ~ "-[" ~ variable? ~ label_filter? ~ quantifier? ~ property_filter? ~ inline_where? ~ "]-" ~ right_arrow?
}

// Inline WHERE clause for patterns (distinct from main WHERE)
// Allows filtering within node/edge patterns: (n:Person WHERE n.age > 21)
inline_where = { WHERE ~ expression }

left_arrow = { "<" }
right_arrow = { ">" }

// Labels: :Person or :Person:Employee (multiple labels)
label_filter = { (":" ~ identifier)+ }

// Properties: {name: 'Alice', age: 30}
property_filter = { "{" ~ property ~ ("," ~ property)* ~ "}" }
property = { identifier ~ ":" ~ literal }

// Path quantifier for variable-length paths: *1..3, *, *2, *..5
quantifier = { "*" ~ range? }
range = { integer? ~ ".." ~ integer? | integer }

// ============================================================
// Expressions with correct precedence (lowest to highest)
// ============================================================

// expression is the entry point for all expressions
expression = { or_expr }

// OR has lowest precedence
or_expr = { and_expr ~ (OR ~ and_expr)* }

// AND has higher precedence than OR
and_expr = { not_expr ~ (AND ~ not_expr)* }

// NOT is a unary operator
not_expr = { NOT* ~ comparison }

// Comparison operators
comparison = { 
    is_null_expr 
    | in_expr 
    | regex_expr
    | comparison_expr 
}

// IS NULL / IS NOT NULL
is_null_expr = { concat_expr ~ IS ~ NOT? ~ NULL }

// IN list / NOT IN list
in_expr = { concat_expr ~ NOT? ~ IN ~ list_expr }

// Regex match: expr =~ 'pattern'
regex_expr = { concat_expr ~ regex_op ~ concat_expr }
regex_op = { "=~" }

// Binary comparison operators
comparison_expr = { concat_expr ~ (comp_op ~ concat_expr)? }

// String concatenation operator (between comparison and additive)
// Precedence: comparison < concat < additive
concat_expr = { additive ~ (concat_op ~ additive)* }
concat_op = { "||" }

comp_op = { 
    neq | lte | gte | lt | gt | eq
    | CONTAINS
    | starts_with
    | ends_with
}

// Use specific rules for operators to avoid ambiguity
neq = { "<>" | "!=" }
lte = { "<=" }
gte = { ">=" }
lt = { "<" }
gt = { ">" }
eq = { "=" }

starts_with = { STARTS ~ WITH }
ends_with = { ENDS ~ WITH }

// Additive operators: + -
additive = { multiplicative ~ (add_op ~ multiplicative)* }
add_op = { "+" | "-" }

// Multiplicative operators: * / %
multiplicative = { power ~ (mul_op ~ power)* }
mul_op = { "*" | "/" | "%" }

// Power operator: ^ (higher precedence than *, /, %)
power = { unary ~ (pow_op ~ unary)* }
pow_op = { "^" }

// Unary minus
unary = { neg_op? ~ postfix_expr }
neg_op = { "-" }

// Postfix expressions: primary followed by zero or more index/slice accesses
// e.g., list[0], list[-1], list[1..3], p.scores[0], [1,2,3][-1]
postfix_expr = { primary ~ index_access* }

// Index or slice access: [expr] or [start..end]
// Must try slice_range first since it can start with an expression
index_access = { "[" ~ (slice_range | expression) ~ "]" }

// Slice range: start..end, ..end, start.., or ..
// All bounds are optional
// e.g., [1..3] [..3] [2..] [..]
slice_range = { slice_start? ~ ".." ~ slice_end? }

// Slice start expression - must stop before ".."
// Using a simpler expression form to avoid consuming the ".."
slice_start = { slice_atom ~ (add_op ~ slice_atom)* }

// Slice end expression - same as start
slice_end = { slice_atom ~ (add_op ~ slice_atom)* }

// Atoms allowed in slice bounds - literals, variables, property access, function calls, parens
slice_atom = {
    neg_op? ~ (
        function_call
        | literal
        | property_access
        | variable
        | "(" ~ expression ~ ")"
    )
}

// Primary expressions (highest precedence)
primary = { 
    case_expr
    | exists_expr
    | reduce_expr     // REDUCE function for list accumulation
    | all_predicate   // ALL(x IN list WHERE cond)
    | any_predicate   // ANY(x IN list WHERE cond)
    | none_predicate  // NONE(x IN list WHERE cond)
    | single_predicate // SINGLE(x IN list WHERE cond)
    | parameter
    | geo_function       // point.distance(...) etc. (spec-56, before function_call)
    | geo_constructor    // point({...}), polygon([...]) (spec-56, before function_call)
    | distance_literal   // 5km, 100m, 3.2mi (spec-56, before literal)
    | literal
    | function_call
    | pattern_comprehension  // [(pattern) | expr] - must come before list_comprehension
    | list_comprehension  // Must come before list_expr to take precedence
    | property_access
    | variable
    | paren_expr
    | list_expr
    | map_expr  // Map literal expression
}

// Pattern comprehension: [(pattern) | transform_expression]
// Matches a pattern and transforms each match into a list element.
// The pattern typically references variables from the outer scope.
// e.g., [(p)-[:FRIEND]->(f) | f.name]
// e.g., [(p)-[r:KNOWS]->(other) | {person: other.name, since: r.since}]
// Note: WHERE clause inside pattern comprehension is optional (for filtering)
pattern_comprehension = { "[" ~ pattern ~ pattern_comp_where? ~ pipe_token ~ expression ~ "]" }
pattern_comp_where = { WHERE ~ expression }

// Map literal expression: {key: value, key2: value2}
// Keys can be identifiers or string literals
// e.g., {name: 'Alice', age: 30}
// e.g., {personName: p.name, personAge: p.age}
map_expr = { "{" ~ (map_entry ~ ("," ~ map_entry)*)? ~ "}" }
map_entry = { map_key ~ ":" ~ expression }
map_key = { identifier | string }

// Parameter reference: $paramName
// Used for parameterized queries with safe value injection
parameter = @{ "$" ~ identifier }

// List comprehension: [x IN list | expression] or [x IN list WHERE condition | expression]
// Transforms and optionally filters a list
// e.g., [p IN people | p.name]
// e.g., [p IN people WHERE p.age > 18 | p.name]
// Note: Uses pipe_token to distinguish the literal "|" from pest's choice operator
list_comprehension = { "[" ~ identifier ~ IN ~ list_comp_source ~ list_comp_where? ~ pipe_token ~ expression ~ "]" }
list_comp_where = { WHERE ~ list_comp_filter }
pipe_token = { "|" }

// List comprehension source expression - stops at WHERE or |
// This avoids greedy matching that would consume the | token
list_comp_source = { list_comp_term ~ (list_comp_binop ~ list_comp_term)* }
list_comp_filter = { list_comp_term ~ (list_comp_binop ~ list_comp_term)* }

// Binary operators allowed in list comprehension source (excludes |)
list_comp_binop = {
    AND | OR | comp_op | add_op | mul_op | pow_op
}

// Terms that can appear in list comprehension source expression
list_comp_term = {
    NOT? ~ list_comp_primary ~ (IS ~ NOT? ~ NULL)?
}

list_comp_primary = {
    function_call
    | list_expr       // Support list literals [1, 2, 3]
    | list_comprehension  // Support nested comprehensions [x IN y | z]
    | literal  
    | property_access
    | variable
    | "(" ~ expression ~ ")"
}

// CASE expression
// CASE WHEN condition THEN result [WHEN ... THEN ...] [ELSE default] END
case_expr = { CASE ~ case_when_clause+ ~ case_else_clause? ~ END }
case_when_clause = { WHEN ~ expression ~ THEN ~ expression }
case_else_clause = { ELSE ~ expression }

// EXISTS expression: EXISTS { pattern } or NOT EXISTS { pattern }
// Also supports the explicit-MATCH subquery form:
//   EXISTS { MATCH pattern [WHERE expression] }
// Used to check if a subpattern matches from the current element. The optional
// WHERE clause inside the braces filters the matched subgraph before the
// EXISTS predicate evaluates true.
exists_expr = { NOT? ~ EXISTS ~ "{" ~ (MATCH ~ pattern ~ (WHERE ~ expression)? | pattern) ~ "}" }

// ALL/ANY/NONE/SINGLE list predicates
// ALL(x IN list WHERE condition) - true if all elements satisfy condition
// ANY(x IN list WHERE condition) - true if at least one element satisfies condition
// NONE(x IN list WHERE condition) - true if no elements satisfy condition
// SINGLE(x IN list WHERE condition) - true if exactly one element satisfies condition
all_predicate = { ALL ~ "(" ~ identifier ~ IN ~ list_comp_source ~ WHERE ~ expression ~ ")" }
any_predicate = { ANY_KW ~ "(" ~ identifier ~ IN ~ list_comp_source ~ WHERE ~ expression ~ ")" }
none_predicate = { NONE_KW ~ "(" ~ identifier ~ IN ~ list_comp_source ~ WHERE ~ expression ~ ")" }
single_predicate = { SINGLE ~ "(" ~ identifier ~ IN ~ list_comp_source ~ WHERE ~ expression ~ ")" }

// REDUCE expression: REDUCE(accumulator = initial, variable IN list | expression)
// Accumulates a value over a list (fold/reduce operation)
// e.g., REDUCE(total = 0, x IN prices | total + x)
// e.g., REDUCE(s = '', name IN names | s || name || ', ')
// Note: Uses reduce_initial for the initial value to avoid consuming the comma
// Uses list_comp_source for the list expression to avoid consuming the pipe token
reduce_expr = { 
    REDUCE ~ "(" ~ 
    identifier ~ "=" ~ reduce_initial ~ "," ~
    identifier ~ IN ~ list_comp_source ~ pipe_token ~ 
    expression ~ 
    ")"
}

// Initial value in REDUCE - limited to avoid consuming comma
// Supports: literals, variables, property access, function calls, parens
reduce_initial = {
    function_call
    | literal
    | property_access
    | variable
    | "(" ~ expression ~ ")"
}

// REDUCE keyword
REDUCE = @{ ^"reduce" ~ !ASCII_ALPHANUMERIC }

// Parenthesized expression
paren_expr = { "(" ~ expression ~ ")" }

// Function call: count(*), sum(x), count(DISTINCT x), etc.
// ============================================================
// Geospatial Constructors and Functions (spec-56)
// ============================================================

// point({longitude: -122.4, latitude: 37.7}) or point({lon: -122.4, lat: 37.7})
geo_constructor = { geo_point_constructor | geo_polygon_constructor }
geo_point_constructor = { "point" ~ "(" ~ map_expr ~ ")" }
// polygon([[-122.6, 37.6], [-122.3, 37.6], ...])
geo_polygon_constructor = { "polygon" ~ "(" ~ "[" ~ geo_point_pair ~ ("," ~ geo_point_pair)* ~ "]" ~ ")" }
geo_point_pair = { "[" ~ geo_number ~ "," ~ geo_number ~ "]" }
geo_number = { float | integer }

// point.distance(expr, expr), point.within_bbox(...), point.within_distance(...)
geo_function = {
    geo_distance_fn
    | geo_within_bbox_fn
    | geo_within_distance_fn
}
geo_distance_fn = { "point" ~ "." ~ "distance" ~ "(" ~ expression ~ "," ~ expression ~ ")" }
geo_within_bbox_fn = { "point" ~ "." ~ "within_bbox" ~ "(" ~ expression ~ "," ~ expression ~ "," ~ expression ~ "," ~ expression ~ ")" }
geo_within_distance_fn = { "point" ~ "." ~ "within_distance" ~ "(" ~ expression ~ "," ~ expression ~ "," ~ expression ~ ")" }

// Distance literal: 5km, 100m, 3.2mi, 10nmi
distance_literal = ${ (float | integer) ~ distance_unit }
distance_unit = @{ "nmi" | "km" | "mi" | "m" }

function_call = { identifier ~ "(" ~ function_args? ~ ")" }
function_args = { star | DISTINCT? ~ expression ~ ("," ~ expression)* }
star = { "*" }

// List expression: [1, 2, 3] or ['a', 'b']
list_expr = { "[" ~ (expression ~ ("," ~ expression)*)? ~ "]" }

// Property access: n.name
property_access = { variable ~ "." ~ identifier }

// Identifiers and variables
// NOTE: Keywords are excluded from identifier matching via the keyword rule.
// Backtick-escaped form bypasses the keyword check, so users can write
// e.g. `desc` to use a reserved word as a variable name.
variable = @{ ("`" ~ (!"`" ~ ANY)+ ~ "`") | (!(keyword ~ !ASCII_ALPHANUMERIC) ~ ASCII_ALPHA ~ (ASCII_ALPHANUMERIC | "_")*) }
identifier = @{ ("`" ~ (!"`" ~ ANY)+ ~ "`") | (ASCII_ALPHA ~ (ASCII_ALPHANUMERIC | "_")*) }

// All keywords - used to exclude them from variable names
keyword = {
    MATCH | RETURN | WHERE | ORDER | BY | GROUP | LIMIT | OFFSET | SKIP | HAVING |
    AS | AND | OR | NOT | TRUE | FALSE | NULL | ASC | DESC |
    IN | IS | CONTAINS | STARTS | ENDS | WITH | DISTINCT | EXISTS |
    CASE | WHEN | THEN | ELSE | END | UNION | ALL | OPTIONAL |
    PATH | UNWIND | CREATE | SET | REMOVE | DELETE | DETACH | MERGE | ON | LET |
    REDUCE | ANY_KW | NONE_KW | SINGLE | CALL | FOREACH |
    // DDL keywords
    TYPE | NODE | EDGE | GRAPH | FROM_KW | TO_KW | ALTER | DROP_KW | ADD |
    ALLOW | ADDITIONAL | PROPERTIES | VALIDATION | SCHEMA | STRICT | CLOSED | WARN_KW | DEFAULT |
    STRING_TYPE | INT_TYPE | FLOAT_TYPE | BOOL_TYPE | LIST_TYPE | MAP_TYPE | ANY_TYPE |
    // Index DDL keywords
    INDEX | UNIQUE
}

// ============================================================
// RETURN clause - multiple items with optional aliases
// ============================================================
// e.g., RETURN n, n.name, n.age AS personAge
// e.g., RETURN DISTINCT n.city
return_clause = { RETURN ~ DISTINCT? ~ return_item ~ ("," ~ return_item)* }
return_item = { expression ~ (AS ~ identifier)? }

// ============================================================
// GROUP BY clause
// ============================================================
// Specifies which expressions to group by when using aggregate functions.
// Non-aggregated expressions in RETURN should appear in GROUP BY.
// e.g., MATCH (p:player) RETURN p.position, count(*) GROUP BY p.position
group_by_clause = { GROUP ~ BY ~ expression ~ ("," ~ expression)* }

// ============================================================
// HAVING clause
// ============================================================
// Filters groups after aggregation. Can only be used with GROUP BY.
// e.g., MATCH (p:person) RETURN p.city, count(*) GROUP BY p.city HAVING count(*) > 5
having_clause = { HAVING ~ expression }

// ============================================================
// ORDER BY clause
// ============================================================
order_clause = { ORDER ~ BY ~ order_item ~ ("," ~ order_item)* }
order_item = { expression ~ (ASC | DESC)? }

// ============================================================
// LIMIT clause
// ============================================================
// SKIP is an alias for OFFSET (Cypher compatibility)
// Supports: LIMIT n, LIMIT n OFFSET m, LIMIT n SKIP m, SKIP n LIMIT m, OFFSET n LIMIT m
limit_clause = { 
    LIMIT ~ integer ~ ((OFFSET | SKIP) ~ integer)?
    | (OFFSET | SKIP) ~ integer ~ (LIMIT ~ integer)?
}

// ============================================================
// Literals
// ============================================================
literal = { string | float | integer | boolean | NULL }
boolean = { TRUE | FALSE }
string = ${ single_quoted_string | double_quoted_string }
single_quoted_string = ${ "'" ~ single_quoted_inner ~ "'" }
single_quoted_inner = @{ (!"'" ~ ANY | "''")* }
double_quoted_string = ${ "\"" ~ double_quoted_inner ~ "\"" }
double_quoted_inner = @{ (!"\"" ~ ANY | "\"\"")* }
integer = @{ "-"? ~ ASCII_DIGIT+ }
float = @{ "-"? ~ ASCII_DIGIT+ ~ "." ~ ASCII_DIGIT+ }