// GQL Grammar - Extended with WHERE clause and full expression support
// Supports:
// - Node patterns: (n:Label {prop: value})
// - Edge patterns: -[e:TYPE]->, <-[e:TYPE]-, -[e:TYPE]-
// - Multiple patterns: (a), (b)
// - Path quantifiers: *1..3
// - Extended RETURN: n.name, n.age AS personAge
// - WHERE clause with comparison, logical, and string operators
// - Arithmetic expressions
// - Aggregate functions with DISTINCT: count(DISTINCT p.city)
WHITESPACE = _{ " " | "\t" | "\r" | "\n" }
// Keywords (case-insensitive) with word boundary check
// The !ASCII_ALPHANUMERIC ensures keywords don't match prefixes of longer words
// e.g., OR should not match the "OR" in "ORDER"
MATCH = @{ ^"match" ~ !ASCII_ALPHANUMERIC }
RETURN = @{ ^"return" ~ !ASCII_ALPHANUMERIC }
WHERE = @{ ^"where" ~ !ASCII_ALPHANUMERIC }
ORDER = @{ ^"order" ~ !ASCII_ALPHANUMERIC }
BY = @{ ^"by" ~ !ASCII_ALPHANUMERIC }
GROUP = @{ ^"group" ~ !ASCII_ALPHANUMERIC }
LIMIT = @{ ^"limit" ~ !ASCII_ALPHANUMERIC }
OFFSET = @{ ^"offset" ~ !ASCII_ALPHANUMERIC }
AS = @{ ^"as" ~ !ASCII_ALPHANUMERIC }
AND = @{ ^"and" ~ !ASCII_ALPHANUMERIC }
OR = @{ ^"or" ~ !ASCII_ALPHANUMERIC }
NOT = @{ ^"not" ~ !ASCII_ALPHANUMERIC }
TRUE = @{ ^"true" ~ !ASCII_ALPHANUMERIC }
FALSE = @{ ^"false" ~ !ASCII_ALPHANUMERIC }
NULL = @{ ^"null" ~ !ASCII_ALPHANUMERIC }
ASC = @{ ^"asc" ~ !ASCII_ALPHANUMERIC }
DESC = @{ ^"desc" ~ !ASCII_ALPHANUMERIC }
IN = @{ ^"in" ~ !ASCII_ALPHANUMERIC }
IS = @{ ^"is" ~ !ASCII_ALPHANUMERIC }
CONTAINS = @{ ^"contains" ~ !ASCII_ALPHANUMERIC }
STARTS = @{ ^"starts" ~ !ASCII_ALPHANUMERIC }
ENDS = @{ ^"ends" ~ !ASCII_ALPHANUMERIC }
WITH = @{ ^"with" ~ !ASCII_ALPHANUMERIC }
DISTINCT = @{ ^"distinct" ~ !ASCII_ALPHANUMERIC }
EXISTS = @{ ^"exists" ~ !ASCII_ALPHANUMERIC }
CASE = @{ ^"case" ~ !ASCII_ALPHANUMERIC }
WHEN = @{ ^"when" ~ !ASCII_ALPHANUMERIC }
THEN = @{ ^"then" ~ !ASCII_ALPHANUMERIC }
ELSE = @{ ^"else" ~ !ASCII_ALPHANUMERIC }
END = @{ ^"end" ~ !ASCII_ALPHANUMERIC }
UNION = @{ ^"union" ~ !ASCII_ALPHANUMERIC }
ALL = @{ ^"all" ~ !ASCII_ALPHANUMERIC }
OPTIONAL = @{ ^"optional" ~ !ASCII_ALPHANUMERIC }
PATH = @{ ^"path" ~ !ASCII_ALPHANUMERIC }
UNWIND = @{ ^"unwind" ~ !ASCII_ALPHANUMERIC }
LET = @{ ^"let" ~ !ASCII_ALPHANUMERIC }
SKIP = @{ ^"skip" ~ !ASCII_ALPHANUMERIC }
HAVING = @{ ^"having" ~ !ASCII_ALPHANUMERIC }
ANY_KW = @{ ^"any" ~ !ASCII_ALPHANUMERIC }
NONE_KW = @{ ^"none" ~ !ASCII_ALPHANUMERIC }
SINGLE = @{ ^"single" ~ !ASCII_ALPHANUMERIC }
// Subquery keywords
CALL = @{ ^"call" ~ !ASCII_ALPHANUMERIC }
FOREACH = @{ ^"foreach" ~ !ASCII_ALPHANUMERIC }
YIELD = @{ ^"yield" ~ !ASCII_ALPHANUMERIC }
// Mutation keywords
CREATE = @{ ^"create" ~ !ASCII_ALPHANUMERIC }
SET = @{ ^"set" ~ !ASCII_ALPHANUMERIC }
REMOVE = @{ ^"remove" ~ !ASCII_ALPHANUMERIC }
DELETE = @{ ^"delete" ~ !ASCII_ALPHANUMERIC }
DETACH = @{ ^"detach" ~ !ASCII_ALPHANUMERIC }
MERGE = @{ ^"merge" ~ !ASCII_ALPHANUMERIC }
ON = @{ ^"on" ~ !ASCII_ALPHANUMERIC }
// Schema/DDL keywords
TYPE = @{ ^"type" ~ !ASCII_ALPHANUMERIC }
NODE = @{ ^"node" ~ !ASCII_ALPHANUMERIC }
EDGE = @{ ^"edge" ~ !ASCII_ALPHANUMERIC }
GRAPH = @{ ^"graph" ~ !ASCII_ALPHANUMERIC }
FROM_KW = @{ ^"from" ~ !ASCII_ALPHANUMERIC }
TO_KW = @{ ^"to" ~ !ASCII_ALPHANUMERIC }
ALTER = @{ ^"alter" ~ !ASCII_ALPHANUMERIC }
DROP_KW = @{ ^"drop" ~ !ASCII_ALPHANUMERIC }
ADD = @{ ^"add" ~ !ASCII_ALPHANUMERIC }
ALLOW = @{ ^"allow" ~ !ASCII_ALPHANUMERIC }
ADDITIONAL = @{ ^"additional" ~ !ASCII_ALPHANUMERIC }
PROPERTIES = @{ ^"properties" ~ !ASCII_ALPHANUMERIC }
VALIDATION = @{ ^"validation" ~ !ASCII_ALPHANUMERIC }
SCHEMA = @{ ^"schema" ~ !ASCII_ALPHANUMERIC }
STRICT = @{ ^"strict" ~ !ASCII_ALPHANUMERIC }
CLOSED = @{ ^"closed" ~ !ASCII_ALPHANUMERIC }
WARN_KW = @{ ^"warn" ~ !ASCII_ALPHANUMERIC }
DEFAULT = @{ ^"default" ~ !ASCII_ALPHANUMERIC }
// Property type keywords
STRING_TYPE = @{ ^"string" ~ !ASCII_ALPHANUMERIC }
INT_TYPE = @{ ^"int" ~ !ASCII_ALPHANUMERIC }
FLOAT_TYPE = @{ ^"float" ~ !ASCII_ALPHANUMERIC }
BOOL_TYPE = @{ ^"bool" ~ !ASCII_ALPHANUMERIC }
LIST_TYPE = @{ ^"list" ~ !ASCII_ALPHANUMERIC }
MAP_TYPE = @{ ^"map" ~ !ASCII_ALPHANUMERIC }
ANY_TYPE = @{ ^"any" ~ !ASCII_ALPHANUMERIC }
// Index DDL keywords
INDEX = @{ ^"index" ~ !ASCII_ALPHANUMERIC }
UNIQUE = @{ ^"unique" ~ !ASCII_ALPHANUMERIC }
// Entry point - statement which may be a read query, UNION, mutation, or DDL
statement = { SOI ~ (ddl_statement | mutation_statement | read_statement) ~ EOI }
// Read-only statement (existing query structure with UNION support)
read_statement = { query ~ (union_clause ~ query)* }
// ============================================================
// Mutation Statements
// ============================================================
// GQL supports three forms of mutation statements:
// 1. CREATE-only: CREATE (n:Label) [RETURN ...]
// 2. MATCH + mutations: MATCH (n) SET n.prop = val [RETURN ...]
// 3. MERGE: MERGE (n:Label) ON CREATE SET ... ON MATCH SET ... [RETURN ...]
mutation_statement = {
create_only_statement
| match_mutation_statement
| merge_statement
}
// CREATE without preceding MATCH
// e.g., CREATE (n:Person {name: 'Alice'}) RETURN n
create_only_statement = { create_clause+ ~ return_clause? }
// MATCH followed by mutation clauses
// e.g., MATCH (n:Person) WHERE n.name = 'Alice' SET n.age = 31 RETURN n
// FOREACH clauses can appear after other mutations
match_mutation_statement = {
match_clause ~
optional_match_clause* ~
where_clause? ~
mutation_clause+ ~
foreach_clause* ~
return_clause?
}
// MERGE statement - upsert operation
// e.g., MERGE (n:Person {name: 'Alice'}) ON CREATE SET n.created = 123
merge_statement = { merge_clause ~ merge_action* ~ return_clause? }
// ============================================================
// DDL Statements (Schema Definition)
// ============================================================
ddl_statement = {
create_index
| drop_index
| create_node_type
| create_edge_type
| alter_node_type
| alter_edge_type
| drop_node_type
| drop_edge_type
| set_schema_validation
}
// CREATE NODE TYPE Person (name STRING NOT NULL, age INT)
create_node_type = {
CREATE ~ NODE ~ TYPE ~ identifier ~
"(" ~ property_def_list? ~ ")"
}
// CREATE EDGE TYPE KNOWS (since INT) FROM Person TO Person
create_edge_type = {
CREATE ~ EDGE ~ TYPE ~ identifier ~
"(" ~ property_def_list? ~ ")" ~
edge_endpoint_clause
}
// FROM Person, Employee TO Company, Startup
edge_endpoint_clause = {
FROM_KW ~ type_name_list ~ TO_KW ~ type_name_list
}
// Comma-separated list of type names
type_name_list = { identifier ~ ("," ~ identifier)* }
// Property definitions
property_def_list = { property_def ~ ("," ~ property_def)* }
// name STRING NOT NULL DEFAULT 'unknown'
property_def = {
identifier ~ property_type ~ not_null_modifier? ~ default_modifier?
}
// NOT NULL modifier
not_null_modifier = { NOT ~ NULL }
// DEFAULT literal
default_modifier = { DEFAULT ~ literal }
// Property types
property_type = {
list_type
| map_type
| STRING_TYPE
| INT_TYPE
| FLOAT_TYPE
| BOOL_TYPE
| ANY_TYPE
}
// LIST or LIST<STRING>
list_type = { LIST_TYPE ~ ("<" ~ property_type ~ ">")? }
// MAP or MAP<INT>
map_type = { MAP_TYPE ~ ("<" ~ property_type ~ ">")? }
// ALTER NODE TYPE Person ALLOW ADDITIONAL PROPERTIES
// ALTER NODE TYPE Person ADD email STRING
// ALTER NODE TYPE Person DROP email
alter_node_type = {
ALTER ~ NODE ~ TYPE ~ identifier ~ alter_type_action
}
// ALTER EDGE TYPE KNOWS ALLOW ADDITIONAL PROPERTIES
alter_edge_type = {
ALTER ~ EDGE ~ TYPE ~ identifier ~ alter_type_action
}
alter_type_action = {
allow_additional_properties
| add_property_action
| drop_property_action
}
allow_additional_properties = { ALLOW ~ ADDITIONAL ~ PROPERTIES }
add_property_action = { ADD ~ property_def }
drop_property_action = { DROP_KW ~ identifier }
// DROP NODE TYPE Person
drop_node_type = { DROP_KW ~ NODE ~ TYPE ~ identifier }
// DROP EDGE TYPE KNOWS
drop_edge_type = { DROP_KW ~ EDGE ~ TYPE ~ identifier }
// SET SCHEMA VALIDATION STRICT
set_schema_validation = { SET ~ SCHEMA ~ VALIDATION ~ validation_mode }
validation_mode = { STRICT | CLOSED | WARN_KW | NONE_KW }
// ============================================================
// Index DDL Statements
// ============================================================
// CREATE INDEX idx_name ON :Label(property)
// CREATE UNIQUE INDEX idx_name ON :Label(property)
// CREATE INDEX idx_name ON (property) -- all labels
// DROP INDEX idx_name
// CREATE [UNIQUE] INDEX name ON [:Label](property)
// CREATE [RTREE] INDEX name ON [:Label](property) (spec-56)
create_index = {
CREATE ~ (UNIQUE | RTREE)? ~ INDEX ~ identifier ~
ON ~ index_target ~ "(" ~ identifier ~ ")"
}
RTREE = { "RTREE" }
// Index target: :Label for specific label, or empty for all labels
// :Person means only vertices with label Person
// :KNOWS means only edges with label KNOWS
index_target = { (":" ~ identifier)? }
// DROP INDEX name
drop_index = { DROP_KW ~ INDEX ~ identifier }
// ============================================================
// Mutation Clauses
// ============================================================
mutation_clause = { create_clause | set_clause | remove_clause | delete_clause | detach_delete_clause }
// FOREACH clause - iterate over a list and apply mutations
// FOREACH (variable IN expression | mutation+)
// e.g., FOREACH (n IN nodes(p) | SET n.visited = true)
// e.g., FOREACH (i IN items | SET i.processed = true REMOVE i.pending)
foreach_clause = { FOREACH ~ "(" ~ identifier ~ IN ~ expression ~ pipe_token ~ foreach_mutation+ ~ ")" }
// Mutations allowed inside FOREACH
foreach_mutation = { set_clause | remove_clause | delete_clause | detach_delete_clause | create_clause | nested_foreach }
// Nested FOREACH for recursive iteration
nested_foreach = { FOREACH ~ "(" ~ identifier ~ IN ~ expression ~ pipe_token ~ foreach_mutation+ ~ ")" }
// CREATE clause - creates new patterns (vertices and edges)
// e.g., CREATE (n:Person {name: 'Alice'}), (m:Person {name: 'Bob'})
create_clause = { CREATE ~ pattern ~ ("," ~ pattern)* }
// SET clause - updates properties
// e.g., SET n.age = 31, n.status = 'active'
set_clause = { SET ~ set_item ~ ("," ~ set_item)* }
set_item = { property_access ~ "=" ~ expression }
// REMOVE clause - removes properties
// e.g., REMOVE n.age, n.status
remove_clause = { REMOVE ~ remove_item ~ ("," ~ remove_item)* }
remove_item = { property_access }
// DELETE clause - deletes elements (fails if vertex has edges)
// e.g., DELETE n, m
delete_clause = { DELETE ~ variable ~ ("," ~ variable)* }
// DETACH DELETE clause - deletes vertices with automatic edge removal
// e.g., DETACH DELETE n
detach_delete_clause = { DETACH ~ DELETE ~ variable ~ ("," ~ variable)* }
// MERGE clause - upsert pattern (match or create)
// e.g., MERGE (n:Person {name: 'Alice'})
merge_clause = { MERGE ~ pattern }
// MERGE actions - what to do on create or match
merge_action = { on_create_action | on_match_action }
on_create_action = { ON ~ CREATE ~ SET ~ set_item ~ ("," ~ set_item)* }
on_match_action = { ON ~ MATCH ~ SET ~ set_item ~ ("," ~ set_item)* }
// UNION clause - combines results from multiple queries
// UNION (without ALL) deduplicates results, UNION ALL keeps duplicates
union_clause = { UNION ~ ALL? }
// Single query with optional WHERE, LET, GROUP BY, HAVING, ORDER BY, and LIMIT clauses
// GROUP BY comes after RETURN (GQL/SQL style)
// HAVING filters groups after aggregation (must come after GROUP BY)
// OPTIONAL MATCH clauses come after the main MATCH clause
// WITH PATH enables path tracking, UNWIND expands lists into rows
// LET binds computed values to variables for use in RETURN
// WITH clause pipes results between query parts (after WHERE/LET, before RETURN)
// CALL subqueries execute nested queries for each row
query = { match_clause ~ optional_match_clause* ~ with_path_clause? ~ unwind_clause* ~ where_clause? ~ call_clause* ~ call_procedure_clause* ~ let_clause* ~ with_clause* ~ return_clause ~ group_by_clause? ~ having_clause? ~ order_clause? ~ limit_clause? }
// ============================================================
// CALL Subquery Clause
// ============================================================
// CALL { subquery } executes a nested query for each row in the outer query.
// Variables can be imported from outer scope using WITH at the start.
// CALL subqueries must end with a RETURN clause.
call_clause = { CALL ~ "{" ~ call_body ~ "}" }
// ============================================================
// CALL Procedure Clause
// ============================================================
// CALL interstellar.shortestPath(source, target) YIELD path, distance
// Calls a named procedure with arguments and yields named results.
call_procedure_clause = { CALL ~ procedure_name ~ "(" ~ procedure_args? ~ ")" ~ yield_clause? }
procedure_name = { identifier ~ ("." ~ identifier)* }
procedure_args = { expression ~ ("," ~ expression)* }
yield_clause = { YIELD ~ yield_item ~ ("," ~ yield_item)* }
yield_item = { identifier ~ (AS ~ identifier)? }
// Body of a CALL subquery - can be a single query or UNION of queries
call_body = { call_query ~ (union_clause ~ call_query)* }
// Query inside CALL - starts with optional importing WITH
// Distinguished from regular WITH by position (must be first in call_query)
// MATCH is optional (can just transform imported variables)
call_query = {
importing_with? ~
match_clause? ~
optional_match_clause* ~
where_clause? ~
call_clause* ~
with_clause* ~
return_clause ~
order_clause? ~
limit_clause?
}
// Importing WITH - brings variables from outer scope into the subquery
// Distinguished from regular WITH by position (first in call_query)
// NOTE: Uses negative lookahead !PATH to distinguish from WITH PATH clause
importing_with = { WITH ~ !PATH ~ return_item ~ ("," ~ return_item)* }
// WITH clause - pipes results between query parts
// WITH projects columns forward, resetting variable scope
// Can include DISTINCT, WHERE (after projection), ORDER BY, LIMIT
// NOTE: Uses negative lookahead !PATH to distinguish from WITH PATH clause
with_clause = { WITH ~ !PATH ~ DISTINCT? ~ return_item ~ ("," ~ return_item)* ~ with_where_clause? ~ order_clause? ~ with_limit_clause? }
// WHERE clause within WITH (filters on projected columns)
with_where_clause = { WHERE ~ expression }
// LIMIT clause within WITH (separate to avoid ambiguity with main limit_clause)
with_limit_clause = { LIMIT ~ integer ~ ((OFFSET | SKIP) ~ integer)? }
// WITH PATH clause - enables path tracking and collection
// Allows using path() function in RETURN to get the traversed path
with_path_clause = { WITH ~ PATH ~ (AS ~ identifier)? }
// UNWIND clause - expands a list into individual rows
// UNWIND expression AS variable
unwind_clause = { UNWIND ~ expression ~ AS ~ identifier }
// LET clause - binds computed values to variables
// LET variable = expression
let_clause = { LET ~ identifier ~ "=" ~ expression }
// MATCH clause - multiple patterns separated by commas
match_clause = { MATCH ~ pattern ~ ("," ~ pattern)* }
// OPTIONAL MATCH clause - matches if possible, produces nulls if not
// Can reference variables from previous MATCH or OPTIONAL MATCH clauses
optional_match_clause = { OPTIONAL ~ MATCH ~ pattern ~ ("," ~ pattern)* }
// WHERE clause
where_clause = { WHERE ~ expression }
// Pattern: alternating nodes and edges starting with a node
// e.g., (a)-[:KNOWS]->(b)-[:WORKS_WITH]->(c)
pattern = { node_pattern ~ (edge_pattern ~ node_pattern)* }
// Node pattern: (variable:Label:Label2 {prop: value, prop2: value2} WHERE expr)
node_pattern = { "(" ~ variable? ~ label_filter? ~ property_filter? ~ inline_where? ~ ")" }
// Edge pattern with direction
// -[variable:TYPE {prop: value} WHERE expr]-> (outgoing)
// <-[variable:TYPE {prop: value} WHERE expr]- (incoming)
// -[variable:TYPE {prop: value} WHERE expr]- (both/undirected)
edge_pattern = {
left_arrow? ~ "-[" ~ variable? ~ label_filter? ~ quantifier? ~ property_filter? ~ inline_where? ~ "]-" ~ right_arrow?
}
// Inline WHERE clause for patterns (distinct from main WHERE)
// Allows filtering within node/edge patterns: (n:Person WHERE n.age > 21)
inline_where = { WHERE ~ expression }
left_arrow = { "<" }
right_arrow = { ">" }
// Labels: :Person or :Person:Employee (multiple labels)
label_filter = { (":" ~ identifier)+ }
// Properties: {name: 'Alice', age: 30}
property_filter = { "{" ~ property ~ ("," ~ property)* ~ "}" }
property = { identifier ~ ":" ~ literal }
// Path quantifier for variable-length paths: *1..3, *, *2, *..5
quantifier = { "*" ~ range? }
range = { integer? ~ ".." ~ integer? | integer }
// ============================================================
// Expressions with correct precedence (lowest to highest)
// ============================================================
// expression is the entry point for all expressions
expression = { or_expr }
// OR has lowest precedence
or_expr = { and_expr ~ (OR ~ and_expr)* }
// AND has higher precedence than OR
and_expr = { not_expr ~ (AND ~ not_expr)* }
// NOT is a unary operator
not_expr = { NOT* ~ comparison }
// Comparison operators
comparison = {
is_null_expr
| in_expr
| regex_expr
| comparison_expr
}
// IS NULL / IS NOT NULL
is_null_expr = { concat_expr ~ IS ~ NOT? ~ NULL }
// IN list / NOT IN list
in_expr = { concat_expr ~ NOT? ~ IN ~ list_expr }
// Regex match: expr =~ 'pattern'
regex_expr = { concat_expr ~ regex_op ~ concat_expr }
regex_op = { "=~" }
// Binary comparison operators
comparison_expr = { concat_expr ~ (comp_op ~ concat_expr)? }
// String concatenation operator (between comparison and additive)
// Precedence: comparison < concat < additive
concat_expr = { additive ~ (concat_op ~ additive)* }
concat_op = { "||" }
comp_op = {
neq | lte | gte | lt | gt | eq
| CONTAINS
| starts_with
| ends_with
}
// Use specific rules for operators to avoid ambiguity
neq = { "<>" | "!=" }
lte = { "<=" }
gte = { ">=" }
lt = { "<" }
gt = { ">" }
eq = { "=" }
starts_with = { STARTS ~ WITH }
ends_with = { ENDS ~ WITH }
// Additive operators: + -
additive = { multiplicative ~ (add_op ~ multiplicative)* }
add_op = { "+" | "-" }
// Multiplicative operators: * / %
multiplicative = { power ~ (mul_op ~ power)* }
mul_op = { "*" | "/" | "%" }
// Power operator: ^ (higher precedence than *, /, %)
power = { unary ~ (pow_op ~ unary)* }
pow_op = { "^" }
// Unary minus
unary = { neg_op? ~ postfix_expr }
neg_op = { "-" }
// Postfix expressions: primary followed by zero or more index/slice accesses
// e.g., list[0], list[-1], list[1..3], p.scores[0], [1,2,3][-1]
postfix_expr = { primary ~ index_access* }
// Index or slice access: [expr] or [start..end]
// Must try slice_range first since it can start with an expression
index_access = { "[" ~ (slice_range | expression) ~ "]" }
// Slice range: start..end, ..end, start.., or ..
// All bounds are optional
// e.g., [1..3] [..3] [2..] [..]
slice_range = { slice_start? ~ ".." ~ slice_end? }
// Slice start expression - must stop before ".."
// Using a simpler expression form to avoid consuming the ".."
slice_start = { slice_atom ~ (add_op ~ slice_atom)* }
// Slice end expression - same as start
slice_end = { slice_atom ~ (add_op ~ slice_atom)* }
// Atoms allowed in slice bounds - literals, variables, property access, function calls, parens
slice_atom = {
neg_op? ~ (
function_call
| literal
| property_access
| variable
| "(" ~ expression ~ ")"
)
}
// Primary expressions (highest precedence)
primary = {
case_expr
| exists_expr
| reduce_expr // REDUCE function for list accumulation
| all_predicate // ALL(x IN list WHERE cond)
| any_predicate // ANY(x IN list WHERE cond)
| none_predicate // NONE(x IN list WHERE cond)
| single_predicate // SINGLE(x IN list WHERE cond)
| parameter
| geo_function // point.distance(...) etc. (spec-56, before function_call)
| geo_constructor // point({...}), polygon([...]) (spec-56, before function_call)
| distance_literal // 5km, 100m, 3.2mi (spec-56, before literal)
| literal
| function_call
| pattern_comprehension // [(pattern) | expr] - must come before list_comprehension
| list_comprehension // Must come before list_expr to take precedence
| property_access
| variable
| paren_expr
| list_expr
| map_expr // Map literal expression
}
// Pattern comprehension: [(pattern) | transform_expression]
// Matches a pattern and transforms each match into a list element.
// The pattern typically references variables from the outer scope.
// e.g., [(p)-[:FRIEND]->(f) | f.name]
// e.g., [(p)-[r:KNOWS]->(other) | {person: other.name, since: r.since}]
// Note: WHERE clause inside pattern comprehension is optional (for filtering)
pattern_comprehension = { "[" ~ pattern ~ pattern_comp_where? ~ pipe_token ~ expression ~ "]" }
pattern_comp_where = { WHERE ~ expression }
// Map literal expression: {key: value, key2: value2}
// Keys can be identifiers or string literals
// e.g., {name: 'Alice', age: 30}
// e.g., {personName: p.name, personAge: p.age}
map_expr = { "{" ~ (map_entry ~ ("," ~ map_entry)*)? ~ "}" }
map_entry = { map_key ~ ":" ~ expression }
map_key = { identifier | string }
// Parameter reference: $paramName
// Used for parameterized queries with safe value injection
parameter = @{ "$" ~ identifier }
// List comprehension: [x IN list | expression] or [x IN list WHERE condition | expression]
// Transforms and optionally filters a list
// e.g., [p IN people | p.name]
// e.g., [p IN people WHERE p.age > 18 | p.name]
// Note: Uses pipe_token to distinguish the literal "|" from pest's choice operator
list_comprehension = { "[" ~ identifier ~ IN ~ list_comp_source ~ list_comp_where? ~ pipe_token ~ expression ~ "]" }
list_comp_where = { WHERE ~ list_comp_filter }
pipe_token = { "|" }
// List comprehension source expression - stops at WHERE or |
// This avoids greedy matching that would consume the | token
list_comp_source = { list_comp_term ~ (list_comp_binop ~ list_comp_term)* }
list_comp_filter = { list_comp_term ~ (list_comp_binop ~ list_comp_term)* }
// Binary operators allowed in list comprehension source (excludes |)
list_comp_binop = {
AND | OR | comp_op | add_op | mul_op | pow_op
}
// Terms that can appear in list comprehension source expression
list_comp_term = {
NOT? ~ list_comp_primary ~ (IS ~ NOT? ~ NULL)?
}
list_comp_primary = {
function_call
| list_expr // Support list literals [1, 2, 3]
| list_comprehension // Support nested comprehensions [x IN y | z]
| literal
| property_access
| variable
| "(" ~ expression ~ ")"
}
// CASE expression
// CASE WHEN condition THEN result [WHEN ... THEN ...] [ELSE default] END
case_expr = { CASE ~ case_when_clause+ ~ case_else_clause? ~ END }
case_when_clause = { WHEN ~ expression ~ THEN ~ expression }
case_else_clause = { ELSE ~ expression }
// EXISTS expression: EXISTS { pattern } or NOT EXISTS { pattern }
// Also supports the explicit-MATCH subquery form:
// EXISTS { MATCH pattern [WHERE expression] }
// Used to check if a subpattern matches from the current element. The optional
// WHERE clause inside the braces filters the matched subgraph before the
// EXISTS predicate evaluates true.
exists_expr = { NOT? ~ EXISTS ~ "{" ~ (MATCH ~ pattern ~ (WHERE ~ expression)? | pattern) ~ "}" }
// ALL/ANY/NONE/SINGLE list predicates
// ALL(x IN list WHERE condition) - true if all elements satisfy condition
// ANY(x IN list WHERE condition) - true if at least one element satisfies condition
// NONE(x IN list WHERE condition) - true if no elements satisfy condition
// SINGLE(x IN list WHERE condition) - true if exactly one element satisfies condition
all_predicate = { ALL ~ "(" ~ identifier ~ IN ~ list_comp_source ~ WHERE ~ expression ~ ")" }
any_predicate = { ANY_KW ~ "(" ~ identifier ~ IN ~ list_comp_source ~ WHERE ~ expression ~ ")" }
none_predicate = { NONE_KW ~ "(" ~ identifier ~ IN ~ list_comp_source ~ WHERE ~ expression ~ ")" }
single_predicate = { SINGLE ~ "(" ~ identifier ~ IN ~ list_comp_source ~ WHERE ~ expression ~ ")" }
// REDUCE expression: REDUCE(accumulator = initial, variable IN list | expression)
// Accumulates a value over a list (fold/reduce operation)
// e.g., REDUCE(total = 0, x IN prices | total + x)
// e.g., REDUCE(s = '', name IN names | s || name || ', ')
// Note: Uses reduce_initial for the initial value to avoid consuming the comma
// Uses list_comp_source for the list expression to avoid consuming the pipe token
reduce_expr = {
REDUCE ~ "(" ~
identifier ~ "=" ~ reduce_initial ~ "," ~
identifier ~ IN ~ list_comp_source ~ pipe_token ~
expression ~
")"
}
// Initial value in REDUCE - limited to avoid consuming comma
// Supports: literals, variables, property access, function calls, parens
reduce_initial = {
function_call
| literal
| property_access
| variable
| "(" ~ expression ~ ")"
}
// REDUCE keyword
REDUCE = @{ ^"reduce" ~ !ASCII_ALPHANUMERIC }
// Parenthesized expression
paren_expr = { "(" ~ expression ~ ")" }
// Function call: count(*), sum(x), count(DISTINCT x), etc.
// ============================================================
// Geospatial Constructors and Functions (spec-56)
// ============================================================
// point({longitude: -122.4, latitude: 37.7}) or point({lon: -122.4, lat: 37.7})
geo_constructor = { geo_point_constructor | geo_polygon_constructor }
geo_point_constructor = { "point" ~ "(" ~ map_expr ~ ")" }
// polygon([[-122.6, 37.6], [-122.3, 37.6], ...])
geo_polygon_constructor = { "polygon" ~ "(" ~ "[" ~ geo_point_pair ~ ("," ~ geo_point_pair)* ~ "]" ~ ")" }
geo_point_pair = { "[" ~ geo_number ~ "," ~ geo_number ~ "]" }
geo_number = { float | integer }
// point.distance(expr, expr), point.within_bbox(...), point.within_distance(...)
geo_function = {
geo_distance_fn
| geo_within_bbox_fn
| geo_within_distance_fn
}
geo_distance_fn = { "point" ~ "." ~ "distance" ~ "(" ~ expression ~ "," ~ expression ~ ")" }
geo_within_bbox_fn = { "point" ~ "." ~ "within_bbox" ~ "(" ~ expression ~ "," ~ expression ~ "," ~ expression ~ "," ~ expression ~ ")" }
geo_within_distance_fn = { "point" ~ "." ~ "within_distance" ~ "(" ~ expression ~ "," ~ expression ~ "," ~ expression ~ ")" }
// Distance literal: 5km, 100m, 3.2mi, 10nmi
distance_literal = ${ (float | integer) ~ distance_unit }
distance_unit = @{ "nmi" | "km" | "mi" | "m" }
function_call = { identifier ~ "(" ~ function_args? ~ ")" }
function_args = { star | DISTINCT? ~ expression ~ ("," ~ expression)* }
star = { "*" }
// List expression: [1, 2, 3] or ['a', 'b']
list_expr = { "[" ~ (expression ~ ("," ~ expression)*)? ~ "]" }
// Property access: n.name
property_access = { variable ~ "." ~ identifier }
// Identifiers and variables
// NOTE: Keywords are excluded from identifier matching via the keyword rule.
// Backtick-escaped form bypasses the keyword check, so users can write
// e.g. `desc` to use a reserved word as a variable name.
variable = @{ ("`" ~ (!"`" ~ ANY)+ ~ "`") | (!(keyword ~ !ASCII_ALPHANUMERIC) ~ ASCII_ALPHA ~ (ASCII_ALPHANUMERIC | "_")*) }
identifier = @{ ("`" ~ (!"`" ~ ANY)+ ~ "`") | (ASCII_ALPHA ~ (ASCII_ALPHANUMERIC | "_")*) }
// All keywords - used to exclude them from variable names
keyword = {
MATCH | RETURN | WHERE | ORDER | BY | GROUP | LIMIT | OFFSET | SKIP | HAVING |
AS | AND | OR | NOT | TRUE | FALSE | NULL | ASC | DESC |
IN | IS | CONTAINS | STARTS | ENDS | WITH | DISTINCT | EXISTS |
CASE | WHEN | THEN | ELSE | END | UNION | ALL | OPTIONAL |
PATH | UNWIND | CREATE | SET | REMOVE | DELETE | DETACH | MERGE | ON | LET |
REDUCE | ANY_KW | NONE_KW | SINGLE | CALL | FOREACH |
// DDL keywords
TYPE | NODE | EDGE | GRAPH | FROM_KW | TO_KW | ALTER | DROP_KW | ADD |
ALLOW | ADDITIONAL | PROPERTIES | VALIDATION | SCHEMA | STRICT | CLOSED | WARN_KW | DEFAULT |
STRING_TYPE | INT_TYPE | FLOAT_TYPE | BOOL_TYPE | LIST_TYPE | MAP_TYPE | ANY_TYPE |
// Index DDL keywords
INDEX | UNIQUE
}
// ============================================================
// RETURN clause - multiple items with optional aliases
// ============================================================
// e.g., RETURN n, n.name, n.age AS personAge
// e.g., RETURN DISTINCT n.city
return_clause = { RETURN ~ DISTINCT? ~ return_item ~ ("," ~ return_item)* }
return_item = { expression ~ (AS ~ identifier)? }
// ============================================================
// GROUP BY clause
// ============================================================
// Specifies which expressions to group by when using aggregate functions.
// Non-aggregated expressions in RETURN should appear in GROUP BY.
// e.g., MATCH (p:player) RETURN p.position, count(*) GROUP BY p.position
group_by_clause = { GROUP ~ BY ~ expression ~ ("," ~ expression)* }
// ============================================================
// HAVING clause
// ============================================================
// Filters groups after aggregation. Can only be used with GROUP BY.
// e.g., MATCH (p:person) RETURN p.city, count(*) GROUP BY p.city HAVING count(*) > 5
having_clause = { HAVING ~ expression }
// ============================================================
// ORDER BY clause
// ============================================================
order_clause = { ORDER ~ BY ~ order_item ~ ("," ~ order_item)* }
order_item = { expression ~ (ASC | DESC)? }
// ============================================================
// LIMIT clause
// ============================================================
// SKIP is an alias for OFFSET (Cypher compatibility)
// Supports: LIMIT n, LIMIT n OFFSET m, LIMIT n SKIP m, SKIP n LIMIT m, OFFSET n LIMIT m
limit_clause = {
LIMIT ~ integer ~ ((OFFSET | SKIP) ~ integer)?
| (OFFSET | SKIP) ~ integer ~ (LIMIT ~ integer)?
}
// ============================================================
// Literals
// ============================================================
literal = { string | float | integer | boolean | NULL }
boolean = { TRUE | FALSE }
string = ${ single_quoted_string | double_quoted_string }
single_quoted_string = ${ "'" ~ single_quoted_inner ~ "'" }
single_quoted_inner = @{ (!"'" ~ ANY | "''")* }
double_quoted_string = ${ "\"" ~ double_quoted_inner ~ "\"" }
double_quoted_inner = @{ (!"\"" ~ ANY | "\"\"")* }
integer = @{ "-"? ~ ASCII_DIGIT+ }
float = @{ "-"? ~ ASCII_DIGIT+ ~ "." ~ ASCII_DIGIT+ }