// VelesQL Grammar - SQL-like query language for VelesDB
// Version 3.6.0 — FLUSH + ANALYZE, TRUNCATE, ALTER COLLECTION + DDL + graph mutations + DELETE FROM + multi-row INSERT + UPSERT
// Whitespace and comments
WHITESPACE = _{ " " | "\t" | "\r" | "\n" }
COMMENT = _{ "--" ~ (!"\n" ~ ANY)* }
// LET clause: named score bindings evaluated once (VelesQL v1.10 Phase 3)
let_clause = { ^"LET" ~ identifier ~ "=" ~ order_by_arithmetic }
// Main entry point - supports SELECT, MATCH, DML (INSERT/UPSERT/UPDATE/DELETE), DDL (CREATE/DROP), admin (ANALYZE/TRUNCATE/ALTER/FLUSH), and introspection (SHOW/DESCRIBE/EXPLAIN)
// LET clauses appear zero or more times before the main statement.
// Order matters (PEG ordered choice):
// - select_edges_stmt before compound_query (both start with SELECT; EDGES disambiguates)
// - insert_node_stmt before insert_edge_stmt (both start with INSERT; NODE vs EDGE disambiguates)
// - insert_edge_stmt before insert_stmt, delete_edge_stmt before delete_stmt
// upsert_stmt placed after insert_stmt since UPSERT and INSERT have distinct first tokens.
// Introspection and admin statements placed first since SHOW/DESCRIBE/EXPLAIN/ANALYZE/TRUNCATE/ALTER/FLUSH do not conflict with any existing first-token.
query = { SOI ~ let_clause* ~ (show_collections_stmt | describe_stmt | explain_stmt | analyze_stmt | truncate_stmt | alter_collection_stmt | flush_stmt | match_query | select_edges_stmt | compound_query | train_stmt | create_index_stmt | create_collection_stmt | drop_index_stmt | drop_collection_stmt | insert_node_stmt | insert_edge_stmt | delete_edge_stmt | delete_stmt | insert_stmt | upsert_stmt | update_stmt) ~ ";"? ~ EOI }
// ──────────────────────────────────────────────────────────────
// Introspection statements (VelesQL v3.4)
// ──────────────────────────────────────────────────────────────
// SHOW COLLECTIONS — lists all collections in the database
show_collections_stmt = { ^"SHOW" ~ ^"COLLECTIONS" }
// DESCRIBE [COLLECTION] name — returns collection metadata
// collection_kw uses word-boundary lookahead to prevent "collection_data"
// being parsed as COLLECTION + "_data" (same pattern as flush_full_kw).
describe_stmt = { ^"DESCRIBE" ~ collection_kw? ~ identifier }
// EXPLAIN <query> — returns the query execution plan without executing
explain_stmt = { ^"EXPLAIN" ~ compound_query }
// ANALYZE [COLLECTION] name — computes CBO statistics for query optimizer
analyze_stmt = { ^"ANALYZE" ~ collection_kw? ~ identifier }
// TRUNCATE [COLLECTION] name — deletes all rows from a collection
truncate_stmt = { ^"TRUNCATE" ~ collection_kw? ~ identifier }
// Word-boundary-safe COLLECTION keyword. Prevents "collection_data" from
// being parsed as COLLECTION + "_data". Same pattern as flush_full_kw.
collection_kw = @{ ^"COLLECTION" ~ !(ASCII_ALPHANUMERIC | "_") }
// ALTER COLLECTION name SET (options) — modifies collection settings
alter_collection_stmt = { ^"ALTER" ~ ^"COLLECTION" ~ identifier ~ ^"SET" ~ "(" ~ create_option_list ~ ")" }
// FLUSH [FULL] [collection] — persist all or specific collection to disk
// flush_full_kw is a named rule so the parser can detect it in inner pairs.
// Negative lookahead (!ASCII_ALPHANUMERIC) prevents matching "FULL" prefix
// of identifiers like "fulltext" as the FULL keyword.
flush_stmt = { ^"FLUSH" ~ flush_full_kw? ~ identifier? }
flush_full_kw = @{ ^"FULL" ~ !(ASCII_ALPHANUMERIC | "_") }
// MATCH query for graph pattern matching (EPIC-045 US-001)
// Syntax: MATCH pattern WHERE condition RETURN items [ORDER BY ...] [LIMIT n]
match_query = {
^"MATCH" ~ graph_pattern ~
where_clause? ~
return_clause ~
order_by_clause? ~
limit_clause?
}
// Graph pattern: (node)-[rel]->(node) chains
graph_pattern = { node_pattern ~ (relationship_pattern ~ node_pattern)* }
node_pattern = { "(" ~ node_spec? ~ ")" }
node_spec = { node_alias? ~ node_labels? ~ collection_annotation? ~ node_properties? }
node_alias = @{ (ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_")* }
node_labels = { ":" ~ label_name ~ (":" ~ label_name)* }
label_name = @{ (ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_")* }
collection_annotation = { "@" ~ collection_ref }
collection_ref = @{ (ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_")* }
node_properties = { "{" ~ property_list ~ "}" }
property_list = { property ~ ("," ~ property)* }
property = { identifier ~ ":" ~ property_value }
property_value = { string | float | integer | boolean | null_value | parameter }
// Relationship pattern: -[r:TYPE*1..3]->
relationship_pattern = { rel_incoming | rel_outgoing | rel_undirected }
rel_incoming = { "<-" ~ rel_spec? ~ "-" }
rel_outgoing = { "-" ~ rel_spec? ~ "->" }
rel_undirected = { "-" ~ rel_spec? ~ "-" }
rel_spec = { "[" ~ rel_details? ~ "]" }
rel_details = { rel_alias? ~ rel_types? ~ rel_range? ~ node_properties? }
rel_alias = @{ (ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_")* }
rel_types = { ":" ~ rel_type_name ~ ("|" ~ rel_type_name)* }
rel_type_name = @{ (ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_")* }
rel_range = { "*" ~ range_spec? }
range_spec = { range_bound ~ ".." ~ range_bound? | ".." ~ range_bound | integer }
range_bound = @{ ASCII_DIGIT+ }
// RETURN clause for MATCH queries
return_clause = { ^"RETURN" ~ return_item_list }
return_item_list = { return_item ~ ("," ~ return_item)* }
return_item = { return_expr ~ (^"AS" ~ identifier)? }
return_expr = { similarity_return | property_access | identifier | "*" }
similarity_return = { ^"similarity" ~ "(" ~ ")" }
property_access = @{ identifier ~ "." ~ identifier }
// Compound query: SELECT with zero or more UNION/INTERSECT/EXCEPT
compound_query = { select_stmt ~ (set_operator ~ select_stmt)* }
set_operator = { ^"UNION" ~ ^"ALL" | ^"UNION" | ^"INTERSECT" | ^"EXCEPT" }
// INSERT statement: INSERT INTO table (col1, col2) VALUES (v1, v2)[, (v3, v4)]
insert_stmt = {
^"INSERT" ~ ^"INTO" ~ identifier ~
"(" ~ identifier ~ ("," ~ identifier)* ~ ")" ~
^"VALUES" ~
values_row ~ ("," ~ values_row)*
}
// UPSERT statement: UPSERT INTO table (col1, col2) VALUES (v1, v2)[, (v3, v4)]
upsert_stmt = {
^"UPSERT" ~ ^"INTO" ~ identifier ~
"(" ~ identifier ~ ("," ~ identifier)* ~ ")" ~
^"VALUES" ~
values_row ~ ("," ~ values_row)*
}
// A single parenthesised row of values
values_row = { "(" ~ value ~ ("," ~ value)* ~ ")" }
// UPDATE statement: UPDATE table SET col1 = v1, col2 = v2 [WHERE ...]
update_stmt = {
^"UPDATE" ~ identifier ~
^"SET" ~ assignment ~ ("," ~ assignment)* ~
where_clause?
}
assignment = { identifier ~ "=" ~ value }
// TRAIN statement: TRAIN QUANTIZER ON collection WITH (params)
train_stmt = {
^"TRAIN" ~ ^"QUANTIZER" ~ ^"ON" ~ identifier ~ with_clause
}
// ──────────────────────────────────────────────────────────────
// DDL statements (VelesQL v3.3)
// ──────────────────────────────────────────────────────────────
// CREATE INDEX ON collection (field) — secondary metadata index
create_index_stmt = { ^"CREATE" ~ ^"INDEX" ~ ^"ON" ~ identifier ~ "(" ~ identifier ~ ")" }
// DROP INDEX ON collection (field) — remove secondary metadata index
drop_index_stmt = { ^"DROP" ~ ^"INDEX" ~ ^"ON" ~ identifier ~ "(" ~ identifier ~ ")" }
// CREATE COLLECTION: vector (default), graph, or metadata-only
// Examples:
// CREATE COLLECTION docs (dimension = 768, metric = 'cosine');
// CREATE COLLECTION docs (dimension = 768, metric = 'cosine') WITH (storage = 'sq8', m = 16);
// CREATE GRAPH COLLECTION kg (dimension = 768, metric = 'cosine') SCHEMALESS;
// CREATE METADATA COLLECTION tags;
create_collection_stmt = {
^"CREATE" ~ collection_kind_kw? ~ ^"COLLECTION" ~ identifier ~
create_body?
}
collection_kind_kw = { ^"GRAPH" | ^"METADATA" }
create_body = { "(" ~ create_option_list ~ ")" ~ create_suffix? }
create_option_list = { create_option ~ ("," ~ create_option)* }
create_option = { identifier ~ "=" ~ create_option_value }
create_option_value = { string | float | integer | boolean | identifier }
create_suffix = { schemaless_clause | with_schema_clause | with_clause }
schemaless_clause = { ^"SCHEMALESS" }
// WITH SCHEMA for typed graph collections
with_schema_clause = { ^"WITH" ~ ^"SCHEMA" ~ "(" ~ schema_def_list ~ ")" }
schema_def_list = { schema_def ~ ("," ~ schema_def)* }
schema_def = { node_type_def | edge_type_def }
node_type_def = { ^"NODE" ~ identifier ~ "(" ~ property_def_list ~ ")" }
edge_type_def = { ^"EDGE" ~ identifier ~ ^"FROM" ~ identifier ~ ^"TO" ~ identifier }
property_def_list = { property_def ~ ("," ~ property_def)* }
property_def = { identifier ~ ":" ~ type_name }
type_name = { ^"STRING" | ^"INTEGER" | ^"FLOAT" | ^"BOOLEAN" | ^"VECTOR" }
// DROP COLLECTION [IF EXISTS] name
drop_collection_stmt = {
^"DROP" ~ ^"COLLECTION" ~ if_exists_clause? ~ identifier
}
if_exists_clause = { ^"IF" ~ ^"EXISTS" }
// INSERT EDGE INTO collection (source = N, target = N, label = 'L')
// [WITH PROPERTIES (key = val, ...)]
insert_edge_stmt = {
^"INSERT" ~ ^"EDGE" ~ ^"INTO" ~ identifier ~
"(" ~ edge_field_list ~ ")" ~
edge_properties_clause?
}
edge_field_list = { edge_field ~ ("," ~ edge_field)* }
edge_field = { identifier ~ "=" ~ value }
edge_properties_clause = { ^"WITH" ~ ^"PROPERTIES" ~ "(" ~ create_option_list ~ ")" }
// DELETE FROM collection WHERE condition
// WHERE is mandatory to prevent accidental full-collection deletion.
delete_stmt = {
^"DELETE" ~ ^"FROM" ~ identifier ~ where_clause
}
// DELETE EDGE edge_id FROM collection
delete_edge_stmt = {
^"DELETE" ~ ^"EDGE" ~ value ~ ^"FROM" ~ identifier
}
// SELECT EDGES FROM collection [WHERE source=N / target=N / label='X'] [LIMIT n]
// Queries edges from a graph collection with optional filtering.
select_edges_stmt = {
^"SELECT" ~ ^"EDGES" ~ ^"FROM" ~ identifier ~ where_clause? ~ limit_clause?
}
// INSERT NODE INTO collection (id = N, payload = '{"key": "value"}')
// Inserts or updates a node payload in a graph collection.
insert_node_stmt = {
^"INSERT" ~ ^"NODE" ~ ^"INTO" ~ identifier ~ "(" ~ edge_field_list ~ ")"
}
// SELECT statement with optional DISTINCT, JOIN, WHERE, GROUP BY, HAVING, ORDER BY, LIMIT, OFFSET, WITH, FUSION clauses
// EPIC-052 US-001: Added DISTINCT support
// EPIC-052 US-003: Added FROM alias support for Self-JOIN
select_stmt = {
^"SELECT" ~ distinct_modifier? ~ select_list ~ ^"FROM" ~ from_clause ~
join_clause* ~ where_clause? ~ group_by_clause? ~ having_clause? ~ order_by_clause? ~ limit_clause? ~ offset_clause? ~ with_clause? ~ using_fusion_clause?
}
// FROM clause with optional alias (EPIC-052 US-003: Self-JOIN support)
// Supports: FROM table, FROM table AS alias
// Note: "FROM table alias" without AS is intentionally NOT supported to avoid
// ambiguity with JOIN keywords. Use "FROM table AS alias" syntax.
from_clause = { identifier ~ from_alias? }
from_alias = { ^"AS" ~ identifier }
// DISTINCT modifier (EPIC-052 US-001)
distinct_modifier = { ^"DISTINCT" }
// USING FUSION clause for hybrid search (EPIC-040 US-005)
using_fusion_clause = { ^"USING" ~ ^"FUSION" ~ fusion_options? }
fusion_options = { "(" ~ fusion_option_list ~ ")" }
fusion_option_list = { fusion_option ~ ("," ~ fusion_option)* }
fusion_option = { identifier ~ "=" ~ fusion_value }
fusion_value = { string | float | integer }
// GROUP BY clause (EPIC-017 US-003, EPIC-052 US-005: nested fields support)
group_by_clause = { ^"GROUP" ~ ^"BY" ~ group_by_list }
group_by_list = { group_by_column ~ ("," ~ group_by_column)* }
// Support both simple identifiers (including quoted) and nested paths
group_by_column = { identifier ~ ("." ~ identifier)* }
// HAVING clause for filtering groups (EPIC-017 US-006)
// Supports both AND and OR logical operators
having_clause = { ^"HAVING" ~ having_condition }
having_condition = { having_term ~ (having_logical_op ~ having_term)* }
// BUG-6 FIX: Named rule so pest emits tokens for AND/OR operators
having_logical_op = { ^"AND" | ^"OR" }
having_term = { aggregate_function ~ compare_op ~ value }
// JOIN clause for cross-store queries (EPIC-031 US-004, extended EPIC-040 US-003)
join_clause = { join_type? ~ ^"JOIN" ~ identifier ~ alias_clause? ~ join_spec }
join_type = { (^"LEFT" ~ ^"OUTER"?) | (^"RIGHT" ~ ^"OUTER"?) | (^"FULL" ~ ^"OUTER"?) | ^"INNER" }
join_spec = { on_clause | using_clause }
on_clause = { ^"ON" ~ join_condition }
using_clause = { ^"USING" ~ "(" ~ identifier ~ ("," ~ identifier)* ~ ")" }
alias_clause = { ^"AS" ~ identifier }
join_condition = { column_ref ~ "=" ~ column_ref }
column_ref = @{ identifier ~ "." ~ identifier }
// ORDER BY clause (EPIC-040 US-002: supports columns, aggregates, similarity)
// EPIC-042: Extended with arithmetic expressions for custom scoring
order_by_clause = { ^"ORDER" ~ ^"BY" ~ order_by_item ~ ("," ~ order_by_item)* }
order_by_item = { order_by_expr ~ sort_direction? }
order_by_expr = { aggregate_function | property_access | order_by_arithmetic }
order_by_similarity = { ^"similarity" ~ "(" ~ similarity_field ~ "," ~ vector_value ~ ")" }
// similarity() zero-arg in ORDER BY: uses pre-computed search score
order_by_similarity_bare = { ^"similarity" ~ "(" ~ ")" }
sort_direction = { ^"DESC" | ^"ASC" }
// Arithmetic expressions for ORDER BY (EPIC-042)
// Precedence: additive (+, -) < multiplicative (*, /)
// arithmetic_atom subsumes: float, integer, similarity(), identifier
order_by_arithmetic = { arithmetic_additive }
arithmetic_additive = { arithmetic_multiplicative ~ ((add_op | sub_op) ~ arithmetic_multiplicative)* }
arithmetic_multiplicative = { arithmetic_atom ~ ((mul_op | div_op) ~ arithmetic_atom)* }
arithmetic_atom = { float | integer | order_by_similarity | order_by_similarity_bare | "(" ~ arithmetic_additive ~ ")" | identifier }
add_op = { "+" }
sub_op = { "-" }
mul_op = { "*" }
div_op = { "/" }
// WITH clause for query-time configuration overrides
with_clause = { ^"WITH" ~ "(" ~ with_option_list ~ ")" }
with_option_list = { with_option ~ ("," ~ with_option)* }
with_option = { identifier ~ "=" ~ with_value }
with_value = { string | float | integer | boolean | identifier }
// Select list: * or mixed items (columns and/or aggregations for GROUP BY)
select_list = { "*" | select_item_list }
// Mixed select items: columns, aggregations, similarity(), and qualified wildcards
select_item_list = { select_item ~ ("," ~ select_item)* }
select_item = { similarity_select | window_item | aggregation_item | qualified_wildcard | column }
// similarity() zero-arg in SELECT: SELECT similarity() [AS alias]
similarity_select = { ^"similarity" ~ "(" ~ ")" ~ (^"AS" ~ identifier)? }
// Qualified wildcard: SELECT alias.* (e.g., SELECT ctx.*)
qualified_wildcard = { identifier ~ "." ~ "*" }
// Aggregate functions: COUNT, SUM, AVG, MIN, MAX
aggregation_item = { aggregate_function ~ (^"AS" ~ identifier)? }
aggregate_function = { aggregate_type ~ "(" ~ aggregate_arg ~ ")" }
aggregate_type = { ^"FIRST" | ^"COUNT" | ^"SUM" | ^"AVG" | ^"MIN" | ^"MAX" }
aggregate_arg = { "*" | ^"score" | column_name }
// ──────────────────────────────────────────────────────────────
// Window functions (Issue #386 Phase 1)
// ──────────────────────────────────────────────────────────────
// Window function expression in SELECT list
// Example: ROW_NUMBER() OVER (PARTITION BY source ORDER BY score DESC) AS rn
window_item = { window_function_call ~ ^"OVER" ~ "(" ~ over_clause ~ ")" ~ (^"AS" ~ identifier)? }
// Window function call (zero-arg for Phase 1 ranking functions)
window_function_call = { window_function_name ~ "(" ~ ")" }
window_function_name = { ^"ROW_NUMBER" | ^"DENSE_RANK" | ^"RANK" }
// OVER clause: optional PARTITION BY + optional ORDER BY
over_clause = { partition_by_clause? ~ window_order_by_clause? }
// PARTITION BY: one or more columns
partition_by_clause = { ^"PARTITION" ~ ^"BY" ~ partition_by_list }
partition_by_list = { column_name ~ ("," ~ column_name)* }
// ORDER BY inside OVER(): separate rule to avoid ambiguity with outer ORDER BY
window_order_by_clause = { ^"ORDER" ~ ^"BY" ~ window_order_by_item ~ ("," ~ window_order_by_item)* }
window_order_by_item = { window_order_by_expr ~ sort_direction? }
window_order_by_expr = { order_by_similarity_bare | column_name }
column = { column_name ~ (^"AS" ~ identifier)? }
// EPIC-052 US-005: Support nested field paths like metadata.source or profile.address.city
column_name = @{ identifier ~ ("." ~ identifier)* }
// WHERE clause
where_clause = { ^"WHERE" ~ or_expr }
// Conditions with precedence (OR < AND < primary)
or_expr = { and_expr ~ (^"OR" ~ and_expr)* }
and_expr = { primary_expr ~ (^"AND" ~ primary_expr)* }
where_column = { identifier ~ ("." ~ identifier)* }
primary_expr = {
"(" ~ or_expr ~ ")" |
not_expr |
graph_match_expr |
similarity_expr |
vector_fused_search |
sparse_vector_search |
vector_search |
match_expr |
in_expr |
between_expr |
like_expr |
is_null_expr |
contains_text_expr |
contains_expr |
geo_distance_expr |
geo_bbox_expr |
compare_expr
}
not_expr = { ^"NOT" ~ primary_expr }
// Graph predicate inside SELECT WHERE clause:
// WHERE ... AND MATCH (a)-[:REL]->(b)
graph_match_expr = { ^"MATCH" ~ graph_pattern }
// Similarity function: similarity(field, vector) op threshold
// Used in hybrid graph-vector queries
// Note: threshold accepts both float (0.8) and integer (1) for user convenience
similarity_expr = {
^"similarity" ~ "(" ~ similarity_field ~ "," ~ vector_value ~ ")" ~ compare_op ~ numeric_threshold
}
numeric_threshold = { float | integer }
similarity_field = @{ (ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_" | ".")* }
// Sparse vector search: vector SPARSE_NEAR sparse_value [USING 'index-name']
sparse_vector_search = {
^"vector" ~ ^"SPARSE_NEAR" ~ sparse_value ~ (^"USING" ~ string)?
}
// Sparse value: inline literal {12: 0.8, 45: 0.3} or bind parameter $sv
sparse_value = { sparse_literal | parameter }
sparse_literal = { "{" ~ sparse_entry ~ ("," ~ sparse_entry)* ~ "}" }
sparse_entry = { integer ~ ":" ~ float }
// Vector search: vector NEAR vector_value
// Note: Distance metric is defined at collection creation, not per-query
vector_search = {
^"vector" ~ ^"NEAR" ~ vector_value
}
// Multi-vector fusion search: vector NEAR_FUSED [v1, v2, ...] USING FUSION 'strategy' (params)
vector_fused_search = {
^"vector" ~ ^"NEAR_FUSED" ~ vector_array ~ fusion_clause?
}
vector_array = { "[" ~ vector_value ~ ("," ~ vector_value)* ~ "]" }
fusion_clause = { ^"USING" ~ ^"FUSION" ~ fusion_strategy ~ fusion_params? }
fusion_strategy = { string }
fusion_params = { "(" ~ fusion_param_list ~ ")" }
fusion_param_list = { fusion_param ~ ("," ~ fusion_param)* }
fusion_param = { identifier ~ "=" ~ fusion_param_value }
fusion_param_value = { float | integer }
vector_value = { vector_literal | parameter }
vector_component = { float | integer }
vector_literal = { "[" ~ vector_component ~ ("," ~ vector_component)* ~ "]" }
// Full-text search: column MATCH 'query'
match_expr = { where_column ~ ^"MATCH" ~ string }
// IN / NOT IN expression: column [NOT] IN (value, ...)
in_expr = { where_column ~ (not_kw ~ ^"IN" | ^"IN") ~ "(" ~ value_list ~ ")" }
value_list = { value ~ ("," ~ value)* }
// BETWEEN expression: column BETWEEN value AND value
between_expr = { where_column ~ ^"BETWEEN" ~ value ~ ^"AND" ~ value }
// LIKE / ILIKE expression: column LIKE 'pattern' or column ILIKE 'pattern'
like_expr = { where_column ~ like_op ~ string }
like_op = { ^"ILIKE" | ^"LIKE" }
// CONTAINS_TEXT expression: strict text substring filter
contains_text_expr = { where_column ~ ^"CONTAINS_TEXT" ~ string }
// CONTAINS expression: column CONTAINS value | column CONTAINS ANY/ALL (values)
contains_expr = {
where_column ~ ^"CONTAINS" ~ ^"ALL" ~ "(" ~ value_list ~ ")" |
where_column ~ ^"CONTAINS" ~ ^"ANY" ~ "(" ~ value_list ~ ")" |
where_column ~ ^"CONTAINS" ~ value
}
// GEO_DISTANCE expression: GEO_DISTANCE(column, lat, lng) op meters
geo_number = { float | integer }
geo_distance_expr = {
^"GEO_DISTANCE" ~ "(" ~ column_name ~ "," ~ geo_number ~ "," ~ geo_number ~ ")" ~ compare_op ~ geo_number
}
// GEO_BBOX expression: GEO_BBOX(column, lat_min, lng_min, lat_max, lng_max)
geo_bbox_expr = {
^"GEO_BBOX" ~ "(" ~ column_name ~ "," ~ geo_number ~ "," ~ geo_number ~ "," ~ geo_number ~ "," ~ geo_number ~ ")"
}
// IS NULL / IS NOT NULL
is_null_expr = { where_column ~ ^"IS" ~ not_kw? ~ ^"NULL" }
not_kw = { ^"NOT" }
// Comparison: column op value
compare_expr = { where_column ~ compare_op ~ value }
compare_op = { ">=" | "<=" | "<>" | "!=" | "=" | ">" | "<" }
// LIMIT and OFFSET
limit_clause = { ^"LIMIT" ~ integer }
offset_clause = { ^"OFFSET" ~ integer }
// Values - EPIC-038: Temporal, EPIC-039: Subquery
value = { subquery_expr | temporal_expr | float | integer | string | boolean | null_value | parameter }
// Scalar subquery expression (EPIC-039)
subquery_expr = { "(" ~ ^"SELECT" ~ select_list ~ ^"FROM" ~ identifier ~ where_clause? ~ group_by_clause? ~ having_clause? ~ limit_clause? ~ ")" }
parameter = @{ "$" ~ identifier }
null_value = { ^"NULL" }
boolean = { ^"TRUE" | ^"FALSE" }
// Temporal expressions (EPIC-038)
temporal_expr = { temporal_arithmetic | now_function | interval_expr }
temporal_arithmetic = { (now_function | interval_expr) ~ temporal_op ~ (now_function | interval_expr) }
temporal_op = { "+" | "-" }
now_function = { ^"NOW" ~ "(" ~ ")" }
interval_expr = { ^"INTERVAL" ~ string }
// Literals
string = @{ "'" ~ (!"'" ~ ANY | "''")* ~ "'" }
integer = @{ "-"? ~ ASCII_DIGIT+ }
float = @{ "-"? ~ ASCII_DIGIT+ ~ "." ~ ASCII_DIGIT+ }
// Identifiers - EPIC-044 US-005: Support quoted identifiers for reserved keywords
// Supports: regular identifiers, backtick-quoted (`select`), double-quoted ("from")
identifier = { quoted_identifier | regular_identifier }
regular_identifier = @{ (ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_")* }
// Quoted identifiers for escaping reserved keywords
// Backtick style: `select`, `from`, `order`
backtick_identifier = @{ "`" ~ backtick_inner ~ "`" }
backtick_inner = @{ (!"`" ~ ANY)+ }
// Double-quote style (SQL standard): "select", "from", "order"
// Supports escaped quotes: "col""name" -> col"name
doublequote_identifier = @{ "\"" ~ doublequote_inner ~ "\"" }
doublequote_inner = @{ (doublequote_escape | !("\"") ~ ANY)* }
doublequote_escape = @{ "\"\"" }
quoted_identifier = { backtick_identifier | doublequote_identifier }