velesdb-core 1.15.0

// VelesQL Grammar - SQL-like query language for VelesDB
// Version 3.6.0 — FLUSH + ANALYZE, TRUNCATE, ALTER COLLECTION + DDL + graph mutations + DELETE FROM + multi-row INSERT + UPSERT

// Whitespace and comments
WHITESPACE = _{ " " | "\t" | "\r" | "\n" }
COMMENT = _{ "--" ~ (!"\n" ~ ANY)* }

// LET clause: named score bindings evaluated once (VelesQL v1.10 Phase 3)
let_clause = { ^"LET" ~ identifier ~ "=" ~ order_by_arithmetic }

// Main entry point - supports SELECT, MATCH, DML (INSERT/UPSERT/UPDATE/DELETE), DDL (CREATE/DROP), admin (ANALYZE/TRUNCATE/ALTER/FLUSH), and introspection (SHOW/DESCRIBE/EXPLAIN)
// LET clauses appear zero or more times before the main statement.
// Order matters (PEG ordered choice):
//   - select_edges_stmt before compound_query (both start with SELECT; EDGES disambiguates)
//   - insert_node_stmt before insert_edge_stmt (both start with INSERT; NODE vs EDGE disambiguates)
//   - insert_edge_stmt before insert_stmt, delete_edge_stmt before delete_stmt
// upsert_stmt placed after insert_stmt since UPSERT and INSERT have distinct first tokens.
// Introspection and admin statements placed first since SHOW/DESCRIBE/EXPLAIN/ANALYZE/TRUNCATE/ALTER/FLUSH do not conflict with any existing first-token.
query = { SOI ~ let_clause* ~ (show_collections_stmt | describe_stmt | explain_stmt | analyze_stmt | truncate_stmt | alter_collection_stmt | flush_stmt | match_query | select_edges_stmt | compound_query | train_stmt | create_index_stmt | create_collection_stmt | drop_index_stmt | drop_collection_stmt | insert_node_stmt | insert_edge_stmt | delete_edge_stmt | delete_stmt | insert_stmt | upsert_stmt | update_stmt) ~ ";"? ~ EOI }

// ──────────────────────────────────────────────────────────────
// Introspection statements (VelesQL v3.4)
// ──────────────────────────────────────────────────────────────

// SHOW COLLECTIONS — lists all collections in the database
show_collections_stmt = { ^"SHOW" ~ ^"COLLECTIONS" }

// DESCRIBE [COLLECTION] name — returns collection metadata
// collection_kw uses word-boundary lookahead to prevent "collection_data"
// being parsed as COLLECTION + "_data" (same pattern as flush_full_kw).
describe_stmt = { ^"DESCRIBE" ~ collection_kw? ~ identifier }

// EXPLAIN <query> — returns the query execution plan without executing
explain_stmt = { ^"EXPLAIN" ~ compound_query }

// ANALYZE [COLLECTION] name — computes CBO statistics for query optimizer
analyze_stmt = { ^"ANALYZE" ~ collection_kw? ~ identifier }

// TRUNCATE [COLLECTION] name — deletes all rows from a collection
truncate_stmt = { ^"TRUNCATE" ~ collection_kw? ~ identifier }

// Word-boundary-safe COLLECTION keyword. Prevents "collection_data" from
// being parsed as COLLECTION + "_data". Same pattern as flush_full_kw.
collection_kw = @{ ^"COLLECTION" ~ !(ASCII_ALPHANUMERIC | "_") }

// ALTER COLLECTION name SET (options) — modifies collection settings
alter_collection_stmt = { ^"ALTER" ~ ^"COLLECTION" ~ identifier ~ ^"SET" ~ "(" ~ create_option_list ~ ")" }

// FLUSH [FULL] [collection] — persist all or specific collection to disk
// flush_full_kw is a named rule so the parser can detect it in inner pairs.
// Negative lookahead (!ASCII_ALPHANUMERIC) prevents matching "FULL" prefix
// of identifiers like "fulltext" as the FULL keyword.
flush_stmt = { ^"FLUSH" ~ flush_full_kw? ~ identifier? }
flush_full_kw = @{ ^"FULL" ~ !(ASCII_ALPHANUMERIC | "_") }

// MATCH query for graph pattern matching (EPIC-045 US-001)
// Syntax: MATCH pattern WHERE condition RETURN items [ORDER BY ...] [LIMIT n]
match_query = {
    ^"MATCH" ~ graph_pattern ~
    where_clause? ~
    return_clause ~
    order_by_clause? ~
    limit_clause?
}

// Graph pattern: (node)-[rel]->(node) chains
graph_pattern = { node_pattern ~ (relationship_pattern ~ node_pattern)* }
node_pattern = { "(" ~ node_spec? ~ ")" }
node_spec = { node_alias? ~ node_labels? ~ collection_annotation? ~ node_properties? }
node_alias = @{ (ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_")* }
node_labels = { ":" ~ label_name ~ (":" ~ label_name)* }
label_name = @{ (ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_")* }
collection_annotation = { "@" ~ collection_ref }
collection_ref = @{ (ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_")* }
node_properties = { "{" ~ property_list ~ "}" }
property_list = { property ~ ("," ~ property)* }
property = { identifier ~ ":" ~ property_value }
property_value = { string | float | integer | boolean | null_value | parameter }

// Relationship pattern: -[r:TYPE*1..3]->
relationship_pattern = { rel_incoming | rel_outgoing | rel_undirected }
rel_incoming = { "<-" ~ rel_spec? ~ "-" }
rel_outgoing = { "-" ~ rel_spec? ~ "->" }
rel_undirected = { "-" ~ rel_spec? ~ "-" }
rel_spec = { "[" ~ rel_details? ~ "]" }
rel_details = { rel_alias? ~ rel_types? ~ rel_range? ~ node_properties? }
rel_alias = @{ (ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_")* }
rel_types = { ":" ~ rel_type_name ~ ("|" ~ rel_type_name)* }
rel_type_name = @{ (ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_")* }
rel_range = { "*" ~ range_spec? }
range_spec = { range_bound ~ ".." ~ range_bound? | ".." ~ range_bound | integer }
range_bound = @{ ASCII_DIGIT+ }

// RETURN clause for MATCH queries
return_clause = { ^"RETURN" ~ return_item_list }
return_item_list = { return_item ~ ("," ~ return_item)* }
return_item = { return_expr ~ (^"AS" ~ identifier)? }
return_expr = { similarity_return | property_access | identifier | "*" }
similarity_return = { ^"similarity" ~ "(" ~ ")" }
property_access = @{ identifier ~ "." ~ identifier }

// Compound query: SELECT with zero or more UNION/INTERSECT/EXCEPT
compound_query = { select_stmt ~ (set_operator ~ select_stmt)* }
set_operator = { ^"UNION" ~ ^"ALL" | ^"UNION" | ^"INTERSECT" | ^"EXCEPT" }

// INSERT statement: INSERT INTO table (col1, col2) VALUES (v1, v2)[, (v3, v4)]
insert_stmt = {
    ^"INSERT" ~ ^"INTO" ~ identifier ~
    "(" ~ identifier ~ ("," ~ identifier)* ~ ")" ~
    ^"VALUES" ~
    values_row ~ ("," ~ values_row)*
}

// UPSERT statement: UPSERT INTO table (col1, col2) VALUES (v1, v2)[, (v3, v4)]
upsert_stmt = {
    ^"UPSERT" ~ ^"INTO" ~ identifier ~
    "(" ~ identifier ~ ("," ~ identifier)* ~ ")" ~
    ^"VALUES" ~
    values_row ~ ("," ~ values_row)*
}

// A single parenthesised row of values
values_row = { "(" ~ value ~ ("," ~ value)* ~ ")" }

// UPDATE statement: UPDATE table SET col1 = v1, col2 = v2 [WHERE ...]
update_stmt = {
    ^"UPDATE" ~ identifier ~
    ^"SET" ~ assignment ~ ("," ~ assignment)* ~
    where_clause?
}
assignment = { identifier ~ "=" ~ value }

// TRAIN statement: TRAIN QUANTIZER ON collection WITH (params)
train_stmt = {
    ^"TRAIN" ~ ^"QUANTIZER" ~ ^"ON" ~ identifier ~ with_clause
}

// ──────────────────────────────────────────────────────────────
// DDL statements (VelesQL v3.3)
// ──────────────────────────────────────────────────────────────

// CREATE INDEX ON collection (field) — secondary metadata index
create_index_stmt = { ^"CREATE" ~ ^"INDEX" ~ ^"ON" ~ identifier ~ "(" ~ identifier ~ ")" }

// DROP INDEX ON collection (field) — remove secondary metadata index
drop_index_stmt = { ^"DROP" ~ ^"INDEX" ~ ^"ON" ~ identifier ~ "(" ~ identifier ~ ")" }

// CREATE COLLECTION: vector (default), graph, or metadata-only
// Examples:
//   CREATE COLLECTION docs (dimension = 768, metric = 'cosine');
//   CREATE COLLECTION docs (dimension = 768, metric = 'cosine') WITH (storage = 'sq8', m = 16);
//   CREATE GRAPH COLLECTION kg (dimension = 768, metric = 'cosine') SCHEMALESS;
//   CREATE METADATA COLLECTION tags;
create_collection_stmt = {
    ^"CREATE" ~ collection_kind_kw? ~ ^"COLLECTION" ~ identifier ~
    create_body?
}
collection_kind_kw = { ^"GRAPH" | ^"METADATA" }
create_body = { "(" ~ create_option_list ~ ")" ~ create_suffix? }
create_option_list = { create_option ~ ("," ~ create_option)* }
create_option = { identifier ~ "=" ~ create_option_value }
create_option_value = { string | float | integer | boolean | identifier }
create_suffix = { schemaless_clause | with_schema_clause | with_clause }
schemaless_clause = { ^"SCHEMALESS" }

// WITH SCHEMA for typed graph collections
with_schema_clause = { ^"WITH" ~ ^"SCHEMA" ~ "(" ~ schema_def_list ~ ")" }
schema_def_list = { schema_def ~ ("," ~ schema_def)* }
schema_def = { node_type_def | edge_type_def }
node_type_def = { ^"NODE" ~ identifier ~ "(" ~ property_def_list ~ ")" }
edge_type_def = { ^"EDGE" ~ identifier ~ ^"FROM" ~ identifier ~ ^"TO" ~ identifier }
property_def_list = { property_def ~ ("," ~ property_def)* }
property_def = { identifier ~ ":" ~ type_name }
type_name = { ^"STRING" | ^"INTEGER" | ^"FLOAT" | ^"BOOLEAN" | ^"VECTOR" }

// DROP COLLECTION [IF EXISTS] name
drop_collection_stmt = {
    ^"DROP" ~ ^"COLLECTION" ~ if_exists_clause? ~ identifier
}
if_exists_clause = { ^"IF" ~ ^"EXISTS" }

// INSERT EDGE INTO collection (source = N, target = N, label = 'L')
//   [WITH PROPERTIES (key = val, ...)]
insert_edge_stmt = {
    ^"INSERT" ~ ^"EDGE" ~ ^"INTO" ~ identifier ~
    "(" ~ edge_field_list ~ ")" ~
    edge_properties_clause?
}
edge_field_list = { edge_field ~ ("," ~ edge_field)* }
edge_field = { identifier ~ "=" ~ value }
edge_properties_clause = { ^"WITH" ~ ^"PROPERTIES" ~ "(" ~ create_option_list ~ ")" }

// DELETE FROM collection WHERE condition
// WHERE is mandatory to prevent accidental full-collection deletion.
delete_stmt = {
    ^"DELETE" ~ ^"FROM" ~ identifier ~ where_clause
}

// DELETE EDGE edge_id FROM collection
delete_edge_stmt = {
    ^"DELETE" ~ ^"EDGE" ~ value ~ ^"FROM" ~ identifier
}

// SELECT EDGES FROM collection [WHERE source=N / target=N / label='X'] [LIMIT n]
// Queries edges from a graph collection with optional filtering.
select_edges_stmt = {
    ^"SELECT" ~ ^"EDGES" ~ ^"FROM" ~ identifier ~ where_clause? ~ limit_clause?
}

// INSERT NODE INTO collection (id = N, payload = '{"key": "value"}')
// Inserts or updates a node payload in a graph collection.
insert_node_stmt = {
    ^"INSERT" ~ ^"NODE" ~ ^"INTO" ~ identifier ~ "(" ~ edge_field_list ~ ")"
}

// SELECT statement with optional DISTINCT, JOIN, WHERE, GROUP BY, HAVING, ORDER BY, LIMIT, OFFSET, WITH, FUSION clauses
// EPIC-052 US-001: Added DISTINCT support
// EPIC-052 US-003: Added FROM alias support for Self-JOIN
select_stmt = { 
    ^"SELECT" ~ distinct_modifier? ~ select_list ~ ^"FROM" ~ from_clause ~
    join_clause* ~ where_clause? ~ group_by_clause? ~ having_clause? ~ order_by_clause? ~ limit_clause? ~ offset_clause? ~ with_clause? ~ using_fusion_clause?
}

// FROM clause with optional alias (EPIC-052 US-003: Self-JOIN support)
// Supports: FROM table, FROM table AS alias
// Note: "FROM table alias" without AS is intentionally NOT supported to avoid
// ambiguity with JOIN keywords. Use "FROM table AS alias" syntax.
from_clause = { identifier ~ from_alias? }
from_alias = { ^"AS" ~ identifier }

// DISTINCT modifier (EPIC-052 US-001)
distinct_modifier = { ^"DISTINCT" }

// USING FUSION clause for hybrid search (EPIC-040 US-005)
using_fusion_clause = { ^"USING" ~ ^"FUSION" ~ fusion_options? }
fusion_options = { "(" ~ fusion_option_list ~ ")" }
fusion_option_list = { fusion_option ~ ("," ~ fusion_option)* }
fusion_option = { identifier ~ "=" ~ fusion_value }
fusion_value = { string | float | integer }

// GROUP BY clause (EPIC-017 US-003, EPIC-052 US-005: nested fields support)
group_by_clause = { ^"GROUP" ~ ^"BY" ~ group_by_list }
group_by_list = { group_by_column ~ ("," ~ group_by_column)* }
// Support both simple identifiers (including quoted) and nested paths
group_by_column = { identifier ~ ("." ~ identifier)* }

// HAVING clause for filtering groups (EPIC-017 US-006)
// Supports both AND and OR logical operators
having_clause = { ^"HAVING" ~ having_condition }
having_condition = { having_term ~ (having_logical_op ~ having_term)* }
// BUG-6 FIX: Named rule so pest emits tokens for AND/OR operators
having_logical_op = { ^"AND" | ^"OR" }
having_term = { aggregate_function ~ compare_op ~ value }

// JOIN clause for cross-store queries (EPIC-031 US-004, extended EPIC-040 US-003)
join_clause = { join_type? ~ ^"JOIN" ~ identifier ~ alias_clause? ~ join_spec }
join_type = { (^"LEFT" ~ ^"OUTER"?) | (^"RIGHT" ~ ^"OUTER"?) | (^"FULL" ~ ^"OUTER"?) | ^"INNER" }
join_spec = { on_clause | using_clause }
on_clause = { ^"ON" ~ join_condition }
using_clause = { ^"USING" ~ "(" ~ identifier ~ ("," ~ identifier)* ~ ")" }
alias_clause = { ^"AS" ~ identifier }
join_condition = { column_ref ~ "=" ~ column_ref }
column_ref = @{ identifier ~ "." ~ identifier }

// ORDER BY clause (EPIC-040 US-002: supports columns, aggregates, similarity)
// EPIC-042: Extended with arithmetic expressions for custom scoring
order_by_clause = { ^"ORDER" ~ ^"BY" ~ order_by_item ~ ("," ~ order_by_item)* }
order_by_item = { order_by_expr ~ sort_direction? }
order_by_expr = { aggregate_function | property_access | order_by_arithmetic }
order_by_similarity = { ^"similarity" ~ "(" ~ similarity_field ~ "," ~ vector_value ~ ")" }
// similarity() zero-arg in ORDER BY: uses pre-computed search score
order_by_similarity_bare = { ^"similarity" ~ "(" ~ ")" }
sort_direction = { ^"DESC" | ^"ASC" }

// Arithmetic expressions for ORDER BY (EPIC-042)
// Precedence: additive (+, -) < multiplicative (*, /)
// arithmetic_atom subsumes: float, integer, similarity(), identifier
order_by_arithmetic = { arithmetic_additive }
arithmetic_additive = { arithmetic_multiplicative ~ ((add_op | sub_op) ~ arithmetic_multiplicative)* }
arithmetic_multiplicative = { arithmetic_atom ~ ((mul_op | div_op) ~ arithmetic_atom)* }
arithmetic_atom = { float | integer | order_by_similarity | order_by_similarity_bare | "(" ~ arithmetic_additive ~ ")" | identifier }
add_op = { "+" }
sub_op = { "-" }
mul_op = { "*" }
div_op = { "/" }

// WITH clause for query-time configuration overrides
with_clause = { ^"WITH" ~ "(" ~ with_option_list ~ ")" }
with_option_list = { with_option ~ ("," ~ with_option)* }
with_option = { identifier ~ "=" ~ with_value }
with_value = { string | float | integer | boolean | identifier }

// Select list: * or mixed items (columns and/or aggregations for GROUP BY)
select_list = { "*" | select_item_list }

// Mixed select items: columns, aggregations, similarity(), and qualified wildcards
select_item_list = { select_item ~ ("," ~ select_item)* }
select_item = { similarity_select | window_item | aggregation_item | qualified_wildcard | column }

// similarity() zero-arg in SELECT: SELECT similarity() [AS alias]
similarity_select = { ^"similarity" ~ "(" ~ ")" ~ (^"AS" ~ identifier)? }

// Qualified wildcard: SELECT alias.* (e.g., SELECT ctx.*)
qualified_wildcard = { identifier ~ "." ~ "*" }

// Aggregate functions: COUNT, SUM, AVG, MIN, MAX
aggregation_item = { aggregate_function ~ (^"AS" ~ identifier)? }
aggregate_function = { aggregate_type ~ "(" ~ aggregate_arg ~ ")" }
aggregate_type = { ^"FIRST" | ^"COUNT" | ^"SUM" | ^"AVG" | ^"MIN" | ^"MAX" }
aggregate_arg = { "*" | ^"score" | column_name }

// ──────────────────────────────────────────────────────────────
// Window functions (Issue #386 Phase 1)
// ──────────────────────────────────────────────────────────────

// Window function expression in SELECT list
// Example: ROW_NUMBER() OVER (PARTITION BY source ORDER BY score DESC) AS rn
window_item = { window_function_call ~ ^"OVER" ~ "(" ~ over_clause ~ ")" ~ (^"AS" ~ identifier)? }

// Window function call (zero-arg for Phase 1 ranking functions)
window_function_call = { window_function_name ~ "(" ~ ")" }
window_function_name = { ^"ROW_NUMBER" | ^"DENSE_RANK" | ^"RANK" }

// OVER clause: optional PARTITION BY + optional ORDER BY
over_clause = { partition_by_clause? ~ window_order_by_clause? }

// PARTITION BY: one or more columns
partition_by_clause = { ^"PARTITION" ~ ^"BY" ~ partition_by_list }
partition_by_list = { column_name ~ ("," ~ column_name)* }

// ORDER BY inside OVER(): separate rule to avoid ambiguity with outer ORDER BY
window_order_by_clause = { ^"ORDER" ~ ^"BY" ~ window_order_by_item ~ ("," ~ window_order_by_item)* }
window_order_by_item = { window_order_by_expr ~ sort_direction? }
window_order_by_expr = { order_by_similarity_bare | column_name }

column = { column_name ~ (^"AS" ~ identifier)? }
// EPIC-052 US-005: Support nested field paths like metadata.source or profile.address.city
column_name = @{ identifier ~ ("." ~ identifier)* }

// WHERE clause
where_clause = { ^"WHERE" ~ or_expr }

// Conditions with precedence (OR < AND < primary)
or_expr = { and_expr ~ (^"OR" ~ and_expr)* }
and_expr = { primary_expr ~ (^"AND" ~ primary_expr)* }
where_column = { identifier ~ ("." ~ identifier)* }

primary_expr = {
    "(" ~ or_expr ~ ")" |
    not_expr |
    graph_match_expr |
    similarity_expr |
    vector_fused_search |
    sparse_vector_search |
    vector_search |
    match_expr |
    in_expr |
    between_expr |
    like_expr |
    is_null_expr |
    contains_text_expr |
    contains_expr |
    geo_distance_expr |
    geo_bbox_expr |
    compare_expr
}

not_expr = { ^"NOT" ~ primary_expr }

// Graph predicate inside SELECT WHERE clause:
// WHERE ... AND MATCH (a)-[:REL]->(b)
graph_match_expr = { ^"MATCH" ~ graph_pattern }

// Similarity function: similarity(field, vector) op threshold
// Used in hybrid graph-vector queries
// Note: threshold accepts both float (0.8) and integer (1) for user convenience
similarity_expr = {
    ^"similarity" ~ "(" ~ similarity_field ~ "," ~ vector_value ~ ")" ~ compare_op ~ numeric_threshold
}
numeric_threshold = { float | integer }
similarity_field = @{ (ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_" | ".")* }

// Sparse vector search: vector SPARSE_NEAR sparse_value [USING 'index-name']
sparse_vector_search = {
    ^"vector" ~ ^"SPARSE_NEAR" ~ sparse_value ~ (^"USING" ~ string)?
}

// Sparse value: inline literal {12: 0.8, 45: 0.3} or bind parameter $sv
sparse_value = { sparse_literal | parameter }
sparse_literal = { "{" ~ sparse_entry ~ ("," ~ sparse_entry)* ~ "}" }
sparse_entry = { integer ~ ":" ~ float }

// Vector search: vector NEAR vector_value
// Note: Distance metric is defined at collection creation, not per-query
vector_search = {
    ^"vector" ~ ^"NEAR" ~ vector_value
}

// Multi-vector fusion search: vector NEAR_FUSED [v1, v2, ...] USING FUSION 'strategy' (params)
vector_fused_search = {
    ^"vector" ~ ^"NEAR_FUSED" ~ vector_array ~ fusion_clause?
}
vector_array = { "[" ~ vector_value ~ ("," ~ vector_value)* ~ "]" }
fusion_clause = { ^"USING" ~ ^"FUSION" ~ fusion_strategy ~ fusion_params? }
fusion_strategy = { string }
fusion_params = { "(" ~ fusion_param_list ~ ")" }
fusion_param_list = { fusion_param ~ ("," ~ fusion_param)* }
fusion_param = { identifier ~ "=" ~ fusion_param_value }
fusion_param_value = { float | integer }

vector_value = { vector_literal | parameter }
vector_component = { float | integer }
vector_literal = { "[" ~ vector_component ~ ("," ~ vector_component)* ~ "]" }

// Full-text search: column MATCH 'query'
match_expr = { where_column ~ ^"MATCH" ~ string }

// IN / NOT IN expression: column [NOT] IN (value, ...)
in_expr = { where_column ~ (not_kw ~ ^"IN" | ^"IN") ~ "(" ~ value_list ~ ")" }
value_list = { value ~ ("," ~ value)* }

// BETWEEN expression: column BETWEEN value AND value
between_expr = { where_column ~ ^"BETWEEN" ~ value ~ ^"AND" ~ value }

// LIKE / ILIKE expression: column LIKE 'pattern' or column ILIKE 'pattern'
like_expr = { where_column ~ like_op ~ string }
like_op = { ^"ILIKE" | ^"LIKE" }

// CONTAINS_TEXT expression: strict text substring filter
contains_text_expr = { where_column ~ ^"CONTAINS_TEXT" ~ string }

// CONTAINS expression: column CONTAINS value | column CONTAINS ANY/ALL (values)
contains_expr = {
    where_column ~ ^"CONTAINS" ~ ^"ALL" ~ "(" ~ value_list ~ ")" |
    where_column ~ ^"CONTAINS" ~ ^"ANY" ~ "(" ~ value_list ~ ")" |
    where_column ~ ^"CONTAINS" ~ value
}

// GEO_DISTANCE expression: GEO_DISTANCE(column, lat, lng) op meters
geo_number = { float | integer }
geo_distance_expr = {
    ^"GEO_DISTANCE" ~ "(" ~ column_name ~ "," ~ geo_number ~ "," ~ geo_number ~ ")" ~ compare_op ~ geo_number
}

// GEO_BBOX expression: GEO_BBOX(column, lat_min, lng_min, lat_max, lng_max)
geo_bbox_expr = {
    ^"GEO_BBOX" ~ "(" ~ column_name ~ "," ~ geo_number ~ "," ~ geo_number ~ "," ~ geo_number ~ "," ~ geo_number ~ ")"
}

// IS NULL / IS NOT NULL
is_null_expr = { where_column ~ ^"IS" ~ not_kw? ~ ^"NULL" }
not_kw = { ^"NOT" }

// Comparison: column op value
compare_expr = { where_column ~ compare_op ~ value }
compare_op = { ">=" | "<=" | "<>" | "!=" | "=" | ">" | "<" }

// LIMIT and OFFSET
limit_clause = { ^"LIMIT" ~ integer }
offset_clause = { ^"OFFSET" ~ integer }

// Values - EPIC-038: Temporal, EPIC-039: Subquery
value = { subquery_expr | temporal_expr | float | integer | string | boolean | null_value | parameter }

// Scalar subquery expression (EPIC-039)
subquery_expr = { "(" ~ ^"SELECT" ~ select_list ~ ^"FROM" ~ identifier ~ where_clause? ~ group_by_clause? ~ having_clause? ~ limit_clause? ~ ")" }
parameter = @{ "$" ~ identifier }
null_value = { ^"NULL" }
boolean = { ^"TRUE" | ^"FALSE" }

// Temporal expressions (EPIC-038)
temporal_expr = { temporal_arithmetic | now_function | interval_expr }
temporal_arithmetic = { (now_function | interval_expr) ~ temporal_op ~ (now_function | interval_expr) }
temporal_op = { "+" | "-" }
now_function = { ^"NOW" ~ "(" ~ ")" }
interval_expr = { ^"INTERVAL" ~ string }

// Literals
string = @{ "'" ~ (!"'" ~ ANY | "''")* ~ "'" }
integer = @{ "-"? ~ ASCII_DIGIT+ }
float = @{ "-"? ~ ASCII_DIGIT+ ~ "." ~ ASCII_DIGIT+ }

// Identifiers - EPIC-044 US-005: Support quoted identifiers for reserved keywords
// Supports: regular identifiers, backtick-quoted (`select`), double-quoted ("from")
identifier = { quoted_identifier | regular_identifier }
regular_identifier = @{ (ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_")* }

// Quoted identifiers for escaping reserved keywords
// Backtick style: `select`, `from`, `order`
backtick_identifier = @{ "`" ~ backtick_inner ~ "`" }
backtick_inner = @{ (!"`" ~ ANY)+ }

// Double-quote style (SQL standard): "select", "from", "order"
// Supports escaped quotes: "col""name" -> col"name
doublequote_identifier = @{ "\"" ~ doublequote_inner ~ "\"" }
doublequote_inner = @{ (doublequote_escape | !("\"") ~ ANY)* }
doublequote_escape = @{ "\"\"" }

quoted_identifier = { backtick_identifier | doublequote_identifier }