// VelesQL Grammar - SQL-like query language for VelesDB
// Version 3.1.0 — string escaping, NOT IN, vector literal integers, N-ary compound queries
// Whitespace and comments
WHITESPACE = _{ " " | "\t" | "\r" | "\n" }
COMMENT = _{ "--" ~ (!"\n" ~ ANY)* }
// Main entry point - supports SELECT, MATCH and DML (INSERT/UPDATE)
query = { SOI ~ (match_query | compound_query | train_stmt | insert_stmt | update_stmt) ~ ";"? ~ EOI }
// MATCH query for graph pattern matching (EPIC-045 US-001)
// Syntax: MATCH pattern WHERE condition RETURN items [ORDER BY ...] [LIMIT n]
match_query = {
^"MATCH" ~ graph_pattern ~
where_clause? ~
return_clause ~
order_by_clause? ~
limit_clause?
}
// Graph pattern: (node)-[rel]->(node) chains
graph_pattern = { node_pattern ~ (relationship_pattern ~ node_pattern)* }
node_pattern = { "(" ~ node_spec? ~ ")" }
node_spec = { node_alias? ~ node_labels? ~ node_properties? }
node_alias = @{ (ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_")* }
node_labels = { ":" ~ label_name ~ (":" ~ label_name)* }
label_name = @{ (ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_")* }
node_properties = { "{" ~ property_list ~ "}" }
property_list = { property ~ ("," ~ property)* }
property = { identifier ~ ":" ~ property_value }
property_value = { string | float | integer | boolean | null_value | parameter }
// Relationship pattern: -[r:TYPE*1..3]->
relationship_pattern = { rel_incoming | rel_outgoing | rel_undirected }
rel_incoming = { "<-" ~ rel_spec? ~ "-" }
rel_outgoing = { "-" ~ rel_spec? ~ "->" }
rel_undirected = { "-" ~ rel_spec? ~ "-" }
rel_spec = { "[" ~ rel_details? ~ "]" }
rel_details = { rel_alias? ~ rel_types? ~ rel_range? ~ node_properties? }
rel_alias = @{ (ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_")* }
rel_types = { ":" ~ rel_type_name ~ ("|" ~ rel_type_name)* }
rel_type_name = @{ (ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_")* }
rel_range = { "*" ~ range_spec? }
range_spec = { range_bound ~ ".." ~ range_bound? | ".." ~ range_bound | integer }
range_bound = @{ ASCII_DIGIT+ }
// RETURN clause for MATCH queries
return_clause = { ^"RETURN" ~ return_item_list }
return_item_list = { return_item ~ ("," ~ return_item)* }
return_item = { return_expr ~ (^"AS" ~ identifier)? }
return_expr = { similarity_return | property_access | identifier | "*" }
similarity_return = { ^"similarity" ~ "(" ~ ")" }
property_access = @{ identifier ~ "." ~ identifier }
// Compound query: SELECT with zero or more UNION/INTERSECT/EXCEPT
compound_query = { select_stmt ~ (set_operator ~ select_stmt)* }
set_operator = { ^"UNION" ~ ^"ALL" | ^"UNION" | ^"INTERSECT" | ^"EXCEPT" }
// INSERT statement: INSERT INTO table (col1, col2) VALUES (v1, v2)
insert_stmt = {
^"INSERT" ~ ^"INTO" ~ identifier ~
"(" ~ identifier ~ ("," ~ identifier)* ~ ")" ~
^"VALUES" ~
"(" ~ value ~ ("," ~ value)* ~ ")"
}
// UPDATE statement: UPDATE table SET col1 = v1, col2 = v2 [WHERE ...]
update_stmt = {
^"UPDATE" ~ identifier ~
^"SET" ~ assignment ~ ("," ~ assignment)* ~
where_clause?
}
assignment = { identifier ~ "=" ~ value }
// TRAIN statement: TRAIN QUANTIZER ON collection WITH (params)
train_stmt = {
^"TRAIN" ~ ^"QUANTIZER" ~ ^"ON" ~ identifier ~ with_clause
}
// SELECT statement with optional DISTINCT, JOIN, WHERE, GROUP BY, HAVING, ORDER BY, LIMIT, OFFSET, WITH, FUSION clauses
// EPIC-052 US-001: Added DISTINCT support
// EPIC-052 US-003: Added FROM alias support for Self-JOIN
select_stmt = {
^"SELECT" ~ distinct_modifier? ~ select_list ~ ^"FROM" ~ from_clause ~
join_clause* ~ where_clause? ~ group_by_clause? ~ having_clause? ~ order_by_clause? ~ limit_clause? ~ offset_clause? ~ with_clause? ~ using_fusion_clause?
}
// FROM clause with optional alias (EPIC-052 US-003: Self-JOIN support)
// Supports: FROM table, FROM table AS alias
// Note: "FROM table alias" without AS is intentionally NOT supported to avoid
// ambiguity with JOIN keywords. Use "FROM table AS alias" syntax.
from_clause = { identifier ~ from_alias? }
from_alias = { ^"AS" ~ identifier }
// DISTINCT modifier (EPIC-052 US-001)
distinct_modifier = { ^"DISTINCT" }
// USING FUSION clause for hybrid search (EPIC-040 US-005)
using_fusion_clause = { ^"USING" ~ ^"FUSION" ~ fusion_options? }
fusion_options = { "(" ~ fusion_option_list ~ ")" }
fusion_option_list = { fusion_option ~ ("," ~ fusion_option)* }
fusion_option = { identifier ~ "=" ~ fusion_value }
fusion_value = { string | float | integer }
// GROUP BY clause (EPIC-017 US-003, EPIC-052 US-005: nested fields support)
group_by_clause = { ^"GROUP" ~ ^"BY" ~ group_by_list }
group_by_list = { group_by_column ~ ("," ~ group_by_column)* }
// Support both simple identifiers (including quoted) and nested paths
group_by_column = { identifier ~ ("." ~ identifier)* }
// HAVING clause for filtering groups (EPIC-017 US-006)
// Supports both AND and OR logical operators
having_clause = { ^"HAVING" ~ having_condition }
having_condition = { having_term ~ (having_logical_op ~ having_term)* }
// BUG-6 FIX: Named rule so pest emits tokens for AND/OR operators
having_logical_op = { ^"AND" | ^"OR" }
having_term = { aggregate_function ~ compare_op ~ value }
// JOIN clause for cross-store queries (EPIC-031 US-004, extended EPIC-040 US-003)
join_clause = { join_type? ~ ^"JOIN" ~ identifier ~ alias_clause? ~ join_spec }
join_type = { (^"LEFT" ~ ^"OUTER"?) | (^"RIGHT" ~ ^"OUTER"?) | (^"FULL" ~ ^"OUTER"?) | ^"INNER" }
join_spec = { on_clause | using_clause }
on_clause = { ^"ON" ~ join_condition }
using_clause = { ^"USING" ~ "(" ~ identifier ~ ("," ~ identifier)* ~ ")" }
alias_clause = { ^"AS" ~ identifier }
join_condition = { column_ref ~ "=" ~ column_ref }
column_ref = @{ identifier ~ "." ~ identifier }
// ORDER BY clause (EPIC-040 US-002: supports columns, aggregates, similarity)
// EPIC-042: Extended with arithmetic expressions for custom scoring
order_by_clause = { ^"ORDER" ~ ^"BY" ~ order_by_item ~ ("," ~ order_by_item)* }
order_by_item = { order_by_expr ~ sort_direction? }
order_by_expr = { aggregate_function | property_access | order_by_arithmetic }
order_by_similarity = { ^"similarity" ~ "(" ~ similarity_field ~ "," ~ vector_value ~ ")" }
// similarity() zero-arg in ORDER BY: uses pre-computed search score
order_by_similarity_bare = { ^"similarity" ~ "(" ~ ")" }
sort_direction = { ^"DESC" | ^"ASC" }
// Arithmetic expressions for ORDER BY (EPIC-042)
// Precedence: additive (+, -) < multiplicative (*, /)
// arithmetic_atom subsumes: float, integer, similarity(), identifier
order_by_arithmetic = { arithmetic_additive }
arithmetic_additive = { arithmetic_multiplicative ~ ((add_op | sub_op) ~ arithmetic_multiplicative)* }
arithmetic_multiplicative = { arithmetic_atom ~ ((mul_op | div_op) ~ arithmetic_atom)* }
arithmetic_atom = { float | integer | order_by_similarity | order_by_similarity_bare | "(" ~ arithmetic_additive ~ ")" | identifier }
add_op = { "+" }
sub_op = { "-" }
mul_op = { "*" }
div_op = { "/" }
// WITH clause for query-time configuration overrides
with_clause = { ^"WITH" ~ "(" ~ with_option_list ~ ")" }
with_option_list = { with_option ~ ("," ~ with_option)* }
with_option = { identifier ~ "=" ~ with_value }
with_value = { string | float | integer | boolean | identifier }
// Select list: * or mixed items (columns and/or aggregations for GROUP BY)
select_list = { "*" | select_item_list }
// Mixed select items: columns, aggregations, similarity(), and qualified wildcards
select_item_list = { select_item ~ ("," ~ select_item)* }
select_item = { similarity_select | aggregation_item | qualified_wildcard | column }
// similarity() zero-arg in SELECT: SELECT similarity() [AS alias]
similarity_select = { ^"similarity" ~ "(" ~ ")" ~ (^"AS" ~ identifier)? }
// Qualified wildcard: SELECT alias.* (e.g., SELECT ctx.*)
qualified_wildcard = { identifier ~ "." ~ "*" }
// Aggregate functions: COUNT, SUM, AVG, MIN, MAX
aggregation_item = { aggregate_function ~ (^"AS" ~ identifier)? }
aggregate_function = { aggregate_type ~ "(" ~ aggregate_arg ~ ")" }
aggregate_type = { ^"COUNT" | ^"SUM" | ^"AVG" | ^"MIN" | ^"MAX" }
aggregate_arg = { "*" | column_name }
column = { column_name ~ (^"AS" ~ identifier)? }
// EPIC-052 US-005: Support nested field paths like metadata.source or profile.address.city
column_name = @{ identifier ~ ("." ~ identifier)* }
// WHERE clause
where_clause = { ^"WHERE" ~ or_expr }
// Conditions with precedence (OR < AND < primary)
or_expr = { and_expr ~ (^"OR" ~ and_expr)* }
and_expr = { primary_expr ~ (^"AND" ~ primary_expr)* }
where_column = { identifier ~ ("." ~ identifier)* }
primary_expr = {
"(" ~ or_expr ~ ")" |
not_expr |
graph_match_expr |
similarity_expr |
vector_fused_search |
sparse_vector_search |
vector_search |
match_expr |
in_expr |
between_expr |
like_expr |
is_null_expr |
compare_expr
}
not_expr = { ^"NOT" ~ primary_expr }
// Graph predicate inside SELECT WHERE clause:
// WHERE ... AND MATCH (a)-[:REL]->(b)
graph_match_expr = { ^"MATCH" ~ graph_pattern }
// Similarity function: similarity(field, vector) op threshold
// Used in hybrid graph-vector queries
// Note: threshold accepts both float (0.8) and integer (1) for user convenience
similarity_expr = {
^"similarity" ~ "(" ~ similarity_field ~ "," ~ vector_value ~ ")" ~ compare_op ~ numeric_threshold
}
numeric_threshold = { float | integer }
similarity_field = @{ (ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_" | ".")* }
// Sparse vector search: vector SPARSE_NEAR sparse_value [USING 'index-name']
sparse_vector_search = {
^"vector" ~ ^"SPARSE_NEAR" ~ sparse_value ~ (^"USING" ~ string)?
}
// Sparse value: inline literal {12: 0.8, 45: 0.3} or bind parameter $sv
sparse_value = { sparse_literal | parameter }
sparse_literal = { "{" ~ sparse_entry ~ ("," ~ sparse_entry)* ~ "}" }
sparse_entry = { integer ~ ":" ~ float }
// Vector search: vector NEAR vector_value
// Note: Distance metric is defined at collection creation, not per-query
vector_search = {
^"vector" ~ ^"NEAR" ~ vector_value
}
// Multi-vector fusion search: vector NEAR_FUSED [v1, v2, ...] USING FUSION 'strategy' (params)
vector_fused_search = {
^"vector" ~ ^"NEAR_FUSED" ~ vector_array ~ fusion_clause?
}
vector_array = { "[" ~ vector_value ~ ("," ~ vector_value)* ~ "]" }
fusion_clause = { ^"USING" ~ ^"FUSION" ~ fusion_strategy ~ fusion_params? }
fusion_strategy = { string }
fusion_params = { "(" ~ fusion_param_list ~ ")" }
fusion_param_list = { fusion_param ~ ("," ~ fusion_param)* }
fusion_param = { identifier ~ "=" ~ fusion_param_value }
fusion_param_value = { float | integer }
vector_value = { vector_literal | parameter }
vector_component = { float | integer }
vector_literal = { "[" ~ vector_component ~ ("," ~ vector_component)* ~ "]" }
// Full-text search: column MATCH 'query'
match_expr = { where_column ~ ^"MATCH" ~ string }
// IN / NOT IN expression: column [NOT] IN (value, ...)
in_expr = { where_column ~ (not_kw ~ ^"IN" | ^"IN") ~ "(" ~ value_list ~ ")" }
value_list = { value ~ ("," ~ value)* }
// BETWEEN expression: column BETWEEN value AND value
between_expr = { where_column ~ ^"BETWEEN" ~ value ~ ^"AND" ~ value }
// LIKE / ILIKE expression: column LIKE 'pattern' or column ILIKE 'pattern'
like_expr = { where_column ~ like_op ~ string }
like_op = { ^"ILIKE" | ^"LIKE" }
// IS NULL / IS NOT NULL
is_null_expr = { where_column ~ ^"IS" ~ not_kw? ~ ^"NULL" }
not_kw = { ^"NOT" }
// Comparison: column op value
compare_expr = { where_column ~ compare_op ~ value }
compare_op = { ">=" | "<=" | "<>" | "!=" | "=" | ">" | "<" }
// LIMIT and OFFSET
limit_clause = { ^"LIMIT" ~ integer }
offset_clause = { ^"OFFSET" ~ integer }
// Values - EPIC-038: Temporal, EPIC-039: Subquery
value = { subquery_expr | temporal_expr | float | integer | string | boolean | null_value | parameter }
// Scalar subquery expression (EPIC-039)
subquery_expr = { "(" ~ ^"SELECT" ~ select_list ~ ^"FROM" ~ identifier ~ where_clause? ~ group_by_clause? ~ having_clause? ~ limit_clause? ~ ")" }
parameter = @{ "$" ~ identifier }
null_value = { ^"NULL" }
boolean = { ^"TRUE" | ^"FALSE" }
// Temporal expressions (EPIC-038)
temporal_expr = { temporal_arithmetic | now_function | interval_expr }
temporal_arithmetic = { (now_function | interval_expr) ~ temporal_op ~ (now_function | interval_expr) }
temporal_op = { "+" | "-" }
now_function = { ^"NOW" ~ "(" ~ ")" }
interval_expr = { ^"INTERVAL" ~ string }
// Literals
string = @{ "'" ~ (!"'" ~ ANY | "''")* ~ "'" }
integer = @{ "-"? ~ ASCII_DIGIT+ }
float = @{ "-"? ~ ASCII_DIGIT+ ~ "." ~ ASCII_DIGIT+ }
// Identifiers - EPIC-044 US-005: Support quoted identifiers for reserved keywords
// Supports: regular identifiers, backtick-quoted (`select`), double-quoted ("from")
identifier = { quoted_identifier | regular_identifier }
regular_identifier = @{ (ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_")* }
// Quoted identifiers for escaping reserved keywords
// Backtick style: `select`, `from`, `order`
backtick_identifier = @{ "`" ~ backtick_inner ~ "`" }
backtick_inner = @{ (!"`" ~ ANY)+ }
// Double-quote style (SQL standard): "select", "from", "order"
// Supports escaped quotes: "col""name" -> col"name
doublequote_identifier = @{ "\"" ~ doublequote_inner ~ "\"" }
doublequote_inner = @{ (doublequote_escape | !("\"") ~ ANY)* }
doublequote_escape = @{ "\"\"" }
quoted_identifier = { backtick_identifier | doublequote_identifier }