velesdb-core 1.13.7

High-performance vector database engine written in Rust
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
// VelesQL Grammar - SQL-like query language for VelesDB
// Version 3.6.0 — FLUSH + ANALYZE, TRUNCATE, ALTER COLLECTION + DDL + graph mutations + DELETE FROM + multi-row INSERT + UPSERT

// Whitespace and comments
WHITESPACE = _{ " " | "\t" | "\r" | "\n" }
COMMENT = _{ "--" ~ (!"\n" ~ ANY)* }

// LET clause: named score bindings evaluated once (VelesQL v1.10 Phase 3)
let_clause = { ^"LET" ~ identifier ~ "=" ~ order_by_arithmetic }

// Main entry point - supports SELECT, MATCH, DML (INSERT/UPSERT/UPDATE/DELETE), DDL (CREATE/DROP), admin (ANALYZE/TRUNCATE/ALTER/FLUSH), and introspection (SHOW/DESCRIBE/EXPLAIN)
// LET clauses appear zero or more times before the main statement.
// Order matters (PEG ordered choice):
//   - select_edges_stmt before compound_query (both start with SELECT; EDGES disambiguates)
//   - insert_node_stmt before insert_edge_stmt (both start with INSERT; NODE vs EDGE disambiguates)
//   - insert_edge_stmt before insert_stmt, delete_edge_stmt before delete_stmt
// upsert_stmt placed after insert_stmt since UPSERT and INSERT have distinct first tokens.
// Introspection and admin statements placed first since SHOW/DESCRIBE/EXPLAIN/ANALYZE/TRUNCATE/ALTER/FLUSH do not conflict with any existing first-token.
query = { SOI ~ let_clause* ~ (show_collections_stmt | describe_stmt | explain_stmt | analyze_stmt | truncate_stmt | alter_collection_stmt | flush_stmt | match_query | select_edges_stmt | compound_query | train_stmt | create_index_stmt | create_collection_stmt | drop_index_stmt | drop_collection_stmt | insert_node_stmt | insert_edge_stmt | delete_edge_stmt | delete_stmt | insert_stmt | upsert_stmt | update_stmt) ~ ";"? ~ EOI }

// ──────────────────────────────────────────────────────────────
// Introspection statements (VelesQL v3.4)
// ──────────────────────────────────────────────────────────────

// SHOW COLLECTIONS — lists all collections in the database
show_collections_stmt = { ^"SHOW" ~ ^"COLLECTIONS" }

// DESCRIBE [COLLECTION] name — returns collection metadata
// collection_kw uses word-boundary lookahead to prevent "collection_data"
// being parsed as COLLECTION + "_data" (same pattern as flush_full_kw).
describe_stmt = { ^"DESCRIBE" ~ collection_kw? ~ identifier }

// EXPLAIN <query> — returns the query execution plan without executing
explain_stmt = { ^"EXPLAIN" ~ compound_query }

// ANALYZE [COLLECTION] name — computes CBO statistics for query optimizer
analyze_stmt = { ^"ANALYZE" ~ collection_kw? ~ identifier }

// TRUNCATE [COLLECTION] name — deletes all rows from a collection
truncate_stmt = { ^"TRUNCATE" ~ collection_kw? ~ identifier }

// Word-boundary-safe COLLECTION keyword. Prevents "collection_data" from
// being parsed as COLLECTION + "_data". Same pattern as flush_full_kw.
collection_kw = @{ ^"COLLECTION" ~ !(ASCII_ALPHANUMERIC | "_") }

// ALTER COLLECTION name SET (options) — modifies collection settings
alter_collection_stmt = { ^"ALTER" ~ ^"COLLECTION" ~ identifier ~ ^"SET" ~ "(" ~ create_option_list ~ ")" }

// FLUSH [FULL] [collection] — persist all or specific collection to disk
// flush_full_kw is a named rule so the parser can detect it in inner pairs.
// Negative lookahead (!ASCII_ALPHANUMERIC) prevents matching "FULL" prefix
// of identifiers like "fulltext" as the FULL keyword.
flush_stmt = { ^"FLUSH" ~ flush_full_kw? ~ identifier? }
flush_full_kw = @{ ^"FULL" ~ !(ASCII_ALPHANUMERIC | "_") }

// MATCH query for graph pattern matching (EPIC-045 US-001)
// Syntax: MATCH pattern WHERE condition RETURN items [ORDER BY ...] [LIMIT n]
match_query = {
    ^"MATCH" ~ graph_pattern ~
    where_clause? ~
    return_clause ~
    order_by_clause? ~
    limit_clause?
}

// Graph pattern: (node)-[rel]->(node) chains
graph_pattern = { node_pattern ~ (relationship_pattern ~ node_pattern)* }
node_pattern = { "(" ~ node_spec? ~ ")" }
node_spec = { node_alias? ~ node_labels? ~ collection_annotation? ~ node_properties? }
node_alias = @{ (ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_")* }
node_labels = { ":" ~ label_name ~ (":" ~ label_name)* }
label_name = @{ (ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_")* }
collection_annotation = { "@" ~ collection_ref }
collection_ref = @{ (ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_")* }
node_properties = { "{" ~ property_list ~ "}" }
property_list = { property ~ ("," ~ property)* }
property = { identifier ~ ":" ~ property_value }
property_value = { string | float | integer | boolean | null_value | parameter }

// Relationship pattern: -[r:TYPE*1..3]->
relationship_pattern = { rel_incoming | rel_outgoing | rel_undirected }
rel_incoming = { "<-" ~ rel_spec? ~ "-" }
rel_outgoing = { "-" ~ rel_spec? ~ "->" }
rel_undirected = { "-" ~ rel_spec? ~ "-" }
rel_spec = { "[" ~ rel_details? ~ "]" }
rel_details = { rel_alias? ~ rel_types? ~ rel_range? ~ node_properties? }
rel_alias = @{ (ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_")* }
rel_types = { ":" ~ rel_type_name ~ ("|" ~ rel_type_name)* }
rel_type_name = @{ (ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_")* }
rel_range = { "*" ~ range_spec? }
range_spec = { range_bound ~ ".." ~ range_bound? | ".." ~ range_bound | integer }
range_bound = @{ ASCII_DIGIT+ }

// RETURN clause for MATCH queries
return_clause = { ^"RETURN" ~ return_item_list }
return_item_list = { return_item ~ ("," ~ return_item)* }
return_item = { return_expr ~ (^"AS" ~ identifier)? }
return_expr = { similarity_return | property_access | identifier | "*" }
similarity_return = { ^"similarity" ~ "(" ~ ")" }
property_access = @{ identifier ~ "." ~ identifier }

// Compound query: SELECT with zero or more UNION/INTERSECT/EXCEPT
compound_query = { select_stmt ~ (set_operator ~ select_stmt)* }
set_operator = { ^"UNION" ~ ^"ALL" | ^"UNION" | ^"INTERSECT" | ^"EXCEPT" }

// INSERT statement: INSERT INTO table (col1, col2) VALUES (v1, v2)[, (v3, v4)]
insert_stmt = {
    ^"INSERT" ~ ^"INTO" ~ identifier ~
    "(" ~ identifier ~ ("," ~ identifier)* ~ ")" ~
    ^"VALUES" ~
    values_row ~ ("," ~ values_row)*
}

// UPSERT statement: UPSERT INTO table (col1, col2) VALUES (v1, v2)[, (v3, v4)]
upsert_stmt = {
    ^"UPSERT" ~ ^"INTO" ~ identifier ~
    "(" ~ identifier ~ ("," ~ identifier)* ~ ")" ~
    ^"VALUES" ~
    values_row ~ ("," ~ values_row)*
}

// A single parenthesised row of values
values_row = { "(" ~ value ~ ("," ~ value)* ~ ")" }

// UPDATE statement: UPDATE table SET col1 = v1, col2 = v2 [WHERE ...]
update_stmt = {
    ^"UPDATE" ~ identifier ~
    ^"SET" ~ assignment ~ ("," ~ assignment)* ~
    where_clause?
}
assignment = { identifier ~ "=" ~ value }

// TRAIN statement: TRAIN QUANTIZER ON collection WITH (params)
train_stmt = {
    ^"TRAIN" ~ ^"QUANTIZER" ~ ^"ON" ~ identifier ~ with_clause
}

// ──────────────────────────────────────────────────────────────
// DDL statements (VelesQL v3.3)
// ──────────────────────────────────────────────────────────────

// CREATE INDEX ON collection (field) — secondary metadata index
create_index_stmt = { ^"CREATE" ~ ^"INDEX" ~ ^"ON" ~ identifier ~ "(" ~ identifier ~ ")" }

// DROP INDEX ON collection (field) — remove secondary metadata index
drop_index_stmt = { ^"DROP" ~ ^"INDEX" ~ ^"ON" ~ identifier ~ "(" ~ identifier ~ ")" }

// CREATE COLLECTION: vector (default), graph, or metadata-only
// Examples:
//   CREATE COLLECTION docs (dimension = 768, metric = 'cosine');
//   CREATE COLLECTION docs (dimension = 768, metric = 'cosine') WITH (storage = 'sq8', m = 16);
//   CREATE GRAPH COLLECTION kg (dimension = 768, metric = 'cosine') SCHEMALESS;
//   CREATE METADATA COLLECTION tags;
create_collection_stmt = {
    ^"CREATE" ~ collection_kind_kw? ~ ^"COLLECTION" ~ identifier ~
    create_body?
}
collection_kind_kw = { ^"GRAPH" | ^"METADATA" }
create_body = { "(" ~ create_option_list ~ ")" ~ create_suffix? }
create_option_list = { create_option ~ ("," ~ create_option)* }
create_option = { identifier ~ "=" ~ create_option_value }
create_option_value = { string | float | integer | boolean | identifier }
create_suffix = { schemaless_clause | with_schema_clause | with_clause }
schemaless_clause = { ^"SCHEMALESS" }

// WITH SCHEMA for typed graph collections
with_schema_clause = { ^"WITH" ~ ^"SCHEMA" ~ "(" ~ schema_def_list ~ ")" }
schema_def_list = { schema_def ~ ("," ~ schema_def)* }
schema_def = { node_type_def | edge_type_def }
node_type_def = { ^"NODE" ~ identifier ~ "(" ~ property_def_list ~ ")" }
edge_type_def = { ^"EDGE" ~ identifier ~ ^"FROM" ~ identifier ~ ^"TO" ~ identifier }
property_def_list = { property_def ~ ("," ~ property_def)* }
property_def = { identifier ~ ":" ~ type_name }
type_name = { ^"STRING" | ^"INTEGER" | ^"FLOAT" | ^"BOOLEAN" | ^"VECTOR" }

// DROP COLLECTION [IF EXISTS] name
drop_collection_stmt = {
    ^"DROP" ~ ^"COLLECTION" ~ if_exists_clause? ~ identifier
}
if_exists_clause = { ^"IF" ~ ^"EXISTS" }

// INSERT EDGE INTO collection (source = N, target = N, label = 'L')
//   [WITH PROPERTIES (key = val, ...)]
insert_edge_stmt = {
    ^"INSERT" ~ ^"EDGE" ~ ^"INTO" ~ identifier ~
    "(" ~ edge_field_list ~ ")" ~
    edge_properties_clause?
}
edge_field_list = { edge_field ~ ("," ~ edge_field)* }
edge_field = { identifier ~ "=" ~ value }
edge_properties_clause = { ^"WITH" ~ ^"PROPERTIES" ~ "(" ~ create_option_list ~ ")" }

// DELETE FROM collection WHERE condition
// WHERE is mandatory to prevent accidental full-collection deletion.
delete_stmt = {
    ^"DELETE" ~ ^"FROM" ~ identifier ~ where_clause
}

// DELETE EDGE edge_id FROM collection
delete_edge_stmt = {
    ^"DELETE" ~ ^"EDGE" ~ value ~ ^"FROM" ~ identifier
}

// SELECT EDGES FROM collection [WHERE source=N / target=N / label='X'] [LIMIT n]
// Queries edges from a graph collection with optional filtering.
select_edges_stmt = {
    ^"SELECT" ~ ^"EDGES" ~ ^"FROM" ~ identifier ~ where_clause? ~ limit_clause?
}

// INSERT NODE INTO collection (id = N, payload = '{"key": "value"}')
// Inserts or updates a node payload in a graph collection.
insert_node_stmt = {
    ^"INSERT" ~ ^"NODE" ~ ^"INTO" ~ identifier ~ "(" ~ edge_field_list ~ ")"
}

// SELECT statement with optional DISTINCT, JOIN, WHERE, GROUP BY, HAVING, ORDER BY, LIMIT, OFFSET, WITH, FUSION clauses
// EPIC-052 US-001: Added DISTINCT support
// EPIC-052 US-003: Added FROM alias support for Self-JOIN
select_stmt = { 
    ^"SELECT" ~ distinct_modifier? ~ select_list ~ ^"FROM" ~ from_clause ~
    join_clause* ~ where_clause? ~ group_by_clause? ~ having_clause? ~ order_by_clause? ~ limit_clause? ~ offset_clause? ~ with_clause? ~ using_fusion_clause?
}

// FROM clause with optional alias (EPIC-052 US-003: Self-JOIN support)
// Supports: FROM table, FROM table AS alias
// Note: "FROM table alias" without AS is intentionally NOT supported to avoid
// ambiguity with JOIN keywords. Use "FROM table AS alias" syntax.
from_clause = { identifier ~ from_alias? }
from_alias = { ^"AS" ~ identifier }

// DISTINCT modifier (EPIC-052 US-001)
distinct_modifier = { ^"DISTINCT" }

// USING FUSION clause for hybrid search (EPIC-040 US-005)
using_fusion_clause = { ^"USING" ~ ^"FUSION" ~ fusion_options? }
fusion_options = { "(" ~ fusion_option_list ~ ")" }
fusion_option_list = { fusion_option ~ ("," ~ fusion_option)* }
fusion_option = { identifier ~ "=" ~ fusion_value }
fusion_value = { string | float | integer }

// GROUP BY clause (EPIC-017 US-003, EPIC-052 US-005: nested fields support)
group_by_clause = { ^"GROUP" ~ ^"BY" ~ group_by_list }
group_by_list = { group_by_column ~ ("," ~ group_by_column)* }
// Support both simple identifiers (including quoted) and nested paths
group_by_column = { identifier ~ ("." ~ identifier)* }

// HAVING clause for filtering groups (EPIC-017 US-006)
// Supports both AND and OR logical operators
having_clause = { ^"HAVING" ~ having_condition }
having_condition = { having_term ~ (having_logical_op ~ having_term)* }
// BUG-6 FIX: Named rule so pest emits tokens for AND/OR operators
having_logical_op = { ^"AND" | ^"OR" }
having_term = { aggregate_function ~ compare_op ~ value }

// JOIN clause for cross-store queries (EPIC-031 US-004, extended EPIC-040 US-003)
join_clause = { join_type? ~ ^"JOIN" ~ identifier ~ alias_clause? ~ join_spec }
join_type = { (^"LEFT" ~ ^"OUTER"?) | (^"RIGHT" ~ ^"OUTER"?) | (^"FULL" ~ ^"OUTER"?) | ^"INNER" }
join_spec = { on_clause | using_clause }
on_clause = { ^"ON" ~ join_condition }
using_clause = { ^"USING" ~ "(" ~ identifier ~ ("," ~ identifier)* ~ ")" }
alias_clause = { ^"AS" ~ identifier }
join_condition = { column_ref ~ "=" ~ column_ref }
column_ref = @{ identifier ~ "." ~ identifier }

// ORDER BY clause (EPIC-040 US-002: supports columns, aggregates, similarity)
// EPIC-042: Extended with arithmetic expressions for custom scoring
order_by_clause = { ^"ORDER" ~ ^"BY" ~ order_by_item ~ ("," ~ order_by_item)* }
order_by_item = { order_by_expr ~ sort_direction? }
order_by_expr = { aggregate_function | property_access | order_by_arithmetic }
order_by_similarity = { ^"similarity" ~ "(" ~ similarity_field ~ "," ~ vector_value ~ ")" }
// similarity() zero-arg in ORDER BY: uses pre-computed search score
order_by_similarity_bare = { ^"similarity" ~ "(" ~ ")" }
sort_direction = { ^"DESC" | ^"ASC" }

// Arithmetic expressions for ORDER BY (EPIC-042)
// Precedence: additive (+, -) < multiplicative (*, /)
// arithmetic_atom subsumes: float, integer, similarity(), identifier
order_by_arithmetic = { arithmetic_additive }
arithmetic_additive = { arithmetic_multiplicative ~ ((add_op | sub_op) ~ arithmetic_multiplicative)* }
arithmetic_multiplicative = { arithmetic_atom ~ ((mul_op | div_op) ~ arithmetic_atom)* }
arithmetic_atom = { float | integer | order_by_similarity | order_by_similarity_bare | "(" ~ arithmetic_additive ~ ")" | identifier }
add_op = { "+" }
sub_op = { "-" }
mul_op = { "*" }
div_op = { "/" }

// WITH clause for query-time configuration overrides
with_clause = { ^"WITH" ~ "(" ~ with_option_list ~ ")" }
with_option_list = { with_option ~ ("," ~ with_option)* }
with_option = { identifier ~ "=" ~ with_value }
with_value = { string | float | integer | boolean | identifier }

// Select list: * or mixed items (columns and/or aggregations for GROUP BY)
select_list = { "*" | select_item_list }

// Mixed select items: columns, aggregations, similarity(), and qualified wildcards
select_item_list = { select_item ~ ("," ~ select_item)* }
select_item = { similarity_select | window_item | aggregation_item | qualified_wildcard | column }

// similarity() zero-arg in SELECT: SELECT similarity() [AS alias]
similarity_select = { ^"similarity" ~ "(" ~ ")" ~ (^"AS" ~ identifier)? }

// Qualified wildcard: SELECT alias.* (e.g., SELECT ctx.*)
qualified_wildcard = { identifier ~ "." ~ "*" }

// Aggregate functions: COUNT, SUM, AVG, MIN, MAX
aggregation_item = { aggregate_function ~ (^"AS" ~ identifier)? }
aggregate_function = { aggregate_type ~ "(" ~ aggregate_arg ~ ")" }
aggregate_type = { ^"FIRST" | ^"COUNT" | ^"SUM" | ^"AVG" | ^"MIN" | ^"MAX" }
aggregate_arg = { "*" | ^"score" | column_name }

// ──────────────────────────────────────────────────────────────
// Window functions (Issue #386 Phase 1)
// ──────────────────────────────────────────────────────────────

// Window function expression in SELECT list
// Example: ROW_NUMBER() OVER (PARTITION BY source ORDER BY score DESC) AS rn
window_item = { window_function_call ~ ^"OVER" ~ "(" ~ over_clause ~ ")" ~ (^"AS" ~ identifier)? }

// Window function call (zero-arg for Phase 1 ranking functions)
window_function_call = { window_function_name ~ "(" ~ ")" }
window_function_name = { ^"ROW_NUMBER" | ^"DENSE_RANK" | ^"RANK" }

// OVER clause: optional PARTITION BY + optional ORDER BY
over_clause = { partition_by_clause? ~ window_order_by_clause? }

// PARTITION BY: one or more columns
partition_by_clause = { ^"PARTITION" ~ ^"BY" ~ partition_by_list }
partition_by_list = { column_name ~ ("," ~ column_name)* }

// ORDER BY inside OVER(): separate rule to avoid ambiguity with outer ORDER BY
window_order_by_clause = { ^"ORDER" ~ ^"BY" ~ window_order_by_item ~ ("," ~ window_order_by_item)* }
window_order_by_item = { window_order_by_expr ~ sort_direction? }
window_order_by_expr = { order_by_similarity_bare | column_name }

column = { column_name ~ (^"AS" ~ identifier)? }
// EPIC-052 US-005: Support nested field paths like metadata.source or profile.address.city
column_name = @{ identifier ~ ("." ~ identifier)* }

// WHERE clause
where_clause = { ^"WHERE" ~ or_expr }

// Conditions with precedence (OR < AND < primary)
or_expr = { and_expr ~ (^"OR" ~ and_expr)* }
and_expr = { primary_expr ~ (^"AND" ~ primary_expr)* }
where_column = { identifier ~ ("." ~ identifier)* }

primary_expr = {
    "(" ~ or_expr ~ ")" |
    not_expr |
    graph_match_expr |
    similarity_expr |
    vector_fused_search |
    sparse_vector_search |
    vector_search |
    match_expr |
    in_expr |
    between_expr |
    like_expr |
    is_null_expr |
    contains_text_expr |
    contains_expr |
    geo_distance_expr |
    geo_bbox_expr |
    compare_expr
}

not_expr = { ^"NOT" ~ primary_expr }

// Graph predicate inside SELECT WHERE clause:
// WHERE ... AND MATCH (a)-[:REL]->(b)
graph_match_expr = { ^"MATCH" ~ graph_pattern }

// Similarity function: similarity(field, vector) op threshold
// Used in hybrid graph-vector queries
// Note: threshold accepts both float (0.8) and integer (1) for user convenience
similarity_expr = {
    ^"similarity" ~ "(" ~ similarity_field ~ "," ~ vector_value ~ ")" ~ compare_op ~ numeric_threshold
}
numeric_threshold = { float | integer }
similarity_field = @{ (ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_" | ".")* }

// Sparse vector search: vector SPARSE_NEAR sparse_value [USING 'index-name']
sparse_vector_search = {
    ^"vector" ~ ^"SPARSE_NEAR" ~ sparse_value ~ (^"USING" ~ string)?
}

// Sparse value: inline literal {12: 0.8, 45: 0.3} or bind parameter $sv
sparse_value = { sparse_literal | parameter }
sparse_literal = { "{" ~ sparse_entry ~ ("," ~ sparse_entry)* ~ "}" }
sparse_entry = { integer ~ ":" ~ float }

// Vector search: vector NEAR vector_value
// Note: Distance metric is defined at collection creation, not per-query
vector_search = {
    ^"vector" ~ ^"NEAR" ~ vector_value
}

// Multi-vector fusion search: vector NEAR_FUSED [v1, v2, ...] USING FUSION 'strategy' (params)
vector_fused_search = {
    ^"vector" ~ ^"NEAR_FUSED" ~ vector_array ~ fusion_clause?
}
vector_array = { "[" ~ vector_value ~ ("," ~ vector_value)* ~ "]" }
fusion_clause = { ^"USING" ~ ^"FUSION" ~ fusion_strategy ~ fusion_params? }
fusion_strategy = { string }
fusion_params = { "(" ~ fusion_param_list ~ ")" }
fusion_param_list = { fusion_param ~ ("," ~ fusion_param)* }
fusion_param = { identifier ~ "=" ~ fusion_param_value }
fusion_param_value = { float | integer }

vector_value = { vector_literal | parameter }
vector_component = { float | integer }
vector_literal = { "[" ~ vector_component ~ ("," ~ vector_component)* ~ "]" }

// Full-text search: column MATCH 'query'
match_expr = { where_column ~ ^"MATCH" ~ string }

// IN / NOT IN expression: column [NOT] IN (value, ...)
in_expr = { where_column ~ (not_kw ~ ^"IN" | ^"IN") ~ "(" ~ value_list ~ ")" }
value_list = { value ~ ("," ~ value)* }

// BETWEEN expression: column BETWEEN value AND value
between_expr = { where_column ~ ^"BETWEEN" ~ value ~ ^"AND" ~ value }

// LIKE / ILIKE expression: column LIKE 'pattern' or column ILIKE 'pattern'
like_expr = { where_column ~ like_op ~ string }
like_op = { ^"ILIKE" | ^"LIKE" }

// CONTAINS_TEXT expression: strict text substring filter
contains_text_expr = { where_column ~ ^"CONTAINS_TEXT" ~ string }

// CONTAINS expression: column CONTAINS value | column CONTAINS ANY/ALL (values)
contains_expr = {
    where_column ~ ^"CONTAINS" ~ ^"ALL" ~ "(" ~ value_list ~ ")" |
    where_column ~ ^"CONTAINS" ~ ^"ANY" ~ "(" ~ value_list ~ ")" |
    where_column ~ ^"CONTAINS" ~ value
}

// GEO_DISTANCE expression: GEO_DISTANCE(column, lat, lng) op meters
geo_number = { float | integer }
geo_distance_expr = {
    ^"GEO_DISTANCE" ~ "(" ~ column_name ~ "," ~ geo_number ~ "," ~ geo_number ~ ")" ~ compare_op ~ geo_number
}

// GEO_BBOX expression: GEO_BBOX(column, lat_min, lng_min, lat_max, lng_max)
geo_bbox_expr = {
    ^"GEO_BBOX" ~ "(" ~ column_name ~ "," ~ geo_number ~ "," ~ geo_number ~ "," ~ geo_number ~ "," ~ geo_number ~ ")"
}

// IS NULL / IS NOT NULL
is_null_expr = { where_column ~ ^"IS" ~ not_kw? ~ ^"NULL" }
not_kw = { ^"NOT" }

// Comparison: column op value
compare_expr = { where_column ~ compare_op ~ value }
compare_op = { ">=" | "<=" | "<>" | "!=" | "=" | ">" | "<" }

// LIMIT and OFFSET
limit_clause = { ^"LIMIT" ~ integer }
offset_clause = { ^"OFFSET" ~ integer }

// Values - EPIC-038: Temporal, EPIC-039: Subquery
value = { subquery_expr | temporal_expr | float | integer | string | boolean | null_value | parameter }

// Scalar subquery expression (EPIC-039)
subquery_expr = { "(" ~ ^"SELECT" ~ select_list ~ ^"FROM" ~ identifier ~ where_clause? ~ group_by_clause? ~ having_clause? ~ limit_clause? ~ ")" }
parameter = @{ "$" ~ identifier }
null_value = { ^"NULL" }
boolean = { ^"TRUE" | ^"FALSE" }

// Temporal expressions (EPIC-038)
temporal_expr = { temporal_arithmetic | now_function | interval_expr }
temporal_arithmetic = { (now_function | interval_expr) ~ temporal_op ~ (now_function | interval_expr) }
temporal_op = { "+" | "-" }
now_function = { ^"NOW" ~ "(" ~ ")" }
interval_expr = { ^"INTERVAL" ~ string }

// Literals
string = @{ "'" ~ (!"'" ~ ANY | "''")* ~ "'" }
integer = @{ "-"? ~ ASCII_DIGIT+ }
float = @{ "-"? ~ ASCII_DIGIT+ ~ "." ~ ASCII_DIGIT+ }

// Identifiers - EPIC-044 US-005: Support quoted identifiers for reserved keywords
// Supports: regular identifiers, backtick-quoted (`select`), double-quoted ("from")
identifier = { quoted_identifier | regular_identifier }
regular_identifier = @{ (ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_")* }

// Quoted identifiers for escaping reserved keywords
// Backtick style: `select`, `from`, `order`
backtick_identifier = @{ "`" ~ backtick_inner ~ "`" }
backtick_inner = @{ (!"`" ~ ANY)+ }

// Double-quote style (SQL standard): "select", "from", "order"
// Supports escaped quotes: "col""name" -> col"name
doublequote_identifier = @{ "\"" ~ doublequote_inner ~ "\"" }
doublequote_inner = @{ (doublequote_escape | !("\"") ~ ANY)* }
doublequote_escape = @{ "\"\"" }

quoted_identifier = { backtick_identifier | doublequote_identifier }