minigdb 0.1.0

An embedded property-graph database in Rust with a GQL query language, RocksDB-backed ACID storage, graph algorithms, and Python bindings
Documentation
// GQL (ISO/IEC 39075) subset grammar for minigdb
// Resolved relative to src/ at compile time by pest_derive.

// ── Whitespace & comments ──────────────────────────────────────────────────
WHITESPACE = _{ " " | "\t" | "\r" | "\n" }
COMMENT    = _{ "//" ~ (!"\n" ~ ANY)* ~ ("\n" | EOI)
              | "/*" ~ (!"*/" ~ ANY)* ~ "*/" }

// ── Keywords (case-insensitive) ────────────────────────────────────────────
kw_match   = _{ ^"MATCH" }
kw_where   = _{ ^"WHERE" }
kw_return  = _{ ^"RETURN" }
kw_distinct= _{ ^"DISTINCT" }
distinct_flag = { ^"DISTINCT" }
kw_order   = _{ ^"ORDER" }
kw_by      = _{ ^"BY" }
kw_limit   = _{ ^"LIMIT" }
kw_offset  = _{ ^"OFFSET" }
kw_skip    = _{ ^"SKIP" }
kw_insert  = _{ ^"INSERT" }
kw_set     = _{ ^"SET" }
kw_remove  = _{ ^"REMOVE" }
kw_delete  = _{ ^"DELETE" }
kw_detach  = _{ ^"DETACH" }
// Logical/comparison keywords that appear in the parse tree (non-silent atomic).
// Being @{} (atomic) prevents implicit whitespace before the word-boundary guard,
// so !(ASCII_ALPHANUMERIC|"_") sees the char right after the keyword text.
kw_and    = @{ ^"AND"   ~ !(ASCII_ALPHANUMERIC | "_") }
kw_or     = @{ ^"OR"    ~ !(ASCII_ALPHANUMERIC | "_") }
kw_not    = @{ ^"NOT"   ~ !(ASCII_ALPHANUMERIC | "_") }
// kw_asc / kw_desc appear in the parse tree so the ORDER BY builder can detect direction.
kw_asc    = @{ ^"ASC" }
kw_desc   = @{ ^"DESC" }
// Path-mode keywords are atomic to keep the boundary check immediate (no consumed space).
kw_walk   = @{ ^"WALK"   ~ !(ASCII_ALPHA | "_") }
kw_trail  = @{ ^"TRAIL"  ~ !(ASCII_ALPHA | "_") }
kw_simple = @{ ^"SIMPLE" ~ !(ASCII_ALPHA | "_") }
// IS / NULL need to be silent (they must not appear in parse tree for is_null_suffix builder).
// Atomic delegates prevent the implicit-whitespace/lookahead bug while keeping the rules silent.
kw_is_atom   = @{ ^"IS"    ~ !(ASCII_ALPHANUMERIC | "_") }
kw_null_atom = @{ ^"NULL"  ~ !(ASCII_ALPHANUMERIC | "_") }
kw_is   = _{ kw_is_atom }
kw_null = _{ kw_null_atom }
// kw_as has the same implicit-whitespace/lookahead bug as kw_and — use atomic delegate.
kw_as_atom = @{ ^"AS" ~ !(ASCII_ALPHANUMERIC | "_") }
kw_as = _{ kw_as_atom }
// Remaining silent keywords (no word-boundary issues in their contexts).
kw_true  = _{ ^"TRUE"  ~ !(ASCII_ALPHANUMERIC | "_") }
kw_false = _{ ^"FALSE" ~ !(ASCII_ALPHANUMERIC | "_") }
kw_in    = @{ ^"IN"    ~ !(ASCII_ALPHANUMERIC | "_") }
kw_count   = _{ ^"COUNT" }
kw_labels  = _{ ^"LABELS" }
kw_type    = _{ ^"TYPE" }
kw_id      = _{ ^"ID" }
kw_create  = _{ ^"CREATE" }
kw_drop    = _{ ^"DROP" }
kw_index   = _{ ^"INDEX" }
kw_on      = _{ ^"ON" }
kw_show    = _{ ^"SHOW" }
kw_indexes = _{ ^"INDEXES" | ^"INDICES" }
// CALL / YIELD keywords (atomic for word-boundary safety).
kw_call    = @{ ^"CALL"     ~ !(ASCII_ALPHANUMERIC | "_") }
kw_yield   = @{ ^"YIELD"    ~ !(ASCII_ALPHANUMERIC | "_") }
// P5 keywords — all atomic for word-boundary safety.
kw_unwind   = @{ ^"UNWIND"   ~ !(ASCII_ALPHANUMERIC | "_") }
kw_optional = @{ ^"OPTIONAL" ~ !(ASCII_ALPHANUMERIC | "_") }
kw_union    = @{ ^"UNION"    ~ !(ASCII_ALPHANUMERIC | "_") }
kw_all      = @{ ^"ALL"      ~ !(ASCII_ALPHANUMERIC | "_") }
kw_with     = @{ ^"WITH"     ~ !(ASCII_ALPHANUMERIC | "_") }
// Constraint keywords — atomic for word-boundary safety.
kw_constraint  = @{ ^"CONSTRAINT"  ~ !(ASCII_ALPHANUMERIC | "_") }
kw_constraints = @{ ^"CONSTRAINTS" ~ !(ASCII_ALPHANUMERIC | "_") }
kw_unique      = @{ ^"UNIQUE"      ~ !(ASCII_ALPHANUMERIC | "_") }
kw_type_kw     = @{ ^"TYPE"        ~ !(ASCII_ALPHANUMERIC | "_") }
// CSV load keywords (simple silent rules; context provides sufficient disambiguation).
kw_truncate = _{ ^"TRUNCATE" }
kw_load  = _{ ^"LOAD" }
kw_csv   = _{ ^"CSV" }
kw_nodes = _{ ^"NODES" }
kw_edges = _{ ^"EDGES" }
kw_from  = _{ ^"FROM" }
kw_label = _{ ^"LABEL" }

path_mode  = { kw_walk | kw_trail | kw_simple }

// ── Identifiers ────────────────────────────────────────────────────────────
ident = @{ ASCII_ALPHA ~ (ASCII_ALPHANUMERIC | "_")* }

// ── Literals ───────────────────────────────────────────────────────────────
integer  = @{ "-"? ~ ASCII_DIGIT+ }
float    = @{ "-"? ~ ASCII_DIGIT+ ~ "." ~ ASCII_DIGIT+ ~ (^"e" ~ "-"? ~ ASCII_DIGIT+)? }
string   = @{ "\"" ~ (!"\"" ~ ANY)* ~ "\"" | "'" ~ (!"'" ~ ANY)* ~ "'" }
boolean  = @{ kw_true | kw_false }
null_lit = @{ kw_null }

literal = { float | integer | string | boolean | null_lit }

// ── Top-level statement ────────────────────────────────────────────────────
// A statement may be a UNION of multiple single statements.
// single_stmt alternatives are tried in order; match_with_stmt must precede
// match_stmt (both start with kw_match, but WITH contains an extra clause).
statement   = { SOI ~ single_stmt ~ (kw_union ~ kw_all? ~ single_stmt)* ~ ";"? ~ EOI }
single_stmt = {
    match_with_stmt
  | match_optional_match_stmt
  | optional_match_stmt
  | call_pipeline_stmt
  | match_stmt
  | match_insert_stmt
  | unwind_insert_stmt
  | unwind_stmt
  | insert_stmt
  | set_stmt
  | remove_stmt
  | delete_stmt
  | create_index_stmt
  | drop_index_stmt
  | show_indexes_stmt
  | call_stmt
  | load_csv_nodes_stmt
  | load_csv_edges_stmt
  | truncate_stmt
  | constraint_stmt
}

// ── Index management statements ─────────────────────────────────────────────
// CREATE INDEX ON :Label(property)
create_index_stmt = { kw_create ~ kw_index ~ kw_on ~ ":" ~ ident ~ "(" ~ ident ~ ")" }
// DROP INDEX ON :Label(property)
drop_index_stmt   = { kw_drop ~ kw_index ~ kw_on ~ ":" ~ ident ~ "(" ~ ident ~ ")" }
// SHOW INDEXES  (also SHOW INDICES)
show_indexes_stmt = { kw_show ~ kw_indexes }

// ── MATCH statement ────────────────────────────────────────────────────────
// graph_patterns allows comma-separated disconnected subpatterns (cross-joined).
match_stmt = { kw_match ~ path_mode? ~ graph_patterns ~ where_clause? ~ return_clause }

graph_pattern = { node_pattern ~ edge_chain* }

edge_chain = { edge_pattern ~ node_pattern }

node_pattern = { "(" ~ node_var? ~ (node_label)* ~ properties_inline? ~ ")" }
node_var     = { ident }
node_label   = { ":" ~ ident }

edge_pattern = {
    left_arrow ~ "[" ~ edge_inner? ~ "]" ~ "-"
  | "-" ~ "[" ~ edge_inner? ~ "]" ~ right_arrow
  | "-" ~ "[" ~ edge_inner? ~ "]" ~ "-"
  | left_arrow
  | right_arrow
  | "-"
}

left_arrow  = _{ "<-" }
right_arrow = _{ "->" }

edge_inner    = { edge_var? ~ edge_label? ~ properties_inline? ~ quantifier? }
edge_var      = { ident }
edge_label    = { ":" ~ ident }

// ── Path quantifiers ───────────────────────────────────────────────────────
// Examples: *, +, *2, *1..3, *2.., {3}, {1,3}, {1,}
// quant_range allows an open-ended upper bound: *2.. means "2 or more hops".
quantifier  = { star_quant | plus_quant | brace_quant }
star_quant  = { "*" ~ quant_range? }
plus_quant  = { "+" }
brace_quant = { "{" ~ nat_int ~ ("," ~ nat_int?)? ~ "}" }
quant_range = { nat_int ~ (".." ~ nat_int?)? }
nat_int     = @{ ASCII_DIGIT+ }

properties_inline = { "{" ~ prop_constraint_list? ~ "}" }
prop_constraint_list = { prop_constraint ~ ("," ~ prop_constraint)* }
prop_constraint = { ident ~ ":" ~ expr }

where_clause  = { kw_where ~ expr }

// ── RETURN clause ──────────────────────────────────────────────────────────
return_clause = { kw_return ~ distinct_flag? ~ return_items ~ order_by_clause? ~ (limit_clause | offset_clause)* }

return_items  = { return_item ~ ("," ~ return_item)* }
return_item   = { "*" | expr ~ (kw_as ~ ident)? }

order_by_clause = { kw_order ~ kw_by ~ order_item ~ ("," ~ order_item)* }
order_item      = { expr ~ (kw_asc | kw_desc)? }

limit_clause  = { kw_limit ~ expr }
offset_clause = { (kw_offset | kw_skip) ~ expr }

// ── OPTIONAL MATCH statement ───────────────────────────────────────────────
// Like MATCH but returns one null row per unmatched binding instead of nothing.
optional_match_stmt = { kw_optional ~ kw_match ~ path_mode? ~ graph_patterns ~ where_clause? ~ return_clause }

// ── MATCH … OPTIONAL MATCH … RETURN ─────────────────────────────────────────
// Chained MATCH + one-or-more OPTIONAL MATCH clauses, like a SQL LEFT JOIN.
// e.g. MATCH (p:Person) OPTIONAL MATCH (p)-[:WORKS_AT]->(c:Company) RETURN p.name, c.name
optional_match_clause     = { kw_optional ~ kw_match ~ path_mode? ~ graph_patterns ~ where_clause? }
match_optional_match_stmt = { kw_match ~ path_mode? ~ graph_patterns ~ where_clause? ~ optional_match_clause+ ~ return_clause }

// ── UNWIND statement ────────────────────────────────────────────────────────
// UNWIND expr AS var RETURN ...
// Evaluates expr to a List and produces one set of bindings per element.
unwind_stmt = { kw_unwind ~ expr ~ kw_as ~ ident ~ return_clause }

// ── WITH clause (pipeline projection) ──────────────────────────────────────
// MATCH pattern [WHERE] WITH projections [WHERE] RETURN ...
with_clause     = { kw_with ~ distinct_flag? ~ return_items ~ where_clause? }
match_with_stmt = { kw_match ~ path_mode? ~ graph_patterns ~ where_clause? ~ with_clause ~ return_clause }

// ── MATCH … INSERT statement ────────────────────────────────────────────────
// Matches existing nodes/edges and inserts new elements (typically edges)
// using the matched variables as endpoints.  Supports comma-separated
// disconnected graph patterns so two independent nodes can be matched.
graph_patterns     = { graph_pattern ~ ("," ~ graph_pattern)* }
match_insert_stmt  = { kw_match ~ graph_patterns ~ where_clause? ~ kw_insert ~ insert_elements }

// ── INSERT statement ───────────────────────────────────────────────────────
insert_stmt   = { kw_insert ~ insert_elements }
insert_elements = { insert_element ~ ("," ~ insert_element)* }
insert_element = { insert_edge | insert_node }

insert_node = { "(" ~ node_var? ~ (node_label)* ~ properties_inline? ~ ")" }

insert_edge = {
    "(" ~ ident ~ ")" ~ "-" ~ "[" ~ ident? ~ ":" ~ ident ~ properties_inline? ~ "]" ~ "->" ~ "(" ~ ident ~ ")"
  | "(" ~ ident ~ ")" ~ "<-" ~ "[" ~ ident? ~ ":" ~ ident ~ properties_inline? ~ "]" ~ "-" ~ "(" ~ ident ~ ")"
  | "(" ~ ident ~ ")" ~ "-" ~ "[" ~ ident? ~ ":" ~ ident ~ properties_inline? ~ "]" ~ "-" ~ "(" ~ ident ~ ")"
}

// ── SET statement ──────────────────────────────────────────────────────────
set_stmt  = { kw_match ~ graph_pattern ~ where_clause? ~ kw_set ~ set_items }
set_items = { set_item ~ ("," ~ set_item)* }
set_item  = { ident ~ ("." ~ ident ~ "=" ~ expr | ":" ~ ident) }

// ── REMOVE statement ───────────────────────────────────────────────────────
remove_stmt  = { kw_match ~ graph_pattern ~ where_clause? ~ kw_remove ~ remove_items }
remove_items = { remove_item ~ ("," ~ remove_item)* }
remove_item  = { ident ~ ("." ~ ident | ":" ~ ident) }

// ── CALL statement ─────────────────────────────────────────────────────────
// CALL algorithmName(key: expr, ...) YIELD col1, col2
call_stmt    = { kw_call ~ ident ~ "(" ~ call_args? ~ ")" ~ yield_clause? }

// CALL...YIELD piped into MATCH and RETURN (Cypher-style procedure pipeline).
// e.g. CALL pageRank() YIELD node, score MATCH (n) WHERE n = node RETURN n.name, score
call_pipeline_match = { kw_match ~ path_mode? ~ graph_patterns ~ where_clause? }
call_pipeline_stmt  = { kw_call ~ ident ~ "(" ~ call_args? ~ ")" ~ yield_clause ~ call_pipeline_match? ~ return_clause }
call_args    = { call_arg ~ ("," ~ call_arg)* }
call_arg     = { ident ~ ":" ~ expr }
yield_clause = { kw_yield ~ ident_list }

// ── DELETE statement ───────────────────────────────────────────────────────
// Supports both:
//   MATCH (n:P) WHERE ... DELETE n
//   MATCH (n:P) WHERE ... DETACH DELETE n
//   DELETE MATCH (n:P) WHERE ... n         (legacy form)
//   DETACH DELETE MATCH (n:P) WHERE ... n  (legacy form)
delete_stmt = {
    kw_match ~ graph_pattern ~ where_clause? ~ (kw_detach ~ kw_delete | kw_delete) ~ ident_list
  | (kw_detach ~ kw_delete | kw_delete) ~ kw_match? ~ graph_pattern? ~ where_clause? ~ ident_list
}
ident_list  = { ident ~ ("," ~ ident)* }

// ── Expressions (operator precedence via layered rules) ────────────────────
expr       = { or_expr }
or_expr    = { and_expr ~ (kw_or ~ and_expr)* }
and_expr   = { not_expr ~ (kw_and ~ not_expr)* }
not_expr   = { kw_not ~ not_expr | compare_expr }
not_flag     = { kw_not }   // non-silent so its presence is visible in the parse tree
is_null_suffix = { kw_is ~ not_flag? ~ kw_null }
compare_expr = { add_expr ~ (cmp_op ~ add_expr | kw_in ~ add_expr | is_null_suffix)? }
add_expr   = { mul_expr ~ ((add_op) ~ mul_expr)* }
mul_expr   = { unary_expr ~ ((mul_op) ~ unary_expr)* }
unary_expr = { "-" ~ unary_expr | primary }

cmp_op = { "<>" | "<=" | ">=" | "<" | ">" | "=" }
add_op = { "+" | "-" }
mul_op = { "*" | "/" | "%" }

// ── List literal ────────────────────────────────────────────────────────────
list_literal = { "[" ~ (expr ~ ("," ~ expr)*)? ~ "]" }

primary = {
    "(" ~ expr ~ ")"
  | func_call
  | list_literal
  | literal
  | param        // $name — query parameter
  | prop_access
  | var_ref
  | star
}

// ── Query parameter ─────────────────────────────────────────────────────────
// $ident — reference to a named parameter passed alongside the query.
param = @{ "$" ~ ident }

func_call   = { ident ~ "(" ~ (expr ~ ("," ~ expr)*)? ~ ")" }
prop_access = { ident ~ "." ~ ident }
var_ref     = { ident }
star        = { "*" }

// ── UNWIND … INSERT ──────────────────────────────────────────────────────────
// Like UNWIND … RETURN but inserts elements for each list row instead.
// Must appear before unwind_stmt in single_stmt so the parser tries it first.
unwind_insert_stmt = { kw_unwind ~ expr ~ kw_as ~ ident ~ kw_insert ~ insert_elements }

// ── LOAD CSV statements ──────────────────────────────────────────────────────
// LOAD CSV NODES FROM 'path/to/nodes.csv' [LABEL LabelName]
load_csv_nodes_stmt = { kw_load ~ kw_csv ~ kw_nodes ~ kw_from ~ string ~ (kw_label ~ ident)? }
// LOAD CSV EDGES FROM 'path/to/edges.csv' [LABEL EdgeLabel]
load_csv_edges_stmt = { kw_load ~ kw_csv ~ kw_edges ~ kw_from ~ string ~ (kw_label ~ ident)? }
// TRUNCATE — delete all nodes and edges in O(1) via RocksDB range tombstones.
truncate_stmt = { kw_truncate }

// ── CONSTRAINT statements ────────────────────────────────────────────────────
// CREATE CONSTRAINT UNIQUE ON :Label(property)
// CREATE CONSTRAINT TYPE IS INTEGER ON :Label(property)
// DROP   CONSTRAINT UNIQUE ON :Label(property)
// SHOW   CONSTRAINTS
constraint_stmt   = {
    (kw_create ~ kw_constraint ~ constraint_kind ~ kw_on ~ constraint_target)
  | (kw_drop   ~ kw_constraint ~ constraint_kind ~ kw_on ~ constraint_target)
  | (kw_show   ~ kw_constraints)
}
constraint_kind   = { kw_unique | (kw_type_kw ~ kw_is_atom ~ value_type) }
constraint_target = { ":" ~ ident ~ "(" ~ ident ~ ")" }
value_type        = { ^"INTEGER" | ^"FLOAT" | ^"STRING" | ^"BOOLEAN" }