meshdb_storage/engine.rs
1//! Backend-agnostic graph storage interface.
2//!
3//! Defines [`StorageEngine`], the object-safe trait that every concrete
4//! storage backend must implement, plus the neutral types that flow
5//! through the trait's API ([`PropertyIndexSpec`] and [`GraphMutation`]).
6//! All non-executor consumers (`meshdb-rpc`, `meshdb-server`) hold the engine
7//! as `Arc<dyn StorageEngine>` so swapping backends is a one-line change
8//! at the `open` site — everything else flows through the trait.
9//!
10//! The trait deliberately mirrors the public surface of the RocksDB
11//! backend 1:1 rather than inventing a more abstract vocabulary. Any
12//! would-be second backend should be able to implement it without
13//! fighting a RocksDB-shaped API, because the methods name graph
14//! operations, not rocksdb CFs.
15//!
16//! Not part of the trait:
17//! - `open(path)` — each backend takes different construction arguments.
18//! - Raw batch types (`WriteBatch`, etc.) — [`GraphMutation`] is the
19//! portable sequencing primitive instead.
20//!
21//! The concrete RocksDB impl lives in [`crate::rocksdb_engine`].
22
23use crate::{Error, Result};
24use meshdb_core::{Edge, EdgeId, Node, NodeId, Property};
25use std::path::Path;
26
27/// Declarative spec for a single-property equality index. An index is
28/// uniquely identified by its `(label, properties)` pair — users don't
29/// name them, which keeps DROP/SHOW behavior simple and matches the
30/// way the planner looks them up when deciding whether to emit
31/// `IndexSeek`.
32///
33/// `properties` is a `Vec<String>` so composite indexes (tuple keys
34/// over multiple properties, `CREATE INDEX FOR (n:Label) ON (n.a, n.b)`)
35/// fit the same shape as the single-property form. Single-property
36/// specs encode byte-identically to the pre-composite layout on
37/// disk, so existing data is readable after upgrade.
38#[derive(Debug, Clone, PartialEq, Eq, Hash)]
39pub struct PropertyIndexSpec {
40 pub label: String,
41 pub properties: Vec<String>,
42}
43
44/// Relationship-scope analogue of [`PropertyIndexSpec`]. Same
45/// `properties: Vec<String>` shape, same composite semantics, same
46/// byte-level on-disk compatibility for length-1 entries.
47#[derive(Debug, Clone, PartialEq, Eq, Hash)]
48pub struct EdgePropertyIndexSpec {
49 pub edge_type: String,
50 pub properties: Vec<String>,
51}
52
53/// Declarative spec for a point / spatial index on a single
54/// `Property::Point` property. A point index accelerates bounding-box
55/// containment and distance-radius queries via a Z-order (Morton)
56/// cell quantizer — points map to sortable u64 cell IDs, so bbox
57/// queries reduce to a cell-range scan plus a per-row precision
58/// filter.
59///
60/// Single-property for now; a future "composite point index" would
61/// have to pick a curve that extends cleanly to N spatial axes and
62/// so gets its own spec shape when it lands.
63#[derive(Debug, Clone, PartialEq, Eq, Hash)]
64pub struct PointIndexSpec {
65 pub label: String,
66 pub property: String,
67}
68
69/// Relationship-scope analogue of [`PointIndexSpec`]. Same
70/// single-property shape; the index lives under a separate CF so
71/// node and edge spatial entries can't alias and the two can be
72/// dropped independently.
73#[derive(Debug, Clone, PartialEq, Eq, Hash)]
74pub struct EdgePointIndexSpec {
75 pub edge_type: String,
76 pub property: String,
77}
78
79/// Kind of single-property constraint. `Unique` comes from
80/// `REQUIRE n.prop IS UNIQUE`, `NotNull` from `REQUIRE n.prop IS NOT
81/// NULL`, and `PropertyType(t)` from `REQUIRE n.prop IS :: <TYPE>`.
82/// `PropertyType` carries the target type so a single constraint-kind
83/// enum covers all four flavours without blowing up into one variant
84/// per type.
85#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
86pub enum PropertyConstraintKind {
87 Unique,
88 NotNull,
89 PropertyType(PropertyType),
90 /// `REQUIRE (n.a, n.b) IS NODE KEY` — composite uniqueness + NOT
91 /// NULL. The constraint holds across the tuple of values the
92 /// spec's `properties` list names; each property must be present
93 /// and the tuple must be unique across every node carrying the
94 /// constrained label.
95 NodeKey,
96}
97
98/// Property types recognised by `IS :: <TYPE>`. Covers the four
99/// primitive Cypher types used in practice; temporal / spatial / list
100/// / map type constraints are a follow-up.
101#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
102pub enum PropertyType {
103 String,
104 Integer,
105 Float,
106 Boolean,
107}
108
109impl PropertyType {
110 /// User-facing token used by `SHOW CONSTRAINTS` output.
111 pub fn as_str(&self) -> &'static str {
112 match self {
113 PropertyType::String => "STRING",
114 PropertyType::Integer => "INTEGER",
115 PropertyType::Float => "FLOAT",
116 PropertyType::Boolean => "BOOLEAN",
117 }
118 }
119
120 /// Lowercase tag used when generating a default constraint name —
121 /// must match the tag the cluster crate uses on the Raft-log side
122 /// so auto-names resolve identically across replicas.
123 pub fn name_tag(&self) -> &'static str {
124 match self {
125 PropertyType::String => "string",
126 PropertyType::Integer => "integer",
127 PropertyType::Float => "float",
128 PropertyType::Boolean => "boolean",
129 }
130 }
131}
132
133impl PropertyConstraintKind {
134 /// Short, human-facing token used in `SHOW CONSTRAINTS` output.
135 /// For `PropertyType` the string carries the target type inside
136 /// parentheses so operators reading the output can see what the
137 /// constraint enforces at a glance. Stable — do not rename
138 /// without a migration, because the auto-name the storage layer
139 /// derives from this tag is part of the user-visible DROP surface.
140 pub fn as_string(&self) -> String {
141 match self {
142 PropertyConstraintKind::Unique => "UNIQUE".into(),
143 PropertyConstraintKind::NotNull => "NOT NULL".into(),
144 PropertyConstraintKind::PropertyType(t) => format!("IS :: {}", t.as_str()),
145 PropertyConstraintKind::NodeKey => "NODE KEY".into(),
146 }
147 }
148
149 /// Lowercase tag used when generating a default constraint name.
150 /// `PropertyType(T)` contributes `type_<t>` so every distinct type
151 /// gets its own auto-name and two type constraints on the same
152 /// `(label, property)` can coexist if they ever need to.
153 pub fn name_tag(&self) -> String {
154 match self {
155 PropertyConstraintKind::Unique => "unique".into(),
156 PropertyConstraintKind::NotNull => "not_null".into(),
157 PropertyConstraintKind::PropertyType(t) => format!("type_{}", t.name_tag()),
158 PropertyConstraintKind::NodeKey => "node_key".into(),
159 }
160 }
161
162 /// True iff the kind accepts more than one property in its spec.
163 /// Used by the storage layer to validate the `properties` list
164 /// length at create time — single-property kinds must not receive
165 /// a multi-element list.
166 pub fn allows_multi_property(&self) -> bool {
167 matches!(self, PropertyConstraintKind::NodeKey)
168 }
169}
170
171/// Scope a constraint applies to. `Node(label)` covers every node
172/// carrying the label; `Relationship(edge_type)` covers every edge
173/// with the given type. Kept as an enum rather than a plain string
174/// so future scope extensions (e.g. specific pattern shapes) land
175/// additively.
176#[derive(Debug, Clone, PartialEq, Eq, Hash)]
177pub enum ConstraintScope {
178 Node(String),
179 Relationship(String),
180}
181
182impl ConstraintScope {
183 /// The scope's target string — label for node scope, edge type
184 /// for relationship scope. Handy when the caller has already
185 /// dispatched on the variant and just needs the name.
186 pub fn target(&self) -> &str {
187 match self {
188 ConstraintScope::Node(l) => l,
189 ConstraintScope::Relationship(t) => t,
190 }
191 }
192
193 /// Short tag used to disambiguate auto-generated constraint
194 /// names so a node and a relationship constraint on the same
195 /// target-and-property don't collide.
196 pub fn name_tag(&self) -> &'static str {
197 match self {
198 ConstraintScope::Node(_) => "node",
199 ConstraintScope::Relationship(_) => "rel",
200 }
201 }
202}
203
204/// Declarative spec for a property constraint. Unlike
205/// [`PropertyIndexSpec`], constraints carry a user-facing `name` —
206/// `DROP CONSTRAINT` takes a name, not a `(scope, properties, kind)`
207/// tuple, so the registry keys on name for lookup. When the user
208/// omits the name, [`StorageEngine::create_property_constraint`] fills
209/// in a deterministic one derived from the other fields.
210///
211/// `properties` is a list so that [`PropertyConstraintKind::NodeKey`]
212/// can carry its composite tuple here alongside single-property kinds;
213/// the single-property kinds (`Unique`, `NotNull`, `PropertyType`)
214/// enforce a length-one invariant at creation time.
215#[derive(Debug, Clone, PartialEq, Eq, Hash)]
216pub struct PropertyConstraintSpec {
217 pub name: String,
218 pub scope: ConstraintScope,
219 pub properties: Vec<String>,
220 pub kind: PropertyConstraintKind,
221}
222
223impl PropertyConstraintSpec {
224 /// The constraint's first (and for single-property kinds, only)
225 /// property name. Handy shorthand for call sites that already
226 /// know the spec is single-property.
227 pub fn primary_property(&self) -> &str {
228 self.properties.first().map(String::as_str).unwrap_or("")
229 }
230}
231
232/// A single mutation that can be combined with others into an atomic
233/// [`StorageEngine::apply_batch`] call. Backend-neutral: the enum only
234/// names what to do to the graph, not how the backend persists it.
235/// Lets callers commit a sequence of mutations as one atomic write —
236/// useful for giving multi-write Cypher queries crash-atomic local
237/// persistence.
238#[derive(Debug, Clone)]
239pub enum GraphMutation {
240 PutNode(Node),
241 PutEdge(Edge),
242 /// Idempotent within a batch: missing edges are skipped silently so a
243 /// log replay that re-applies a partially-committed batch is safe.
244 DeleteEdge(EdgeId),
245 /// Idempotent within a batch: missing nodes contribute no operations.
246 DetachDeleteNode(NodeId),
247}
248
249/// Object-safe interface for a graph storage backend. See module docs for
250/// design notes.
251pub trait StorageEngine: Send + Sync {
252 // --- Node CRUD ---
253
254 fn put_node(&self, node: &Node) -> Result<()>;
255 fn get_node(&self, id: NodeId) -> Result<Option<Node>>;
256 fn detach_delete_node(&self, id: NodeId) -> Result<()>;
257
258 // --- Edge CRUD ---
259
260 fn put_edge(&self, edge: &Edge) -> Result<()>;
261 fn get_edge(&self, id: EdgeId) -> Result<Option<Edge>>;
262 fn delete_edge(&self, id: EdgeId) -> Result<()>;
263
264 /// Apply a sequence of mutations atomically. Either every mutation
265 /// lands or none does. See [`GraphMutation`] for the variant set.
266 fn apply_batch(&self, mutations: &[GraphMutation]) -> Result<()>;
267
268 // --- Full-graph scans ---
269
270 fn all_nodes(&self) -> Result<Vec<Node>>;
271 fn all_edges(&self) -> Result<Vec<Edge>>;
272 fn all_node_ids(&self) -> Result<Vec<NodeId>>;
273
274 // --- Adjacency ---
275
276 fn outgoing(&self, source: NodeId) -> Result<Vec<(EdgeId, NodeId)>>;
277 fn incoming(&self, target: NodeId) -> Result<Vec<(EdgeId, NodeId)>>;
278
279 // --- Label / type / property indexes ---
280
281 fn nodes_by_label(&self, label: &str) -> Result<Vec<NodeId>>;
282 fn edges_by_type(&self, edge_type: &str) -> Result<Vec<EdgeId>>;
283 fn nodes_by_property(
284 &self,
285 label: &str,
286 property: &str,
287 value: &Property,
288 ) -> Result<Vec<NodeId>>;
289 /// Composite form of [`Self::nodes_by_property`]. `properties`
290 /// and `values` are parallel slices of equal length — the tuple
291 /// must match an equivalent stored index entry. Single-property
292 /// callers should use [`Self::nodes_by_property`] which delegates
293 /// here with length-1 slices.
294 fn nodes_by_properties(
295 &self,
296 label: &str,
297 properties: &[String],
298 values: &[Property],
299 ) -> Result<Vec<NodeId>>;
300
301 /// Equality lookup through an edge property index. Mirrors
302 /// [`StorageEngine::nodes_by_property`] for the relationship side.
303 /// The caller is responsible for verifying the `(edge_type, property)`
304 /// index exists before dispatching — a missing index returns an
305 /// empty result (no entries maintained) rather than erroring so
306 /// the planner's race with a concurrent DROP stays safe.
307 fn edges_by_property(
308 &self,
309 edge_type: &str,
310 property: &str,
311 value: &Property,
312 ) -> Result<Vec<EdgeId>>;
313
314 // --- Index DDL ---
315
316 fn create_property_index(&self, label: &str, property: &str) -> Result<()>;
317 fn drop_property_index(&self, label: &str, property: &str) -> Result<()>;
318 /// Composite form of [`Self::create_property_index`] — declares
319 /// a tuple index over `(label, properties...)`. Single-property
320 /// callers should use [`Self::create_property_index`] which
321 /// delegates here with a length-1 slice.
322 fn create_property_index_composite(&self, label: &str, properties: &[String]) -> Result<()>;
323 /// Composite form of [`Self::drop_property_index`].
324 fn drop_property_index_composite(&self, label: &str, properties: &[String]) -> Result<()>;
325 fn list_property_indexes(&self) -> Vec<PropertyIndexSpec>;
326
327 /// Declare a new `(edge_type, property)` single-property equality
328 /// index and backfill it from every edge currently carrying the
329 /// type. Idempotent: re-creating an already-registered index is a
330 /// no-op. Matches [`StorageEngine::create_property_index`] for
331 /// node scope.
332 fn create_edge_property_index(&self, edge_type: &str, property: &str) -> Result<()>;
333 /// Tear down an edge property index. Removes the meta entry and
334 /// every entry under the `(edge_type, prop)` prefix. Idempotent.
335 fn drop_edge_property_index(&self, edge_type: &str, property: &str) -> Result<()>;
336 /// Composite form of [`Self::create_edge_property_index`].
337 fn create_edge_property_index_composite(
338 &self,
339 edge_type: &str,
340 properties: &[String],
341 ) -> Result<()>;
342 /// Composite form of [`Self::drop_edge_property_index`].
343 fn drop_edge_property_index_composite(
344 &self,
345 edge_type: &str,
346 properties: &[String],
347 ) -> Result<()>;
348 /// Snapshot the currently-registered edge property indexes.
349 fn list_edge_property_indexes(&self) -> Vec<EdgePropertyIndexSpec>;
350
351 // --- Point / spatial index DDL ---
352
353 /// Declare a point index on `(label, property)` and backfill it
354 /// by scanning every node carrying `label`. Idempotent:
355 /// re-creating a registered index is a no-op. Single-property
356 /// and node-scope only — composite / relationship-scope spatial
357 /// indexes are follow-ups.
358 fn create_point_index(&self, label: &str, property: &str) -> Result<()>;
359
360 /// Tear down a point index. Removes the meta entry and sweeps
361 /// every stored entry under the `(label, property)` header — the
362 /// SRID-keyed sub-prefixes all fall inside that header, so one
363 /// range scan covers all coordinate systems. Idempotent.
364 fn drop_point_index(&self, label: &str, property: &str) -> Result<()>;
365
366 /// Snapshot the currently-registered point indexes.
367 fn list_point_indexes(&self) -> Vec<PointIndexSpec>;
368
369 /// Axis-aligned bounding-box range query over the point index
370 /// `(label, property)` under `srid`. Entries tagged with a
371 /// different SRID are scoped out by the index key prefix, so
372 /// cross-SRID rows never leak. A missing index returns empty
373 /// rather than erroring — matches the soft-fail contract of
374 /// [`Self::nodes_by_property`].
375 fn nodes_in_bbox(
376 &self,
377 label: &str,
378 property: &str,
379 srid: i32,
380 xlo: f64,
381 ylo: f64,
382 xhi: f64,
383 yhi: f64,
384 ) -> Result<Vec<NodeId>>;
385
386 // --- Edge point / spatial index DDL ---
387
388 /// Relationship-scope analogue of [`Self::create_point_index`].
389 /// Declares an edge point index on `(edge_type, property)` and
390 /// backfills by scanning every edge currently carrying
391 /// `edge_type`. Idempotent.
392 fn create_edge_point_index(&self, edge_type: &str, property: &str) -> Result<()>;
393
394 /// Tear down an edge point index. Idempotent. Mirrors
395 /// [`Self::drop_point_index`].
396 fn drop_edge_point_index(&self, edge_type: &str, property: &str) -> Result<()>;
397
398 /// Snapshot the currently-registered edge point indexes.
399 fn list_edge_point_indexes(&self) -> Vec<EdgePointIndexSpec>;
400
401 /// Relationship-scope analogue of [`Self::nodes_in_bbox`].
402 fn edges_in_bbox(
403 &self,
404 edge_type: &str,
405 property: &str,
406 srid: i32,
407 xlo: f64,
408 ylo: f64,
409 xhi: f64,
410 yhi: f64,
411 ) -> Result<Vec<EdgeId>>;
412
413 // --- Constraint DDL ---
414
415 /// Declare a new property constraint. If `name` is `None`, the
416 /// backend derives a deterministic name from the other fields so
417 /// `DROP CONSTRAINT` can still target it. When `if_not_exists` is
418 /// true, re-declaring a constraint with the same name is a no-op
419 /// (including when the existing constraint has different
420 /// label/property/kind — matches Neo4j's name-first semantics).
421 /// Returns the final [`PropertyConstraintSpec`] that was either
422 /// installed or already present.
423 ///
424 /// `UNIQUE` constraints implicitly require a backing property
425 /// index; implementations should ensure one exists on the relevant
426 /// `(label, property)` pair. The backing index is NOT torn down
427 /// when the constraint is dropped — users who want it gone must
428 /// issue a separate `DROP INDEX`.
429 fn create_property_constraint(
430 &self,
431 name: Option<&str>,
432 scope: &ConstraintScope,
433 properties: &[String],
434 kind: PropertyConstraintKind,
435 if_not_exists: bool,
436 ) -> Result<PropertyConstraintSpec>;
437
438 /// Tear down a constraint identified by `name`. When `if_exists`
439 /// is true, dropping a non-existent constraint is a no-op.
440 fn drop_property_constraint(&self, name: &str, if_exists: bool) -> Result<()>;
441
442 /// Snapshot every registered constraint. Order is insertion order
443 /// for deterministic `SHOW CONSTRAINTS` output across restarts.
444 fn list_property_constraints(&self) -> Vec<PropertyConstraintSpec>;
445
446 // --- apoc.trigger.* registry ---
447
448 /// Persist an `apoc.trigger.*` registration. The value is a
449 /// JSON-encoded blob owned by `meshdb-executor`'s
450 /// `TriggerSpec` — storage stays format-agnostic so the
451 /// schema can evolve without bumping the storage trait.
452 /// Default impl errors loudly so backends that haven't
453 /// wired triggers in yet surface the gap immediately.
454 fn put_trigger(&self, _name: &str, _value: &[u8]) -> Result<()> {
455 Err(Error::Unsupported(
456 "trigger registry is not supported by this backend".into(),
457 ))
458 }
459
460 /// Remove a registered trigger by name. Idempotent — dropping
461 /// a non-existent name is a no-op.
462 fn delete_trigger(&self, _name: &str) -> Result<()> {
463 Err(Error::Unsupported(
464 "trigger registry is not supported by this backend".into(),
465 ))
466 }
467
468 /// Snapshot every registered trigger as `(name, value)` pairs
469 /// in insertion-order by name. The value bytes are passed
470 /// straight back to the caller for format-side decoding.
471 fn list_triggers(&self) -> Result<Vec<(String, Vec<u8>)>> {
472 Ok(Vec::new())
473 }
474
475 /// Persist an in-doubt cross-partition transaction's staging
476 /// blob. Used by the multi-raft `PartitionGraphApplier` so
477 /// `pending_txs` survives a process restart — without
478 /// persistence, an applied `PreparedTx` would be invisible to
479 /// post-restart recovery (openraft replays only entries past
480 /// `last_applied`, and the staged commands live in memory).
481 ///
482 /// `key` is a partition-namespaced txid (e.g. `b"<u32 LE
483 /// partition><txid bytes>"`); `value` is the serde-encoded
484 /// `Vec<GraphMutation>` to apply on `CommitTx`. Default impl
485 /// errors so backends that don't host multi-raft surface the
486 /// gap immediately.
487 fn put_pending_tx(&self, _key: &[u8], _value: &[u8]) -> Result<()> {
488 Err(Error::Unsupported(
489 "pending-tx metadata is not supported by this backend".into(),
490 ))
491 }
492
493 /// Drop a pending-tx entry by key. Idempotent for unknown keys.
494 fn delete_pending_tx(&self, _key: &[u8]) -> Result<()> {
495 Err(Error::Unsupported(
496 "pending-tx metadata is not supported by this backend".into(),
497 ))
498 }
499
500 /// All pending-tx entries as `(key, value)` pairs. The applier
501 /// scans this on startup to rebuild its in-memory map.
502 fn list_pending_txs(&self) -> Result<Vec<(Vec<u8>, Vec<u8>)>> {
503 Ok(Vec::new())
504 }
505
506 // --- Snapshot / restore hooks ---
507
508 /// Persist a point-in-time copy of the backend's on-disk state into
509 /// `path`. The shape of what lands at `path` is backend-specific — the
510 /// caller is expected to package it in a backend-aware way (see
511 /// `meshdb-rpc::raft_applier` for the RocksDB path). For RocksDB this is
512 /// a `Checkpoint` directory of SST files; a different backend is free
513 /// to write a single file, a directory of segments, etc., as long as
514 /// its own `open(path)` can later rehydrate from the same layout.
515 fn create_checkpoint(&self, path: &Path) -> Result<()>;
516
517 /// Drop every entry from every part of the backend. Used by the Raft
518 /// snapshot-install path to wipe local state before applying the
519 /// leader's snapshot.
520 fn clear_all(&self) -> Result<()>;
521}