selene_core/changeset.rs
1//! WAL change payloads per spec 02 section 9.
2//!
3//! The principal/audit actor lives in the WAL entry header per D12; these
4//! payloads carry only the graph mutation itself. Diff payloads keep key lists
5//! in canonical lexicographic order by [`DbString::as_str`] both in memory and on
6//! the wire (the derived [`DbString`] `Ord` is lexicographic through the inner
7//! string). Serialize canonicalizes (sorts) the lists before emitting — a no-op
8//! for diffs built via the constructors, but load-bearing because the diff
9//! fields are public and can be set non-canonically. Deserialize then validates
10//! the canonical invariant and rejects a non-canonical or out-of-order payload
11//! as malformed rather than re-sorting it.
12
13use serde::{Deserialize, Deserializer, Serialize, Serializer};
14use smallvec::SmallVec;
15
16use crate::{
17 CoreError, CoreResult, DbString, EdgeId, EdgeTypeDef, EdgeTypeDefV1, GraphId, GraphType,
18 GraphTypeId, HnswIndexConfig, IvfIndexConfig, LabelSet, NodeId, NodeTypeDef, NodeTypeDefV1,
19 PropertyMap, RecordTypeDef, Value,
20};
21
22/// A graph or schema change carried by the WAL.
23#[allow(clippy::large_enum_variant)]
24#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)]
25// Invariant: serde+postcard tag stability - append new variants, never insert.
26// Reordering corrupts WAL files written under prior tag layouts.
27pub enum Change {
28 /// Node creation.
29 NodeCreated {
30 /// Created node ID.
31 id: NodeId,
32 /// Initial labels.
33 labels: LabelSet,
34 /// Initial properties.
35 properties: PropertyMap,
36 },
37 /// Node update.
38 NodeUpdated {
39 /// Updated node ID.
40 id: NodeId,
41 /// Label changes.
42 labels_diff: LabelDiff,
43 /// Property changes.
44 properties_diff: PropertyDiff,
45 },
46 /// Node deletion.
47 NodeDeleted {
48 /// Deleted node ID.
49 id: NodeId,
50 },
51 /// Edge creation.
52 EdgeCreated {
53 /// Created edge ID.
54 id: EdgeId,
55 /// Edge label.
56 label: DbString,
57 /// Source node ID.
58 source: NodeId,
59 /// Target node ID.
60 target: NodeId,
61 /// Initial properties.
62 properties: PropertyMap,
63 },
64 /// Edge update.
65 EdgeUpdated {
66 /// Updated edge ID.
67 id: EdgeId,
68 /// Property changes.
69 properties_diff: PropertyDiff,
70 },
71 /// Edge deletion.
72 EdgeDeleted {
73 /// Deleted edge ID.
74 id: EdgeId,
75 },
76 /// Schema mutation.
77 SchemaChanged {
78 /// Graph affected by the schema change.
79 graph: GraphId,
80 /// Schema change payload.
81 change: SchemaChange,
82 },
83 /// Node property removal.
84 NodePropertyRemoved {
85 /// Updated node ID.
86 id: NodeId,
87 /// Removed property key.
88 property: DbString,
89 },
90 /// Edge property removal.
91 EdgePropertyRemoved {
92 /// Updated edge ID.
93 id: EdgeId,
94 /// Removed property key.
95 property: DbString,
96 },
97 /// Node label removal.
98 NodeLabelRemoved {
99 /// Updated node ID.
100 id: NodeId,
101 /// Removed label.
102 label: DbString,
103 },
104 /// Bulk removal of every node carrying `label` plus all incident edges.
105 ///
106 /// This is the O(1)-WAL declarative truncate change (BRIEF-150, deletion-
107 /// reclamation audit Item 11). It carries **only** the label — never the
108 /// affected node/edge ids — so a `TRUNCATE NODE TYPE :L` of N nodes still
109 /// writes exactly one WAL change. Recovery re-derives the affected rows by
110 /// walking the recovered store ("replay walks store"), marking dead every
111 /// alive node with `label` and every alive edge incident to such a node, so
112 /// the recovered state is byte-identical to `MATCH (n:L) DETACH DELETE n`.
113 /// Live commit fan-out substitutes the change with staged per-row
114 /// `NodeDeleted`/`EdgeDeleted` tombstones when the mutator captured them
115 /// during execution. WAL/recovery replay carries this persisted declarative
116 /// variant, so provider-owned derived state must either handle it directly
117 /// or rebuild from the recovered graph snapshot before serving reads.
118 NodesOfTypeTruncated {
119 /// Node label whose instances (and incident edges) were removed.
120 label: DbString,
121 },
122 /// Bulk removal of every edge carrying `label`.
123 ///
124 /// The edge-type counterpart to [`Change::NodesOfTypeTruncated`]
125 /// (`TRUNCATE EDGE TYPE :L`). Carries only the label (O(1) WAL); recovery
126 /// re-derives the affected edges from the recovered store. Live commit
127 /// fan-out substitutes the change with staged per-row `EdgeDeleted`
128 /// tombstones when execution captured them; WAL/recovery replay carries
129 /// this persisted declarative variant, so providers must handle it directly
130 /// or rebuild before serving reads.
131 EdgesOfTypeTruncated {
132 /// Edge label whose instances were removed.
133 label: DbString,
134 },
135 /// Factory-reset of the entire graph: wipe **all** nodes and edges (every
136 /// label, including untyped/arbitrary-label rows) **and** reset the schema
137 /// to open (`bound_type` -> `None`), in one declarative O(1)-WAL change.
138 ///
139 /// This is the `DROP GRAPH` factory-reset change (BRIEF-152, deletion-
140 /// reclamation audit Item 10). Under D1 single-graph it targets the one
141 /// bound graph. It carries **nothing** — never the affected node/edge ids
142 /// nor any schema payload — so a reset of a graph with N rows still writes
143 /// exactly one WAL change. Recovery re-derives every affected row by walking
144 /// the recovered store ("replay walks store"), marking dead every alive node
145 /// and edge, and forces the recovered `bound_type` to `None`, so the
146 /// recovered state is byte-identical to `MATCH (n) DETACH DELETE n` followed
147 /// by a full schema drop. Live commit fan-out substitutes the change with
148 /// staged per-row `NodeDeleted`/`EdgeDeleted` tombstones when execution
149 /// captured them. WAL/recovery replay carries this persisted declarative
150 /// variant, so providers must handle it directly or rebuild before serving
151 /// reads. The MANIFEST epoch and WAL archive lineage are untouched: a
152 /// factory-reset is one committed WAL entry on top of the existing snapshot,
153 /// not a file-level wipe.
154 GraphReset {},
155}
156
157/// Label set difference.
158#[derive(Clone, Debug, PartialEq)]
159pub struct LabelDiff {
160 /// Labels added by the mutation.
161 pub added: SmallVec<[DbString; 2]>,
162 /// Labels removed by the mutation.
163 pub removed: SmallVec<[DbString; 2]>,
164}
165
166impl LabelDiff {
167 /// Construct a sorted, deduplicated label diff.
168 ///
169 /// # Errors
170 ///
171 /// Returns [`CoreError::OverlappingDiff`] when a label appears in both
172 /// `added` and `removed`. Contradictory diffs would make WAL replay
173 /// order-dependent, so the constructor refuses to build them.
174 pub fn new(
175 added: impl IntoIterator<Item = DbString>,
176 removed: impl IntoIterator<Item = DbString>,
177 ) -> CoreResult<Self> {
178 let added = sorted_deduped(added);
179 let removed = sorted_deduped(removed);
180 ensure_disjoint("label", &added, &removed)?;
181 Ok(Self { added, removed })
182 }
183
184 /// Return true if no labels changed.
185 #[must_use]
186 pub fn is_empty(&self) -> bool {
187 self.added.is_empty() && self.removed.is_empty()
188 }
189}
190
191#[derive(Deserialize, Serialize)]
192struct LabelDiffWire {
193 added: SmallVec<[DbString; 2]>,
194 removed: SmallVec<[DbString; 2]>,
195}
196
197impl Serialize for LabelDiff {
198 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
199 where
200 S: Serializer,
201 {
202 // Canonicalize on serialize. `LabelDiff::new` already sorts, so this is
203 // a no-op (byte-identical) for constructed diffs — but `added`/`removed`
204 // are public fields, so a caller can build a non-canonical diff directly;
205 // sorting here guarantees the wire is canonical and round-trips through
206 // the strict (validate, no-resort) deserializer below.
207 let mut added = self.added.clone();
208 let mut removed = self.removed.clone();
209 added.sort_by(|lhs, rhs| lhs.as_str().cmp(rhs.as_str()));
210 removed.sort_by(|lhs, rhs| lhs.as_str().cmp(rhs.as_str()));
211 LabelDiffWire { added, removed }.serialize(serializer)
212 }
213}
214
215impl<'de> Deserialize<'de> for LabelDiff {
216 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
217 where
218 D: Deserializer<'de>,
219 {
220 // Validate the canonical (strictly-ascending, dedup'd, disjoint)
221 // invariant rather than re-sorting; a non-canonical payload is
222 // rejected as malformed.
223 let wire = LabelDiffWire::deserialize(deserializer)?;
224 validate_sorted_unique(&wire.added, "LabelDiff.added")?;
225 validate_sorted_unique(&wire.removed, "LabelDiff.removed")?;
226 validate_disjoint(&wire.added, &wire.removed, "label")?;
227 Ok(Self {
228 added: wire.added,
229 removed: wire.removed,
230 })
231 }
232}
233
234/// Property map difference.
235#[derive(Clone, Debug, PartialEq)]
236pub struct PropertyDiff {
237 /// Keys set to a new value. Use [`Value::Null`] for an explicit null set.
238 pub set: SmallVec<[(DbString, Value); 4]>,
239 /// Keys whose entries are removed entirely.
240 pub removed: SmallVec<[DbString; 2]>,
241}
242
243impl PropertyDiff {
244 /// Construct a sorted, deduplicated property diff.
245 ///
246 /// # Errors
247 ///
248 /// Returns [`CoreError::OverlappingDiff`] when a key appears in both `set`
249 /// and `removed`. Contradictory diffs would make WAL replay
250 /// order-dependent, so the constructor refuses to build them.
251 pub fn new(
252 set: impl IntoIterator<Item = (DbString, Value)>,
253 removed: impl IntoIterator<Item = DbString>,
254 ) -> CoreResult<Self> {
255 let mut set: Vec<_> = set.into_iter().collect();
256 set.sort_by(|(lhs, _), (rhs, _)| lhs.cmp(rhs));
257 set.dedup_by(|(lhs_key, lhs_value), (rhs_key, rhs_value)| {
258 if lhs_key == rhs_key {
259 *lhs_value = rhs_value.clone();
260 true
261 } else {
262 false
263 }
264 });
265 let set: SmallVec<[(DbString, Value); 4]> = set.into_iter().collect();
266 let removed = sorted_deduped(removed);
267 for (key, _) in set.iter() {
268 if removed.binary_search(key).is_ok() {
269 return Err(CoreError::OverlappingDiff {
270 kind: "property",
271 key: key.clone(),
272 });
273 }
274 }
275 Ok(Self { set, removed })
276 }
277
278 /// Return true if no properties changed.
279 #[must_use]
280 pub fn is_empty(&self) -> bool {
281 self.set.is_empty() && self.removed.is_empty()
282 }
283}
284
285#[derive(Deserialize, Serialize)]
286struct PropertyDiffWire {
287 set: SmallVec<[(DbString, Value); 4]>,
288 removed: SmallVec<[DbString; 2]>,
289}
290
291impl Serialize for PropertyDiff {
292 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
293 where
294 S: Serializer,
295 {
296 // Canonicalize on serialize. `PropertyDiff::new` already sorts, so this
297 // is a no-op (byte-identical) for constructed diffs — but `set`/`removed`
298 // are public fields, so a caller can build a non-canonical diff directly;
299 // sorting here guarantees the wire is canonical and round-trips through
300 // the strict (validate, no-resort) deserializer below.
301 let mut set = self.set.clone();
302 let mut removed = self.removed.clone();
303 set.sort_by(|(lhs, _), (rhs, _)| lhs.as_str().cmp(rhs.as_str()));
304 removed.sort_by(|lhs, rhs| lhs.as_str().cmp(rhs.as_str()));
305 PropertyDiffWire { set, removed }.serialize(serializer)
306 }
307}
308
309impl<'de> Deserialize<'de> for PropertyDiff {
310 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
311 where
312 D: Deserializer<'de>,
313 {
314 // Validate the canonical invariant (strictly-ascending set keys,
315 // strictly-ascending removed, disjoint) rather than re-sorting; a
316 // non-canonical payload is rejected as malformed.
317 let wire = PropertyDiffWire::deserialize(deserializer)?;
318 for window in wire.set.windows(2) {
319 if window[0].0 >= window[1].0 {
320 return Err(serde::de::Error::custom(
321 "PropertyDiff.set entries must be sorted by DbString order with no duplicate keys",
322 ));
323 }
324 }
325 validate_sorted_unique(&wire.removed, "PropertyDiff.removed")?;
326 for (key, _) in wire.set.iter() {
327 if wire.removed.binary_search(key).is_ok() {
328 return Err(serde::de::Error::custom(format!(
329 "PropertyDiff: key {key} appears in both set and removed",
330 )));
331 }
332 }
333 Ok(Self {
334 set: wire.set,
335 removed: wire.removed,
336 })
337 }
338}
339
340/// Schema change payload.
341#[allow(clippy::large_enum_variant)]
342#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)]
343pub enum SchemaChange {
344 /// Graph creation.
345 GraphCreated {
346 /// Created graph ID.
347 id: GraphId,
348 /// Graph name.
349 name: DbString,
350 /// Optional graph type assigned at creation.
351 graph_type: Option<GraphTypeId>,
352 },
353 /// Graph deletion.
354 GraphDropped {
355 /// Dropped graph ID.
356 id: GraphId,
357 },
358 /// Graph type creation.
359 GraphTypeCreated {
360 /// Created graph type definition.
361 graph_type: GraphType,
362 },
363 /// Graph type deletion.
364 GraphTypeDropped {
365 /// Dropped graph type ID.
366 id: GraphTypeId,
367 },
368 /// Node type addition.
369 NodeTypeAdded {
370 /// Owning graph type.
371 graph_type: GraphTypeId,
372 /// Node type label.
373 label: DbString,
374 /// Legacy node type definition.
375 def: NodeTypeDefV1,
376 },
377 /// Edge type addition.
378 EdgeTypeAdded {
379 /// Owning graph type.
380 graph_type: GraphTypeId,
381 /// Edge type label.
382 label: DbString,
383 /// Legacy edge type definition.
384 def: EdgeTypeDefV1,
385 },
386 /// Node type deletion.
387 NodeTypeDropped {
388 /// Owning graph type.
389 graph_type: GraphTypeId,
390 /// Dropped node type name.
391 name: DbString,
392 },
393 /// Edge type deletion.
394 EdgeTypeDropped {
395 /// Owning graph type.
396 graph_type: GraphTypeId,
397 /// Dropped edge type name.
398 name: DbString,
399 },
400 /// Record type addition.
401 RecordTypeAdded {
402 /// Owning graph type.
403 graph_type: GraphTypeId,
404 /// Record type definition.
405 def: RecordTypeDef,
406 },
407 /// Property index creation.
408 PropertyIndexCreated {
409 /// Indexed node label.
410 label: DbString,
411 /// Indexed property key.
412 property: DbString,
413 /// Declared index value kind.
414 kind: SchemaPropertyIndexKind,
415 },
416 /// Property index deletion.
417 PropertyIndexDropped {
418 /// Indexed node label.
419 label: DbString,
420 /// Indexed property key.
421 property: DbString,
422 },
423 /// Property index creation with optional explicit catalog name.
424 ///
425 /// Declared after every existing v1.1 variant so the `postcard`
426 /// discriminants of all earlier variants remain stable. Old WALs continue
427 /// to decode through [`SchemaChange::PropertyIndexCreated`].
428 PropertyIndexCreatedNamed {
429 /// Indexed node label.
430 label: DbString,
431 /// Indexed property key.
432 property: DbString,
433 /// Declared index value kind.
434 kind: SchemaPropertyIndexKind,
435 /// Optional explicit catalog name.
436 name: Option<DbString>,
437 },
438 /// Node type addition carrying v2 type-model fields.
439 ///
440 /// Declared after every existing v1.1 variant so the `postcard`
441 /// discriminants of all earlier variants remain stable. New code emits this
442 /// variant; old WALs continue to decode through [`SchemaChange::NodeTypeAdded`].
443 NodeTypeAddedV2 {
444 /// Owning graph type.
445 graph_type: GraphTypeId,
446 /// Node type label.
447 label: DbString,
448 /// Node type definition.
449 def: NodeTypeDef,
450 },
451 /// Edge type addition carrying v2 type-model fields.
452 ///
453 /// Declared after every existing v1.1 variant so the `postcard`
454 /// discriminants of all earlier variants remain stable. New code emits this
455 /// variant; old WALs continue to decode through [`SchemaChange::EdgeTypeAdded`].
456 EdgeTypeAddedV2 {
457 /// Owning graph type.
458 graph_type: GraphTypeId,
459 /// Edge type label.
460 label: DbString,
461 /// Edge type definition.
462 def: EdgeTypeDef,
463 },
464 /// Composite property index creation with optional explicit catalog name.
465 ///
466 /// Declared after every existing v1.1 variant so the `postcard`
467 /// discriminants of all earlier variants remain stable.
468 CompositePropertyIndexCreated {
469 /// Indexed node label.
470 label: DbString,
471 /// Indexed property keys in declaration order.
472 properties: SmallVec<[DbString; 4]>,
473 /// Declared index value kinds in declaration order.
474 kinds: SmallVec<[SchemaPropertyIndexKind; 4]>,
475 /// Optional explicit catalog name.
476 name: Option<DbString>,
477 },
478 /// Composite property index deletion.
479 ///
480 /// Declared after every existing v1.1 variant so the `postcard`
481 /// discriminants of all earlier variants remain stable.
482 CompositePropertyIndexDropped {
483 /// Indexed node label.
484 label: DbString,
485 /// Indexed property keys in declaration order.
486 properties: SmallVec<[DbString; 4]>,
487 },
488 /// Vector property index creation with optional explicit catalog name.
489 ///
490 /// Declared after every existing v1.1 variant so the `postcard`
491 /// discriminants of all earlier variants remain stable.
492 VectorIndexCreated {
493 /// Indexed node label.
494 label: DbString,
495 /// Indexed vector property key.
496 property: DbString,
497 /// Declared vector index algorithm.
498 kind: SchemaVectorIndexKind,
499 /// Required vector dimensionality for indexed rows.
500 dimension: u32,
501 /// Optional explicit catalog name.
502 name: Option<DbString>,
503 /// Optional HNSW construction parameters.
504 hnsw_config: Option<HnswIndexConfig>,
505 /// Optional IVF construction parameters.
506 ivf_config: Option<IvfIndexConfig>,
507 },
508 /// Vector property index deletion.
509 ///
510 /// Declared after every existing v1.1 variant so the `postcard`
511 /// discriminants of all earlier variants remain stable.
512 VectorIndexDropped {
513 /// Indexed node label.
514 label: DbString,
515 /// Indexed vector property key.
516 property: DbString,
517 },
518 /// Text property index creation with optional explicit catalog name.
519 ///
520 /// Declared after every existing v1.1 variant so the `postcard`
521 /// discriminants of all earlier variants remain stable.
522 TextIndexCreated {
523 /// Indexed node label.
524 label: DbString,
525 /// Indexed string property key.
526 property: DbString,
527 /// Optional explicit catalog name.
528 name: Option<DbString>,
529 },
530 /// Text property index deletion.
531 ///
532 /// Declared after every existing v1.1 variant so the `postcard`
533 /// discriminants of all earlier variants remain stable.
534 TextIndexDropped {
535 /// Indexed node label.
536 label: DbString,
537 /// Indexed string property key.
538 property: DbString,
539 },
540}
541
542/// Schema-level vector index algorithm kind.
543///
544/// This mirrors storage-level vector index algorithm selection without making
545/// `selene-core` depend on graph storage internals.
546#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)]
547pub enum SchemaVectorIndexKind {
548 /// Exact in-memory row-set accelerator. ANN algorithms can be added as new
549 /// variants without changing the `(label, property)` catalog identity.
550 Flat,
551 /// Approximate HNSW index using squared Euclidean distance.
552 HnswSquaredEuclidean,
553 /// Approximate HNSW index using cosine distance.
554 HnswCosine,
555 /// Approximate HNSW index using negative inner product distance.
556 HnswNegativeInnerProduct,
557 /// Approximate IVF index using squared Euclidean distance.
558 IvfSquaredEuclidean,
559 /// Approximate IVF index using cosine distance.
560 IvfCosine,
561 /// Approximate IVF index using negative inner product distance.
562 IvfNegativeInnerProduct,
563 /// Compressed TurboQuant candidate index using cosine distance.
564 TurboQuantCosine,
565}
566
567/// Schema-level property index value kind.
568///
569/// This mirrors `selene_graph::TypedIndexKind` without making `selene-core`
570/// depend on graph storage internals.
571#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)]
572pub enum SchemaPropertyIndexKind {
573 /// Boolean value.
574 Bool,
575 /// Signed 64-bit integer.
576 I64,
577 /// Unsigned 64-bit integer.
578 U64,
579 /// Signed 128-bit integer.
580 I128,
581 /// Unsigned 128-bit integer.
582 U128,
583 /// Fixed-precision decimal value.
584 Decimal,
585 /// Finite 32-bit floating-point value.
586 F32,
587 /// Finite 64-bit floating-point value.
588 F64,
589 /// Database string.
590 String,
591 /// Civil date.
592 Date,
593 /// Civil local date-time.
594 LocalDateTime,
595 /// Zoned date-time.
596 ZonedDateTime,
597 /// Civil local time.
598 LocalTime,
599 /// Zoned time.
600 ZonedTime,
601 /// Duration.
602 Duration,
603 /// UUID.
604 Uuid,
605}
606
607fn sorted_deduped(values: impl IntoIterator<Item = DbString>) -> SmallVec<[DbString; 2]> {
608 let mut values: SmallVec<[DbString; 2]> = values.into_iter().collect();
609 values.sort();
610 values.dedup();
611 values
612}
613
614fn ensure_disjoint(
615 kind: &'static str,
616 added: &SmallVec<[DbString; 2]>,
617 removed: &SmallVec<[DbString; 2]>,
618) -> CoreResult<()> {
619 for label in added.iter() {
620 if removed.binary_search(label).is_ok() {
621 return Err(CoreError::OverlappingDiff {
622 kind,
623 key: label.clone(),
624 });
625 }
626 }
627 Ok(())
628}
629
630fn validate_sorted_unique<E: serde::de::Error>(
631 values: &SmallVec<[DbString; 2]>,
632 label: &'static str,
633) -> Result<(), E> {
634 for window in values.windows(2) {
635 if window[0] >= window[1] {
636 return Err(E::custom(format!(
637 "{label} must be sorted by DbString order with no duplicates"
638 )));
639 }
640 }
641 Ok(())
642}
643
644fn validate_disjoint<E: serde::de::Error>(
645 added: &SmallVec<[DbString; 2]>,
646 removed: &SmallVec<[DbString; 2]>,
647 kind: &'static str,
648) -> Result<(), E> {
649 for label in added.iter() {
650 if removed.binary_search(label).is_ok() {
651 return Err(E::custom(format!(
652 "overlapping {kind} diff: {label} appears in both add/set and remove",
653 )));
654 }
655 }
656 Ok(())
657}
658
659#[cfg(test)]
660#[path = "changeset/tests.rs"]
661mod tests;
662
663#[cfg(test)]
664#[path = "changeset/proptests.rs"]
665mod proptests;