Skip to main content

selene_core/
changeset.rs

1//! WAL change payloads per spec 02 section 9.
2//!
3//! The principal/audit actor lives in the WAL entry header per D12; these
4//! payloads carry only the graph mutation itself. Diff payloads keep key lists
5//! in canonical lexicographic order by [`DbString::as_str`] both in memory and on
6//! the wire (the derived [`DbString`] `Ord` is lexicographic through the inner
7//! string). Serialize canonicalizes (sorts) the lists before emitting — a no-op
8//! for diffs built via the constructors, but load-bearing because the diff
9//! fields are public and can be set non-canonically. Deserialize then validates
10//! the canonical invariant and rejects a non-canonical or out-of-order payload
11//! as malformed rather than re-sorting it.
12
13use serde::{Deserialize, Deserializer, Serialize, Serializer};
14use smallvec::SmallVec;
15
16use crate::{
17    CoreError, CoreResult, DbString, EdgeId, EdgeTypeDef, EdgeTypeDefV1, GraphId, GraphType,
18    GraphTypeId, HnswIndexConfig, IvfIndexConfig, LabelSet, NodeId, NodeTypeDef, NodeTypeDefV1,
19    PropertyMap, RecordTypeDef, Value,
20};
21
22/// A graph or schema change carried by the WAL.
23#[allow(clippy::large_enum_variant)]
24#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)]
25// Invariant: serde+postcard tag stability - append new variants, never insert.
26// Reordering corrupts WAL files written under prior tag layouts.
27pub enum Change {
28    /// Node creation.
29    NodeCreated {
30        /// Created node ID.
31        id: NodeId,
32        /// Initial labels.
33        labels: LabelSet,
34        /// Initial properties.
35        properties: PropertyMap,
36    },
37    /// Node update.
38    NodeUpdated {
39        /// Updated node ID.
40        id: NodeId,
41        /// Label changes.
42        labels_diff: LabelDiff,
43        /// Property changes.
44        properties_diff: PropertyDiff,
45    },
46    /// Node deletion.
47    NodeDeleted {
48        /// Deleted node ID.
49        id: NodeId,
50    },
51    /// Edge creation.
52    EdgeCreated {
53        /// Created edge ID.
54        id: EdgeId,
55        /// Edge label.
56        label: DbString,
57        /// Source node ID.
58        source: NodeId,
59        /// Target node ID.
60        target: NodeId,
61        /// Initial properties.
62        properties: PropertyMap,
63    },
64    /// Edge update.
65    EdgeUpdated {
66        /// Updated edge ID.
67        id: EdgeId,
68        /// Property changes.
69        properties_diff: PropertyDiff,
70    },
71    /// Edge deletion.
72    EdgeDeleted {
73        /// Deleted edge ID.
74        id: EdgeId,
75    },
76    /// Schema mutation.
77    SchemaChanged {
78        /// Graph affected by the schema change.
79        graph: GraphId,
80        /// Schema change payload.
81        change: SchemaChange,
82    },
83    /// Node property removal.
84    NodePropertyRemoved {
85        /// Updated node ID.
86        id: NodeId,
87        /// Removed property key.
88        property: DbString,
89    },
90    /// Edge property removal.
91    EdgePropertyRemoved {
92        /// Updated edge ID.
93        id: EdgeId,
94        /// Removed property key.
95        property: DbString,
96    },
97    /// Node label removal.
98    NodeLabelRemoved {
99        /// Updated node ID.
100        id: NodeId,
101        /// Removed label.
102        label: DbString,
103    },
104    /// Bulk removal of every node carrying `label` plus all incident edges.
105    ///
106    /// This is the O(1)-WAL declarative truncate change (BRIEF-150, deletion-
107    /// reclamation audit Item 11). It carries **only** the label — never the
108    /// affected node/edge ids — so a `TRUNCATE NODE TYPE :L` of N nodes still
109    /// writes exactly one WAL change. Recovery re-derives the affected rows by
110    /// walking the recovered store ("replay walks store"), marking dead every
111    /// alive node with `label` and every alive edge incident to such a node, so
112    /// the recovered state is byte-identical to `MATCH (n:L) DETACH DELETE n`.
113    /// Live commit fan-out substitutes the change with staged per-row
114    /// `NodeDeleted`/`EdgeDeleted` tombstones when the mutator captured them
115    /// during execution. WAL/recovery replay carries this persisted declarative
116    /// variant, so provider-owned derived state must either handle it directly
117    /// or rebuild from the recovered graph snapshot before serving reads.
118    NodesOfTypeTruncated {
119        /// Node label whose instances (and incident edges) were removed.
120        label: DbString,
121    },
122    /// Bulk removal of every edge carrying `label`.
123    ///
124    /// The edge-type counterpart to [`Change::NodesOfTypeTruncated`]
125    /// (`TRUNCATE EDGE TYPE :L`). Carries only the label (O(1) WAL); recovery
126    /// re-derives the affected edges from the recovered store. Live commit
127    /// fan-out substitutes the change with staged per-row `EdgeDeleted`
128    /// tombstones when execution captured them; WAL/recovery replay carries
129    /// this persisted declarative variant, so providers must handle it directly
130    /// or rebuild before serving reads.
131    EdgesOfTypeTruncated {
132        /// Edge label whose instances were removed.
133        label: DbString,
134    },
135    /// Factory-reset of the entire graph: wipe **all** nodes and edges (every
136    /// label, including untyped/arbitrary-label rows) **and** reset the schema
137    /// to open (`bound_type` -> `None`), in one declarative O(1)-WAL change.
138    ///
139    /// This is the `DROP GRAPH` factory-reset change (BRIEF-152, deletion-
140    /// reclamation audit Item 10). Under D1 single-graph it targets the one
141    /// bound graph. It carries **nothing** — never the affected node/edge ids
142    /// nor any schema payload — so a reset of a graph with N rows still writes
143    /// exactly one WAL change. Recovery re-derives every affected row by walking
144    /// the recovered store ("replay walks store"), marking dead every alive node
145    /// and edge, and forces the recovered `bound_type` to `None`, so the
146    /// recovered state is byte-identical to `MATCH (n) DETACH DELETE n` followed
147    /// by a full schema drop. Live commit fan-out substitutes the change with
148    /// staged per-row `NodeDeleted`/`EdgeDeleted` tombstones when execution
149    /// captured them. WAL/recovery replay carries this persisted declarative
150    /// variant, so providers must handle it directly or rebuild before serving
151    /// reads. The MANIFEST epoch and WAL archive lineage are untouched: a
152    /// factory-reset is one committed WAL entry on top of the existing snapshot,
153    /// not a file-level wipe.
154    GraphReset {},
155}
156
157/// Label set difference.
158#[derive(Clone, Debug, PartialEq)]
159pub struct LabelDiff {
160    /// Labels added by the mutation.
161    pub added: SmallVec<[DbString; 2]>,
162    /// Labels removed by the mutation.
163    pub removed: SmallVec<[DbString; 2]>,
164}
165
166impl LabelDiff {
167    /// Construct a sorted, deduplicated label diff.
168    ///
169    /// # Errors
170    ///
171    /// Returns [`CoreError::OverlappingDiff`] when a label appears in both
172    /// `added` and `removed`. Contradictory diffs would make WAL replay
173    /// order-dependent, so the constructor refuses to build them.
174    pub fn new(
175        added: impl IntoIterator<Item = DbString>,
176        removed: impl IntoIterator<Item = DbString>,
177    ) -> CoreResult<Self> {
178        let added = sorted_deduped(added);
179        let removed = sorted_deduped(removed);
180        ensure_disjoint("label", &added, &removed)?;
181        Ok(Self { added, removed })
182    }
183
184    /// Return true if no labels changed.
185    #[must_use]
186    pub fn is_empty(&self) -> bool {
187        self.added.is_empty() && self.removed.is_empty()
188    }
189}
190
191#[derive(Deserialize, Serialize)]
192struct LabelDiffWire {
193    added: SmallVec<[DbString; 2]>,
194    removed: SmallVec<[DbString; 2]>,
195}
196
197impl Serialize for LabelDiff {
198    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
199    where
200        S: Serializer,
201    {
202        // Canonicalize on serialize. `LabelDiff::new` already sorts, so this is
203        // a no-op (byte-identical) for constructed diffs — but `added`/`removed`
204        // are public fields, so a caller can build a non-canonical diff directly;
205        // sorting here guarantees the wire is canonical and round-trips through
206        // the strict (validate, no-resort) deserializer below.
207        let mut added = self.added.clone();
208        let mut removed = self.removed.clone();
209        added.sort_by(|lhs, rhs| lhs.as_str().cmp(rhs.as_str()));
210        removed.sort_by(|lhs, rhs| lhs.as_str().cmp(rhs.as_str()));
211        LabelDiffWire { added, removed }.serialize(serializer)
212    }
213}
214
215impl<'de> Deserialize<'de> for LabelDiff {
216    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
217    where
218        D: Deserializer<'de>,
219    {
220        // Validate the canonical (strictly-ascending, dedup'd, disjoint)
221        // invariant rather than re-sorting; a non-canonical payload is
222        // rejected as malformed.
223        let wire = LabelDiffWire::deserialize(deserializer)?;
224        validate_sorted_unique(&wire.added, "LabelDiff.added")?;
225        validate_sorted_unique(&wire.removed, "LabelDiff.removed")?;
226        validate_disjoint(&wire.added, &wire.removed, "label")?;
227        Ok(Self {
228            added: wire.added,
229            removed: wire.removed,
230        })
231    }
232}
233
234/// Property map difference.
235#[derive(Clone, Debug, PartialEq)]
236pub struct PropertyDiff {
237    /// Keys set to a new value. Use [`Value::Null`] for an explicit null set.
238    pub set: SmallVec<[(DbString, Value); 4]>,
239    /// Keys whose entries are removed entirely.
240    pub removed: SmallVec<[DbString; 2]>,
241}
242
243impl PropertyDiff {
244    /// Construct a sorted, deduplicated property diff.
245    ///
246    /// # Errors
247    ///
248    /// Returns [`CoreError::OverlappingDiff`] when a key appears in both `set`
249    /// and `removed`. Contradictory diffs would make WAL replay
250    /// order-dependent, so the constructor refuses to build them.
251    pub fn new(
252        set: impl IntoIterator<Item = (DbString, Value)>,
253        removed: impl IntoIterator<Item = DbString>,
254    ) -> CoreResult<Self> {
255        let mut set: Vec<_> = set.into_iter().collect();
256        set.sort_by(|(lhs, _), (rhs, _)| lhs.cmp(rhs));
257        set.dedup_by(|(lhs_key, lhs_value), (rhs_key, rhs_value)| {
258            if lhs_key == rhs_key {
259                *lhs_value = rhs_value.clone();
260                true
261            } else {
262                false
263            }
264        });
265        let set: SmallVec<[(DbString, Value); 4]> = set.into_iter().collect();
266        let removed = sorted_deduped(removed);
267        for (key, _) in set.iter() {
268            if removed.binary_search(key).is_ok() {
269                return Err(CoreError::OverlappingDiff {
270                    kind: "property",
271                    key: key.clone(),
272                });
273            }
274        }
275        Ok(Self { set, removed })
276    }
277
278    /// Return true if no properties changed.
279    #[must_use]
280    pub fn is_empty(&self) -> bool {
281        self.set.is_empty() && self.removed.is_empty()
282    }
283}
284
285#[derive(Deserialize, Serialize)]
286struct PropertyDiffWire {
287    set: SmallVec<[(DbString, Value); 4]>,
288    removed: SmallVec<[DbString; 2]>,
289}
290
291impl Serialize for PropertyDiff {
292    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
293    where
294        S: Serializer,
295    {
296        // Canonicalize on serialize. `PropertyDiff::new` already sorts, so this
297        // is a no-op (byte-identical) for constructed diffs — but `set`/`removed`
298        // are public fields, so a caller can build a non-canonical diff directly;
299        // sorting here guarantees the wire is canonical and round-trips through
300        // the strict (validate, no-resort) deserializer below.
301        let mut set = self.set.clone();
302        let mut removed = self.removed.clone();
303        set.sort_by(|(lhs, _), (rhs, _)| lhs.as_str().cmp(rhs.as_str()));
304        removed.sort_by(|lhs, rhs| lhs.as_str().cmp(rhs.as_str()));
305        PropertyDiffWire { set, removed }.serialize(serializer)
306    }
307}
308
309impl<'de> Deserialize<'de> for PropertyDiff {
310    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
311    where
312        D: Deserializer<'de>,
313    {
314        // Validate the canonical invariant (strictly-ascending set keys,
315        // strictly-ascending removed, disjoint) rather than re-sorting; a
316        // non-canonical payload is rejected as malformed.
317        let wire = PropertyDiffWire::deserialize(deserializer)?;
318        for window in wire.set.windows(2) {
319            if window[0].0 >= window[1].0 {
320                return Err(serde::de::Error::custom(
321                    "PropertyDiff.set entries must be sorted by DbString order with no duplicate keys",
322                ));
323            }
324        }
325        validate_sorted_unique(&wire.removed, "PropertyDiff.removed")?;
326        for (key, _) in wire.set.iter() {
327            if wire.removed.binary_search(key).is_ok() {
328                return Err(serde::de::Error::custom(format!(
329                    "PropertyDiff: key {key} appears in both set and removed",
330                )));
331            }
332        }
333        Ok(Self {
334            set: wire.set,
335            removed: wire.removed,
336        })
337    }
338}
339
340/// Schema change payload.
341#[allow(clippy::large_enum_variant)]
342#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)]
343pub enum SchemaChange {
344    /// Graph creation.
345    GraphCreated {
346        /// Created graph ID.
347        id: GraphId,
348        /// Graph name.
349        name: DbString,
350        /// Optional graph type assigned at creation.
351        graph_type: Option<GraphTypeId>,
352    },
353    /// Graph deletion.
354    GraphDropped {
355        /// Dropped graph ID.
356        id: GraphId,
357    },
358    /// Graph type creation.
359    GraphTypeCreated {
360        /// Created graph type definition.
361        graph_type: GraphType,
362    },
363    /// Graph type deletion.
364    GraphTypeDropped {
365        /// Dropped graph type ID.
366        id: GraphTypeId,
367    },
368    /// Node type addition.
369    NodeTypeAdded {
370        /// Owning graph type.
371        graph_type: GraphTypeId,
372        /// Node type label.
373        label: DbString,
374        /// Legacy node type definition.
375        def: NodeTypeDefV1,
376    },
377    /// Edge type addition.
378    EdgeTypeAdded {
379        /// Owning graph type.
380        graph_type: GraphTypeId,
381        /// Edge type label.
382        label: DbString,
383        /// Legacy edge type definition.
384        def: EdgeTypeDefV1,
385    },
386    /// Node type deletion.
387    NodeTypeDropped {
388        /// Owning graph type.
389        graph_type: GraphTypeId,
390        /// Dropped node type name.
391        name: DbString,
392    },
393    /// Edge type deletion.
394    EdgeTypeDropped {
395        /// Owning graph type.
396        graph_type: GraphTypeId,
397        /// Dropped edge type name.
398        name: DbString,
399    },
400    /// Record type addition.
401    RecordTypeAdded {
402        /// Owning graph type.
403        graph_type: GraphTypeId,
404        /// Record type definition.
405        def: RecordTypeDef,
406    },
407    /// Property index creation.
408    PropertyIndexCreated {
409        /// Indexed node label.
410        label: DbString,
411        /// Indexed property key.
412        property: DbString,
413        /// Declared index value kind.
414        kind: SchemaPropertyIndexKind,
415    },
416    /// Property index deletion.
417    PropertyIndexDropped {
418        /// Indexed node label.
419        label: DbString,
420        /// Indexed property key.
421        property: DbString,
422    },
423    /// Property index creation with optional explicit catalog name.
424    ///
425    /// Declared after every existing v1.1 variant so the `postcard`
426    /// discriminants of all earlier variants remain stable. Old WALs continue
427    /// to decode through [`SchemaChange::PropertyIndexCreated`].
428    PropertyIndexCreatedNamed {
429        /// Indexed node label.
430        label: DbString,
431        /// Indexed property key.
432        property: DbString,
433        /// Declared index value kind.
434        kind: SchemaPropertyIndexKind,
435        /// Optional explicit catalog name.
436        name: Option<DbString>,
437    },
438    /// Node type addition carrying v2 type-model fields.
439    ///
440    /// Declared after every existing v1.1 variant so the `postcard`
441    /// discriminants of all earlier variants remain stable. New code emits this
442    /// variant; old WALs continue to decode through [`SchemaChange::NodeTypeAdded`].
443    NodeTypeAddedV2 {
444        /// Owning graph type.
445        graph_type: GraphTypeId,
446        /// Node type label.
447        label: DbString,
448        /// Node type definition.
449        def: NodeTypeDef,
450    },
451    /// Edge type addition carrying v2 type-model fields.
452    ///
453    /// Declared after every existing v1.1 variant so the `postcard`
454    /// discriminants of all earlier variants remain stable. New code emits this
455    /// variant; old WALs continue to decode through [`SchemaChange::EdgeTypeAdded`].
456    EdgeTypeAddedV2 {
457        /// Owning graph type.
458        graph_type: GraphTypeId,
459        /// Edge type label.
460        label: DbString,
461        /// Edge type definition.
462        def: EdgeTypeDef,
463    },
464    /// Composite property index creation with optional explicit catalog name.
465    ///
466    /// Declared after every existing v1.1 variant so the `postcard`
467    /// discriminants of all earlier variants remain stable.
468    CompositePropertyIndexCreated {
469        /// Indexed node label.
470        label: DbString,
471        /// Indexed property keys in declaration order.
472        properties: SmallVec<[DbString; 4]>,
473        /// Declared index value kinds in declaration order.
474        kinds: SmallVec<[SchemaPropertyIndexKind; 4]>,
475        /// Optional explicit catalog name.
476        name: Option<DbString>,
477    },
478    /// Composite property index deletion.
479    ///
480    /// Declared after every existing v1.1 variant so the `postcard`
481    /// discriminants of all earlier variants remain stable.
482    CompositePropertyIndexDropped {
483        /// Indexed node label.
484        label: DbString,
485        /// Indexed property keys in declaration order.
486        properties: SmallVec<[DbString; 4]>,
487    },
488    /// Vector property index creation with optional explicit catalog name.
489    ///
490    /// Declared after every existing v1.1 variant so the `postcard`
491    /// discriminants of all earlier variants remain stable.
492    VectorIndexCreated {
493        /// Indexed node label.
494        label: DbString,
495        /// Indexed vector property key.
496        property: DbString,
497        /// Declared vector index algorithm.
498        kind: SchemaVectorIndexKind,
499        /// Required vector dimensionality for indexed rows.
500        dimension: u32,
501        /// Optional explicit catalog name.
502        name: Option<DbString>,
503        /// Optional HNSW construction parameters.
504        hnsw_config: Option<HnswIndexConfig>,
505        /// Optional IVF construction parameters.
506        ivf_config: Option<IvfIndexConfig>,
507    },
508    /// Vector property index deletion.
509    ///
510    /// Declared after every existing v1.1 variant so the `postcard`
511    /// discriminants of all earlier variants remain stable.
512    VectorIndexDropped {
513        /// Indexed node label.
514        label: DbString,
515        /// Indexed vector property key.
516        property: DbString,
517    },
518    /// Text property index creation with optional explicit catalog name.
519    ///
520    /// Declared after every existing v1.1 variant so the `postcard`
521    /// discriminants of all earlier variants remain stable.
522    TextIndexCreated {
523        /// Indexed node label.
524        label: DbString,
525        /// Indexed string property key.
526        property: DbString,
527        /// Optional explicit catalog name.
528        name: Option<DbString>,
529    },
530    /// Text property index deletion.
531    ///
532    /// Declared after every existing v1.1 variant so the `postcard`
533    /// discriminants of all earlier variants remain stable.
534    TextIndexDropped {
535        /// Indexed node label.
536        label: DbString,
537        /// Indexed string property key.
538        property: DbString,
539    },
540}
541
542/// Schema-level vector index algorithm kind.
543///
544/// This mirrors storage-level vector index algorithm selection without making
545/// `selene-core` depend on graph storage internals.
546#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)]
547pub enum SchemaVectorIndexKind {
548    /// Exact in-memory row-set accelerator. ANN algorithms can be added as new
549    /// variants without changing the `(label, property)` catalog identity.
550    Flat,
551    /// Approximate HNSW index using squared Euclidean distance.
552    HnswSquaredEuclidean,
553    /// Approximate HNSW index using cosine distance.
554    HnswCosine,
555    /// Approximate HNSW index using negative inner product distance.
556    HnswNegativeInnerProduct,
557    /// Approximate IVF index using squared Euclidean distance.
558    IvfSquaredEuclidean,
559    /// Approximate IVF index using cosine distance.
560    IvfCosine,
561    /// Approximate IVF index using negative inner product distance.
562    IvfNegativeInnerProduct,
563    /// Compressed TurboQuant candidate index using cosine distance.
564    TurboQuantCosine,
565}
566
567/// Schema-level property index value kind.
568///
569/// This mirrors `selene_graph::TypedIndexKind` without making `selene-core`
570/// depend on graph storage internals.
571#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)]
572pub enum SchemaPropertyIndexKind {
573    /// Boolean value.
574    Bool,
575    /// Signed 64-bit integer.
576    I64,
577    /// Unsigned 64-bit integer.
578    U64,
579    /// Signed 128-bit integer.
580    I128,
581    /// Unsigned 128-bit integer.
582    U128,
583    /// Fixed-precision decimal value.
584    Decimal,
585    /// Finite 32-bit floating-point value.
586    F32,
587    /// Finite 64-bit floating-point value.
588    F64,
589    /// Database string.
590    String,
591    /// Civil date.
592    Date,
593    /// Civil local date-time.
594    LocalDateTime,
595    /// Zoned date-time.
596    ZonedDateTime,
597    /// Civil local time.
598    LocalTime,
599    /// Zoned time.
600    ZonedTime,
601    /// Duration.
602    Duration,
603    /// UUID.
604    Uuid,
605}
606
607fn sorted_deduped(values: impl IntoIterator<Item = DbString>) -> SmallVec<[DbString; 2]> {
608    let mut values: SmallVec<[DbString; 2]> = values.into_iter().collect();
609    values.sort();
610    values.dedup();
611    values
612}
613
614fn ensure_disjoint(
615    kind: &'static str,
616    added: &SmallVec<[DbString; 2]>,
617    removed: &SmallVec<[DbString; 2]>,
618) -> CoreResult<()> {
619    for label in added.iter() {
620        if removed.binary_search(label).is_ok() {
621            return Err(CoreError::OverlappingDiff {
622                kind,
623                key: label.clone(),
624            });
625        }
626    }
627    Ok(())
628}
629
630fn validate_sorted_unique<E: serde::de::Error>(
631    values: &SmallVec<[DbString; 2]>,
632    label: &'static str,
633) -> Result<(), E> {
634    for window in values.windows(2) {
635        if window[0] >= window[1] {
636            return Err(E::custom(format!(
637                "{label} must be sorted by DbString order with no duplicates"
638            )));
639        }
640    }
641    Ok(())
642}
643
644fn validate_disjoint<E: serde::de::Error>(
645    added: &SmallVec<[DbString; 2]>,
646    removed: &SmallVec<[DbString; 2]>,
647    kind: &'static str,
648) -> Result<(), E> {
649    for label in added.iter() {
650        if removed.binary_search(label).is_ok() {
651            return Err(E::custom(format!(
652                "overlapping {kind} diff: {label} appears in both add/set and remove",
653            )));
654        }
655    }
656    Ok(())
657}
658
659#[cfg(test)]
660#[path = "changeset/tests.rs"]
661mod tests;
662
663#[cfg(test)]
664#[path = "changeset/proptests.rs"]
665mod proptests;