Skip to main content

uni_fork/
types.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright 2024-2026 Dragonscale Team
3
4//! Phase 6 — Fork diff & promote types.
5//!
6//! `ForkDiff` describes the structural delta between two fork views
7//! (or a fork and primary). The convention is *forward*: `diff(a, b)`
8//! is the delta that, if applied to `a`, would produce `b`. So
9//! `added` rows exist in `b` only, `deleted` exist in `a` only, and
10//! `changed` is a per-row before/after on rows with matching identity.
11//!
12//! **Identity** is `UniId` for vertices and `(src_uid, dst_uid, type)`
13//! for edges. Both are content-addressed (vertex UID = SHA3-256 of
14//! `(label, ext_id, properties)`; edge UID is the tuple of endpoint
15//! UIDs plus the edge type), so the diff is correct across two
16//! unrelated forks that happen to have rolled the same VIDs. The
17//! per-side VID is preserved on `DiffVertex` as informational; pairing
18//! never depends on it.
19//!
20//! Phase 6a (the initial MVP) keyed diffs by VID. Phase 6b lifted
21//! identity to UID so siblings-off-a-shared-parent and totally
22//! unrelated forks compare correctly.
23//!
24//! `PromotePattern` is the spec for what to scan on a fork during
25//! `Uni::promote_from_fork`. Phase 6 supports the most common shape
26//! (label + optional Cypher WHERE clause); future phases may grow
27//! relationship-aware patterns.
28
29use std::collections::{HashMap, HashSet};
30use std::fmt;
31
32use uni_common::Properties;
33use uni_common::Value;
34use uni_common::core::id::{UniId, Vid};
35
36/// The full delta from one fork view to another.
37#[derive(Debug, Clone, Default)]
38pub struct ForkDiff {
39    /// Per-label vertex deltas.
40    pub vertices: VertexDiff,
41    /// Per-edge-type edge deltas.
42    pub edges: EdgeDiff,
43}
44
45impl ForkDiff {
46    /// Returns `true` when there are no vertex or edge differences.
47    pub fn is_empty(&self) -> bool {
48        self.vertices.is_empty() && self.edges.is_empty()
49    }
50
51    /// Total rows in this diff across vertices and edges.
52    pub fn total_rows(&self) -> usize {
53        self.vertices.total_rows() + self.edges.total_rows()
54    }
55
56    /// Return the inverse: swap added/deleted and swap before/after in
57    /// every property change. By construction
58    /// `diff(a,b).invert() == diff(b,a)`.
59    pub fn invert(mut self) -> Self {
60        self.vertices = self.vertices.invert();
61        self.edges = self.edges.invert();
62        self
63    }
64}
65
66/// Vertex-side of [`ForkDiff`].
67#[derive(Debug, Clone, Default)]
68pub struct VertexDiff {
69    /// Rows present in `b` but not `a`.
70    pub added: Vec<DiffVertex>,
71    /// Rows present in `a` but not `b`.
72    pub deleted: Vec<DiffVertex>,
73    /// **Reserved; always empty.** Identity is content-addressed (the UID
74    /// folds in the properties), so two rows with matching identity have
75    /// identical properties by construction — a property change surfaces as
76    /// a paired `deleted` + `added` instead. Kept for forward compatibility
77    /// with a future stable-id diff. (L12)
78    pub changed: Vec<VertexPropertyChange>,
79}
80
81impl VertexDiff {
82    /// Returns `true` when added, deleted, and changed are all empty.
83    pub fn is_empty(&self) -> bool {
84        self.added.is_empty() && self.deleted.is_empty() && self.changed.is_empty()
85    }
86
87    /// Sum of added + deleted + changed counts.
88    pub fn total_rows(&self) -> usize {
89        self.added.len() + self.deleted.len() + self.changed.len()
90    }
91
92    fn invert(self) -> Self {
93        Self {
94            added: self.deleted,
95            deleted: self.added,
96            changed: self
97                .changed
98                .into_iter()
99                .map(VertexPropertyChange::invert)
100                .collect(),
101        }
102    }
103}
104
105/// Edge-side of [`ForkDiff`].
106#[derive(Debug, Clone, Default)]
107pub struct EdgeDiff {
108    /// Edges present in `b` but not `a`.
109    pub added: Vec<DiffEdge>,
110    /// Edges present in `a` but not `b`.
111    pub deleted: Vec<DiffEdge>,
112    /// **Reserved; always empty.** The edge UID folds in the properties, so
113    /// a property change surfaces as a paired `deleted` + `added`, not here.
114    /// Kept for forward compatibility. (L12)
115    pub changed: Vec<EdgePropertyChange>,
116}
117
118impl EdgeDiff {
119    /// Returns `true` when added, deleted, and changed are all empty.
120    pub fn is_empty(&self) -> bool {
121        self.added.is_empty() && self.deleted.is_empty() && self.changed.is_empty()
122    }
123
124    /// Sum of added + deleted + changed counts.
125    pub fn total_rows(&self) -> usize {
126        self.added.len() + self.deleted.len() + self.changed.len()
127    }
128
129    fn invert(self) -> Self {
130        Self {
131            added: self.deleted,
132            deleted: self.added,
133            changed: self
134                .changed
135                .into_iter()
136                .map(EdgePropertyChange::invert)
137                .collect(),
138        }
139    }
140}
141
142/// A vertex row from one side of a diff.
143#[derive(Debug, Clone)]
144pub struct DiffVertex {
145    /// The vertex's label.
146    pub label: String,
147    /// Content-addressed identity (`compute_vertex_uid(label, None,
148    /// properties)`). This is the bucketing key during diff.
149    pub uid: UniId,
150    /// Informational: which VID this row carried on the side it was
151    /// scanned from. `None` if the per-side scan returned a node
152    /// without a VID, which should not happen in practice.
153    pub vid: Option<Vid>,
154    /// Property bag for the vertex (user properties only).
155    pub properties: Properties,
156}
157
158/// A change to one vertex's properties.
159#[derive(Debug, Clone)]
160pub struct VertexPropertyChange {
161    /// The vertex's label.
162    pub label: String,
163    /// UID of the vertex — the pairing key across sides.
164    pub uid: UniId,
165    /// One entry per property whose value differs between sides.
166    pub changes: Vec<PropertyChange>,
167}
168
169impl VertexPropertyChange {
170    fn invert(self) -> Self {
171        Self {
172            label: self.label,
173            uid: self.uid,
174            changes: self
175                .changes
176                .into_iter()
177                .map(PropertyChange::invert)
178                .collect(),
179        }
180    }
181}
182
183/// An edge row from one side of a diff.
184#[derive(Debug, Clone)]
185pub struct DiffEdge {
186    /// The edge type.
187    pub edge_type: String,
188    /// Content-addressed edge UID (computed via
189    /// `MainEdgeDataset::compute_edge_uid` over
190    /// `(src_uid, dst_uid, edge_type, sorted_properties)`). Two
191    /// parallel edges between the same endpoints with different
192    /// property bags have different `edge_uid`s — that's how the
193    /// diff distinguishes them.
194    pub edge_uid: UniId,
195    /// Source vertex UID (content-addressed).
196    pub src_uid: UniId,
197    /// Destination vertex UID (content-addressed).
198    pub dst_uid: UniId,
199    /// Property bag for the edge.
200    pub properties: Properties,
201}
202
203/// A change to one edge's properties.
204#[derive(Debug, Clone)]
205pub struct EdgePropertyChange {
206    /// The edge type.
207    pub edge_type: String,
208    /// Source vertex UID.
209    pub src_uid: UniId,
210    /// Destination vertex UID.
211    pub dst_uid: UniId,
212    /// One entry per property whose value differs between sides.
213    pub changes: Vec<PropertyChange>,
214}
215
216impl EdgePropertyChange {
217    fn invert(self) -> Self {
218        Self {
219            edge_type: self.edge_type,
220            src_uid: self.src_uid,
221            dst_uid: self.dst_uid,
222            changes: self
223                .changes
224                .into_iter()
225                .map(PropertyChange::invert)
226                .collect(),
227        }
228    }
229}
230
231/// A single property's before/after pair.
232#[derive(Debug, Clone)]
233pub struct PropertyChange {
234    /// Property key.
235    pub key: String,
236    /// Value on the `a` side, or `None` if absent.
237    pub before: Option<Value>,
238    /// Value on the `b` side, or `None` if absent.
239    pub after: Option<Value>,
240}
241
242impl PropertyChange {
243    fn invert(self) -> Self {
244        Self {
245            key: self.key,
246            before: self.after,
247            after: self.before,
248        }
249    }
250}
251
252/// Selector for `Uni::promote_from_fork`.
253///
254/// Two shapes:
255/// - [`PromotePattern::label`] — match every vertex with this label;
256///   bulk-inserted on primary, deduplicated by content-derived UID.
257/// - [`PromotePattern::edge_type`] — match every edge of this type
258///   whose endpoints already exist on primary; the edge is inserted
259///   between the resolved primary endpoints, deduplicated by
260///   `(src_uid, dst_uid, edge_type)`.
261///
262/// Both variants accept an optional Cypher `WHERE` clause, interpolated
263/// verbatim into the fork-side scan. Callers are responsible for
264/// quoting and parameter safety.
265#[derive(Debug, Clone)]
266#[non_exhaustive]
267pub enum PromotePattern {
268    /// Promote vertices.
269    Vertex {
270        /// Vertex label.
271        label: String,
272        /// Optional `WHERE` predicate on the fork-side scan.
273        where_clause: Option<String>,
274    },
275    /// Promote edges. Endpoints must already exist on primary (by UID);
276    /// fork-only endpoints are skipped and counted in
277    /// [`PromoteReport::edges_skipped_no_endpoint`].
278    Edge {
279        /// Edge type.
280        edge_type: String,
281        /// Optional `WHERE` predicate on the fork-side scan. The bound
282        /// names are `a` (source), `r` (edge), `b` (destination).
283        where_clause: Option<String>,
284    },
285}
286
287impl PromotePattern {
288    /// Match every vertex with this label.
289    pub fn label(label: impl Into<String>) -> Self {
290        Self::Vertex {
291            label: label.into(),
292            where_clause: None,
293        }
294    }
295
296    /// Match every edge with this type. Endpoints must already exist
297    /// on primary (resolved by UID); fork-only endpoints are counted
298    /// and skipped — they need to be promoted first via a vertex
299    /// pattern.
300    pub fn edge_type(edge_type: impl Into<String>) -> Self {
301        Self::Edge {
302            edge_type: edge_type.into(),
303            where_clause: None,
304        }
305    }
306
307    /// Restrict the scan to rows matching this Cypher predicate.
308    /// Verbatim interpolation — caller owns quoting.
309    pub fn where_clause(mut self, expr: impl Into<String>) -> Self {
310        let expr = expr.into();
311        match &mut self {
312            Self::Vertex { where_clause, .. } | Self::Edge { where_clause, .. } => {
313                *where_clause = Some(expr)
314            }
315        }
316        self
317    }
318
319    /// Vertex label for vertex patterns. Empty string for edge patterns.
320    pub fn label_name(&self) -> &str {
321        match self {
322            Self::Vertex { label, .. } => label,
323            Self::Edge { .. } => "",
324        }
325    }
326
327    /// Edge type for edge patterns. Empty string for vertex patterns.
328    pub fn edge_type_name(&self) -> &str {
329        match self {
330            Self::Edge { edge_type, .. } => edge_type,
331            Self::Vertex { .. } => "",
332        }
333    }
334
335    /// The optional `WHERE` predicate.
336    pub fn where_expr(&self) -> Option<&str> {
337        match self {
338            Self::Vertex { where_clause, .. } | Self::Edge { where_clause, .. } => {
339                where_clause.as_deref()
340            }
341        }
342    }
343
344    /// `true` if this pattern targets edges.
345    pub fn is_edge(&self) -> bool {
346        matches!(self, Self::Edge { .. })
347    }
348}
349
350impl fmt::Display for PromotePattern {
351    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
352        match self {
353            Self::Vertex {
354                label,
355                where_clause: Some(w),
356            } => write!(f, "(:{} WHERE {})", label, w),
357            Self::Vertex {
358                label,
359                where_clause: None,
360            } => write!(f, "(:{})", label),
361            Self::Edge {
362                edge_type,
363                where_clause: Some(w),
364            } => write!(f, "[:{} WHERE {}]", edge_type, w),
365            Self::Edge {
366                edge_type,
367                where_clause: None,
368            } => write!(f, "[:{}]", edge_type),
369        }
370    }
371}
372
373/// Options controlling `Uni::promote_from_fork` merge behavior.
374///
375/// Additive and `#[non_exhaustive]`: the legacy `promote_from_fork`
376/// entry point uses `PromoteOptions::default()` (insert-only), so existing
377/// callers are unaffected.
378#[derive(Debug, Clone, Default)]
379#[non_exhaustive]
380pub struct PromoteOptions {
381    /// When `true`, a fork edit to a vertex that already exists on primary
382    /// (matched by `(label, ext_id)`) is applied as an in-place property
383    /// update instead of inserting a twin. Vertices without an `ext_id`
384    /// keep the legacy content-UID insert-or-skip behavior. Default
385    /// `false` preserves the historical insert-only contract.
386    pub upsert: bool,
387
388    /// When `true`, after the pattern loop, primary vertices present at the
389    /// fork point but removed on the fork are deleted from primary
390    /// (ext_id-keyed; rows without an `ext_id` are reported, never deleted).
391    /// Requires a fork-point baseline; the host builds it and passes it to
392    /// [`crate::run_promote`]. Off by default because it removes primary rows.
393    pub delete_promotion: bool,
394
395    /// How to resolve a vertex that diverged on BOTH primary and the fork
396    /// since the fork point (a concurrent edit). Only consulted in the
397    /// baseline-aware merge (i.e. when a baseline is supplied).
398    pub on_conflict: ConflictPolicy,
399}
400
401/// Policy for a concurrent divergent edit during a baseline-aware merge.
402#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
403#[non_exhaustive]
404pub enum ConflictPolicy {
405    /// Leave primary's value untouched; count it in `vertices_conflicting`.
406    /// The safe default.
407    #[default]
408    Skip,
409    /// Apply the fork's value over primary's; count in both
410    /// `vertices_conflicting` and `vertices_updated`.
411    Overwrite,
412}
413
414impl PromoteOptions {
415    /// Insert-only (legacy) options — the default.
416    #[must_use]
417    pub fn insert_only() -> Self {
418        Self::default()
419    }
420
421    /// Enable ext_id-keyed upsert of existing primary vertices (fork-wins,
422    /// no baseline).
423    #[must_use]
424    pub fn with_upsert() -> Self {
425        Self {
426            upsert: true,
427            ..Self::default()
428        }
429    }
430
431    /// Enable baseline-aware merge: ext_id upsert + conflict detection +
432    /// delete-promotion. The host supplies a [`PromoteBaseline`].
433    #[must_use]
434    pub fn with_merge() -> Self {
435        Self {
436            upsert: true,
437            delete_promotion: true,
438            on_conflict: ConflictPolicy::Skip,
439        }
440    }
441
442    /// Set the conflict policy (builder).
443    #[must_use]
444    pub fn on_conflict(mut self, policy: ConflictPolicy) -> Self {
445        self.on_conflict = policy;
446        self
447    }
448}
449
450/// Fork-point snapshot of primary, keyed for the merge/delete passes.
451///
452/// Built by reading primary pinned at the fork's `parent_snapshot_id`.
453/// `ext` keys rows by their stable `(label, ext_id)`; `no_ext` tracks
454/// `ext_id`-less rows by content-UID so they can be reported as
455/// un-promotable on delete, never deleted.
456#[derive(Debug, Clone, Default)]
457pub struct PromoteBaseline {
458    /// label → (ext_id → properties at the fork point).
459    pub ext: HashMap<String, HashMap<String, Properties>>,
460    /// label → content-UIDs of fork-point rows that had no `ext_id`.
461    pub no_ext: HashMap<String, HashSet<UniId>>,
462}
463
464impl PromoteBaseline {
465    /// `true` when the baseline holds no rows.
466    #[must_use]
467    pub fn is_empty(&self) -> bool {
468        self.ext.is_empty() && self.no_ext.is_empty()
469    }
470}
471
472/// Outcome of `Uni::promote_from_fork`.
473#[derive(Debug, Clone, Default)]
474pub struct PromoteReport {
475    /// Number of vertices inserted into primary.
476    pub vertices_inserted: usize,
477    /// Number of existing primary vertices updated in place (upsert by
478    /// `(label, ext_id)`; only populated when `PromoteOptions::upsert`).
479    pub vertices_updated: usize,
480    /// Number of fork rows that matched an existing primary vertex by
481    /// `(label, ext_id)` with identical properties — nothing written.
482    pub vertices_skipped_no_op: usize,
483    /// Number of vertices inserted while their primary presence could
484    /// NOT be confirmed (a transient resolve failure degraded to
485    /// "absent → insert"). A non-zero value means some of
486    /// `vertices_inserted` may be duplicates; see the warning logged at
487    /// promote time. (M5)
488    pub vertices_inserted_unverified: usize,
489    /// Number of fork rows skipped because primary already has the same UID.
490    pub vertices_skipped_uid_conflict: usize,
491    /// Number of primary vertices deleted because they existed at the fork
492    /// point but the fork has since removed them (delete-promotion; only
493    /// populated when `PromoteOptions::delete_promotion`).
494    pub vertices_deleted: usize,
495    /// Number of fork-point rows without an `ext_id` that vanished from the
496    /// fork — they cannot be safely resolved on primary for deletion, so
497    /// they are reported here rather than deleted.
498    pub vertices_skipped_no_ext_id_for_delete: usize,
499    /// Number of ext_id targets where both primary and the fork diverged
500    /// from the fork-point baseline (a concurrent divergent edit), resolved
501    /// per `PromoteOptions::on_conflict`.
502    pub vertices_conflicting: usize,
503    /// Number of edges inserted into primary.
504    pub edges_inserted: usize,
505    /// Number of fork edges skipped because primary already has an
506    /// edge of the same type between the resolved endpoints.
507    pub edges_skipped_duplicate: usize,
508    /// Number of fork edges skipped because at least one endpoint had
509    /// no UID match on primary. To insert these edges, promote the
510    /// missing vertices first via a vertex pattern, then re-run.
511    pub edges_skipped_no_endpoint: usize,
512    /// Number of edges that touched a promoted vertex but were not
513    /// themselves promoted (no edge pattern in the call). Phase 6
514    /// MVP's behaviour: silently skip + warn. Phase 6b adds explicit
515    /// edge patterns; when no edge pattern is given, this counter
516    /// still surfaces incidental edges for visibility.
517    pub edges_skipped: usize,
518    /// Per-pattern row counts so callers can see which pattern matched
519    /// what. Indexed by pattern position in the input slice.
520    pub per_pattern_inserted: Vec<usize>,
521}
522#[cfg(test)]
523mod tests {
524    use super::*;
525
526    #[test]
527    fn property_change_inverts_before_after() {
528        let pc = PropertyChange {
529            key: "age".into(),
530            before: Some(Value::Int(30)),
531            after: Some(Value::Int(31)),
532        };
533        let inv = pc.clone().invert();
534        assert_eq!(inv.before, pc.after);
535        assert_eq!(inv.after, pc.before);
536    }
537
538    #[test]
539    fn vertex_diff_invert_swaps_added_deleted() {
540        let v_a = DiffVertex {
541            label: "Person".into(),
542            uid: UniId::from_bytes([1; 32]),
543            vid: Some(Vid::new(1)),
544            properties: Default::default(),
545        };
546        let v_b = DiffVertex {
547            label: "Person".into(),
548            uid: UniId::from_bytes([2; 32]),
549            vid: Some(Vid::new(2)),
550            properties: Default::default(),
551        };
552        let d = VertexDiff {
553            added: vec![v_a.clone()],
554            deleted: vec![v_b.clone()],
555            changed: vec![],
556        };
557        let inv = d.invert();
558        assert_eq!(inv.added.len(), 1);
559        assert_eq!(inv.deleted.len(), 1);
560    }
561
562    #[test]
563    fn fork_diff_default_is_empty() {
564        let d = ForkDiff::default();
565        assert!(d.is_empty());
566        assert_eq!(d.total_rows(), 0);
567    }
568
569    #[test]
570    fn promote_pattern_display() {
571        let p = PromotePattern::label("Person");
572        assert_eq!(format!("{}", p), "(:Person)");
573        let p2 = PromotePattern::label("Person").where_clause("n.age > 30");
574        assert_eq!(format!("{}", p2), "(:Person WHERE n.age > 30)");
575    }
576}