Skip to main content

tiptap_rusty_parser/
diff.rs

1//! Structural diff: compute a path-addressed [`Change`] list between two
2//! [`Node`] trees, and [`apply`] it to reproduce the target.
3//!
4//! ```
5//! use tiptap_rusty_parser::Document;
6//!
7//! let a = Document::from_json_str(
8//!     r#"{"type":"doc","content":[{"type":"paragraph","content":[{"type":"text","text":"hi"}]}]}"#,
9//! ).unwrap();
10//! let b = Document::from_json_str(
11//!     r#"{"type":"doc","content":[{"type":"paragraph","content":[{"type":"text","text":"bye"}]}]}"#,
12//! ).unwrap();
13//!
14//! let changes = a.diff(&b);          // -> Vec<Change>
15//! let mut c = a.clone();
16//! c.apply(&changes).unwrap();        // reproduce `b`
17//! assert_eq!(c, b);
18//! ```
19//!
20//! ## Path convention
21//! Node-local changes ([`Change::SetAttr`], `RemoveAttr`, `SetText`, `SetMarks`,
22//! `SetExtra`, `RemoveExtra`, `Replace`) address the **target node** by its
23//! index path. Child-list changes ([`Change::Insert`], [`Change::Remove`])
24//! address the **parent** node, with `index` selecting the child — mirroring
25//! [`Node::insert_child`] / [`Node::remove_child`].
26//!
27//! ## Apply contract
28//! [`apply`] executes changes strictly in order; child `index` values are
29//! interpreted against the *live* (already-partially-mutated) list. A list
30//! produced by [`diff`] always reproduces the target exactly:
31//! `apply(&mut a, &diff(a, b))` yields `b`.
32//!
33//! Empty-vs-absent container shapes (e.g. `"content":[]` vs no `content`) are
34//! preserved: when the field/child ops can't express the difference, the node
35//! is replaced wholesale so the round-trip stays exact.
36//!
37//! ## Move detection
38//! A child relocated within a list is emitted as a single [`Change::Move`]
39//! (no subtree clone) instead of a remove+insert: after LCS alignment, leftover
40//! deletions and insertions that are *equal by value* are paired as moves, and
41//! their live indices are derived by simulating the op stream — so the result
42//! reproduces the target exactly regardless of how moves interleave with plain
43//! inserts/removes. Pairing is greedy/FIFO, so duplicate-equal children give a
44//! correct (if not always minimal) result. [`invert`] needs no special handling:
45//! it re-diffs the reverse direction, re-deriving the inverse moves.
46//!
47//! ## v1 limitations
48//! - Matching is LCS-by-equality; modifies are paired positionally within the
49//!   gaps between matched anchors (still correct, just not always minimal).
50
51use crate::node::{Mark, Node};
52use serde::{Deserialize, Serialize};
53use serde_json::{Map, Value};
54use std::collections::{HashMap, HashSet};
55use std::fmt;
56
57/// A single structural change between two [`Node`] trees.
58///
59/// Serializes as a tagged object, e.g. `{"op":"setText","path":[0,0],"text":"hi"}`.
60#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
61#[serde(tag = "op", rename_all = "camelCase")]
62pub enum Change {
63    /// Set (insert or overwrite) attribute `key` on the node at `path`.
64    SetAttr {
65        /// Index path of the target node.
66        path: Vec<usize>,
67        /// Attribute key.
68        key: String,
69        /// New attribute value.
70        value: Value,
71    },
72    /// Remove attribute `key` from the node at `path`.
73    RemoveAttr {
74        /// Index path of the target node.
75        path: Vec<usize>,
76        /// Attribute key.
77        key: String,
78    },
79    /// Set the text payload of the node at `path` (`None` clears it).
80    SetText {
81        /// Index path of the target node.
82        path: Vec<usize>,
83        /// New text payload, or `None` to clear.
84        text: Option<String>,
85    },
86    /// Replace the whole mark list of the node at `path` (`None` clears it).
87    SetMarks {
88        /// Index path of the target node.
89        path: Vec<usize>,
90        /// New mark list, or `None` to clear.
91        marks: Option<Vec<Mark>>,
92    },
93    /// Set (insert or overwrite) unknown top-level field `key` on the node at `path`.
94    SetExtra {
95        /// Index path of the target node.
96        path: Vec<usize>,
97        /// Field key.
98        key: String,
99        /// New field value.
100        value: Value,
101    },
102    /// Remove unknown top-level field `key` from the node at `path`.
103    RemoveExtra {
104        /// Index path of the target node.
105        path: Vec<usize>,
106        /// Field key.
107        key: String,
108    },
109    /// Insert `node` as a child of the node at `path` (the parent), at `index`.
110    Insert {
111        /// Index path of the **parent** node.
112        path: Vec<usize>,
113        /// Child position to insert at.
114        index: usize,
115        /// The node to insert.
116        node: Node,
117    },
118    /// Remove the child at `index` of the node at `path` (the parent).
119    Remove {
120        /// Index path of the **parent** node.
121        path: Vec<usize>,
122        /// Child position to remove.
123        index: usize,
124    },
125    /// Replace the node at `path` wholesale (used when its `type` changes).
126    Replace {
127        /// Index path of the target node (empty = root).
128        path: Vec<usize>,
129        /// The replacement node.
130        node: Node,
131    },
132    /// Relocate a child within the same parent's list, without cloning its
133    /// subtree. `from`/`to` are interpreted against the *live* list: the child
134    /// at `from` is removed first, then re-inserted so it lands at index `to`
135    /// in the post-removal list — observationally `Remove{from}` + `Insert{to}`,
136    /// so it composes with the same live-index cursor model as the other ops.
137    Move {
138        /// Index path of the **parent** node.
139        path: Vec<usize>,
140        /// Live index of the child to move (before this op).
141        from: usize,
142        /// Target index in the list after the child is removed.
143        to: usize,
144    },
145}
146
147/// Error from [`apply`] when a change can't be located (no node at the path, or
148/// a child index out of range). Lists produced by [`diff`] never fail.
149#[derive(Debug, Clone, PartialEq, Eq)]
150pub struct ApplyError {
151    /// The path that could not be resolved.
152    pub path: Vec<usize>,
153}
154
155impl fmt::Display for ApplyError {
156    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
157        write!(f, "apply: no node at path {:?}", self.path)
158    }
159}
160
161impl std::error::Error for ApplyError {}
162
163impl Node {
164    /// Structural diff from `self` to `other`: a [`Change`] list that, when
165    /// [`applied`](apply) to a clone of `self`, reproduces `other`.
166    pub fn diff(&self, other: &Node) -> Vec<Change> {
167        let mut out = Vec::new();
168        let mut path = Vec::new();
169        diff_node(self, other, &mut path, &mut out);
170        out
171    }
172
173    /// Apply `changes` to `self` in order. See [`apply`].
174    pub fn apply(&mut self, changes: &[Change]) -> std::result::Result<(), ApplyError> {
175        apply(self, changes)
176    }
177
178    /// Invert `changes` relative to `self` (the pre-image). See [`invert`].
179    pub fn invert(&self, changes: &[Change]) -> std::result::Result<Vec<Change>, ApplyError> {
180        invert(self, changes)
181    }
182}
183
184/// Structural diff between two nodes. Free-function form of [`Node::diff`].
185pub fn diff(a: &Node, b: &Node) -> Vec<Change> {
186    a.diff(b)
187}
188
189/// Apply a [`Change`] list to `root` in order, mutating it in place.
190///
191/// Applying `diff(a, b)` to a clone of `a` reproduces `b` exactly. Returns
192/// [`ApplyError`] only for externally-authored lists whose paths/indices don't
193/// resolve.
194pub fn apply(root: &mut Node, changes: &[Change]) -> std::result::Result<(), ApplyError> {
195    for change in changes {
196        apply_one(root, change)?;
197    }
198    Ok(())
199}
200
201/// Invert a change list: produce the reverse changes that, applied to the
202/// result of `apply(base, changes)`, restore `base` — the basis for undo.
203///
204/// Computed as `diff(apply(base, changes), base)`: replay the forward changes,
205/// then diff back to `base`. This reuses the diff round-trip guarantee, so it
206/// handles every value/shape edge exactly — subject to the same non-minimality
207/// caveats as [`diff`] (e.g. no move detection). Errors only if `changes`
208/// itself doesn't apply to `base`.
209///
210/// ```
211/// use tiptap_rusty_parser::Document;
212/// let a = Document::from_json_str(r#"{"type":"doc","content":[{"type":"paragraph"}]}"#).unwrap();
213/// let b = Document::from_json_str(r#"{"type":"doc","content":[{"type":"heading"}]}"#).unwrap();
214/// let forward = a.diff(&b);
215/// let undo = a.invert(&forward).unwrap();
216/// let mut c = b.clone();
217/// c.apply(&undo).unwrap();
218/// assert_eq!(c, a);
219/// ```
220pub fn invert(base: &Node, changes: &[Change]) -> std::result::Result<Vec<Change>, ApplyError> {
221    let mut result = base.clone();
222    apply(&mut result, changes)?;
223    Ok(result.diff(base))
224}
225
226// ---- diff internals -----------------------------------------------------
227
228fn diff_node(a: &Node, b: &Node, path: &mut Vec<usize>, out: &mut Vec<Change>) {
229    if a == b {
230        return; // prune identical subtrees (the main perf lever)
231    }
232    if a.node_type != b.node_type
233        || empty_shape_mismatch(
234            a.attrs.as_ref().map(Map::is_empty),
235            b.attrs.as_ref().map(Map::is_empty),
236        )
237        || empty_shape_mismatch(
238            a.content.as_ref().map(Vec::is_empty),
239            b.content.as_ref().map(Vec::is_empty),
240        )
241    {
242        // Type change, or an empty-vs-None container shape the field/child ops
243        // can't express (e.g. `"content":[]` -> absent) -> wholesale replace.
244        out.push(Change::Replace {
245            path: path.clone(),
246            node: b.clone(),
247        });
248        return;
249    }
250    diff_attrs(a.attrs.as_ref(), b.attrs.as_ref(), path, out);
251    if a.text != b.text {
252        out.push(Change::SetText {
253            path: path.clone(),
254            text: b.text.clone(),
255        });
256    }
257    if a.marks != b.marks {
258        out.push(Change::SetMarks {
259            path: path.clone(),
260            marks: b.marks.clone(),
261        });
262    }
263    diff_extra(&a.extra, &b.extra, path, out);
264    diff_children(a.children(), b.children(), path, out);
265}
266
267/// Whether an `Option<container>` shape difference (where the arg is
268/// `Some(is_empty)` / `None`) can't be reconciled by the key/child ops, which
269/// normalize emptied containers to `None`. A present-but-empty container
270/// (`Some(true)`, e.g. parsed from `[]`/`{}`) needs an exact match on the other
271/// side; otherwise the node must be replaced wholesale to round-trip exactly.
272fn empty_shape_mismatch(a_is_empty: Option<bool>, b_is_empty: Option<bool>) -> bool {
273    match b_is_empty {
274        Some(true) => a_is_empty != Some(true),
275        None => a_is_empty == Some(true),
276        Some(false) => false,
277    }
278}
279
280fn diff_attrs(
281    a: Option<&Map<String, Value>>,
282    b: Option<&Map<String, Value>>,
283    path: &mut [usize],
284    out: &mut Vec<Change>,
285) {
286    let empty = Map::new();
287    let am = a.unwrap_or(&empty);
288    let bm = b.unwrap_or(&empty);
289    for (k, v) in bm {
290        if am.get(k) != Some(v) {
291            out.push(Change::SetAttr {
292                path: path.to_vec(),
293                key: k.clone(),
294                value: v.clone(),
295            });
296        }
297    }
298    for k in am.keys() {
299        if !bm.contains_key(k) {
300            out.push(Change::RemoveAttr {
301                path: path.to_vec(),
302                key: k.clone(),
303            });
304        }
305    }
306}
307
308fn diff_extra(
309    am: &Map<String, Value>,
310    bm: &Map<String, Value>,
311    path: &mut [usize],
312    out: &mut Vec<Change>,
313) {
314    for (k, v) in bm {
315        if am.get(k) != Some(v) {
316            out.push(Change::SetExtra {
317                path: path.to_vec(),
318                key: k.clone(),
319                value: v.clone(),
320            });
321        }
322    }
323    for k in am.keys() {
324        if !bm.contains_key(k) {
325            out.push(Change::RemoveExtra {
326                path: path.to_vec(),
327                key: k.clone(),
328            });
329        }
330    }
331}
332
333/// One LCS-alignment step over the (trimmed) middle child slices.
334enum Step {
335    Match,
336    Del(usize),
337    Ins(usize),
338}
339
340/// A live-list element identity used by the move simulation: either an original
341/// `a`-child (reused/relocated/modified in place) or a freshly inserted node.
342#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
343enum Key {
344    Orig(usize),
345    New(usize),
346}
347
348/// Positional pairing of a leftover deletion with a leftover insertion in the
349/// same gap: a same-type modify (recurse) or a different-type wholesale replace.
350enum Role {
351    Modify(usize),
352    Replace(usize),
353}
354
355fn diff_children(a: &[Node], b: &[Node], path: &mut Vec<usize>, out: &mut Vec<Change>) {
356    // Trim common prefix/suffix (cheap; shrinks the LCS DP and handles the
357    // common append/prepend cases in linear time).
358    let mut start = 0;
359    while start < a.len() && start < b.len() && a[start] == b[start] {
360        start += 1;
361    }
362    let mut ea = a.len();
363    let mut eb = b.len();
364    while ea > start && eb > start && a[ea - 1] == b[eb - 1] {
365        ea -= 1;
366        eb -= 1;
367    }
368
369    let am = &a[start..ea];
370    let bm = &b[start..eb];
371    if am.is_empty() && bm.is_empty() {
372        return;
373    }
374
375    let steps = lcs_align(am, bm);
376
377    // Reconstruct the alignment: which bm index each am index matched (kept in
378    // place), plus the per-gap unmatched deletions/insertions between anchors.
379    let mut gaps: Vec<(Vec<usize>, Vec<usize>)> = Vec::new();
380    let mut cur_dels: Vec<usize> = Vec::new();
381    let mut cur_inss: Vec<usize> = Vec::new();
382    let mut bm_match_src: HashMap<usize, usize> = HashMap::new(); // bm_j -> am_i
383    let (mut ai, mut bj) = (0usize, 0usize);
384    for step in &steps {
385        match step {
386            Step::Match => {
387                gaps.push((std::mem::take(&mut cur_dels), std::mem::take(&mut cur_inss)));
388                bm_match_src.insert(bj, ai);
389                ai += 1;
390                bj += 1;
391            }
392            Step::Del(i) => {
393                cur_dels.push(*i);
394                ai += 1;
395            }
396            Step::Ins(j) => {
397                cur_inss.push(*j);
398                bj += 1;
399            }
400        }
401    }
402    gaps.push((cur_dels, cur_inss));
403
404    // Move detection: pair value-equal nodes that LCS split into a deletion and
405    // an insertion. Greedy/FIFO over equal values -> stable on duplicates.
406    let all_dels: Vec<usize> = gaps.iter().flat_map(|(d, _)| d.iter().copied()).collect();
407    let mut del_used = vec![false; all_dels.len()];
408    let mut bm_move_src: HashMap<usize, usize> = HashMap::new(); // bm_j -> am_d
409    let mut am_moved: HashSet<usize> = HashSet::new();
410    for (_, inss) in &gaps {
411        for &j in inss {
412            if let Some(slot) = all_dels
413                .iter()
414                .enumerate()
415                .find(|(slot, &d)| !del_used[*slot] && am[d] == bm[j])
416                .map(|(slot, _)| slot)
417            {
418                del_used[slot] = true;
419                bm_move_src.insert(j, all_dels[slot]);
420                am_moved.insert(all_dels[slot]);
421            }
422        }
423    }
424
425    // Per-gap modify/replace pairing on the remaining (non-moved) leftovers.
426    let mut bm_role: HashMap<usize, Role> = HashMap::new();
427    let mut removed_am: Vec<usize> = Vec::new();
428    for (dels, inss) in &gaps {
429        let rem_dels: Vec<usize> = dels
430            .iter()
431            .copied()
432            .filter(|d| !am_moved.contains(d))
433            .collect();
434        let rem_inss: Vec<usize> = inss
435            .iter()
436            .copied()
437            .filter(|j| !bm_move_src.contains_key(j))
438            .collect();
439        let pairs = rem_dels.len().min(rem_inss.len());
440        for k in 0..pairs {
441            let (d, j) = (rem_dels[k], rem_inss[k]);
442            if am[d].node_type == bm[j].node_type {
443                bm_role.insert(j, Role::Modify(d)); // same type -> recurse in place
444            } else {
445                bm_role.insert(j, Role::Replace(d)); // type change -> wholesale
446            }
447        }
448        removed_am.extend_from_slice(&rem_dels[pairs..]);
449    }
450
451    // Target key sequence for the middle (what the live list must become), plus
452    // a key -> target-index lookup used to place relocated nodes minimally.
453    let mut target: Vec<Key> = Vec::with_capacity(bm.len());
454    for j in 0..bm.len() {
455        let key = if let Some(&i) = bm_match_src.get(&j) {
456            Key::Orig(i)
457        } else if let Some(&d) = bm_move_src.get(&j) {
458            Key::Orig(d)
459        } else {
460            match bm_role.get(&j) {
461                Some(Role::Modify(d) | Role::Replace(d)) => Key::Orig(*d),
462                None => Key::New(j),
463            }
464        };
465        target.push(key);
466    }
467    let target_pos: HashMap<Key, usize> = target.iter().enumerate().map(|(i, &k)| (k, i)).collect();
468    let is_moved = |k: Key| matches!(k, Key::Orig(x) if am_moved.contains(&x));
469
470    // Simulate the live list and emit ops whose indices are valid by
471    // construction. Absolute index = start + position in the middle.
472    let mut live: Vec<Key> = (0..am.len()).map(Key::Orig).collect();
473
474    // 1. Plain removes (indices computed against the shrinking live list).
475    for &d in &removed_am {
476        let idx = live.iter().position(|&k| k == Key::Orig(d)).unwrap();
477        out.push(Change::Remove {
478            path: path.clone(),
479            index: start + idx,
480        });
481        live.remove(idx);
482    }
483
484    // 2. Settle each target position left to right, maintaining the invariant
485    //    `live[..p] == target[..p]`. New nodes are inserted; otherwise we
486    //    relocate exactly one genuinely-moved node (the displaced occupant, or
487    //    the wanted node if it's the one that moved) to its sorted destination —
488    //    one Move per moved node, never disturbing the matched skeleton.
489    let mut p = 0;
490    while p < target.len() {
491        let want = target[p];
492        if live.get(p).copied() == Some(want) {
493            p += 1;
494            continue;
495        }
496        if let Key::New(j) = want {
497            out.push(Change::Insert {
498                path: path.clone(),
499                index: start + p,
500                node: bm[j].clone(),
501            });
502            live.insert(p, want);
503            p += 1;
504            continue;
505        }
506        // `want` is an original node not yet at `p`. Move the displaced occupant
507        // if it is the relocated one, else pull `want` (itself relocated) here.
508        let cur = live[p];
509        let mover = if is_moved(cur) { cur } else { want };
510        let from = live.iter().position(|&k| k == mover).unwrap();
511        let tmover = target_pos[&mover];
512        // Destination = number of currently-present elements that precede it in
513        // the target order (excluding the mover itself).
514        let dest = live
515            .iter()
516            .filter(|&&k| k != mover && target_pos[&k] < tmover)
517            .count();
518        out.push(Change::Move {
519            path: path.clone(),
520            from: start + from,
521            to: start + dest,
522        });
523        let k = live.remove(from);
524        live.insert(dest, k);
525        // Re-check `p` (the move may or may not have settled this slot).
526    }
527    debug_assert_eq!(live, target, "diff move simulation diverged from target");
528
529    // 3. Value fixes for modify/replace pairs, at their final positions (the
530    //    structural layout is now settled, so index = start + bm index).
531    for (j, target_node) in bm.iter().enumerate() {
532        match bm_role.get(&j) {
533            Some(Role::Modify(d)) => {
534                path.push(start + j);
535                diff_node(&am[*d], target_node, path, out);
536                path.pop();
537            }
538            Some(Role::Replace(_)) => {
539                let mut child = path.clone();
540                child.push(start + j);
541                out.push(Change::Replace {
542                    path: child,
543                    node: target_node.clone(),
544                });
545            }
546            None => {}
547        }
548    }
549}
550
551/// Longest-common-subsequence alignment of two child slices, by node equality.
552fn lcs_align(a: &[Node], b: &[Node]) -> Vec<Step> {
553    let (m, n) = (a.len(), b.len());
554    if m == 0 {
555        return (0..n).map(Step::Ins).collect();
556    }
557    if n == 0 {
558        return (0..m).map(Step::Del).collect();
559    }
560    // dp[i][j] = LCS length of a[i..] vs b[j..].
561    let mut dp = vec![vec![0u32; n + 1]; m + 1];
562    for i in (0..m).rev() {
563        for j in (0..n).rev() {
564            dp[i][j] = if a[i] == b[j] {
565                dp[i + 1][j + 1] + 1
566            } else {
567                dp[i + 1][j].max(dp[i][j + 1])
568            };
569        }
570    }
571    let mut steps = Vec::with_capacity(m.max(n));
572    let (mut i, mut j) = (0usize, 0usize);
573    while i < m && j < n {
574        if a[i] == b[j] {
575            steps.push(Step::Match);
576            i += 1;
577            j += 1;
578        } else if dp[i + 1][j] >= dp[i][j + 1] {
579            steps.push(Step::Del(i));
580            i += 1;
581        } else {
582            steps.push(Step::Ins(j));
583            j += 1;
584        }
585    }
586    while i < m {
587        steps.push(Step::Del(i));
588        i += 1;
589    }
590    while j < n {
591        steps.push(Step::Ins(j));
592        j += 1;
593    }
594    steps
595}
596
597// ---- apply internals ----------------------------------------------------
598
599fn node_at_mut<'a>(
600    root: &'a mut Node,
601    path: &[usize],
602) -> std::result::Result<&'a mut Node, ApplyError> {
603    root.node_at_mut(path).ok_or_else(|| ApplyError {
604        path: path.to_vec(),
605    })
606}
607
608fn apply_one(root: &mut Node, change: &Change) -> std::result::Result<(), ApplyError> {
609    match change {
610        Change::SetAttr { path, key, value } => {
611            node_at_mut(root, path)?.set_attr(key.clone(), value.clone());
612        }
613        Change::RemoveAttr { path, key } => {
614            node_at_mut(root, path)?.remove_attr(key);
615        }
616        Change::SetText { path, text } => {
617            node_at_mut(root, path)?.text = text.clone();
618        }
619        Change::SetMarks { path, marks } => {
620            node_at_mut(root, path)?.marks = marks.clone();
621        }
622        Change::SetExtra { path, key, value } => {
623            node_at_mut(root, path)?
624                .extra
625                .insert(key.clone(), value.clone());
626        }
627        Change::RemoveExtra { path, key } => {
628            node_at_mut(root, path)?.extra.remove(key);
629        }
630        Change::Insert { path, index, node } => {
631            let parent = node_at_mut(root, path)?;
632            if *index > parent.child_count() {
633                let mut p = path.clone();
634                p.push(*index);
635                return Err(ApplyError { path: p });
636            }
637            parent.insert_child(*index, node.clone());
638        }
639        Change::Remove { path, index } => {
640            if node_at_mut(root, path)?.remove_child(*index).is_none() {
641                let mut p = path.clone();
642                p.push(*index);
643                return Err(ApplyError { path: p });
644            }
645        }
646        Change::Replace { path, node } => {
647            if path.is_empty() {
648                *root = node.clone();
649            } else {
650                let (parent_path, last) = path.split_at(path.len() - 1);
651                let parent = node_at_mut(root, parent_path)?;
652                if parent.replace_child(last[0], node.clone()).is_none() {
653                    return Err(ApplyError { path: path.clone() });
654                }
655            }
656        }
657        Change::Move { path, from, to } => {
658            let parent = node_at_mut(root, path)?;
659            let len = parent.child_count();
660            // `from` indexes the live list; `to` indexes the list after removal,
661            // so its valid range is 0..=len-1.
662            if *from >= len || *to >= len {
663                let mut p = path.clone();
664                p.push(if *from >= len { *from } else { *to });
665                return Err(ApplyError { path: p });
666            }
667            let node = parent.remove_child(*from).expect("from bounds-checked");
668            parent.insert_child(*to, node);
669        }
670    }
671    Ok(())
672}