tiptap_rusty_parser/diff.rs
1//! Structural diff: compute a path-addressed [`Change`] list between two
2//! [`Node`] trees, and [`apply`] it to reproduce the target.
3//!
4//! ```
5//! use tiptap_rusty_parser::Document;
6//!
7//! let a = Document::from_json_str(
8//! r#"{"type":"doc","content":[{"type":"paragraph","content":[{"type":"text","text":"hi"}]}]}"#,
9//! ).unwrap();
10//! let b = Document::from_json_str(
11//! r#"{"type":"doc","content":[{"type":"paragraph","content":[{"type":"text","text":"bye"}]}]}"#,
12//! ).unwrap();
13//!
14//! let changes = a.diff(&b); // -> Vec<Change>
15//! let mut c = a.clone();
16//! c.apply(&changes).unwrap(); // reproduce `b`
17//! assert_eq!(c, b);
18//! ```
19//!
20//! ## Path convention
21//! Node-local changes ([`Change::SetAttr`], `RemoveAttr`, `SetText`, `SetMarks`,
22//! `SetExtra`, `RemoveExtra`, `Replace`) address the **target node** by its
23//! index path. Child-list changes ([`Change::Insert`], [`Change::Remove`])
24//! address the **parent** node, with `index` selecting the child — mirroring
25//! [`Node::insert_child`] / [`Node::remove_child`].
26//!
27//! ## Apply contract
28//! [`apply`] executes changes strictly in order; child `index` values are
29//! interpreted against the *live* (already-partially-mutated) list. A list
30//! produced by [`diff`] always reproduces the target exactly:
31//! `apply(&mut a, &diff(a, b))` yields `b`.
32//!
33//! Empty-vs-absent container shapes (e.g. `"content":[]` vs no `content`) are
34//! preserved: when the field/child ops can't express the difference, the node
35//! is replaced wholesale so the round-trip stays exact.
36//!
37//! ## Move detection
38//! A child relocated within a list is emitted as a single [`Change::Move`]
39//! (no subtree clone) instead of a remove+insert: after LCS alignment, leftover
40//! deletions and insertions that are *equal by value* are paired as moves, and
41//! their live indices are derived by simulating the op stream — so the result
42//! reproduces the target exactly regardless of how moves interleave with plain
43//! inserts/removes. Pairing is greedy/FIFO, so duplicate-equal children give a
44//! correct (if not always minimal) result. [`invert`] needs no special handling:
45//! it re-diffs the reverse direction, re-deriving the inverse moves.
46//!
47//! ## v1 limitations
48//! - Matching is LCS-by-equality; modifies are paired positionally within the
49//! gaps between matched anchors (still correct, just not always minimal).
50
51use crate::node::{Mark, Node};
52use serde::{Deserialize, Serialize};
53use serde_json::{Map, Value};
54use std::collections::{HashMap, HashSet};
55use std::fmt;
56
57/// A single structural change between two [`Node`] trees.
58///
59/// Serializes as a tagged object, e.g. `{"op":"setText","path":[0,0],"text":"hi"}`.
60#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
61#[serde(tag = "op", rename_all = "camelCase")]
62pub enum Change {
63 /// Set (insert or overwrite) attribute `key` on the node at `path`.
64 SetAttr {
65 /// Index path of the target node.
66 path: Vec<usize>,
67 /// Attribute key.
68 key: String,
69 /// New attribute value.
70 value: Value,
71 },
72 /// Remove attribute `key` from the node at `path`.
73 RemoveAttr {
74 /// Index path of the target node.
75 path: Vec<usize>,
76 /// Attribute key.
77 key: String,
78 },
79 /// Set the text payload of the node at `path` (`None` clears it).
80 SetText {
81 /// Index path of the target node.
82 path: Vec<usize>,
83 /// New text payload, or `None` to clear.
84 text: Option<String>,
85 },
86 /// Replace the whole mark list of the node at `path` (`None` clears it).
87 SetMarks {
88 /// Index path of the target node.
89 path: Vec<usize>,
90 /// New mark list, or `None` to clear.
91 marks: Option<Vec<Mark>>,
92 },
93 /// Set (insert or overwrite) unknown top-level field `key` on the node at `path`.
94 SetExtra {
95 /// Index path of the target node.
96 path: Vec<usize>,
97 /// Field key.
98 key: String,
99 /// New field value.
100 value: Value,
101 },
102 /// Remove unknown top-level field `key` from the node at `path`.
103 RemoveExtra {
104 /// Index path of the target node.
105 path: Vec<usize>,
106 /// Field key.
107 key: String,
108 },
109 /// Insert `node` as a child of the node at `path` (the parent), at `index`.
110 Insert {
111 /// Index path of the **parent** node.
112 path: Vec<usize>,
113 /// Child position to insert at.
114 index: usize,
115 /// The node to insert.
116 node: Node,
117 },
118 /// Remove the child at `index` of the node at `path` (the parent).
119 Remove {
120 /// Index path of the **parent** node.
121 path: Vec<usize>,
122 /// Child position to remove.
123 index: usize,
124 },
125 /// Replace the node at `path` wholesale (used when its `type` changes).
126 Replace {
127 /// Index path of the target node (empty = root).
128 path: Vec<usize>,
129 /// The replacement node.
130 node: Node,
131 },
132 /// Relocate a child within the same parent's list, without cloning its
133 /// subtree. `from`/`to` are interpreted against the *live* list: the child
134 /// at `from` is removed first, then re-inserted so it lands at index `to`
135 /// in the post-removal list — observationally `Remove{from}` + `Insert{to}`,
136 /// so it composes with the same live-index cursor model as the other ops.
137 Move {
138 /// Index path of the **parent** node.
139 path: Vec<usize>,
140 /// Live index of the child to move (before this op).
141 from: usize,
142 /// Target index in the list after the child is removed.
143 to: usize,
144 },
145}
146
147/// Error from [`apply`] when a change can't be located (no node at the path, or
148/// a child index out of range). Lists produced by [`diff`] never fail.
149#[derive(Debug, Clone, PartialEq, Eq)]
150pub struct ApplyError {
151 /// The path that could not be resolved.
152 pub path: Vec<usize>,
153}
154
155impl fmt::Display for ApplyError {
156 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
157 write!(f, "apply: no node at path {:?}", self.path)
158 }
159}
160
161impl std::error::Error for ApplyError {}
162
163impl Node {
164 /// Structural diff from `self` to `other`: a [`Change`] list that, when
165 /// [`applied`](apply) to a clone of `self`, reproduces `other`.
166 pub fn diff(&self, other: &Node) -> Vec<Change> {
167 let mut out = Vec::new();
168 let mut path = Vec::new();
169 diff_node(self, other, &mut path, &mut out);
170 out
171 }
172
173 /// Apply `changes` to `self` in order. See [`apply`].
174 pub fn apply(&mut self, changes: &[Change]) -> std::result::Result<(), ApplyError> {
175 apply(self, changes)
176 }
177
178 /// Invert `changes` relative to `self` (the pre-image). See [`invert`].
179 pub fn invert(&self, changes: &[Change]) -> std::result::Result<Vec<Change>, ApplyError> {
180 invert(self, changes)
181 }
182}
183
184/// Structural diff between two nodes. Free-function form of [`Node::diff`].
185pub fn diff(a: &Node, b: &Node) -> Vec<Change> {
186 a.diff(b)
187}
188
189/// Apply a [`Change`] list to `root` in order, mutating it in place.
190///
191/// Applying `diff(a, b)` to a clone of `a` reproduces `b` exactly. Returns
192/// [`ApplyError`] only for externally-authored lists whose paths/indices don't
193/// resolve.
194pub fn apply(root: &mut Node, changes: &[Change]) -> std::result::Result<(), ApplyError> {
195 for change in changes {
196 apply_one(root, change)?;
197 }
198 Ok(())
199}
200
201/// Invert a change list: produce the reverse changes that, applied to the
202/// result of `apply(base, changes)`, restore `base` — the basis for undo.
203///
204/// Computed as `diff(apply(base, changes), base)`: replay the forward changes,
205/// then diff back to `base`. This reuses the diff round-trip guarantee, so it
206/// handles every value/shape edge exactly — subject to the same non-minimality
207/// caveats as [`diff`] (e.g. no move detection). Errors only if `changes`
208/// itself doesn't apply to `base`.
209///
210/// ```
211/// use tiptap_rusty_parser::Document;
212/// let a = Document::from_json_str(r#"{"type":"doc","content":[{"type":"paragraph"}]}"#).unwrap();
213/// let b = Document::from_json_str(r#"{"type":"doc","content":[{"type":"heading"}]}"#).unwrap();
214/// let forward = a.diff(&b);
215/// let undo = a.invert(&forward).unwrap();
216/// let mut c = b.clone();
217/// c.apply(&undo).unwrap();
218/// assert_eq!(c, a);
219/// ```
220pub fn invert(base: &Node, changes: &[Change]) -> std::result::Result<Vec<Change>, ApplyError> {
221 let mut result = base.clone();
222 apply(&mut result, changes)?;
223 Ok(result.diff(base))
224}
225
226// ---- diff internals -----------------------------------------------------
227
228fn diff_node(a: &Node, b: &Node, path: &mut Vec<usize>, out: &mut Vec<Change>) {
229 if a == b {
230 return; // prune identical subtrees (the main perf lever)
231 }
232 if a.node_type != b.node_type
233 || empty_shape_mismatch(
234 a.attrs.as_ref().map(Map::is_empty),
235 b.attrs.as_ref().map(Map::is_empty),
236 )
237 || empty_shape_mismatch(
238 a.content.as_ref().map(Vec::is_empty),
239 b.content.as_ref().map(Vec::is_empty),
240 )
241 {
242 // Type change, or an empty-vs-None container shape the field/child ops
243 // can't express (e.g. `"content":[]` -> absent) -> wholesale replace.
244 out.push(Change::Replace {
245 path: path.clone(),
246 node: b.clone(),
247 });
248 return;
249 }
250 diff_attrs(a.attrs.as_ref(), b.attrs.as_ref(), path, out);
251 if a.text != b.text {
252 out.push(Change::SetText {
253 path: path.clone(),
254 text: b.text.clone(),
255 });
256 }
257 if a.marks != b.marks {
258 out.push(Change::SetMarks {
259 path: path.clone(),
260 marks: b.marks.clone(),
261 });
262 }
263 diff_extra(&a.extra, &b.extra, path, out);
264 diff_children(a.children(), b.children(), path, out);
265}
266
267/// Whether an `Option<container>` shape difference (where the arg is
268/// `Some(is_empty)` / `None`) can't be reconciled by the key/child ops, which
269/// normalize emptied containers to `None`. A present-but-empty container
270/// (`Some(true)`, e.g. parsed from `[]`/`{}`) needs an exact match on the other
271/// side; otherwise the node must be replaced wholesale to round-trip exactly.
272fn empty_shape_mismatch(a_is_empty: Option<bool>, b_is_empty: Option<bool>) -> bool {
273 match b_is_empty {
274 Some(true) => a_is_empty != Some(true),
275 None => a_is_empty == Some(true),
276 Some(false) => false,
277 }
278}
279
280fn diff_attrs(
281 a: Option<&Map<String, Value>>,
282 b: Option<&Map<String, Value>>,
283 path: &mut [usize],
284 out: &mut Vec<Change>,
285) {
286 let empty = Map::new();
287 let am = a.unwrap_or(&empty);
288 let bm = b.unwrap_or(&empty);
289 for (k, v) in bm {
290 if am.get(k) != Some(v) {
291 out.push(Change::SetAttr {
292 path: path.to_vec(),
293 key: k.clone(),
294 value: v.clone(),
295 });
296 }
297 }
298 for k in am.keys() {
299 if !bm.contains_key(k) {
300 out.push(Change::RemoveAttr {
301 path: path.to_vec(),
302 key: k.clone(),
303 });
304 }
305 }
306}
307
308fn diff_extra(
309 am: &Map<String, Value>,
310 bm: &Map<String, Value>,
311 path: &mut [usize],
312 out: &mut Vec<Change>,
313) {
314 for (k, v) in bm {
315 if am.get(k) != Some(v) {
316 out.push(Change::SetExtra {
317 path: path.to_vec(),
318 key: k.clone(),
319 value: v.clone(),
320 });
321 }
322 }
323 for k in am.keys() {
324 if !bm.contains_key(k) {
325 out.push(Change::RemoveExtra {
326 path: path.to_vec(),
327 key: k.clone(),
328 });
329 }
330 }
331}
332
333/// One LCS-alignment step over the (trimmed) middle child slices.
334enum Step {
335 Match,
336 Del(usize),
337 Ins(usize),
338}
339
340/// A live-list element identity used by the move simulation: either an original
341/// `a`-child (reused/relocated/modified in place) or a freshly inserted node.
342#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
343enum Key {
344 Orig(usize),
345 New(usize),
346}
347
348/// Positional pairing of a leftover deletion with a leftover insertion in the
349/// same gap: a same-type modify (recurse) or a different-type wholesale replace.
350enum Role {
351 Modify(usize),
352 Replace(usize),
353}
354
355fn diff_children(a: &[Node], b: &[Node], path: &mut Vec<usize>, out: &mut Vec<Change>) {
356 // Trim common prefix/suffix (cheap; shrinks the LCS DP and handles the
357 // common append/prepend cases in linear time).
358 let mut start = 0;
359 while start < a.len() && start < b.len() && a[start] == b[start] {
360 start += 1;
361 }
362 let mut ea = a.len();
363 let mut eb = b.len();
364 while ea > start && eb > start && a[ea - 1] == b[eb - 1] {
365 ea -= 1;
366 eb -= 1;
367 }
368
369 let am = &a[start..ea];
370 let bm = &b[start..eb];
371 if am.is_empty() && bm.is_empty() {
372 return;
373 }
374
375 let steps = lcs_align(am, bm);
376
377 // Reconstruct the alignment: which bm index each am index matched (kept in
378 // place), plus the per-gap unmatched deletions/insertions between anchors.
379 let mut gaps: Vec<(Vec<usize>, Vec<usize>)> = Vec::new();
380 let mut cur_dels: Vec<usize> = Vec::new();
381 let mut cur_inss: Vec<usize> = Vec::new();
382 let mut bm_match_src: HashMap<usize, usize> = HashMap::new(); // bm_j -> am_i
383 let (mut ai, mut bj) = (0usize, 0usize);
384 for step in &steps {
385 match step {
386 Step::Match => {
387 gaps.push((std::mem::take(&mut cur_dels), std::mem::take(&mut cur_inss)));
388 bm_match_src.insert(bj, ai);
389 ai += 1;
390 bj += 1;
391 }
392 Step::Del(i) => {
393 cur_dels.push(*i);
394 ai += 1;
395 }
396 Step::Ins(j) => {
397 cur_inss.push(*j);
398 bj += 1;
399 }
400 }
401 }
402 gaps.push((cur_dels, cur_inss));
403
404 // Move detection: pair value-equal nodes that LCS split into a deletion and
405 // an insertion. Greedy/FIFO over equal values -> stable on duplicates.
406 let all_dels: Vec<usize> = gaps.iter().flat_map(|(d, _)| d.iter().copied()).collect();
407 let mut del_used = vec![false; all_dels.len()];
408 let mut bm_move_src: HashMap<usize, usize> = HashMap::new(); // bm_j -> am_d
409 let mut am_moved: HashSet<usize> = HashSet::new();
410 for (_, inss) in &gaps {
411 for &j in inss {
412 if let Some(slot) = all_dels
413 .iter()
414 .enumerate()
415 .find(|(slot, &d)| !del_used[*slot] && am[d] == bm[j])
416 .map(|(slot, _)| slot)
417 {
418 del_used[slot] = true;
419 bm_move_src.insert(j, all_dels[slot]);
420 am_moved.insert(all_dels[slot]);
421 }
422 }
423 }
424
425 // Per-gap modify/replace pairing on the remaining (non-moved) leftovers.
426 let mut bm_role: HashMap<usize, Role> = HashMap::new();
427 let mut removed_am: Vec<usize> = Vec::new();
428 for (dels, inss) in &gaps {
429 let rem_dels: Vec<usize> = dels
430 .iter()
431 .copied()
432 .filter(|d| !am_moved.contains(d))
433 .collect();
434 let rem_inss: Vec<usize> = inss
435 .iter()
436 .copied()
437 .filter(|j| !bm_move_src.contains_key(j))
438 .collect();
439 let pairs = rem_dels.len().min(rem_inss.len());
440 for k in 0..pairs {
441 let (d, j) = (rem_dels[k], rem_inss[k]);
442 if am[d].node_type == bm[j].node_type {
443 bm_role.insert(j, Role::Modify(d)); // same type -> recurse in place
444 } else {
445 bm_role.insert(j, Role::Replace(d)); // type change -> wholesale
446 }
447 }
448 removed_am.extend_from_slice(&rem_dels[pairs..]);
449 }
450
451 // Target key sequence for the middle (what the live list must become), plus
452 // a key -> target-index lookup used to place relocated nodes minimally.
453 let mut target: Vec<Key> = Vec::with_capacity(bm.len());
454 for j in 0..bm.len() {
455 let key = if let Some(&i) = bm_match_src.get(&j) {
456 Key::Orig(i)
457 } else if let Some(&d) = bm_move_src.get(&j) {
458 Key::Orig(d)
459 } else {
460 match bm_role.get(&j) {
461 Some(Role::Modify(d) | Role::Replace(d)) => Key::Orig(*d),
462 None => Key::New(j),
463 }
464 };
465 target.push(key);
466 }
467 let target_pos: HashMap<Key, usize> = target.iter().enumerate().map(|(i, &k)| (k, i)).collect();
468 let is_moved = |k: Key| matches!(k, Key::Orig(x) if am_moved.contains(&x));
469
470 // Simulate the live list and emit ops whose indices are valid by
471 // construction. Absolute index = start + position in the middle.
472 let mut live: Vec<Key> = (0..am.len()).map(Key::Orig).collect();
473
474 // 1. Plain removes (indices computed against the shrinking live list).
475 for &d in &removed_am {
476 let idx = live.iter().position(|&k| k == Key::Orig(d)).unwrap();
477 out.push(Change::Remove {
478 path: path.clone(),
479 index: start + idx,
480 });
481 live.remove(idx);
482 }
483
484 // 2. Settle each target position left to right, maintaining the invariant
485 // `live[..p] == target[..p]`. New nodes are inserted; otherwise we
486 // relocate exactly one genuinely-moved node (the displaced occupant, or
487 // the wanted node if it's the one that moved) to its sorted destination —
488 // one Move per moved node, never disturbing the matched skeleton.
489 let mut p = 0;
490 while p < target.len() {
491 let want = target[p];
492 if live.get(p).copied() == Some(want) {
493 p += 1;
494 continue;
495 }
496 if let Key::New(j) = want {
497 out.push(Change::Insert {
498 path: path.clone(),
499 index: start + p,
500 node: bm[j].clone(),
501 });
502 live.insert(p, want);
503 p += 1;
504 continue;
505 }
506 // `want` is an original node not yet at `p`. Move the displaced occupant
507 // if it is the relocated one, else pull `want` (itself relocated) here.
508 let cur = live[p];
509 let mover = if is_moved(cur) { cur } else { want };
510 let from = live.iter().position(|&k| k == mover).unwrap();
511 let tmover = target_pos[&mover];
512 // Destination = number of currently-present elements that precede it in
513 // the target order (excluding the mover itself).
514 let dest = live
515 .iter()
516 .filter(|&&k| k != mover && target_pos[&k] < tmover)
517 .count();
518 out.push(Change::Move {
519 path: path.clone(),
520 from: start + from,
521 to: start + dest,
522 });
523 let k = live.remove(from);
524 live.insert(dest, k);
525 // Re-check `p` (the move may or may not have settled this slot).
526 }
527 debug_assert_eq!(live, target, "diff move simulation diverged from target");
528
529 // 3. Value fixes for modify/replace pairs, at their final positions (the
530 // structural layout is now settled, so index = start + bm index).
531 for (j, target_node) in bm.iter().enumerate() {
532 match bm_role.get(&j) {
533 Some(Role::Modify(d)) => {
534 path.push(start + j);
535 diff_node(&am[*d], target_node, path, out);
536 path.pop();
537 }
538 Some(Role::Replace(_)) => {
539 let mut child = path.clone();
540 child.push(start + j);
541 out.push(Change::Replace {
542 path: child,
543 node: target_node.clone(),
544 });
545 }
546 None => {}
547 }
548 }
549}
550
551/// Longest-common-subsequence alignment of two child slices, by node equality.
552fn lcs_align(a: &[Node], b: &[Node]) -> Vec<Step> {
553 let (m, n) = (a.len(), b.len());
554 if m == 0 {
555 return (0..n).map(Step::Ins).collect();
556 }
557 if n == 0 {
558 return (0..m).map(Step::Del).collect();
559 }
560 // dp[i][j] = LCS length of a[i..] vs b[j..].
561 let mut dp = vec![vec![0u32; n + 1]; m + 1];
562 for i in (0..m).rev() {
563 for j in (0..n).rev() {
564 dp[i][j] = if a[i] == b[j] {
565 dp[i + 1][j + 1] + 1
566 } else {
567 dp[i + 1][j].max(dp[i][j + 1])
568 };
569 }
570 }
571 let mut steps = Vec::with_capacity(m.max(n));
572 let (mut i, mut j) = (0usize, 0usize);
573 while i < m && j < n {
574 if a[i] == b[j] {
575 steps.push(Step::Match);
576 i += 1;
577 j += 1;
578 } else if dp[i + 1][j] >= dp[i][j + 1] {
579 steps.push(Step::Del(i));
580 i += 1;
581 } else {
582 steps.push(Step::Ins(j));
583 j += 1;
584 }
585 }
586 while i < m {
587 steps.push(Step::Del(i));
588 i += 1;
589 }
590 while j < n {
591 steps.push(Step::Ins(j));
592 j += 1;
593 }
594 steps
595}
596
597// ---- apply internals ----------------------------------------------------
598
599fn node_at_mut<'a>(
600 root: &'a mut Node,
601 path: &[usize],
602) -> std::result::Result<&'a mut Node, ApplyError> {
603 root.node_at_mut(path).ok_or_else(|| ApplyError {
604 path: path.to_vec(),
605 })
606}
607
608fn apply_one(root: &mut Node, change: &Change) -> std::result::Result<(), ApplyError> {
609 match change {
610 Change::SetAttr { path, key, value } => {
611 node_at_mut(root, path)?.set_attr(key.clone(), value.clone());
612 }
613 Change::RemoveAttr { path, key } => {
614 node_at_mut(root, path)?.remove_attr(key);
615 }
616 Change::SetText { path, text } => {
617 node_at_mut(root, path)?.text = text.clone();
618 }
619 Change::SetMarks { path, marks } => {
620 node_at_mut(root, path)?.marks = marks.clone();
621 }
622 Change::SetExtra { path, key, value } => {
623 node_at_mut(root, path)?
624 .extra
625 .insert(key.clone(), value.clone());
626 }
627 Change::RemoveExtra { path, key } => {
628 node_at_mut(root, path)?.extra.remove(key);
629 }
630 Change::Insert { path, index, node } => {
631 let parent = node_at_mut(root, path)?;
632 if *index > parent.child_count() {
633 let mut p = path.clone();
634 p.push(*index);
635 return Err(ApplyError { path: p });
636 }
637 parent.insert_child(*index, node.clone());
638 }
639 Change::Remove { path, index } => {
640 if node_at_mut(root, path)?.remove_child(*index).is_none() {
641 let mut p = path.clone();
642 p.push(*index);
643 return Err(ApplyError { path: p });
644 }
645 }
646 Change::Replace { path, node } => {
647 if path.is_empty() {
648 *root = node.clone();
649 } else {
650 let (parent_path, last) = path.split_at(path.len() - 1);
651 let parent = node_at_mut(root, parent_path)?;
652 if parent.replace_child(last[0], node.clone()).is_none() {
653 return Err(ApplyError { path: path.clone() });
654 }
655 }
656 }
657 Change::Move { path, from, to } => {
658 let parent = node_at_mut(root, path)?;
659 let len = parent.child_count();
660 // `from` indexes the live list; `to` indexes the list after removal,
661 // so its valid range is 0..=len-1.
662 if *from >= len || *to >= len {
663 let mut p = path.clone();
664 p.push(if *from >= len { *from } else { *to });
665 return Err(ApplyError { path: p });
666 }
667 let node = parent.remove_child(*from).expect("from bounds-checked");
668 parent.insert_child(*to, node);
669 }
670 }
671 Ok(())
672}