tiptap_rusty_parser/diff.rs
1//! Structural diff: compute a path-addressed [`Change`] list between two
2//! [`Node`] trees, and [`apply`] it to reproduce the target.
3//!
4//! ```
5//! use tiptap_rusty_parser::Document;
6//!
7//! let a = Document::from_json_str(
8//! r#"{"type":"doc","content":[{"type":"paragraph","content":[{"type":"text","text":"hi"}]}]}"#,
9//! ).unwrap();
10//! let b = Document::from_json_str(
11//! r#"{"type":"doc","content":[{"type":"paragraph","content":[{"type":"text","text":"bye"}]}]}"#,
12//! ).unwrap();
13//!
14//! let changes = a.diff(&b); // -> Vec<Change>
15//! let mut c = a.clone();
16//! c.apply(&changes).unwrap(); // reproduce `b`
17//! assert_eq!(c, b);
18//! ```
19//!
20//! ## Path convention
21//! Node-local changes ([`Change::SetAttr`], `RemoveAttr`, `SetText`, `SetMarks`,
22//! `SetExtra`, `RemoveExtra`, `Replace`) address the **target node** by its
23//! index path. Child-list changes ([`Change::Insert`], [`Change::Remove`])
24//! address the **parent** node, with `index` selecting the child — mirroring
25//! [`Node::insert_child`] / [`Node::remove_child`].
26//!
27//! ## Apply contract
28//! [`apply`] executes changes strictly in order; child `index` values are
29//! interpreted against the *live* (already-partially-mutated) list. A list
30//! produced by [`diff`] always reproduces the target exactly:
31//! `apply(&mut a, &diff(a, b))` yields `b`.
32//!
33//! Empty-vs-absent container shapes (e.g. `"content":[]` vs no `content`) are
34//! preserved: when the field/child ops can't express the difference, the node
35//! is replaced wholesale so the round-trip stays exact.
36//!
37//! ## v1 limitations
38//! - **No move detection**: a child relocated within a list is emitted as a
39//! [`Change::Remove`] + [`Change::Insert`] (its subtree is cloned).
40//! - Child matching is LCS-by-equality; pathological reorders degrade to
41//! remove+insert (still correct, just not minimal).
42
43use crate::node::{Mark, Node};
44use serde::{Deserialize, Serialize};
45use serde_json::{Map, Value};
46use std::fmt;
47
48/// A single structural change between two [`Node`] trees.
49///
50/// Serializes as a tagged object, e.g. `{"op":"setText","path":[0,0],"text":"hi"}`.
51#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
52#[serde(tag = "op", rename_all = "camelCase")]
53pub enum Change {
54 /// Set (insert or overwrite) attribute `key` on the node at `path`.
55 SetAttr {
56 /// Index path of the target node.
57 path: Vec<usize>,
58 /// Attribute key.
59 key: String,
60 /// New attribute value.
61 value: Value,
62 },
63 /// Remove attribute `key` from the node at `path`.
64 RemoveAttr {
65 /// Index path of the target node.
66 path: Vec<usize>,
67 /// Attribute key.
68 key: String,
69 },
70 /// Set the text payload of the node at `path` (`None` clears it).
71 SetText {
72 /// Index path of the target node.
73 path: Vec<usize>,
74 /// New text payload, or `None` to clear.
75 text: Option<String>,
76 },
77 /// Replace the whole mark list of the node at `path` (`None` clears it).
78 SetMarks {
79 /// Index path of the target node.
80 path: Vec<usize>,
81 /// New mark list, or `None` to clear.
82 marks: Option<Vec<Mark>>,
83 },
84 /// Set (insert or overwrite) unknown top-level field `key` on the node at `path`.
85 SetExtra {
86 /// Index path of the target node.
87 path: Vec<usize>,
88 /// Field key.
89 key: String,
90 /// New field value.
91 value: Value,
92 },
93 /// Remove unknown top-level field `key` from the node at `path`.
94 RemoveExtra {
95 /// Index path of the target node.
96 path: Vec<usize>,
97 /// Field key.
98 key: String,
99 },
100 /// Insert `node` as a child of the node at `path` (the parent), at `index`.
101 Insert {
102 /// Index path of the **parent** node.
103 path: Vec<usize>,
104 /// Child position to insert at.
105 index: usize,
106 /// The node to insert.
107 node: Node,
108 },
109 /// Remove the child at `index` of the node at `path` (the parent).
110 Remove {
111 /// Index path of the **parent** node.
112 path: Vec<usize>,
113 /// Child position to remove.
114 index: usize,
115 },
116 /// Replace the node at `path` wholesale (used when its `type` changes).
117 Replace {
118 /// Index path of the target node (empty = root).
119 path: Vec<usize>,
120 /// The replacement node.
121 node: Node,
122 },
123}
124
125/// Error from [`apply`] when a change can't be located (no node at the path, or
126/// a child index out of range). Lists produced by [`diff`] never fail.
127#[derive(Debug, Clone, PartialEq, Eq)]
128pub struct ApplyError {
129 /// The path that could not be resolved.
130 pub path: Vec<usize>,
131}
132
133impl fmt::Display for ApplyError {
134 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
135 write!(f, "apply: no node at path {:?}", self.path)
136 }
137}
138
139impl std::error::Error for ApplyError {}
140
141impl Node {
142 /// Structural diff from `self` to `other`: a [`Change`] list that, when
143 /// [`applied`](apply) to a clone of `self`, reproduces `other`.
144 pub fn diff(&self, other: &Node) -> Vec<Change> {
145 let mut out = Vec::new();
146 let mut path = Vec::new();
147 diff_node(self, other, &mut path, &mut out);
148 out
149 }
150
151 /// Apply `changes` to `self` in order. See [`apply`].
152 pub fn apply(&mut self, changes: &[Change]) -> std::result::Result<(), ApplyError> {
153 apply(self, changes)
154 }
155
156 /// Invert `changes` relative to `self` (the pre-image). See [`invert`].
157 pub fn invert(&self, changes: &[Change]) -> std::result::Result<Vec<Change>, ApplyError> {
158 invert(self, changes)
159 }
160}
161
162/// Structural diff between two nodes. Free-function form of [`Node::diff`].
163pub fn diff(a: &Node, b: &Node) -> Vec<Change> {
164 a.diff(b)
165}
166
167/// Apply a [`Change`] list to `root` in order, mutating it in place.
168///
169/// Applying `diff(a, b)` to a clone of `a` reproduces `b` exactly. Returns
170/// [`ApplyError`] only for externally-authored lists whose paths/indices don't
171/// resolve.
172pub fn apply(root: &mut Node, changes: &[Change]) -> std::result::Result<(), ApplyError> {
173 for change in changes {
174 apply_one(root, change)?;
175 }
176 Ok(())
177}
178
179/// Invert a change list: produce the reverse changes that, applied to the
180/// result of `apply(base, changes)`, restore `base` — the basis for undo.
181///
182/// Computed as `diff(apply(base, changes), base)`: replay the forward changes,
183/// then diff back to `base`. This reuses the diff round-trip guarantee, so it
184/// handles every value/shape edge exactly — subject to the same non-minimality
185/// caveats as [`diff`] (e.g. no move detection). Errors only if `changes`
186/// itself doesn't apply to `base`.
187///
188/// ```
189/// use tiptap_rusty_parser::Document;
190/// let a = Document::from_json_str(r#"{"type":"doc","content":[{"type":"paragraph"}]}"#).unwrap();
191/// let b = Document::from_json_str(r#"{"type":"doc","content":[{"type":"heading"}]}"#).unwrap();
192/// let forward = a.diff(&b);
193/// let undo = a.invert(&forward).unwrap();
194/// let mut c = b.clone();
195/// c.apply(&undo).unwrap();
196/// assert_eq!(c, a);
197/// ```
198pub fn invert(base: &Node, changes: &[Change]) -> std::result::Result<Vec<Change>, ApplyError> {
199 let mut result = base.clone();
200 apply(&mut result, changes)?;
201 Ok(result.diff(base))
202}
203
204// ---- diff internals -----------------------------------------------------
205
206fn diff_node(a: &Node, b: &Node, path: &mut Vec<usize>, out: &mut Vec<Change>) {
207 if a == b {
208 return; // prune identical subtrees (the main perf lever)
209 }
210 if a.node_type != b.node_type
211 || empty_shape_mismatch(
212 a.attrs.as_ref().map(Map::is_empty),
213 b.attrs.as_ref().map(Map::is_empty),
214 )
215 || empty_shape_mismatch(
216 a.content.as_ref().map(Vec::is_empty),
217 b.content.as_ref().map(Vec::is_empty),
218 )
219 {
220 // Type change, or an empty-vs-None container shape the field/child ops
221 // can't express (e.g. `"content":[]` -> absent) -> wholesale replace.
222 out.push(Change::Replace {
223 path: path.clone(),
224 node: b.clone(),
225 });
226 return;
227 }
228 diff_attrs(a.attrs.as_ref(), b.attrs.as_ref(), path, out);
229 if a.text != b.text {
230 out.push(Change::SetText {
231 path: path.clone(),
232 text: b.text.clone(),
233 });
234 }
235 if a.marks != b.marks {
236 out.push(Change::SetMarks {
237 path: path.clone(),
238 marks: b.marks.clone(),
239 });
240 }
241 diff_extra(&a.extra, &b.extra, path, out);
242 diff_children(a.children(), b.children(), path, out);
243}
244
245/// Whether an `Option<container>` shape difference (where the arg is
246/// `Some(is_empty)` / `None`) can't be reconciled by the key/child ops, which
247/// normalize emptied containers to `None`. A present-but-empty container
248/// (`Some(true)`, e.g. parsed from `[]`/`{}`) needs an exact match on the other
249/// side; otherwise the node must be replaced wholesale to round-trip exactly.
250fn empty_shape_mismatch(a_is_empty: Option<bool>, b_is_empty: Option<bool>) -> bool {
251 match b_is_empty {
252 Some(true) => a_is_empty != Some(true),
253 None => a_is_empty == Some(true),
254 Some(false) => false,
255 }
256}
257
258fn diff_attrs(
259 a: Option<&Map<String, Value>>,
260 b: Option<&Map<String, Value>>,
261 path: &mut [usize],
262 out: &mut Vec<Change>,
263) {
264 let empty = Map::new();
265 let am = a.unwrap_or(&empty);
266 let bm = b.unwrap_or(&empty);
267 for (k, v) in bm {
268 if am.get(k) != Some(v) {
269 out.push(Change::SetAttr {
270 path: path.to_vec(),
271 key: k.clone(),
272 value: v.clone(),
273 });
274 }
275 }
276 for k in am.keys() {
277 if !bm.contains_key(k) {
278 out.push(Change::RemoveAttr {
279 path: path.to_vec(),
280 key: k.clone(),
281 });
282 }
283 }
284}
285
286fn diff_extra(
287 am: &Map<String, Value>,
288 bm: &Map<String, Value>,
289 path: &mut [usize],
290 out: &mut Vec<Change>,
291) {
292 for (k, v) in bm {
293 if am.get(k) != Some(v) {
294 out.push(Change::SetExtra {
295 path: path.to_vec(),
296 key: k.clone(),
297 value: v.clone(),
298 });
299 }
300 }
301 for k in am.keys() {
302 if !bm.contains_key(k) {
303 out.push(Change::RemoveExtra {
304 path: path.to_vec(),
305 key: k.clone(),
306 });
307 }
308 }
309}
310
311/// One LCS-alignment step over the (trimmed) middle child slices.
312enum Step {
313 Match,
314 Del(usize),
315 Ins(usize),
316}
317
318fn diff_children(a: &[Node], b: &[Node], path: &mut Vec<usize>, out: &mut Vec<Change>) {
319 // Trim common prefix/suffix (cheap; shrinks the LCS DP and handles the
320 // common append/prepend cases in linear time).
321 let mut start = 0;
322 while start < a.len() && start < b.len() && a[start] == b[start] {
323 start += 1;
324 }
325 let mut ea = a.len();
326 let mut eb = b.len();
327 while ea > start && eb > start && a[ea - 1] == b[eb - 1] {
328 ea -= 1;
329 eb -= 1;
330 }
331
332 let am = &a[start..ea];
333 let bm = &b[start..eb];
334 if am.is_empty() && bm.is_empty() {
335 return;
336 }
337
338 let steps = lcs_align(am, bm);
339 let mut cursor = start; // position in the live list (prefix kept at 0..start)
340 let mut dels: Vec<usize> = Vec::new();
341 let mut inss: Vec<usize> = Vec::new();
342 for step in steps {
343 match step {
344 Step::Match => {
345 flush_gap(am, bm, &dels, &inss, path, &mut cursor, out);
346 dels.clear();
347 inss.clear();
348 cursor += 1; // matched child kept in place
349 }
350 Step::Del(i) => dels.push(i),
351 Step::Ins(j) => inss.push(j),
352 }
353 }
354 flush_gap(am, bm, &dels, &inss, path, &mut cursor, out);
355}
356
357/// Emit ops for a gap of unmatched children. Same-type del/ins pairs recurse
358/// (a modify-in-place); otherwise they become remove+insert. Indices are
359/// against the live list (see module docs).
360fn flush_gap(
361 am: &[Node],
362 bm: &[Node],
363 dels: &[usize],
364 inss: &[usize],
365 path: &mut Vec<usize>,
366 cursor: &mut usize,
367 out: &mut Vec<Change>,
368) {
369 let pairs = dels.len().min(inss.len());
370 for k in 0..pairs {
371 let (ai, bj) = (dels[k], inss[k]);
372 if am[ai].node_type == bm[bj].node_type {
373 // same type: recurse for a minimal in-place modify
374 path.push(*cursor);
375 diff_node(&am[ai], &bm[bj], path, out);
376 path.pop();
377 } else {
378 // type changed: replace the child wholesale (1 op, vs remove+insert)
379 let mut child = path.clone();
380 child.push(*cursor);
381 out.push(Change::Replace {
382 path: child,
383 node: bm[bj].clone(),
384 });
385 }
386 *cursor += 1;
387 }
388 for _ in &dels[pairs..] {
389 out.push(Change::Remove {
390 path: path.clone(),
391 index: *cursor,
392 }); // no cursor advance: the next child shifts into this slot
393 }
394 for &bj in &inss[pairs..] {
395 out.push(Change::Insert {
396 path: path.clone(),
397 index: *cursor,
398 node: bm[bj].clone(),
399 });
400 *cursor += 1;
401 }
402}
403
404/// Longest-common-subsequence alignment of two child slices, by node equality.
405fn lcs_align(a: &[Node], b: &[Node]) -> Vec<Step> {
406 let (m, n) = (a.len(), b.len());
407 if m == 0 {
408 return (0..n).map(Step::Ins).collect();
409 }
410 if n == 0 {
411 return (0..m).map(Step::Del).collect();
412 }
413 // dp[i][j] = LCS length of a[i..] vs b[j..].
414 let mut dp = vec![vec![0u32; n + 1]; m + 1];
415 for i in (0..m).rev() {
416 for j in (0..n).rev() {
417 dp[i][j] = if a[i] == b[j] {
418 dp[i + 1][j + 1] + 1
419 } else {
420 dp[i + 1][j].max(dp[i][j + 1])
421 };
422 }
423 }
424 let mut steps = Vec::with_capacity(m.max(n));
425 let (mut i, mut j) = (0usize, 0usize);
426 while i < m && j < n {
427 if a[i] == b[j] {
428 steps.push(Step::Match);
429 i += 1;
430 j += 1;
431 } else if dp[i + 1][j] >= dp[i][j + 1] {
432 steps.push(Step::Del(i));
433 i += 1;
434 } else {
435 steps.push(Step::Ins(j));
436 j += 1;
437 }
438 }
439 while i < m {
440 steps.push(Step::Del(i));
441 i += 1;
442 }
443 while j < n {
444 steps.push(Step::Ins(j));
445 j += 1;
446 }
447 steps
448}
449
450// ---- apply internals ----------------------------------------------------
451
452fn node_at_mut<'a>(
453 root: &'a mut Node,
454 path: &[usize],
455) -> std::result::Result<&'a mut Node, ApplyError> {
456 root.node_at_mut(path).ok_or_else(|| ApplyError {
457 path: path.to_vec(),
458 })
459}
460
461fn apply_one(root: &mut Node, change: &Change) -> std::result::Result<(), ApplyError> {
462 match change {
463 Change::SetAttr { path, key, value } => {
464 node_at_mut(root, path)?.set_attr(key.clone(), value.clone());
465 }
466 Change::RemoveAttr { path, key } => {
467 node_at_mut(root, path)?.remove_attr(key);
468 }
469 Change::SetText { path, text } => {
470 node_at_mut(root, path)?.text = text.clone();
471 }
472 Change::SetMarks { path, marks } => {
473 node_at_mut(root, path)?.marks = marks.clone();
474 }
475 Change::SetExtra { path, key, value } => {
476 node_at_mut(root, path)?
477 .extra
478 .insert(key.clone(), value.clone());
479 }
480 Change::RemoveExtra { path, key } => {
481 node_at_mut(root, path)?.extra.remove(key);
482 }
483 Change::Insert { path, index, node } => {
484 let parent = node_at_mut(root, path)?;
485 if *index > parent.child_count() {
486 let mut p = path.clone();
487 p.push(*index);
488 return Err(ApplyError { path: p });
489 }
490 parent.insert_child(*index, node.clone());
491 }
492 Change::Remove { path, index } => {
493 if node_at_mut(root, path)?.remove_child(*index).is_none() {
494 let mut p = path.clone();
495 p.push(*index);
496 return Err(ApplyError { path: p });
497 }
498 }
499 Change::Replace { path, node } => {
500 if path.is_empty() {
501 *root = node.clone();
502 } else {
503 let (parent_path, last) = path.split_at(path.len() - 1);
504 let parent = node_at_mut(root, parent_path)?;
505 if parent.replace_child(last[0], node.clone()).is_none() {
506 return Err(ApplyError { path: path.clone() });
507 }
508 }
509 }
510 }
511 Ok(())
512}