Skip to main content

tiptap_rusty_parser/
diff.rs

1//! Structural diff: compute a path-addressed [`Change`] list between two
2//! [`Node`] trees, and [`apply`] it to reproduce the target.
3//!
4//! ```
5//! use tiptap_rusty_parser::Document;
6//!
7//! let a = Document::from_json_str(
8//!     r#"{"type":"doc","content":[{"type":"paragraph","content":[{"type":"text","text":"hi"}]}]}"#,
9//! ).unwrap();
10//! let b = Document::from_json_str(
11//!     r#"{"type":"doc","content":[{"type":"paragraph","content":[{"type":"text","text":"bye"}]}]}"#,
12//! ).unwrap();
13//!
14//! let changes = a.diff(&b);          // -> Vec<Change>
15//! let mut c = a.clone();
16//! c.apply(&changes).unwrap();        // reproduce `b`
17//! assert_eq!(c, b);
18//! ```
19//!
20//! ## Path convention
21//! Node-local changes ([`Change::SetAttr`], `RemoveAttr`, `SetText`, `SetMarks`,
22//! `SetExtra`, `RemoveExtra`, `Replace`) address the **target node** by its
23//! index path. Child-list changes ([`Change::Insert`], [`Change::Remove`])
24//! address the **parent** node, with `index` selecting the child — mirroring
25//! [`Node::insert_child`] / [`Node::remove_child`].
26//!
27//! ## Apply contract
28//! [`apply`] executes changes strictly in order; child `index` values are
29//! interpreted against the *live* (already-partially-mutated) list. A list
30//! produced by [`diff`] always reproduces the target exactly:
31//! `apply(&mut a, &diff(a, b))` yields `b`.
32//!
33//! Empty-vs-absent container shapes (e.g. `"content":[]` vs no `content`) are
34//! preserved: when the field/child ops can't express the difference, the node
35//! is replaced wholesale so the round-trip stays exact.
36//!
37//! ## v1 limitations
38//! - **No move detection**: a child relocated within a list is emitted as a
39//!   [`Change::Remove`] + [`Change::Insert`] (its subtree is cloned).
40//! - Child matching is LCS-by-equality; pathological reorders degrade to
41//!   remove+insert (still correct, just not minimal).
42
43use crate::node::{Mark, Node};
44use serde::{Deserialize, Serialize};
45use serde_json::{Map, Value};
46use std::fmt;
47
48/// A single structural change between two [`Node`] trees.
49///
50/// Serializes as a tagged object, e.g. `{"op":"setText","path":[0,0],"text":"hi"}`.
51#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
52#[serde(tag = "op", rename_all = "camelCase")]
53pub enum Change {
54    /// Set (insert or overwrite) attribute `key` on the node at `path`.
55    SetAttr {
56        /// Index path of the target node.
57        path: Vec<usize>,
58        /// Attribute key.
59        key: String,
60        /// New attribute value.
61        value: Value,
62    },
63    /// Remove attribute `key` from the node at `path`.
64    RemoveAttr {
65        /// Index path of the target node.
66        path: Vec<usize>,
67        /// Attribute key.
68        key: String,
69    },
70    /// Set the text payload of the node at `path` (`None` clears it).
71    SetText {
72        /// Index path of the target node.
73        path: Vec<usize>,
74        /// New text payload, or `None` to clear.
75        text: Option<String>,
76    },
77    /// Replace the whole mark list of the node at `path` (`None` clears it).
78    SetMarks {
79        /// Index path of the target node.
80        path: Vec<usize>,
81        /// New mark list, or `None` to clear.
82        marks: Option<Vec<Mark>>,
83    },
84    /// Set (insert or overwrite) unknown top-level field `key` on the node at `path`.
85    SetExtra {
86        /// Index path of the target node.
87        path: Vec<usize>,
88        /// Field key.
89        key: String,
90        /// New field value.
91        value: Value,
92    },
93    /// Remove unknown top-level field `key` from the node at `path`.
94    RemoveExtra {
95        /// Index path of the target node.
96        path: Vec<usize>,
97        /// Field key.
98        key: String,
99    },
100    /// Insert `node` as a child of the node at `path` (the parent), at `index`.
101    Insert {
102        /// Index path of the **parent** node.
103        path: Vec<usize>,
104        /// Child position to insert at.
105        index: usize,
106        /// The node to insert.
107        node: Node,
108    },
109    /// Remove the child at `index` of the node at `path` (the parent).
110    Remove {
111        /// Index path of the **parent** node.
112        path: Vec<usize>,
113        /// Child position to remove.
114        index: usize,
115    },
116    /// Replace the node at `path` wholesale (used when its `type` changes).
117    Replace {
118        /// Index path of the target node (empty = root).
119        path: Vec<usize>,
120        /// The replacement node.
121        node: Node,
122    },
123}
124
125/// Error from [`apply`] when a change can't be located (no node at the path, or
126/// a child index out of range). Lists produced by [`diff`] never fail.
127#[derive(Debug, Clone, PartialEq, Eq)]
128pub struct ApplyError {
129    /// The path that could not be resolved.
130    pub path: Vec<usize>,
131}
132
133impl fmt::Display for ApplyError {
134    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
135        write!(f, "apply: no node at path {:?}", self.path)
136    }
137}
138
139impl std::error::Error for ApplyError {}
140
141impl Node {
142    /// Structural diff from `self` to `other`: a [`Change`] list that, when
143    /// [`applied`](apply) to a clone of `self`, reproduces `other`.
144    pub fn diff(&self, other: &Node) -> Vec<Change> {
145        let mut out = Vec::new();
146        let mut path = Vec::new();
147        diff_node(self, other, &mut path, &mut out);
148        out
149    }
150
151    /// Apply `changes` to `self` in order. See [`apply`].
152    pub fn apply(&mut self, changes: &[Change]) -> std::result::Result<(), ApplyError> {
153        apply(self, changes)
154    }
155
156    /// Invert `changes` relative to `self` (the pre-image). See [`invert`].
157    pub fn invert(&self, changes: &[Change]) -> std::result::Result<Vec<Change>, ApplyError> {
158        invert(self, changes)
159    }
160}
161
162/// Structural diff between two nodes. Free-function form of [`Node::diff`].
163pub fn diff(a: &Node, b: &Node) -> Vec<Change> {
164    a.diff(b)
165}
166
167/// Apply a [`Change`] list to `root` in order, mutating it in place.
168///
169/// Applying `diff(a, b)` to a clone of `a` reproduces `b` exactly. Returns
170/// [`ApplyError`] only for externally-authored lists whose paths/indices don't
171/// resolve.
172pub fn apply(root: &mut Node, changes: &[Change]) -> std::result::Result<(), ApplyError> {
173    for change in changes {
174        apply_one(root, change)?;
175    }
176    Ok(())
177}
178
179/// Invert a change list: produce the reverse changes that, applied to the
180/// result of `apply(base, changes)`, restore `base` — the basis for undo.
181///
182/// Computed as `diff(apply(base, changes), base)`: replay the forward changes,
183/// then diff back to `base`. This reuses the diff round-trip guarantee, so it
184/// handles every value/shape edge exactly — subject to the same non-minimality
185/// caveats as [`diff`] (e.g. no move detection). Errors only if `changes`
186/// itself doesn't apply to `base`.
187///
188/// ```
189/// use tiptap_rusty_parser::Document;
190/// let a = Document::from_json_str(r#"{"type":"doc","content":[{"type":"paragraph"}]}"#).unwrap();
191/// let b = Document::from_json_str(r#"{"type":"doc","content":[{"type":"heading"}]}"#).unwrap();
192/// let forward = a.diff(&b);
193/// let undo = a.invert(&forward).unwrap();
194/// let mut c = b.clone();
195/// c.apply(&undo).unwrap();
196/// assert_eq!(c, a);
197/// ```
198pub fn invert(base: &Node, changes: &[Change]) -> std::result::Result<Vec<Change>, ApplyError> {
199    let mut result = base.clone();
200    apply(&mut result, changes)?;
201    Ok(result.diff(base))
202}
203
204// ---- diff internals -----------------------------------------------------
205
206fn diff_node(a: &Node, b: &Node, path: &mut Vec<usize>, out: &mut Vec<Change>) {
207    if a == b {
208        return; // prune identical subtrees (the main perf lever)
209    }
210    if a.node_type != b.node_type
211        || empty_shape_mismatch(
212            a.attrs.as_ref().map(Map::is_empty),
213            b.attrs.as_ref().map(Map::is_empty),
214        )
215        || empty_shape_mismatch(
216            a.content.as_ref().map(Vec::is_empty),
217            b.content.as_ref().map(Vec::is_empty),
218        )
219    {
220        // Type change, or an empty-vs-None container shape the field/child ops
221        // can't express (e.g. `"content":[]` -> absent) -> wholesale replace.
222        out.push(Change::Replace {
223            path: path.clone(),
224            node: b.clone(),
225        });
226        return;
227    }
228    diff_attrs(a.attrs.as_ref(), b.attrs.as_ref(), path, out);
229    if a.text != b.text {
230        out.push(Change::SetText {
231            path: path.clone(),
232            text: b.text.clone(),
233        });
234    }
235    if a.marks != b.marks {
236        out.push(Change::SetMarks {
237            path: path.clone(),
238            marks: b.marks.clone(),
239        });
240    }
241    diff_extra(&a.extra, &b.extra, path, out);
242    diff_children(a.children(), b.children(), path, out);
243}
244
245/// Whether an `Option<container>` shape difference (where the arg is
246/// `Some(is_empty)` / `None`) can't be reconciled by the key/child ops, which
247/// normalize emptied containers to `None`. A present-but-empty container
248/// (`Some(true)`, e.g. parsed from `[]`/`{}`) needs an exact match on the other
249/// side; otherwise the node must be replaced wholesale to round-trip exactly.
250fn empty_shape_mismatch(a_is_empty: Option<bool>, b_is_empty: Option<bool>) -> bool {
251    match b_is_empty {
252        Some(true) => a_is_empty != Some(true),
253        None => a_is_empty == Some(true),
254        Some(false) => false,
255    }
256}
257
258fn diff_attrs(
259    a: Option<&Map<String, Value>>,
260    b: Option<&Map<String, Value>>,
261    path: &mut [usize],
262    out: &mut Vec<Change>,
263) {
264    let empty = Map::new();
265    let am = a.unwrap_or(&empty);
266    let bm = b.unwrap_or(&empty);
267    for (k, v) in bm {
268        if am.get(k) != Some(v) {
269            out.push(Change::SetAttr {
270                path: path.to_vec(),
271                key: k.clone(),
272                value: v.clone(),
273            });
274        }
275    }
276    for k in am.keys() {
277        if !bm.contains_key(k) {
278            out.push(Change::RemoveAttr {
279                path: path.to_vec(),
280                key: k.clone(),
281            });
282        }
283    }
284}
285
286fn diff_extra(
287    am: &Map<String, Value>,
288    bm: &Map<String, Value>,
289    path: &mut [usize],
290    out: &mut Vec<Change>,
291) {
292    for (k, v) in bm {
293        if am.get(k) != Some(v) {
294            out.push(Change::SetExtra {
295                path: path.to_vec(),
296                key: k.clone(),
297                value: v.clone(),
298            });
299        }
300    }
301    for k in am.keys() {
302        if !bm.contains_key(k) {
303            out.push(Change::RemoveExtra {
304                path: path.to_vec(),
305                key: k.clone(),
306            });
307        }
308    }
309}
310
311/// One LCS-alignment step over the (trimmed) middle child slices.
312enum Step {
313    Match,
314    Del(usize),
315    Ins(usize),
316}
317
318fn diff_children(a: &[Node], b: &[Node], path: &mut Vec<usize>, out: &mut Vec<Change>) {
319    // Trim common prefix/suffix (cheap; shrinks the LCS DP and handles the
320    // common append/prepend cases in linear time).
321    let mut start = 0;
322    while start < a.len() && start < b.len() && a[start] == b[start] {
323        start += 1;
324    }
325    let mut ea = a.len();
326    let mut eb = b.len();
327    while ea > start && eb > start && a[ea - 1] == b[eb - 1] {
328        ea -= 1;
329        eb -= 1;
330    }
331
332    let am = &a[start..ea];
333    let bm = &b[start..eb];
334    if am.is_empty() && bm.is_empty() {
335        return;
336    }
337
338    let steps = lcs_align(am, bm);
339    let mut cursor = start; // position in the live list (prefix kept at 0..start)
340    let mut dels: Vec<usize> = Vec::new();
341    let mut inss: Vec<usize> = Vec::new();
342    for step in steps {
343        match step {
344            Step::Match => {
345                flush_gap(am, bm, &dels, &inss, path, &mut cursor, out);
346                dels.clear();
347                inss.clear();
348                cursor += 1; // matched child kept in place
349            }
350            Step::Del(i) => dels.push(i),
351            Step::Ins(j) => inss.push(j),
352        }
353    }
354    flush_gap(am, bm, &dels, &inss, path, &mut cursor, out);
355}
356
357/// Emit ops for a gap of unmatched children. Same-type del/ins pairs recurse
358/// (a modify-in-place); otherwise they become remove+insert. Indices are
359/// against the live list (see module docs).
360fn flush_gap(
361    am: &[Node],
362    bm: &[Node],
363    dels: &[usize],
364    inss: &[usize],
365    path: &mut Vec<usize>,
366    cursor: &mut usize,
367    out: &mut Vec<Change>,
368) {
369    let pairs = dels.len().min(inss.len());
370    for k in 0..pairs {
371        let (ai, bj) = (dels[k], inss[k]);
372        if am[ai].node_type == bm[bj].node_type {
373            // same type: recurse for a minimal in-place modify
374            path.push(*cursor);
375            diff_node(&am[ai], &bm[bj], path, out);
376            path.pop();
377        } else {
378            // type changed: replace the child wholesale (1 op, vs remove+insert)
379            let mut child = path.clone();
380            child.push(*cursor);
381            out.push(Change::Replace {
382                path: child,
383                node: bm[bj].clone(),
384            });
385        }
386        *cursor += 1;
387    }
388    for _ in &dels[pairs..] {
389        out.push(Change::Remove {
390            path: path.clone(),
391            index: *cursor,
392        }); // no cursor advance: the next child shifts into this slot
393    }
394    for &bj in &inss[pairs..] {
395        out.push(Change::Insert {
396            path: path.clone(),
397            index: *cursor,
398            node: bm[bj].clone(),
399        });
400        *cursor += 1;
401    }
402}
403
404/// Longest-common-subsequence alignment of two child slices, by node equality.
405fn lcs_align(a: &[Node], b: &[Node]) -> Vec<Step> {
406    let (m, n) = (a.len(), b.len());
407    if m == 0 {
408        return (0..n).map(Step::Ins).collect();
409    }
410    if n == 0 {
411        return (0..m).map(Step::Del).collect();
412    }
413    // dp[i][j] = LCS length of a[i..] vs b[j..].
414    let mut dp = vec![vec![0u32; n + 1]; m + 1];
415    for i in (0..m).rev() {
416        for j in (0..n).rev() {
417            dp[i][j] = if a[i] == b[j] {
418                dp[i + 1][j + 1] + 1
419            } else {
420                dp[i + 1][j].max(dp[i][j + 1])
421            };
422        }
423    }
424    let mut steps = Vec::with_capacity(m.max(n));
425    let (mut i, mut j) = (0usize, 0usize);
426    while i < m && j < n {
427        if a[i] == b[j] {
428            steps.push(Step::Match);
429            i += 1;
430            j += 1;
431        } else if dp[i + 1][j] >= dp[i][j + 1] {
432            steps.push(Step::Del(i));
433            i += 1;
434        } else {
435            steps.push(Step::Ins(j));
436            j += 1;
437        }
438    }
439    while i < m {
440        steps.push(Step::Del(i));
441        i += 1;
442    }
443    while j < n {
444        steps.push(Step::Ins(j));
445        j += 1;
446    }
447    steps
448}
449
450// ---- apply internals ----------------------------------------------------
451
452fn node_at_mut<'a>(
453    root: &'a mut Node,
454    path: &[usize],
455) -> std::result::Result<&'a mut Node, ApplyError> {
456    root.node_at_mut(path).ok_or_else(|| ApplyError {
457        path: path.to_vec(),
458    })
459}
460
461fn apply_one(root: &mut Node, change: &Change) -> std::result::Result<(), ApplyError> {
462    match change {
463        Change::SetAttr { path, key, value } => {
464            node_at_mut(root, path)?.set_attr(key.clone(), value.clone());
465        }
466        Change::RemoveAttr { path, key } => {
467            node_at_mut(root, path)?.remove_attr(key);
468        }
469        Change::SetText { path, text } => {
470            node_at_mut(root, path)?.text = text.clone();
471        }
472        Change::SetMarks { path, marks } => {
473            node_at_mut(root, path)?.marks = marks.clone();
474        }
475        Change::SetExtra { path, key, value } => {
476            node_at_mut(root, path)?
477                .extra
478                .insert(key.clone(), value.clone());
479        }
480        Change::RemoveExtra { path, key } => {
481            node_at_mut(root, path)?.extra.remove(key);
482        }
483        Change::Insert { path, index, node } => {
484            let parent = node_at_mut(root, path)?;
485            if *index > parent.child_count() {
486                let mut p = path.clone();
487                p.push(*index);
488                return Err(ApplyError { path: p });
489            }
490            parent.insert_child(*index, node.clone());
491        }
492        Change::Remove { path, index } => {
493            if node_at_mut(root, path)?.remove_child(*index).is_none() {
494                let mut p = path.clone();
495                p.push(*index);
496                return Err(ApplyError { path: p });
497            }
498        }
499        Change::Replace { path, node } => {
500            if path.is_empty() {
501                *root = node.clone();
502            } else {
503                let (parent_path, last) = path.split_at(path.len() - 1);
504                let parent = node_at_mut(root, parent_path)?;
505                if parent.replace_child(last[0], node.clone()).is_none() {
506                    return Err(ApplyError { path: path.clone() });
507                }
508            }
509        }
510    }
511    Ok(())
512}