Skip to main content

tiptap_rusty_parser/
diff.rs

1//! Structural diff: compute a path-addressed [`Change`] list between two
2//! [`Node`] trees, and [`apply`] it to reproduce the target.
3//!
4//! ```
5//! use tiptap_rusty_parser::Document;
6//!
7//! let a = Document::from_json_str(
8//!     r#"{"type":"doc","content":[{"type":"paragraph","content":[{"type":"text","text":"hi"}]}]}"#,
9//! ).unwrap();
10//! let b = Document::from_json_str(
11//!     r#"{"type":"doc","content":[{"type":"paragraph","content":[{"type":"text","text":"bye"}]}]}"#,
12//! ).unwrap();
13//!
14//! let changes = a.diff(&b);          // -> Vec<Change>
15//! let mut c = a.clone();
16//! c.apply(&changes).unwrap();        // reproduce `b`
17//! assert_eq!(c, b);
18//! ```
19//!
20//! ## Path convention
21//! Node-local changes ([`Change::SetAttr`], `RemoveAttr`, `SetText`, `SetMarks`,
22//! `SetExtra`, `RemoveExtra`, `Replace`) address the **target node** by its
23//! index path. Child-list changes ([`Change::Insert`], [`Change::Remove`])
24//! address the **parent** node, with `index` selecting the child — mirroring
25//! [`Node::insert_child`] / [`Node::remove_child`].
26//!
27//! ## Apply contract
28//! [`apply`] executes changes strictly in order; child `index` values are
29//! interpreted against the *live* (already-partially-mutated) list. A list
30//! produced by [`diff`] always reproduces the target exactly:
31//! `apply(&mut a, &diff(a, b))` yields `b`.
32//!
33//! Empty-vs-absent container shapes (e.g. `"content":[]` vs no `content`) are
34//! preserved: when the field/child ops can't express the difference, the node
35//! is replaced wholesale so the round-trip stays exact.
36//!
37//! ## v1 limitations
38//! - **No move detection**: a child relocated within a list is emitted as a
39//!   [`Change::Remove`] + [`Change::Insert`] (its subtree is cloned).
40//! - Child matching is LCS-by-equality; pathological reorders degrade to
41//!   remove+insert (still correct, just not minimal).
42
43use crate::node::{Mark, Node};
44use serde::{Deserialize, Serialize};
45use serde_json::{Map, Value};
46use std::fmt;
47
48/// A single structural change between two [`Node`] trees.
49///
50/// Serializes as a tagged object, e.g. `{"op":"setText","path":[0,0],"text":"hi"}`.
51#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
52#[serde(tag = "op", rename_all = "camelCase")]
53pub enum Change {
54    /// Set (insert or overwrite) attribute `key` on the node at `path`.
55    SetAttr {
56        /// Index path of the target node.
57        path: Vec<usize>,
58        /// Attribute key.
59        key: String,
60        /// New attribute value.
61        value: Value,
62    },
63    /// Remove attribute `key` from the node at `path`.
64    RemoveAttr {
65        /// Index path of the target node.
66        path: Vec<usize>,
67        /// Attribute key.
68        key: String,
69    },
70    /// Set the text payload of the node at `path` (`None` clears it).
71    SetText {
72        /// Index path of the target node.
73        path: Vec<usize>,
74        /// New text payload, or `None` to clear.
75        text: Option<String>,
76    },
77    /// Replace the whole mark list of the node at `path` (`None` clears it).
78    SetMarks {
79        /// Index path of the target node.
80        path: Vec<usize>,
81        /// New mark list, or `None` to clear.
82        marks: Option<Vec<Mark>>,
83    },
84    /// Set (insert or overwrite) unknown top-level field `key` on the node at `path`.
85    SetExtra {
86        /// Index path of the target node.
87        path: Vec<usize>,
88        /// Field key.
89        key: String,
90        /// New field value.
91        value: Value,
92    },
93    /// Remove unknown top-level field `key` from the node at `path`.
94    RemoveExtra {
95        /// Index path of the target node.
96        path: Vec<usize>,
97        /// Field key.
98        key: String,
99    },
100    /// Insert `node` as a child of the node at `path` (the parent), at `index`.
101    Insert {
102        /// Index path of the **parent** node.
103        path: Vec<usize>,
104        /// Child position to insert at.
105        index: usize,
106        /// The node to insert.
107        node: Node,
108    },
109    /// Remove the child at `index` of the node at `path` (the parent).
110    Remove {
111        /// Index path of the **parent** node.
112        path: Vec<usize>,
113        /// Child position to remove.
114        index: usize,
115    },
116    /// Replace the node at `path` wholesale (used when its `type` changes).
117    Replace {
118        /// Index path of the target node (empty = root).
119        path: Vec<usize>,
120        /// The replacement node.
121        node: Node,
122    },
123}
124
125/// Error from [`apply`] when a change can't be located (no node at the path, or
126/// a child index out of range). Lists produced by [`diff`] never fail.
127#[derive(Debug, Clone, PartialEq, Eq)]
128pub struct ApplyError {
129    /// The path that could not be resolved.
130    pub path: Vec<usize>,
131}
132
133impl fmt::Display for ApplyError {
134    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
135        write!(f, "apply: no node at path {:?}", self.path)
136    }
137}
138
139impl std::error::Error for ApplyError {}
140
141impl Node {
142    /// Structural diff from `self` to `other`: a [`Change`] list that, when
143    /// [`applied`](apply) to a clone of `self`, reproduces `other`.
144    pub fn diff(&self, other: &Node) -> Vec<Change> {
145        let mut out = Vec::new();
146        let mut path = Vec::new();
147        diff_node(self, other, &mut path, &mut out);
148        out
149    }
150
151    /// Apply `changes` to `self` in order. See [`apply`].
152    pub fn apply(&mut self, changes: &[Change]) -> std::result::Result<(), ApplyError> {
153        apply(self, changes)
154    }
155}
156
157/// Structural diff between two nodes. Free-function form of [`Node::diff`].
158pub fn diff(a: &Node, b: &Node) -> Vec<Change> {
159    a.diff(b)
160}
161
162/// Apply a [`Change`] list to `root` in order, mutating it in place.
163///
164/// Applying `diff(a, b)` to a clone of `a` reproduces `b` exactly. Returns
165/// [`ApplyError`] only for externally-authored lists whose paths/indices don't
166/// resolve.
167pub fn apply(root: &mut Node, changes: &[Change]) -> std::result::Result<(), ApplyError> {
168    for change in changes {
169        apply_one(root, change)?;
170    }
171    Ok(())
172}
173
174// ---- diff internals -----------------------------------------------------
175
176fn diff_node(a: &Node, b: &Node, path: &mut Vec<usize>, out: &mut Vec<Change>) {
177    if a == b {
178        return; // prune identical subtrees (the main perf lever)
179    }
180    if a.node_type != b.node_type
181        || empty_shape_mismatch(
182            a.attrs.as_ref().map(Map::is_empty),
183            b.attrs.as_ref().map(Map::is_empty),
184        )
185        || empty_shape_mismatch(
186            a.content.as_ref().map(Vec::is_empty),
187            b.content.as_ref().map(Vec::is_empty),
188        )
189    {
190        // Type change, or an empty-vs-None container shape the field/child ops
191        // can't express (e.g. `"content":[]` -> absent) -> wholesale replace.
192        out.push(Change::Replace {
193            path: path.clone(),
194            node: b.clone(),
195        });
196        return;
197    }
198    diff_attrs(a.attrs.as_ref(), b.attrs.as_ref(), path, out);
199    if a.text != b.text {
200        out.push(Change::SetText {
201            path: path.clone(),
202            text: b.text.clone(),
203        });
204    }
205    if a.marks != b.marks {
206        out.push(Change::SetMarks {
207            path: path.clone(),
208            marks: b.marks.clone(),
209        });
210    }
211    diff_extra(&a.extra, &b.extra, path, out);
212    diff_children(a.children(), b.children(), path, out);
213}
214
215/// Whether an `Option<container>` shape difference (where the arg is
216/// `Some(is_empty)` / `None`) can't be reconciled by the key/child ops, which
217/// normalize emptied containers to `None`. A present-but-empty container
218/// (`Some(true)`, e.g. parsed from `[]`/`{}`) needs an exact match on the other
219/// side; otherwise the node must be replaced wholesale to round-trip exactly.
220fn empty_shape_mismatch(a_is_empty: Option<bool>, b_is_empty: Option<bool>) -> bool {
221    match b_is_empty {
222        Some(true) => a_is_empty != Some(true),
223        None => a_is_empty == Some(true),
224        Some(false) => false,
225    }
226}
227
228fn diff_attrs(
229    a: Option<&Map<String, Value>>,
230    b: Option<&Map<String, Value>>,
231    path: &mut [usize],
232    out: &mut Vec<Change>,
233) {
234    let empty = Map::new();
235    let am = a.unwrap_or(&empty);
236    let bm = b.unwrap_or(&empty);
237    for (k, v) in bm {
238        if am.get(k) != Some(v) {
239            out.push(Change::SetAttr {
240                path: path.to_vec(),
241                key: k.clone(),
242                value: v.clone(),
243            });
244        }
245    }
246    for k in am.keys() {
247        if !bm.contains_key(k) {
248            out.push(Change::RemoveAttr {
249                path: path.to_vec(),
250                key: k.clone(),
251            });
252        }
253    }
254}
255
256fn diff_extra(
257    am: &Map<String, Value>,
258    bm: &Map<String, Value>,
259    path: &mut [usize],
260    out: &mut Vec<Change>,
261) {
262    for (k, v) in bm {
263        if am.get(k) != Some(v) {
264            out.push(Change::SetExtra {
265                path: path.to_vec(),
266                key: k.clone(),
267                value: v.clone(),
268            });
269        }
270    }
271    for k in am.keys() {
272        if !bm.contains_key(k) {
273            out.push(Change::RemoveExtra {
274                path: path.to_vec(),
275                key: k.clone(),
276            });
277        }
278    }
279}
280
281/// One LCS-alignment step over the (trimmed) middle child slices.
282enum Step {
283    Match,
284    Del(usize),
285    Ins(usize),
286}
287
288fn diff_children(a: &[Node], b: &[Node], path: &mut Vec<usize>, out: &mut Vec<Change>) {
289    // Trim common prefix/suffix (cheap; shrinks the LCS DP and handles the
290    // common append/prepend cases in linear time).
291    let mut start = 0;
292    while start < a.len() && start < b.len() && a[start] == b[start] {
293        start += 1;
294    }
295    let mut ea = a.len();
296    let mut eb = b.len();
297    while ea > start && eb > start && a[ea - 1] == b[eb - 1] {
298        ea -= 1;
299        eb -= 1;
300    }
301
302    let am = &a[start..ea];
303    let bm = &b[start..eb];
304    if am.is_empty() && bm.is_empty() {
305        return;
306    }
307
308    let steps = lcs_align(am, bm);
309    let mut cursor = start; // position in the live list (prefix kept at 0..start)
310    let mut dels: Vec<usize> = Vec::new();
311    let mut inss: Vec<usize> = Vec::new();
312    for step in steps {
313        match step {
314            Step::Match => {
315                flush_gap(am, bm, &dels, &inss, path, &mut cursor, out);
316                dels.clear();
317                inss.clear();
318                cursor += 1; // matched child kept in place
319            }
320            Step::Del(i) => dels.push(i),
321            Step::Ins(j) => inss.push(j),
322        }
323    }
324    flush_gap(am, bm, &dels, &inss, path, &mut cursor, out);
325}
326
327/// Emit ops for a gap of unmatched children. Same-type del/ins pairs recurse
328/// (a modify-in-place); otherwise they become remove+insert. Indices are
329/// against the live list (see module docs).
330fn flush_gap(
331    am: &[Node],
332    bm: &[Node],
333    dels: &[usize],
334    inss: &[usize],
335    path: &mut Vec<usize>,
336    cursor: &mut usize,
337    out: &mut Vec<Change>,
338) {
339    let pairs = dels.len().min(inss.len());
340    for k in 0..pairs {
341        let (ai, bj) = (dels[k], inss[k]);
342        if am[ai].node_type == bm[bj].node_type {
343            // same type: recurse for a minimal in-place modify
344            path.push(*cursor);
345            diff_node(&am[ai], &bm[bj], path, out);
346            path.pop();
347        } else {
348            // type changed: replace the child wholesale (1 op, vs remove+insert)
349            let mut child = path.clone();
350            child.push(*cursor);
351            out.push(Change::Replace {
352                path: child,
353                node: bm[bj].clone(),
354            });
355        }
356        *cursor += 1;
357    }
358    for _ in &dels[pairs..] {
359        out.push(Change::Remove {
360            path: path.clone(),
361            index: *cursor,
362        }); // no cursor advance: the next child shifts into this slot
363    }
364    for &bj in &inss[pairs..] {
365        out.push(Change::Insert {
366            path: path.clone(),
367            index: *cursor,
368            node: bm[bj].clone(),
369        });
370        *cursor += 1;
371    }
372}
373
374/// Longest-common-subsequence alignment of two child slices, by node equality.
375fn lcs_align(a: &[Node], b: &[Node]) -> Vec<Step> {
376    let (m, n) = (a.len(), b.len());
377    if m == 0 {
378        return (0..n).map(Step::Ins).collect();
379    }
380    if n == 0 {
381        return (0..m).map(Step::Del).collect();
382    }
383    // dp[i][j] = LCS length of a[i..] vs b[j..].
384    let mut dp = vec![vec![0u32; n + 1]; m + 1];
385    for i in (0..m).rev() {
386        for j in (0..n).rev() {
387            dp[i][j] = if a[i] == b[j] {
388                dp[i + 1][j + 1] + 1
389            } else {
390                dp[i + 1][j].max(dp[i][j + 1])
391            };
392        }
393    }
394    let mut steps = Vec::with_capacity(m.max(n));
395    let (mut i, mut j) = (0usize, 0usize);
396    while i < m && j < n {
397        if a[i] == b[j] {
398            steps.push(Step::Match);
399            i += 1;
400            j += 1;
401        } else if dp[i + 1][j] >= dp[i][j + 1] {
402            steps.push(Step::Del(i));
403            i += 1;
404        } else {
405            steps.push(Step::Ins(j));
406            j += 1;
407        }
408    }
409    while i < m {
410        steps.push(Step::Del(i));
411        i += 1;
412    }
413    while j < n {
414        steps.push(Step::Ins(j));
415        j += 1;
416    }
417    steps
418}
419
420// ---- apply internals ----------------------------------------------------
421
422fn node_at_mut<'a>(
423    root: &'a mut Node,
424    path: &[usize],
425) -> std::result::Result<&'a mut Node, ApplyError> {
426    root.node_at_mut(path).ok_or_else(|| ApplyError {
427        path: path.to_vec(),
428    })
429}
430
431fn apply_one(root: &mut Node, change: &Change) -> std::result::Result<(), ApplyError> {
432    match change {
433        Change::SetAttr { path, key, value } => {
434            node_at_mut(root, path)?.set_attr(key.clone(), value.clone());
435        }
436        Change::RemoveAttr { path, key } => {
437            node_at_mut(root, path)?.remove_attr(key);
438        }
439        Change::SetText { path, text } => {
440            node_at_mut(root, path)?.text = text.clone();
441        }
442        Change::SetMarks { path, marks } => {
443            node_at_mut(root, path)?.marks = marks.clone();
444        }
445        Change::SetExtra { path, key, value } => {
446            node_at_mut(root, path)?
447                .extra
448                .insert(key.clone(), value.clone());
449        }
450        Change::RemoveExtra { path, key } => {
451            node_at_mut(root, path)?.extra.remove(key);
452        }
453        Change::Insert { path, index, node } => {
454            let parent = node_at_mut(root, path)?;
455            if *index > parent.child_count() {
456                let mut p = path.clone();
457                p.push(*index);
458                return Err(ApplyError { path: p });
459            }
460            parent.insert_child(*index, node.clone());
461        }
462        Change::Remove { path, index } => {
463            if node_at_mut(root, path)?.remove_child(*index).is_none() {
464                let mut p = path.clone();
465                p.push(*index);
466                return Err(ApplyError { path: p });
467            }
468        }
469        Change::Replace { path, node } => {
470            if path.is_empty() {
471                *root = node.clone();
472            } else {
473                let (parent_path, last) = path.split_at(path.len() - 1);
474                let parent = node_at_mut(root, parent_path)?;
475                if parent.replace_child(last[0], node.clone()).is_none() {
476                    return Err(ApplyError { path: path.clone() });
477                }
478            }
479        }
480    }
481    Ok(())
482}