Skip to main content

tiptap_rusty_parser/
text_diff.rs

1//! Character-level (inline) text diffing, and the granularity knob for
2//! [`Node::diff_with`](crate::Node::diff_with).
3//!
4//! The structural [`diff`](crate::diff) replaces a changed text node wholesale
5//! ([`Change::SetText`](crate::Change::SetText)). For AI/review flows you often
6//! want *character-level* edits instead — [`Change::SpliceText`](crate::Change::SpliceText)
7//! islands — so suggestions read as "inserted X / deleted Y" rather than
8//! "replaced the whole paragraph". The islands are minimal for typical edits
9//! (a single delete+insert fallback bounds cost on very large changed runs —
10//! see [`diff_text`]). [`diff_text`] computes the segments; [`DiffGranularity`]
11//! selects block vs inline vs a smart blend.
12
13use crate::diff::Change;
14use serde::{Deserialize, Serialize};
15
16/// Cap on the LCS table size (`|a| * |b|`); past it, a changed run is emitted as
17/// a single delete+insert instead of a minimal character diff (bounds cost).
18const LCS_GUARD: usize = 1 << 20;
19
20/// One segment of a character-level text diff.
21#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
22#[serde(rename_all = "camelCase")]
23pub enum SegKind {
24    /// Text common to both sides.
25    Keep,
26    /// Text present only in the new string.
27    Insert,
28    /// Text present only in the old string.
29    Delete,
30}
31
32/// A run of characters classified by a text diff.
33#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
34pub struct TextSegment {
35    /// Whether this run was kept, inserted, or deleted.
36    pub kind: SegKind,
37    /// The run's text.
38    pub text: String,
39}
40
41/// How [`Node::diff_with`](crate::Node::diff_with) treats changed text nodes.
42#[derive(Debug, Clone, Copy, PartialEq, Default, Serialize, Deserialize)]
43#[serde(rename_all = "camelCase", rename_all_fields = "camelCase")]
44pub enum DiffGranularity {
45    /// Whole-node text replacement (`SetText`) — the default [`diff`](crate::diff) behavior.
46    #[default]
47    Block,
48    /// Character-level `SpliceText` islands.
49    Inline,
50    /// Inline, but fall back to a whole `SetText` once the changed fraction
51    /// exceeds `replace_threshold` (0.0–1.0).
52    Smart {
53        /// Changed-scalar fraction above which a whole replacement is preferred.
54        replace_threshold: f32,
55    },
56}
57
58/// Options for [`Node::diff_with`](crate::Node::diff_with).
59#[derive(Debug, Clone, Copy, PartialEq, Default, Serialize, Deserialize)]
60#[serde(rename_all = "camelCase", default)]
61pub struct DiffOptions {
62    /// Text-node diff granularity.
63    pub text: DiffGranularity,
64}
65
66/// Character-level (Unicode-scalar) diff of two strings.
67///
68/// Trims the common prefix/suffix, then runs an LCS over the changed middle to
69/// produce minimal segments. For very large changed runs (when the middle's
70/// `|a| * |b|` exceeds [`LCS_GUARD`]) it falls back to a single delete + insert
71/// to bound cost — correct, but not minimal for those pathological inputs.
72///
73/// ```
74/// use tiptap_rusty_parser::{diff_text, SegKind};
75/// let segs = diff_text("kitten", "sitting");
76/// // The common run "itt" survives as a Keep segment.
77/// assert!(segs.iter().any(|s| s.kind == SegKind::Keep && s.text.contains("itt")));
78/// ```
79pub fn diff_text(a: &str, b: &str) -> Vec<TextSegment> {
80    let ac: Vec<char> = a.chars().collect();
81    let bc: Vec<char> = b.chars().collect();
82    let mut raw: Vec<(SegKind, String)> = Vec::new();
83
84    // Common prefix / suffix (linear; handles the common "small middle edit").
85    let mut p = 0;
86    while p < ac.len() && p < bc.len() && ac[p] == bc[p] {
87        p += 1;
88    }
89    let mut ea = ac.len();
90    let mut eb = bc.len();
91    while ea > p && eb > p && ac[ea - 1] == bc[eb - 1] {
92        ea -= 1;
93        eb -= 1;
94    }
95
96    if p > 0 {
97        raw.push((SegKind::Keep, ac[..p].iter().collect()));
98    }
99    let (ma, mb) = (&ac[p..ea], &bc[p..eb]);
100    if ma.is_empty() && mb.is_empty() {
101        // unchanged middle
102    } else if ma.is_empty() {
103        raw.push((SegKind::Insert, mb.iter().collect()));
104    } else if mb.is_empty() {
105        raw.push((SegKind::Delete, ma.iter().collect()));
106    } else if ma.len().saturating_mul(mb.len()) > LCS_GUARD {
107        raw.push((SegKind::Delete, ma.iter().collect()));
108        raw.push((SegKind::Insert, mb.iter().collect()));
109    } else {
110        lcs_segments(ma, mb, &mut raw);
111    }
112    if ea < ac.len() {
113        raw.push((SegKind::Keep, ac[ea..].iter().collect()));
114    }
115
116    coalesce(raw)
117}
118
119/// LCS-backtrack of two char slices, pushing per-char `(kind, char-as-string)`
120/// runs (merged by [`coalesce`]).
121fn lcs_segments(a: &[char], b: &[char], out: &mut Vec<(SegKind, String)>) {
122    let (m, n) = (a.len(), b.len());
123    let mut dp = vec![vec![0u32; n + 1]; m + 1];
124    for i in (0..m).rev() {
125        for j in (0..n).rev() {
126            dp[i][j] = if a[i] == b[j] {
127                dp[i + 1][j + 1] + 1
128            } else {
129                dp[i + 1][j].max(dp[i][j + 1])
130            };
131        }
132    }
133    let (mut i, mut j) = (0usize, 0usize);
134    let push = |out: &mut Vec<(SegKind, String)>, k: SegKind, c: char| {
135        out.push((k, c.to_string()));
136    };
137    while i < m && j < n {
138        if a[i] == b[j] {
139            push(out, SegKind::Keep, a[i]);
140            i += 1;
141            j += 1;
142        } else if dp[i + 1][j] >= dp[i][j + 1] {
143            push(out, SegKind::Delete, a[i]);
144            i += 1;
145        } else {
146            push(out, SegKind::Insert, b[j]);
147            j += 1;
148        }
149    }
150    while i < m {
151        push(out, SegKind::Delete, a[i]);
152        i += 1;
153    }
154    while j < n {
155        push(out, SegKind::Insert, b[j]);
156        j += 1;
157    }
158}
159
160/// Merge adjacent same-kind runs, dropping empties.
161fn coalesce(raw: Vec<(SegKind, String)>) -> Vec<TextSegment> {
162    let mut out: Vec<TextSegment> = Vec::new();
163    for (kind, text) in raw {
164        if text.is_empty() {
165            continue;
166        }
167        match out.last_mut() {
168            Some(last) if last.kind == kind => last.text.push_str(&text),
169            _ => out.push(TextSegment { kind, text }),
170        }
171    }
172    out
173}
174
175/// Total scalars touched (inserted + deleted) by a segment list.
176pub(crate) fn changed_scalars(segs: &[TextSegment]) -> usize {
177    segs.iter()
178        .filter(|s| s.kind != SegKind::Keep)
179        .map(|s| s.text.chars().count())
180        .sum()
181}
182
183/// Convert a segment list (a→b) into minimal `SpliceText` changes on `path`.
184/// Each maximal non-`Keep` island between kept runs becomes one splice.
185///
186/// Splices carry **original-string** scalar offsets, so they're emitted
187/// **highest-offset first**: applied in that order, each edit leaves every
188/// lower offset untouched, so the un-rebased offsets stay valid.
189pub(crate) fn splices_from_segments(path: &[usize], segs: &[TextSegment], out: &mut Vec<Change>) {
190    let mut islands: Vec<Change> = Vec::new();
191    let mut cursor = 0usize; // scalar offset in the original (old) string
192    let mut i = 0;
193    while i < segs.len() {
194        if segs[i].kind == SegKind::Keep {
195            cursor += segs[i].text.chars().count();
196            i += 1;
197            continue;
198        }
199        let from = cursor;
200        let mut len_del = 0usize;
201        let mut insert = String::new();
202        while i < segs.len() && segs[i].kind != SegKind::Keep {
203            match segs[i].kind {
204                SegKind::Delete => {
205                    let n = segs[i].text.chars().count();
206                    len_del += n;
207                    cursor += n;
208                }
209                SegKind::Insert => insert.push_str(&segs[i].text),
210                SegKind::Keep => unreachable!(),
211            }
212            i += 1;
213        }
214        islands.push(Change::SpliceText {
215            path: path.to_vec(),
216            from,
217            len_del,
218            insert,
219        });
220    }
221    out.extend(islands.into_iter().rev());
222}