tiptap_rusty_parser/normalize.rs
1//! Canonicalize a [`Node`] tree: merge adjacent equal-mark text, drop empty
2//! text nodes, and (opt-in) prune empty non-text nodes.
3//!
4//! Normalization produces a stable canonical form: smaller [`diff`](crate::diff)
5//! output, cleaner roundtrips, and a single representation for trees that are
6//! semantically identical but split differently (e.g. `"foo"` as one text node
7//! vs `"fo"`+`"o"` with the same marks). Tuned with [`NormalizeOptions`] — a
8//! plain data struct (no closures), so the surface works over WASM/FFI.
9//!
10//! `normalize` is idempotent: `normalize(normalize(x)) == normalize(x)`.
11//!
12//! ```
13//! use tiptap_rusty_parser::Document;
14//! let mut doc = Document::from_json_str(
15//! r#"{"type":"doc","content":[{"type":"paragraph","content":[
16//! {"type":"text","text":"foo"},
17//! {"type":"text","text":"bar"},
18//! {"type":"text","text":""}
19//! ]}]}"#,
20//! ).unwrap();
21//! doc.normalize();
22//! assert_eq!(doc.text_content(), "foobar");
23//! // The three text nodes collapsed into one.
24//! assert_eq!(doc.children()[0].child_count(), 1);
25//! ```
26
27use crate::node::Node;
28use serde::{Deserialize, Serialize};
29
30/// Options controlling [`Node::normalize_with`]. [`Default`] is the sensible
31/// hygiene pass: merge adjacent text and drop empty text, but keep empty nodes
32/// (an empty paragraph is structurally valid).
33#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
34#[serde(rename_all = "camelCase", default)]
35pub struct NormalizeOptions {
36 /// Merge consecutive text nodes that are identical except for `text`
37 /// (same `marks`, `attrs`, `extra`) by concatenating their `text`.
38 pub merge_adjacent_text: bool,
39 /// Drop text nodes whose `text` is empty (`""` or absent).
40 pub remove_empty_text: bool,
41 /// Drop non-text nodes whose `content` is an empty list (`Some([])`).
42 /// Off by default: an empty paragraph is valid. Absent (`None`) content is
43 /// always left untouched, preserving the empty-vs-missing distinction.
44 pub remove_empty_nodes: bool,
45}
46
47impl Default for NormalizeOptions {
48 fn default() -> Self {
49 Self {
50 merge_adjacent_text: true,
51 remove_empty_text: true,
52 remove_empty_nodes: false,
53 }
54 }
55}
56
57impl Node {
58 /// Normalize this subtree in place with default [`NormalizeOptions`].
59 pub fn normalize(&mut self) {
60 self.normalize_with(&NormalizeOptions::default());
61 }
62
63 /// Normalize this subtree in place with custom [`NormalizeOptions`].
64 pub fn normalize_with(&mut self, opts: &NormalizeOptions) {
65 if let Some(children) = self.content.as_mut() {
66 // Bottom-up: normalize each child first, so empties/merges that
67 // surface deeper propagate before this level compacts.
68 for child in children.iter_mut() {
69 child.normalize_with(opts);
70 }
71 normalize_children(children, opts);
72 }
73 }
74}
75
76#[inline]
77fn is_text(n: &Node) -> bool {
78 n.node_type.as_deref() == Some("text")
79}
80
81#[inline]
82fn is_empty_text(n: &Node) -> bool {
83 is_text(n) && n.text.as_deref().unwrap_or("").is_empty()
84}
85
86/// True if two text nodes differ only in their `text` payload.
87#[inline]
88fn mergeable(a: &Node, b: &Node) -> bool {
89 is_text(a) && is_text(b) && a.marks == b.marks && a.attrs == b.attrs && a.extra == b.extra
90}
91
92/// Canonicalize one parent's child list in place. Shared building block reused
93/// by inline range editing (merge-after-edit).
94pub(crate) fn normalize_children(children: &mut Vec<Node>, opts: &NormalizeOptions) {
95 if opts.remove_empty_text {
96 children.retain(|c| !is_empty_text(c));
97 }
98 if opts.merge_adjacent_text && !children.is_empty() {
99 // Single in-place pass: keep a write cursor `w` at the last retained
100 // node; fold each mergeable successor's text into it. O(n) (vs repeated
101 // Vec::remove), so merging a long run of text nodes stays linear.
102 let mut w = 0;
103 for r in 1..children.len() {
104 if mergeable(&children[w], &children[r]) {
105 if let Some(t) = children[r].text.take() {
106 children[w]
107 .text
108 .get_or_insert_with(String::new)
109 .push_str(&t);
110 }
111 } else {
112 w += 1;
113 if w != r {
114 children.swap(w, r);
115 }
116 }
117 }
118 children.truncate(w + 1);
119 }
120 if opts.remove_empty_nodes {
121 children.retain(|c| is_text(c) || !c.content.as_ref().is_some_and(Vec::is_empty));
122 }
123}