Skip to main content

tiptap_rusty_parser/
normalize.rs

1//! Canonicalize a [`Node`] tree: merge adjacent equal-mark text, drop empty
2//! text nodes, and (opt-in) prune empty non-text nodes.
3//!
4//! Normalization produces a stable canonical form: smaller [`diff`](crate::diff)
5//! output, cleaner roundtrips, and a single representation for trees that are
6//! semantically identical but split differently (e.g. `"foo"` as one text node
7//! vs `"fo"`+`"o"` with the same marks). Tuned with [`NormalizeOptions`] — a
8//! plain data struct (no closures), so the surface works over WASM/FFI.
9//!
10//! `normalize` is idempotent: `normalize(normalize(x)) == normalize(x)`.
11//!
12//! ```
13//! use tiptap_rusty_parser::Document;
14//! let mut doc = Document::from_json_str(
15//!     r#"{"type":"doc","content":[{"type":"paragraph","content":[
16//!         {"type":"text","text":"foo"},
17//!         {"type":"text","text":"bar"},
18//!         {"type":"text","text":""}
19//!     ]}]}"#,
20//! ).unwrap();
21//! doc.normalize();
22//! assert_eq!(doc.text_content(), "foobar");
23//! // The three text nodes collapsed into one.
24//! assert_eq!(doc.children()[0].child_count(), 1);
25//! ```
26
27use crate::node::Node;
28use serde::{Deserialize, Serialize};
29
30/// Options controlling [`Node::normalize_with`]. [`Default`] is the sensible
31/// hygiene pass: merge adjacent text and drop empty text, but keep empty nodes
32/// (an empty paragraph is structurally valid).
33#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
34#[serde(rename_all = "camelCase", default)]
35pub struct NormalizeOptions {
36    /// Merge consecutive text nodes that are identical except for `text`
37    /// (same `marks`, `attrs`, `extra`) by concatenating their `text`.
38    pub merge_adjacent_text: bool,
39    /// Drop text nodes whose `text` is empty (`""` or absent).
40    pub remove_empty_text: bool,
41    /// Drop non-text nodes whose `content` is an empty list (`Some([])`).
42    /// Off by default: an empty paragraph is valid. Absent (`None`) content is
43    /// always left untouched, preserving the empty-vs-missing distinction.
44    pub remove_empty_nodes: bool,
45}
46
47impl Default for NormalizeOptions {
48    fn default() -> Self {
49        Self {
50            merge_adjacent_text: true,
51            remove_empty_text: true,
52            remove_empty_nodes: false,
53        }
54    }
55}
56
57impl Node {
58    /// Normalize this subtree in place with default [`NormalizeOptions`].
59    pub fn normalize(&mut self) {
60        self.normalize_with(&NormalizeOptions::default());
61    }
62
63    /// Normalize this subtree in place with custom [`NormalizeOptions`].
64    pub fn normalize_with(&mut self, opts: &NormalizeOptions) {
65        if let Some(children) = self.content.as_mut() {
66            // Bottom-up: normalize each child first, so empties/merges that
67            // surface deeper propagate before this level compacts.
68            for child in children.iter_mut() {
69                child.normalize_with(opts);
70            }
71            normalize_children(children, opts);
72        }
73    }
74}
75
76#[inline]
77fn is_text(n: &Node) -> bool {
78    n.node_type.as_deref() == Some("text")
79}
80
81#[inline]
82fn is_empty_text(n: &Node) -> bool {
83    is_text(n) && n.text.as_deref().unwrap_or("").is_empty()
84}
85
86/// True if two text nodes differ only in their `text` payload.
87#[inline]
88fn mergeable(a: &Node, b: &Node) -> bool {
89    is_text(a) && is_text(b) && a.marks == b.marks && a.attrs == b.attrs && a.extra == b.extra
90}
91
92/// Canonicalize one parent's child list in place. Shared building block reused
93/// by inline range editing (merge-after-edit).
94pub(crate) fn normalize_children(children: &mut Vec<Node>, opts: &NormalizeOptions) {
95    if opts.remove_empty_text {
96        children.retain(|c| !is_empty_text(c));
97    }
98    if opts.merge_adjacent_text && !children.is_empty() {
99        // Single in-place pass: keep a write cursor `w` at the last retained
100        // node; fold each mergeable successor's text into it. O(n) (vs repeated
101        // Vec::remove), so merging a long run of text nodes stays linear.
102        let mut w = 0;
103        for r in 1..children.len() {
104            if mergeable(&children[w], &children[r]) {
105                if let Some(t) = children[r].text.take() {
106                    children[w]
107                        .text
108                        .get_or_insert_with(String::new)
109                        .push_str(&t);
110                }
111            } else {
112                w += 1;
113                if w != r {
114                    children.swap(w, r);
115                }
116            }
117        }
118        children.truncate(w + 1);
119    }
120    if opts.remove_empty_nodes {
121        children.retain(|c| is_text(c) || !c.content.as_ref().is_some_and(Vec::is_empty));
122    }
123}