Skip to main content

tiptap_rusty_parser/
html.rs

1//! Render [`Node`] trees to an HTML string (`to_html`).
2//!
3//! Schema-agnostic with Tiptap-sensible defaults: `paragraph`→`<p>`,
4//! `heading`→`<h1>`..`<h6>`, marks like `bold`→`<strong>`, etc. Output is
5//! compact; **text content and attribute values are HTML-escaped** (the element
6//! tags themselves are emitted as markup). Behavior is tuned with [`HtmlOptions`]
7//! — a plain data struct (no closures), so the same surface works over WASM/FFI.
8//!
9//! Escaping is not sanitization: URLs (e.g. a `link` `href`) are emitted as-is,
10//! so a `javascript:` href survives. Sanitize output from untrusted documents.
11//!
12//! ```
13//! use tiptap_rusty_parser::Document;
14//! let doc = Document::from_json_str(
15//!     r#"{"type":"doc","content":[{"type":"paragraph","content":[
16//!         {"type":"text","text":"hi","marks":[{"type":"bold"}]}]}]}"#,
17//! ).unwrap();
18//! assert_eq!(doc.to_html(), "<p><strong>hi</strong></p>");
19//! ```
20
21use crate::node::{Mark, Node};
22use serde::{Deserialize, Serialize};
23use serde_json::Value;
24use std::borrow::Cow;
25use std::collections::HashMap;
26
27/// How to render a node whose type has no built-in or configured mapping.
28#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
29#[serde(rename_all = "camelCase")]
30pub enum UnknownNodePolicy {
31    /// Render the children with no wrapping element (default).
32    #[default]
33    Transparent,
34    /// Wrap the children in `<div data-type="…">`.
35    DataTypeDiv,
36    /// Emit nothing (drop the subtree).
37    Skip,
38}
39
40/// How to render a mark whose type has no built-in or configured mapping.
41#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
42#[serde(rename_all = "camelCase")]
43pub enum UnknownMarkPolicy {
44    /// Render the text without the mark's element (default).
45    #[default]
46    Transparent,
47    /// Wrap the text in `<span data-mark="…">`.
48    DataMarkSpan,
49    /// Drop the marked text entirely.
50    Skip,
51}
52
53/// Whether void elements self-close (`<br/>`) or use the HTML5 form (`<br>`).
54#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
55#[serde(rename_all = "camelCase")]
56pub enum SelfClosingStyle {
57    /// `<br>`, `<hr>`, `<img …>` (default).
58    #[default]
59    Html5,
60    /// `<br/>`, `<hr/>`, `<img …/>`.
61    Xhtml,
62}
63
64/// Options controlling HTML rendering. [`Default`] matches Tiptap conventions.
65#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
66#[serde(rename_all = "camelCase", default)]
67pub struct HtmlOptions {
68    /// Override/extend the node→tag map with simple wrappers (e.g. `callout`→`aside`).
69    pub node_tags: HashMap<String, String>,
70    /// Override/extend the mark→tag map with simple wrappers (e.g. `highlight`→`mark`).
71    pub mark_tags: HashMap<String, String>,
72    /// How to render unknown node types.
73    pub unknown_node: UnknownNodePolicy,
74    /// How to render unknown mark types.
75    pub unknown_mark: UnknownMarkPolicy,
76    /// Void-element style.
77    pub self_closing: SelfClosingStyle,
78    /// Emit a node's remaining (non-structural) `attrs` as HTML attributes.
79    /// Off by default — arbitrary attribute names are an injection footgun.
80    pub spread_attrs: bool,
81    /// Emit `style="text-align:…"` for `paragraph`/`heading` `textAlign` attrs.
82    pub text_align: bool,
83}
84
85impl Default for HtmlOptions {
86    fn default() -> Self {
87        Self {
88            node_tags: HashMap::new(),
89            mark_tags: HashMap::new(),
90            unknown_node: UnknownNodePolicy::default(),
91            unknown_mark: UnknownMarkPolicy::default(),
92            self_closing: SelfClosingStyle::default(),
93            spread_attrs: false,
94            text_align: true,
95        }
96    }
97}
98
99impl Node {
100    /// Render this node (and its subtree) to an HTML string with default options.
101    pub fn to_html(&self) -> String {
102        self.to_html_with(&HtmlOptions::default())
103    }
104
105    /// Render to HTML with custom [`HtmlOptions`].
106    pub fn to_html_with(&self, opts: &HtmlOptions) -> String {
107        let mut out = String::with_capacity(256);
108        render_node(self, opts, &mut out);
109        out
110    }
111}
112
113/// Render a node to an HTML string. Free-function form of [`Node::to_html`].
114pub fn to_html(node: &Node) -> String {
115    node.to_html()
116}
117
118// ---- rendering ----------------------------------------------------------
119
120fn render_node(n: &Node, opts: &HtmlOptions, out: &mut String) {
121    let ty = match n.node_type.as_deref() {
122        None => return render_children(n, opts, out),
123        Some(t) => t,
124    };
125    match ty {
126        "text" => return render_text(n, opts, out),
127        "doc" => return render_children(n, opts, out),
128        _ => {}
129    }
130    // configured simple-wrapper override takes precedence over built-ins
131    if let Some(tag) = opts.node_tags.get(ty) {
132        out.push('<');
133        out.push_str(tag);
134        if opts.spread_attrs {
135            spread(n, &[], out);
136        }
137        out.push('>');
138        render_children(n, opts, out);
139        out.push_str("</");
140        out.push_str(tag);
141        out.push('>');
142        return;
143    }
144    match ty {
145        "paragraph" => wrap(n, opts, out, "p", true, &["textAlign"]),
146        "heading" => {
147            let level = n
148                .attr("level")
149                .and_then(Value::as_u64)
150                .unwrap_or(1)
151                .clamp(1, 6) as u8;
152            let digit = (b'0' + level) as char;
153            out.push_str("<h");
154            out.push(digit);
155            write_text_align(n, opts, out);
156            if opts.spread_attrs {
157                spread(n, &["level", "textAlign"], out);
158            }
159            out.push('>');
160            render_children(n, opts, out);
161            out.push_str("</h");
162            out.push(digit);
163            out.push('>');
164        }
165        "blockquote" => wrap(n, opts, out, "blockquote", false, &[]),
166        "bulletList" => wrap(n, opts, out, "ul", false, &[]),
167        "listItem" => wrap(n, opts, out, "li", false, &[]),
168        "orderedList" => {
169            out.push_str("<ol");
170            if let Some(start) = n.attr("start").and_then(Value::as_u64) {
171                if start != 1 {
172                    out.push_str(" start=\"");
173                    out.push_str(&start.to_string());
174                    out.push('"');
175                }
176            }
177            if opts.spread_attrs {
178                spread(n, &["start"], out);
179            }
180            out.push('>');
181            render_children(n, opts, out);
182            out.push_str("</ol>");
183        }
184        "codeBlock" => {
185            out.push_str("<pre><code");
186            if let Some(Value::String(lang)) = n.attr("language") {
187                if !lang.is_empty() {
188                    out.push_str(" class=\"language-");
189                    escape_attr(lang, out);
190                    out.push('"');
191                }
192            }
193            if opts.spread_attrs {
194                spread(n, &["language"], out);
195            }
196            out.push('>');
197            render_code_text(n, out); // raw text, marks ignored
198            out.push_str("</code></pre>");
199        }
200        "hardBreak" => void(out, "br", opts),
201        "horizontalRule" => void(out, "hr", opts),
202        "image" => {
203            out.push_str("<img");
204            for key in ["src", "alt", "title"] {
205                if let Some(v) = n.attr(key).and_then(attr_string) {
206                    write_attr(key, &v, out);
207                }
208            }
209            if opts.spread_attrs {
210                spread(n, &["src", "alt", "title"], out);
211            }
212            void_close(out, opts);
213        }
214        other => render_unknown(n, opts, out, other),
215    }
216}
217
218fn render_children(n: &Node, opts: &HtmlOptions, out: &mut String) {
219    if let Some(children) = &n.content {
220        for c in children {
221            render_node(c, opts, out);
222        }
223    }
224}
225
226fn wrap(n: &Node, opts: &HtmlOptions, out: &mut String, tag: &str, align: bool, consumed: &[&str]) {
227    out.push('<');
228    out.push_str(tag);
229    if align {
230        write_text_align(n, opts, out);
231    }
232    if opts.spread_attrs {
233        spread(n, consumed, out);
234    }
235    out.push('>');
236    render_children(n, opts, out);
237    out.push_str("</");
238    out.push_str(tag);
239    out.push('>');
240}
241
242fn render_unknown(n: &Node, opts: &HtmlOptions, out: &mut String, ty: &str) {
243    match opts.unknown_node {
244        UnknownNodePolicy::Transparent => render_children(n, opts, out),
245        UnknownNodePolicy::Skip => {}
246        UnknownNodePolicy::DataTypeDiv => {
247            out.push_str("<div data-type=\"");
248            escape_attr(ty, out);
249            out.push('"');
250            if opts.spread_attrs {
251                spread(n, &[], out);
252            }
253            out.push('>');
254            render_children(n, opts, out);
255            out.push_str("</div>");
256        }
257    }
258}
259
260fn render_text(n: &Node, opts: &HtmlOptions, out: &mut String) {
261    let text = n.text.as_deref().unwrap_or("");
262    let marks = n.marks.as_deref().unwrap_or(&[]);
263    // `Skip` policy drops text carrying an unknown mark entirely.
264    if opts.unknown_mark == UnknownMarkPolicy::Skip
265        && marks.iter().any(|m| is_unknown_mark(m, opts))
266    {
267        return;
268    }
269    for m in marks {
270        open_mark(m, opts, out);
271    }
272    escape_text(text, out);
273    for m in marks.iter().rev() {
274        close_mark(m, opts, out);
275    }
276}
277
278/// Concatenate descendant text (escaped), ignoring marks — for `codeBlock`.
279fn render_code_text(n: &Node, out: &mut String) {
280    if let Some(t) = &n.text {
281        escape_text(t, out);
282    }
283    if let Some(children) = &n.content {
284        for c in children {
285            render_code_text(c, out);
286        }
287    }
288}
289
290fn builtin_mark_tag(mark_type: &str) -> Option<&'static str> {
291    Some(match mark_type {
292        "bold" => "strong",
293        "italic" => "em",
294        "strike" => "s",
295        "code" => "code",
296        "underline" => "u",
297        "subscript" => "sub",
298        "superscript" => "sup",
299        "link" => "a",
300        _ => return None,
301    })
302}
303
304fn is_unknown_mark(m: &Mark, opts: &HtmlOptions) -> bool {
305    !opts.mark_tags.contains_key(&m.mark_type) && builtin_mark_tag(&m.mark_type).is_none()
306}
307
308fn open_mark(m: &Mark, opts: &HtmlOptions, out: &mut String) {
309    if let Some(tag) = opts.mark_tags.get(&m.mark_type) {
310        out.push('<');
311        out.push_str(tag);
312        out.push('>');
313        return;
314    }
315    match m.mark_type.as_str() {
316        "link" => {
317            out.push_str("<a");
318            for key in ["href", "target", "rel"] {
319                if let Some(v) = m
320                    .attrs
321                    .as_ref()
322                    .and_then(|a| a.get(key))
323                    .and_then(attr_string)
324                {
325                    write_attr(key, &v, out);
326                }
327            }
328            out.push('>');
329        }
330        other => match builtin_mark_tag(other) {
331            Some(tag) => {
332                out.push('<');
333                out.push_str(tag);
334                out.push('>');
335            }
336            None => {
337                if opts.unknown_mark == UnknownMarkPolicy::DataMarkSpan {
338                    out.push_str("<span data-mark=\"");
339                    escape_attr(other, out);
340                    out.push_str("\">");
341                }
342            }
343        },
344    }
345}
346
347fn close_mark(m: &Mark, opts: &HtmlOptions, out: &mut String) {
348    if let Some(tag) = opts.mark_tags.get(&m.mark_type) {
349        out.push_str("</");
350        out.push_str(tag);
351        out.push('>');
352        return;
353    }
354    let tag = match builtin_mark_tag(&m.mark_type) {
355        Some(tag) => tag,
356        None => match opts.unknown_mark {
357            UnknownMarkPolicy::DataMarkSpan => "span",
358            _ => return,
359        },
360    };
361    out.push_str("</");
362    out.push_str(tag);
363    out.push('>');
364}
365
366// ---- attribute / escape helpers -----------------------------------------
367
368fn write_text_align(n: &Node, opts: &HtmlOptions, out: &mut String) {
369    if !opts.text_align {
370        return;
371    }
372    if let Some(Value::String(a)) = n.attr("textAlign") {
373        // Whitelist known keywords: emitting an arbitrary value into a `style`
374        // attribute would allow CSS injection (`;color:red`) despite escaping.
375        if matches!(
376            a.as_str(),
377            "left" | "right" | "center" | "justify" | "start" | "end"
378        ) {
379            out.push_str(" style=\"text-align:");
380            out.push_str(a);
381            out.push('"');
382        }
383    }
384}
385
386fn void(out: &mut String, tag: &str, opts: &HtmlOptions) {
387    out.push('<');
388    out.push_str(tag);
389    void_close(out, opts);
390}
391
392fn void_close(out: &mut String, opts: &HtmlOptions) {
393    match opts.self_closing {
394        SelfClosingStyle::Html5 => out.push('>'),
395        SelfClosingStyle::Xhtml => out.push_str("/>"),
396    }
397}
398
399/// Render a JSON scalar as an attribute value; `None` for null/array/object.
400fn attr_string(v: &Value) -> Option<Cow<'_, str>> {
401    match v {
402        Value::String(s) => Some(Cow::Borrowed(s)),
403        Value::Bool(b) => Some(Cow::Owned(b.to_string())),
404        Value::Number(n) => Some(Cow::Owned(n.to_string())),
405        _ => None,
406    }
407}
408
409fn valid_attr_name(name: &str) -> bool {
410    !name.is_empty()
411        && name
412            .bytes()
413            .all(|b| b.is_ascii_alphanumeric() || matches!(b, b'-' | b'_' | b':'))
414}
415
416/// Emit a node's remaining attrs (skipping `consumed` keys and invalid names).
417fn spread(n: &Node, consumed: &[&str], out: &mut String) {
418    if let Some(attrs) = &n.attrs {
419        for (k, v) in attrs {
420            if consumed.contains(&k.as_str()) || !valid_attr_name(k) {
421                continue;
422            }
423            if let Some(s) = attr_string(v) {
424                write_attr(k, &s, out);
425            }
426        }
427    }
428}
429
430fn write_attr(name: &str, value: &str, out: &mut String) {
431    out.push(' ');
432    out.push_str(name);
433    out.push_str("=\"");
434    escape_attr(value, out);
435    out.push('"');
436}
437
438fn escape_text(s: &str, out: &mut String) {
439    escape_into(s, out, false);
440}
441
442fn escape_attr(s: &str, out: &mut String) {
443    escape_into(s, out, true);
444}
445
446/// Escape `& < >` (and `"` when `quote`), pushing clean runs in bulk.
447fn escape_into(s: &str, out: &mut String, quote: bool) {
448    let mut last = 0;
449    for (i, b) in s.bytes().enumerate() {
450        let rep = match b {
451            b'&' => "&amp;",
452            b'<' => "&lt;",
453            b'>' => "&gt;",
454            b'"' if quote => "&quot;",
455            _ => continue,
456        };
457        out.push_str(&s[last..i]);
458        out.push_str(rep);
459        last = i + 1;
460    }
461    out.push_str(&s[last..]);
462}