Skip to main content

devup_editor_html/
export.rs

1//! `Document` / `CopiedBlocks` → HTML serialization.
2//!
3//! Two entry points:
4//! - [`Html::export`] — `DocumentExport` trait impl. Serialises a
5//!   [`Document`] using its flat `root_block_ids()` iteration order.
6//! - [`blocks_to_html`] — clipboard-oriented serialiser that accepts
7//!   the `CopiedBlocks` subtree shape (with explicit `children` IDs
8//!   for tables / toggle descendants / etc.). This is the one the
9//!   React clipboard path calls through WASM.
10//!
11//! Both produce identical output for simple blocks; only clipboard
12//! mode emits the `data-devup-props` marker (lossless devup→devup
13//! table round-trip) and Notion-compatible toggle nesting.
14
15use std::collections::HashMap;
16
17use devup_editor_core::{
18    Block, BlockId, Document, DocumentExport, DocumentImport, IdGenerator, Mark, TextSpan,
19};
20use serde_json::Value;
21
22use crate::HtmlError;
23use crate::clipboard::{CopiedBlocks, DEVUP_PROPS_ATTR, encode_props};
24use crate::import::parse_html;
25
26/// Marker type carrying the [`DocumentExport`] / [`DocumentImport`]
27/// impls for HTML.
28pub struct Html;
29
30impl DocumentExport for Html {
31    type Output = String;
32    type Error = HtmlError;
33
34    fn export(doc: &Document) -> Result<String, HtmlError> {
35        // Promote the flat Document into a children-keyed map so the
36        // clipboard-aware walker can use it uniformly.
37        let copied = document_to_copied_blocks(doc);
38        Ok(serialize_roots(&copied.roots, &copied.by_id))
39    }
40}
41
42impl DocumentImport for Html {
43    type Input = String;
44    type Error = HtmlError;
45
46    fn import(input: String, id_gen: &mut dyn IdGenerator) -> Result<Document, HtmlError> {
47        let copied = parse_html(&input, id_gen);
48        // Promote the clipboard shape into a Document. We only keep
49        // roots at Document level; call sites that need the full tree
50        // (tables, toggle children) should use `html_to_copied_blocks`.
51        let mut doc = Document::new();
52        for root in copied.roots {
53            doc.push_root_block(root);
54        }
55        Ok(doc)
56    }
57}
58
59/// Recursively copy a block and every descendant reachable via
60/// `children` into `by_id`. Used when promoting a `Document` into the
61/// clipboard shape for serialisation.
62///
63/// Guards against cycles (a block listing itself / an ancestor as a
64/// child) by short-circuiting when the id is already in `by_id` —
65/// the recursion has already covered that subtree.
66fn populate_map(doc: &Document, block: &Block, by_id: &mut HashMap<BlockId, Block>) {
67    if by_id.contains_key(&block.id) {
68        return;
69    }
70    by_id.insert(block.id.clone(), block.clone());
71    for child_id in &block.children {
72        if let Some(child) = doc.get_block(child_id) {
73            populate_map(doc, child, by_id);
74        }
75    }
76}
77
78/// Promote a flat [`Document`] into the clipboard `{roots, byId}`
79/// shape. Descends `block.children` so tables, toggle descendants, and
80/// any future parent→child references survive the conversion.
81///
82/// Single source of truth for the "Document → `CopiedBlocks`" shape
83/// transform — the WASM layer re-exports it verbatim instead of
84/// maintaining a near-identical copy.
85#[must_use]
86#[allow(clippy::implicit_hasher)]
87pub fn document_to_copied_blocks(doc: &Document) -> CopiedBlocks {
88    let mut by_id: HashMap<BlockId, Block> = HashMap::new();
89    let roots: Vec<Block> = doc
90        .root_block_ids()
91        .iter()
92        .filter_map(|id| doc.get_block(id).cloned())
93        .collect();
94    for block in &roots {
95        populate_map(doc, block, &mut by_id);
96    }
97    CopiedBlocks { roots, by_id }
98}
99
100/// Clipboard-oriented serialiser. Mirrors the React `blocksToHtml`:
101/// emits toggle blocks in Notion's canonical nested format and consumes
102/// indent-based siblings as toggle children.
103///
104/// The `by_id` map uses the default hasher — this is deliberate. We
105/// don't generalize over `BuildHasher` because callers always build
106/// this map themselves from editor state (never from a custom-hasher
107/// container) and the generic bound would only add noise to the
108/// public API.
109#[must_use]
110#[allow(clippy::implicit_hasher)]
111pub fn blocks_to_html(roots: &[Block], by_id: &HashMap<BlockId, Block>) -> String {
112    serialize_roots(roots, by_id)
113}
114
115/// Convenience for the WASM boundary: accepts [`CopiedBlocks`] directly.
116#[must_use]
117pub fn copied_blocks_to_html(copied: &CopiedBlocks) -> String {
118    blocks_to_html(&copied.roots, &copied.by_id)
119}
120
121fn serialize_roots(roots: &[Block], by_id: &HashMap<BlockId, Block>) -> String {
122    let mut cursor = 0usize;
123    let mut out = String::new();
124    emit_siblings(roots, by_id, &mut cursor, 0, &mut out);
125    out
126}
127
128/// Emit roots from `cursor` forward while their indent is ≥
129/// `stop_indent`. On encountering a toggle block, recursively consume
130/// subsequent higher-indent blocks as its children (Notion clipboard
131/// format).
132fn emit_siblings(
133    roots: &[Block],
134    by_id: &HashMap<BlockId, Block>,
135    cursor: &mut usize,
136    stop_indent: i64,
137    out: &mut String,
138) {
139    while *cursor < roots.len() {
140        let block = &roots[*cursor];
141        let indent = block.indent_level().max(0);
142        if indent < stop_indent {
143            break;
144        }
145        *cursor += 1;
146
147        if block.ty == "toggle" {
148            let title = render_inline(&block.content);
149            out.push_str(r#"<ul class="toggle"><li><details open=""><summary>"#);
150            out.push_str(&title);
151            out.push_str("</summary>");
152            emit_siblings(roots, by_id, cursor, indent + 1, out);
153            out.push_str("</details></li></ul>");
154        } else {
155            write_block_html(block, by_id, out);
156        }
157    }
158}
159
160// ── Single-block HTML emission ────────────────────────────────────
161
162fn write_block_html(block: &Block, by_id: &HashMap<BlockId, Block>, out: &mut String) {
163    match block.ty.as_str() {
164        "heading" => {
165            let level = block
166                .props
167                .get("level")
168                .and_then(Value::as_u64)
169                .unwrap_or(1)
170                .clamp(1, 6);
171            out.push('<');
172            out.push('h');
173            out.push(digit(level));
174            out.push('>');
175            out.push_str(&render_inline(&block.content));
176            out.push_str("</h");
177            out.push(digit(level));
178            out.push('>');
179        }
180        "quote" => {
181            out.push_str("<blockquote>");
182            out.push_str(&render_inline(&block.content));
183            out.push_str("</blockquote>");
184        }
185        "todo" => {
186            let checked = block
187                .props
188                .get("checked")
189                .and_then(Value::as_bool)
190                .unwrap_or(false);
191            out.push_str("<p data-type=\"todo\" data-checked=\"");
192            out.push_str(if checked { "true" } else { "false" });
193            out.push_str("\">");
194            out.push_str(&render_inline(&block.content));
195            out.push_str("</p>");
196        }
197        "list" => {
198            let style = block
199                .props
200                .get("style")
201                .and_then(Value::as_str)
202                .unwrap_or("unordered");
203            let tag = if style.starts_with("ordered") {
204                "ol"
205            } else {
206                "ul"
207            };
208            out.push('<');
209            out.push_str(tag);
210            out.push_str("><li>");
211            out.push_str(&render_inline(&block.content));
212            out.push_str("</li></");
213            out.push_str(tag);
214            out.push('>');
215        }
216        "code" => {
217            let lang = block
218                .props
219                .get("language")
220                .and_then(Value::as_str)
221                .unwrap_or("");
222            let plain = block.plain_text();
223            out.push_str("<pre><code");
224            if !lang.is_empty() {
225                out.push_str(" class=\"language-");
226                out.push_str(&escape_attr(lang));
227                out.push('"');
228            }
229            out.push('>');
230            out.push_str(&escape_text(&plain));
231            out.push_str("</code></pre>");
232        }
233        "divider" => {
234            out.push_str("<hr>");
235        }
236        "table" => {
237            write_table(block, by_id, out);
238        }
239        _ => {
240            out.push_str("<p>");
241            out.push_str(&render_inline(&block.content));
242            out.push_str("</p>");
243        }
244    }
245}
246
247fn digit(n: u64) -> char {
248    match n {
249        1 => '1',
250        2 => '2',
251        3 => '3',
252        4 => '4',
253        5 => '5',
254        _ => '6',
255    }
256}
257
258// ── Table emission ────────────────────────────────────────────────
259
260fn write_table(table: &Block, by_id: &HashMap<BlockId, Block>, out: &mut String) {
261    let mut rows_html = String::new();
262    for row_id in &table.children {
263        let Some(row) = by_id.get(row_id) else {
264            continue;
265        };
266        let mut cells_html = String::new();
267        for cell_id in &row.children {
268            let Some(cell) = by_id.get(cell_id) else {
269                continue;
270            };
271            write_cell(cell, &mut cells_html);
272        }
273        write_row(row, &cells_html, &mut rows_html);
274    }
275    let colgroup = serialize_colgroup(table);
276
277    let mut attrs = TableAttrs::new();
278    attrs.push_style(&inline_cell_style(&table.props));
279    attrs.push_marker(&encode_props(Some(&table.props)));
280    out.push_str("<table");
281    attrs.write_into(out);
282    out.push('>');
283    out.push_str(&colgroup);
284    out.push_str("<tbody>");
285    out.push_str(&rows_html);
286    out.push_str("</tbody></table>");
287}
288
289fn write_row(row: &Block, inner_cells: &str, out: &mut String) {
290    let mut attrs = TableAttrs::new();
291    let height = match row.props.get("height") {
292        Some(Value::Number(n)) => n.as_f64().map(format_px),
293        Some(Value::String(s)) => Some(s.clone()),
294        _ => None,
295    };
296    if let Some(h) = height {
297        attrs.push_style(&format!("height:{h}"));
298    }
299    attrs.push_marker(&encode_props(Some(&row.props)));
300
301    out.push_str("<tr");
302    attrs.write_into(out);
303    out.push('>');
304    out.push_str(inner_cells);
305    out.push_str("</tr>");
306}
307
308fn write_cell(cell: &Block, out: &mut String) {
309    let mut attrs = TableAttrs::new();
310    if let Some(n) = cell.props.get("colspan").and_then(Value::as_u64)
311        && n > 1
312    {
313        attrs.push_raw(&format!("colspan=\"{n}\""));
314    }
315    if let Some(n) = cell.props.get("rowspan").and_then(Value::as_u64)
316        && n > 1
317    {
318        attrs.push_raw(&format!("rowspan=\"{n}\""));
319    }
320    attrs.push_style(&inline_cell_style(&cell.props));
321    attrs.push_marker(&encode_props(Some(&cell.props)));
322
323    out.push_str("<td");
324    attrs.write_into(out);
325    out.push('>');
326    out.push_str(&render_inline(&cell.content));
327    out.push_str("</td>");
328}
329
330fn serialize_colgroup(table: &Block) -> String {
331    let Some(Value::Array(cols)) = table.props.get("columns") else {
332        return String::new();
333    };
334    if cols.is_empty() {
335        return String::new();
336    }
337    let mut s = String::from("<colgroup>");
338    for col in cols {
339        let width = col.get("width").and_then(|v| match v {
340            Value::Number(n) => n.as_f64().map(format_px),
341            Value::String(raw) => Some(raw.clone()),
342            _ => None,
343        });
344        match width {
345            Some(w) => {
346                s.push_str("<col style=\"width:");
347                s.push_str(&escape_attr(&w));
348                s.push_str("\">");
349            }
350            None => s.push_str("<col>"),
351        }
352    }
353    s.push_str("</colgroup>");
354    s
355}
356
357/// Helper that accumulates `style="…"` declarations AND the
358/// `data-devup-props` marker AND any raw `colspan`/`rowspan` attributes,
359/// then emits a single leading-space attribute string.
360struct TableAttrs {
361    parts: Vec<String>,
362    styles: Vec<String>,
363    marker: String,
364}
365
366impl TableAttrs {
367    fn new() -> Self {
368        Self {
369            parts: Vec::new(),
370            styles: Vec::new(),
371            marker: String::new(),
372        }
373    }
374
375    fn push_style(&mut self, s: &str) {
376        if !s.is_empty() {
377            self.styles.push(s.to_string());
378        }
379    }
380
381    fn push_marker(&mut self, marker: &str) {
382        if !marker.is_empty() {
383            self.marker = marker.to_string();
384        }
385    }
386
387    fn push_raw(&mut self, raw: &str) {
388        self.parts.push(raw.to_string());
389    }
390
391    fn write_into(&self, out: &mut String) {
392        for p in &self.parts {
393            out.push(' ');
394            out.push_str(p);
395        }
396        if !self.styles.is_empty() {
397            out.push_str(" style=\"");
398            out.push_str(&escape_attr(&self.styles.join(";")));
399            out.push('"');
400        }
401        if !self.marker.is_empty() {
402            out.push(' ');
403            out.push_str(DEVUP_PROPS_ATTR);
404            out.push_str("=\"");
405            out.push_str(&escape_attr(&self.marker));
406            out.push('"');
407        }
408    }
409}
410
411fn inline_cell_style(props: &serde_json::Map<String, Value>) -> String {
412    let mut parts: Vec<String> = Vec::new();
413    if let Some(v) = props.get("backgroundColor").and_then(Value::as_str) {
414        parts.push(format!("background-color:{v}"));
415    }
416    if let Some(v) = props.get("borderColor").and_then(Value::as_str) {
417        parts.push(format!("border-color:{v}"));
418    }
419    if let Some(v) = props.get("borderWidth").and_then(Value::as_str) {
420        parts.push(format!("border-width:{v}"));
421    }
422    if let Some(v) = props.get("borderStyle").and_then(Value::as_str) {
423        parts.push(format!("border-style:{v}"));
424    }
425    if let Some(v) = props.get("verticalAlign").and_then(Value::as_str) {
426        parts.push(format!("vertical-align:{v}"));
427    }
428    if let Some(v) = props.get("padding") {
429        let as_str = match v {
430            Value::String(s) => Some(s.clone()),
431            Value::Number(n) => n.as_f64().map(format_px),
432            _ => None,
433        };
434        if let Some(s) = as_str {
435            parts.push(format!("padding:{s}"));
436        }
437    }
438    parts.join(";")
439}
440
441/// Format a pixel dimension without trailing `.0` for whole values so
442/// output matches the TS ``${n}px`` template exactly on integer
443/// inputs. See the matching `extract_row_props` helper in `import.rs`
444/// for the justification of the two clippy opt-outs: `float_cmp`
445/// because we want an exact integral check (not an epsilon window),
446/// and `cast_possible_truncation` because `as i64` saturates on
447/// values exceeding `i64::MAX` rather than panicking — harmless here
448/// since callers only ever pass finite positive heights produced by
449/// the same `import.rs` normalization path.
450///
451/// `#[allow]` is scoped to the two specific expressions that need it
452/// rather than the whole function — future code added here is
453/// unaffected and would still be flagged by clippy.
454fn format_px(v: f64) -> String {
455    #[allow(clippy::float_cmp)]
456    let is_integral = v == v.trunc();
457    if is_integral {
458        #[allow(clippy::cast_possible_truncation)]
459        let as_int = v as i64;
460        format!("{as_int}px")
461    } else {
462        format!("{v}px")
463    }
464}
465
466// ── Inline (spans + marks) serialisation ─────────────────────────
467
468fn render_inline(spans: &[TextSpan]) -> String {
469    let mut out = String::new();
470    for span in spans {
471        out.push_str(&apply_marks(&span.text, &span.marks));
472    }
473    out
474}
475
476fn apply_marks(text: &str, marks: &[Mark]) -> String {
477    // Text is escaped first, then `\n` becomes `<br>` — mirrors
478    // `spanHtml()` ordering. Escaping before br-replace ensures any
479    // `<` in the text doesn't collide with the inserted tag.
480    let escaped = escape_text(text).replace('\n', "<br>");
481    let mut out = escaped;
482
483    let has = |t: &str| marks.iter().any(|m| m.ty == t);
484
485    if has("code") {
486        out = format!("<code>{out}</code>");
487    }
488    if has("strike") {
489        out = format!("<s>{out}</s>");
490    }
491    if has("underline") {
492        out = format!("<u>{out}</u>");
493    }
494    if has("italic") {
495        out = format!("<em>{out}</em>");
496    }
497    if has("bold") {
498        out = format!("<strong>{out}</strong>");
499    }
500
501    // Color / highlight → single span wrapper.
502    let mut style_parts: Vec<String> = Vec::new();
503    if let Some(c) = style_value(marks, "color", "color") {
504        style_parts.push(format!("color:{}", sanitize_css(c)));
505    }
506    if let Some(bg) = style_value(marks, "highlight", "backgroundColor") {
507        style_parts.push(format!("background-color:{}", sanitize_css(bg)));
508    }
509    if !style_parts.is_empty() {
510        out = format!(
511            "<span style=\"{}\">{out}</span>",
512            escape_attr(&style_parts.join(";"))
513        );
514    }
515
516    if let Some(href) = link_href(marks) {
517        out = format!(
518            "<a href=\"{}\" rel=\"noopener noreferrer\">{out}</a>",
519            escape_attr(href)
520        );
521    }
522
523    // Unknown marks — surface them via `<span data-mark="type">` so
524    // text is never silently dropped on copy.
525    for mark in marks {
526        if !is_known_mark(&mark.ty) {
527            out = format!("<span data-mark=\"{}\">{out}</span>", escape_attr(&mark.ty));
528        }
529    }
530
531    out
532}
533
534fn is_known_mark(ty: &str) -> bool {
535    matches!(
536        ty,
537        "bold" | "italic" | "underline" | "strike" | "code" | "link" | "color" | "highlight"
538    )
539}
540
541fn style_value<'a>(marks: &'a [Mark], mark_type: &str, key: &str) -> Option<&'a str> {
542    marks.iter().find(|m| m.ty == mark_type).and_then(|mark| {
543        mark.style()
544            .and_then(|style| style.get(key))
545            .and_then(Value::as_str)
546    })
547}
548
549fn link_href(marks: &[Mark]) -> Option<&str> {
550    marks.iter().find(|m| m.ty == "link").and_then(|mark| {
551        mark.attrs
552            .get("href")
553            .and_then(Value::as_str)
554            .filter(|href| is_safe_href(href))
555    })
556}
557
558/// Reject hrefs that could trigger script execution when opened
559/// directly (`javascript:`, `vbscript:`), or would leak local-file or
560/// arbitrary-content URIs (`file:`, most `data:`). `data:image/*` is
561/// permitted because inline-image previews are legitimate.
562///
563/// **Must stay in sync with `isSafeLinkHref` in
564/// `packages/react/src/utils/spansToHtml.ts`** — any policy change
565/// here must be mirrored there (and vice versa) so the React render
566/// path and the clipboard export path reject identical URLs. A parity
567/// test in `tests/href_parity.rs` enforces the common cases.
568pub(crate) fn is_safe_href(href: &str) -> bool {
569    let trimmed = href.trim().to_ascii_lowercase();
570    if trimmed.is_empty() {
571        return false;
572    }
573    if trimmed.starts_with("javascript:")
574        || trimmed.starts_with("vbscript:")
575        || trimmed.starts_with("file:")
576    {
577        return false;
578    }
579    // Only `data:image/*` is permitted; other data URIs can smuggle
580    // HTML / scripts into third-party viewers.
581    if trimmed.starts_with("data:") && !trimmed.starts_with("data:image/") {
582        return false;
583    }
584    true
585}
586
587fn sanitize_css(s: &str) -> String {
588    s.chars()
589        .filter(|c| *c != '"' && *c != '\\' && *c != '\n' && *c != '\r')
590        .collect()
591}
592
593fn escape_text(s: &str) -> String {
594    let mut out = String::with_capacity(s.len());
595    for c in s.chars() {
596        match c {
597            '&' => out.push_str("&amp;"),
598            '<' => out.push_str("&lt;"),
599            '>' => out.push_str("&gt;"),
600            _ => out.push(c),
601        }
602    }
603    out
604}
605
606fn escape_attr(s: &str) -> String {
607    let mut out = String::with_capacity(s.len());
608    for c in s.chars() {
609        match c {
610            '&' => out.push_str("&amp;"),
611            '<' => out.push_str("&lt;"),
612            '>' => out.push_str("&gt;"),
613            '"' => out.push_str("&quot;"),
614            _ => out.push(c),
615        }
616    }
617    out
618}
619
620#[cfg(test)]
621mod tests {
622    use super::*;
623    use devup_editor_core::{TextSpan, model::block::Block};
624
625    /// The flat-document path: `root_block_ids` becomes `roots` and
626    /// `by_id` contains every root. No children traversal needed.
627    #[test]
628    fn document_to_copied_blocks_flat_document() {
629        let mut doc = Document::new();
630        let mut p1 = Block::new_paragraph(BlockId::new("p1"));
631        p1.content = vec![TextSpan::plain("first")];
632        let mut p2 = Block::new_paragraph(BlockId::new("p2"));
633        p2.content = vec![TextSpan::plain("second")];
634        doc.push_root_block(p1);
635        doc.push_root_block(p2);
636
637        let copied = document_to_copied_blocks(&doc);
638        assert_eq!(copied.roots.len(), 2);
639        assert_eq!(copied.by_id.len(), 2);
640        assert!(copied.by_id.contains_key(&BlockId::new("p1")));
641        assert!(copied.by_id.contains_key(&BlockId::new("p2")));
642    }
643
644    /// The table-subtree path: rows and cells aren't in `root_block_ids`
645    /// but are reachable via `block.children`. They MUST show up in
646    /// `by_id` or the clipboard loses the table structure.
647    #[test]
648    fn document_to_copied_blocks_preserves_table_children() {
649        let mut doc = Document::new();
650        let cell_id = BlockId::new("c1");
651        let row_id = BlockId::new("r1");
652        let table_id = BlockId::new("t1");
653
654        let mut cell = Block::new(cell_id.clone(), "table_cell");
655        cell.content = vec![TextSpan::plain("hi")];
656        cell.parent = Some(row_id.clone());
657
658        let mut row = Block::new(row_id.clone(), "table_row");
659        row.children = vec![cell_id.clone()];
660        row.parent = Some(table_id.clone());
661
662        let mut table = Block::new(table_id.clone(), "table");
663        table.children = vec![row_id.clone()];
664
665        // Push all blocks as roots (Document doesn't model children
666        // hierarchy separately — the parent/children fields on Block
667        // are the tree representation). For this test the important
668        // thing is that `document_to_copied_blocks` follows
669        // `block.children` and NOT the root list.
670        doc.push_root_block(table);
671        doc.push_root_block(row);
672        doc.push_root_block(cell);
673
674        let copied = document_to_copied_blocks(&doc);
675        // Every descendant reachable via children is in `by_id`.
676        assert!(copied.by_id.contains_key(&table_id));
677        assert!(copied.by_id.contains_key(&row_id));
678        assert!(copied.by_id.contains_key(&cell_id));
679    }
680
681    /// An empty document produces an empty `CopiedBlocks`.
682    #[test]
683    fn document_to_copied_blocks_empty() {
684        let doc = Document::new();
685        let copied = document_to_copied_blocks(&doc);
686        assert!(copied.roots.is_empty());
687        assert!(copied.by_id.is_empty());
688    }
689
690    /// Self-reference / cycle guard: a block listing itself as a
691    /// child must not cause infinite recursion. `populate_map` uses
692    /// `entry().or_insert_with()` which is a strong enough guard.
693    #[test]
694    fn document_to_copied_blocks_cycle_safe() {
695        let mut doc = Document::new();
696        let mut b = Block::new(BlockId::new("x"), "paragraph");
697        b.children = vec![BlockId::new("x")];
698        doc.push_root_block(b);
699        let copied = document_to_copied_blocks(&doc);
700        assert_eq!(copied.by_id.len(), 1);
701    }
702}