Skip to main content

devup_editor_html/import/
mod.rs

1//! HTML → [`CopiedBlocks`] parsing.
2//!
3//! Mirrors the behaviour of the React `htmlToBlocks` implementation
4//! byte-for-byte on the inputs our editor and external apps (Word,
5//! Notion, Google Docs) produce. The parser is tolerant: malformed
6//! input is coerced through `html5ever` into a best-effort tree rather
7//! than rejected.
8
9use std::collections::HashMap;
10
11use devup_editor_core::{Block, BlockId, IdGenerator, TextSpan, normalize_spans};
12use html5ever::driver::{ParseOpts, parse_document};
13use html5ever::tendril::TendrilSink;
14use markup5ever_rcdom::{Handle, NodeData, RcDom};
15use serde_json::{Map, Value};
16
17use crate::clipboard::{CopiedBlocks, clean_html};
18
19mod dom;
20use dom::{
21    MarkSet, attr_value, attrs_contains, build_synthetic_parent, clone_node_without_checkboxes,
22    collect_inline_into, collect_raw_text, collect_table_rows, decode_props_from_element,
23    detect_any_checkbox, detect_direct_checkbox, direct_children_of_tag_any, element_attrs,
24    element_tag, extract_cell_props, extract_colgroup_widths, extract_row_props, extract_spans,
25    extract_spans_from_li, find_body, find_descendant_with_any_class, has_class,
26    has_descendant_with_class, is_all_whitespace, is_notion_v3_toggle, parse_inline_style,
27    strip_nested_blocks,
28};
29
30// ── Public entry points ──────────────────────────────────────────
31
32/// Parse an arbitrary HTML string (typically from a clipboard paste or
33/// external rich-text export) into a flat [`Document`]. Prefer
34/// [`html_to_copied_blocks`] for clipboard flows — it preserves the
35/// table / toggle child structure that a Document's root-only view
36/// drops.
37pub(crate) fn parse_html(input: &str, id_gen: &mut dyn IdGenerator) -> CopiedBlocks {
38    html_to_copied_blocks(input, id_gen)
39}
40
41/// Parse HTML into the clipboard-shaped [`CopiedBlocks`] subtree used
42/// by the React paste flow. Block IDs come from `id_gen` so the caller
43/// controls determinism.
44pub fn html_to_copied_blocks(input: &str, id_gen: &mut dyn IdGenerator) -> CopiedBlocks {
45    let cleaned = clean_html(input.trim());
46    if cleaned.is_empty() {
47        return CopiedBlocks {
48            roots: Vec::new(),
49            by_id: HashMap::new(),
50        };
51    }
52
53    let dom = parse_document(RcDom::default(), ParseOpts::default()).one(cleaned);
54    let body = find_body(&dom.document).unwrap_or_else(|| dom.document.clone());
55
56    let mut ctx = Context::new(id_gen);
57    ctx.process_children_with_indent(&body, 0);
58    let roots = ctx.finalize_roots();
59    CopiedBlocks {
60        roots,
61        by_id: ctx.by_id,
62    }
63}
64
65// ── Parse context ────────────────────────────────────────────────
66
67/// Mutable state shared by the recursive parser. Collects root blocks
68/// (in document order) and every descendant block, keyed by id, so the
69/// caller can rebuild the tree on paste without additional lookups.
70struct Context<'a> {
71    id_gen: &'a mut dyn IdGenerator,
72    roots_order: Vec<BlockId>,
73    by_id: HashMap<BlockId, Block>,
74}
75
76impl<'a> Context<'a> {
77    fn new(id_gen: &'a mut dyn IdGenerator) -> Self {
78        Self {
79            id_gen,
80            roots_order: Vec::new(),
81            by_id: HashMap::new(),
82        }
83    }
84
85    fn next_id(&mut self) -> BlockId {
86        self.id_gen.next_id()
87    }
88
89    /// Insert a block into `by_id` and mark it as a root in document
90    /// order.
91    fn push_root(&mut self, block: Block) {
92        self.roots_order.push(block.id.clone());
93        self.by_id.insert(block.id.clone(), block);
94    }
95
96    /// Insert a descendant block (cell, row, toggle child…). Returns
97    /// the id so the caller can reference it from its parent's
98    /// `children` field.
99    fn insert(&mut self, block: Block) -> BlockId {
100        let id = block.id.clone();
101        self.by_id.insert(id.clone(), block);
102        id
103    }
104
105    fn finalize_roots(&mut self) -> Vec<Block> {
106        let mut out = Vec::with_capacity(self.roots_order.len());
107        for id in &self.roots_order {
108            if let Some(b) = self.by_id.get(id) {
109                out.push(b.clone());
110            }
111        }
112        out
113    }
114
115    // ── Block-level recursion ────────────────────────────────────
116
117    /// Walk `node`'s children, emitting root-level blocks and carrying
118    /// `indent` through nested lists / toggles.
119    fn process_children_with_indent(&mut self, node: &Handle, indent: i64) {
120        // Buffer of inline DOM children that haven't yet hit a
121        // block-level boundary — flushed as a `<p>` when we do.
122        let mut inline_buf: Vec<Handle> = Vec::new();
123
124        for child in node.children.borrow().iter() {
125            match &child.data {
126                NodeData::Text { contents } => {
127                    if !contents.borrow().trim().is_empty() {
128                        inline_buf.push(child.clone());
129                    }
130                    continue;
131                }
132                NodeData::Comment { .. } | NodeData::Doctype { .. } => continue,
133                NodeData::Element { .. } => {}
134                _ => continue,
135            }
136
137            let tag = element_tag(child).unwrap_or_default();
138
139            if !BLOCK_TAGS.contains(&tag.as_str()) && tag != "details" {
140                inline_buf.push(child.clone());
141                continue;
142            }
143
144            self.flush_inline(&mut inline_buf, indent);
145
146            if matches!(tag.as_str(), "h1" | "h2" | "h3" | "h4" | "h5" | "h6") {
147                let level_digit = tag.chars().nth(1).and_then(|c| c.to_digit(10)).unwrap_or(1);
148                let level = u64::from(level_digit);
149                let spans = extract_spans(child);
150                if !is_all_whitespace(&spans) {
151                    let mut props = Map::new();
152                    props.insert("level".into(), Value::from(level));
153                    if indent > 0 {
154                        props.insert("indent".into(), Value::from(indent));
155                    }
156                    let id = self.next_id();
157                    let mut b = Block::with_props(id, "heading", props);
158                    b.content = spans;
159                    self.push_root(b);
160                }
161                continue;
162            }
163
164            if tag == "blockquote" {
165                let spans = extract_spans(child);
166                if !is_all_whitespace(&spans) {
167                    let id = self.next_id();
168                    let mut b = new_block(id, "quote", indent);
169                    b.content = spans;
170                    self.push_root(b);
171                }
172                continue;
173            }
174
175            if tag == "pre" {
176                // Detect fenced code via inner `<code class="language-xxx">`.
177                // Falls back to paragraph when no language class is present
178                // (matches TS clipboard behaviour for generic `<pre>`).
179                let code_child = child.children.borrow().iter().find_map(|c| {
180                    if element_tag(c).as_deref() == Some("code") {
181                        Some(c.clone())
182                    } else {
183                        None
184                    }
185                });
186                let language = code_child
187                    .as_ref()
188                    .and_then(|code| attr_value(code, "class"))
189                    .and_then(|cls| {
190                        cls.split_whitespace()
191                            .find_map(|c| c.strip_prefix("language-").map(String::from))
192                    });
193                let text = match code_child.as_ref() {
194                    Some(code) => collect_raw_text(code),
195                    None => collect_raw_text(child),
196                };
197                let text = text.strip_prefix('\n').unwrap_or(&text).to_string();
198                if !text.is_empty() {
199                    let id = self.next_id();
200                    let ty = if language.is_some() {
201                        "code"
202                    } else {
203                        "paragraph"
204                    };
205                    let mut props = Map::new();
206                    if indent > 0 {
207                        props.insert("indent".into(), Value::from(indent));
208                    }
209                    if let Some(lang) = language {
210                        props.insert("language".into(), Value::String(lang));
211                    }
212                    let mut b = if props.is_empty() {
213                        Block::new(id, ty)
214                    } else {
215                        Block::with_props(id, ty, props)
216                    };
217                    b.content = vec![TextSpan::plain(text)];
218                    self.push_root(b);
219                }
220                continue;
221            }
222
223            if tag == "details" {
224                self.process_toggle_details(child, indent);
225                continue;
226            }
227
228            if tag == "ul" || tag == "ol" {
229                let attrs = element_attrs(child);
230                // data-type="todo" list marker
231                if attrs_contains(&attrs, "data-devup-type", "todo")
232                    || has_class(&attrs, "to-do-list")
233                {
234                    self.process_notion_todo_list(child);
235                    continue;
236                }
237                if tag == "ul" && has_class(&attrs, "toggle") {
238                    self.process_notion_toggle_list(child, indent);
239                    continue;
240                }
241                self.process_list(child, tag == "ol", indent);
242                continue;
243            }
244
245            if tag == "table" {
246                self.process_table(child, indent);
247                continue;
248            }
249
250            if tag == "hr" {
251                let id = self.next_id();
252                self.push_root(new_block(id, "divider", indent));
253                continue;
254            }
255
256            if tag == "li" {
257                // Bare <li> outside a list — salvage as paragraph so
258                // text survives.
259                let spans = extract_spans(child);
260                if !is_all_whitespace(&spans) {
261                    let id = self.next_id();
262                    let mut b = new_block(id, "paragraph", indent);
263                    b.content = spans;
264                    self.push_root(b);
265                }
266                continue;
267            }
268
269            if tag == "p" {
270                // Detect the `<p data-type="todo" data-checked="…">`
271                // serialisation emitted by `write_block_html`. Parsed as
272                // a todo block so copy→paste within devup survives.
273                let p_attrs = element_attrs(child);
274                if attrs_contains(&p_attrs, "data-type", "todo") {
275                    let checked = attrs_contains(&p_attrs, "data-checked", "true");
276                    let spans = extract_spans(child);
277                    if !is_all_whitespace(&spans) {
278                        let id = self.next_id();
279                        let mut props = Map::new();
280                        props.insert("checked".into(), Value::Bool(checked));
281                        if indent > 0 {
282                            props.insert("indent".into(), Value::from(indent));
283                        }
284                        let mut b = Block::with_props(id, "todo", props);
285                        b.content = spans;
286                        self.push_root(b);
287                    }
288                    continue;
289                }
290                let spans = extract_spans(child);
291                if !is_all_whitespace(&spans) {
292                    let id = self.next_id();
293                    let mut b = new_block(id, "paragraph", indent);
294                    b.content = spans;
295                    self.push_root(b);
296                }
297                continue;
298            }
299
300            // Generic block wrapper (div/section/article/main/header/
301            // footer/nav/aside/figure…) — descend if it has block
302            // children, otherwise flatten to a paragraph.
303            let has_block_child = child.children.borrow().iter().any(|c| {
304                if let Some(t) = element_tag(c) {
305                    BLOCK_TAGS.contains(&t.as_str()) || t == "details"
306                } else {
307                    false
308                }
309            });
310            if has_block_child {
311                self.process_children_with_indent(child, indent);
312            } else {
313                let spans = extract_spans(child);
314                if !is_all_whitespace(&spans) {
315                    let id = self.next_id();
316                    let mut b = new_block(id, "paragraph", indent);
317                    b.content = spans;
318                    self.push_root(b);
319                }
320            }
321        }
322
323        self.flush_inline(&mut inline_buf, indent);
324    }
325
326    fn flush_inline(&mut self, buf: &mut Vec<Handle>, indent: i64) {
327        if buf.is_empty() {
328            return;
329        }
330        let mut spans: Vec<TextSpan> = Vec::new();
331        for n in buf.iter() {
332            let mark_set = MarkSet::empty();
333            collect_inline_into(n, &mut spans, &mark_set);
334        }
335        buf.clear();
336        normalize_spans(&mut spans);
337        if is_all_whitespace(&spans) {
338            return;
339        }
340        let id = self.next_id();
341        let mut b = new_block(id, "paragraph", indent);
342        b.content = spans;
343        self.push_root(b);
344    }
345
346    // ── List / toggle / todo handlers ────────────────────────────
347
348    fn process_toggle_details(&mut self, details: &Handle, indent: i64) {
349        // Summary inline spans → toggle title.
350        let summary_node = details.children.borrow().iter().find_map(|c| {
351            if element_tag(c).as_deref() == Some("summary") {
352                Some(c.clone())
353            } else {
354                None
355            }
356        });
357        let title_spans = summary_node.as_ref().map(extract_spans).unwrap_or_default();
358
359        let id = self.next_id();
360        let mut props = Map::new();
361        props.insert("collapsed".into(), Value::Bool(false));
362        if indent > 0 {
363            props.insert("indent".into(), Value::from(indent));
364        }
365        let mut toggle = Block::with_props(id, "toggle", props);
366        toggle.content = title_spans;
367        self.push_root(toggle);
368
369        // Everything after <summary> becomes child blocks at indent+1.
370        // We temporarily rehome them onto a synthetic container by
371        // iterating the children list directly.
372        let child_handle = details.clone();
373        let original_children: Vec<Handle> = child_handle
374            .children
375            .borrow()
376            .iter()
377            .filter(|c| element_tag(c).as_deref() != Some("summary"))
378            .cloned()
379            .collect();
380        self.process_handles_with_indent(&original_children, indent + 1);
381    }
382
383    fn process_notion_toggle_list(&mut self, ul: &Handle, indent: i64) {
384        for li in ul.children.borrow().iter() {
385            if element_tag(li).as_deref() != Some("li") {
386                continue;
387            }
388            let details = li.children.borrow().iter().find_map(|c| {
389                if element_tag(c).as_deref() == Some("details") {
390                    Some(c.clone())
391                } else {
392                    None
393                }
394            });
395            if let Some(det) = details {
396                self.process_toggle_details(&det, indent);
397            } else {
398                let spans = extract_spans_from_li(li);
399                if !is_all_whitespace(&spans) {
400                    let id = self.next_id();
401                    let mut props = Map::new();
402                    props.insert("style".into(), Value::String("unordered".into()));
403                    if indent > 0 {
404                        props.insert("indent".into(), Value::from(indent));
405                    }
406                    let mut b = Block::with_props(id, "list", props);
407                    b.content = spans;
408                    self.push_root(b);
409                }
410            }
411        }
412    }
413
414    fn process_list(&mut self, list_el: &Handle, ordered: bool, indent: i64) {
415        let style = if ordered { "ordered" } else { "unordered" };
416        for li in list_el.children.borrow().iter() {
417            if element_tag(li).as_deref() != Some("li") {
418                continue;
419            }
420
421            // Generic checkbox heuristic: `<li>[<div>]<input type=checkbox>…</li>`
422            if let Some(checked) = detect_direct_checkbox(li) {
423                let clone_without_cb = clone_node_without_checkboxes(li);
424                let spans = extract_spans(&clone_without_cb);
425                if !is_all_whitespace(&spans) {
426                    let id = self.next_id();
427                    let mut props = Map::new();
428                    props.insert("checked".into(), Value::Bool(checked));
429                    if indent > 0 {
430                        props.insert("indent".into(), Value::from(indent));
431                    }
432                    let mut b = Block::with_props(id, "todo", props);
433                    b.content = spans;
434                    self.push_root(b);
435                }
436                // Recurse nested lists regardless.
437                self.recurse_nested_lists(li, indent);
438                continue;
439            }
440
441            // Notion v3 toggle heuristic
442            if !ordered && is_notion_v3_toggle(li) {
443                let block_children: Vec<Handle> = li
444                    .children
445                    .borrow()
446                    .iter()
447                    .filter(|c| {
448                        if let Some(t) = element_tag(c) {
449                            matches!(
450                                t.as_str(),
451                                "p" | "div"
452                                    | "ul"
453                                    | "ol"
454                                    | "blockquote"
455                                    | "pre"
456                                    | "table"
457                                    | "details"
458                            ) || is_heading_tag(&t)
459                        } else {
460                            false
461                        }
462                    })
463                    .cloned()
464                    .collect();
465                let title_el = &block_children[0];
466                let title_spans = extract_spans(title_el);
467                let id = self.next_id();
468                let mut props = Map::new();
469                props.insert("collapsed".into(), Value::Bool(false));
470                if indent > 0 {
471                    props.insert("indent".into(), Value::from(indent));
472                }
473                let mut toggle = Block::with_props(id, "toggle", props);
474                toggle.content = title_spans;
475                self.push_root(toggle);
476                self.process_handles_with_indent(&block_children[1..], indent + 1);
477                continue;
478            }
479
480            // Plain list item
481            let spans = extract_spans_from_li(li);
482            if !is_all_whitespace(&spans) {
483                let id = self.next_id();
484                let mut props = Map::new();
485                props.insert("style".into(), Value::String(style.into()));
486                if indent > 0 {
487                    props.insert("indent".into(), Value::from(indent));
488                }
489                let mut b = Block::with_props(id, "list", props);
490                b.content = spans;
491                self.push_root(b);
492            }
493
494            self.recurse_nested_lists(li, indent);
495        }
496    }
497
498    fn recurse_nested_lists(&mut self, li: &Handle, indent: i64) {
499        for nested in li.children.borrow().iter() {
500            let Some(t) = element_tag(nested) else {
501                continue;
502            };
503            if t == "ul" {
504                let attrs = element_attrs(nested);
505                if has_class(&attrs, "toggle") {
506                    self.process_notion_toggle_list(nested, indent + 1);
507                } else {
508                    self.process_list(nested, false, indent + 1);
509                }
510            } else if t == "ol" {
511                self.process_list(nested, true, indent + 1);
512            } else if t == "details" {
513                self.process_toggle_details(nested, indent + 1);
514            }
515        }
516    }
517
518    fn process_notion_todo_list(&mut self, ul: &Handle) {
519        for li in ul.children.borrow().iter() {
520            if element_tag(li).as_deref() != Some("li") {
521                continue;
522            }
523
524            // `<ul data-devup-type="todo">` items often carry a label +
525            // checkbox inside — detect and reuse the direct-checkbox
526            // path.
527            let attrs = element_attrs(li);
528            let marker_checked = attrs
529                .iter()
530                .find(|a| a.name.local.as_ref() == "data-checked")
531                .map(|a| a.value.as_ref().eq_ignore_ascii_case("true"));
532            let checkbox = detect_any_checkbox(li);
533            let notion_checked = has_descendant_with_class(li, "checkbox-on");
534
535            let checked = marker_checked
536                .or(checkbox)
537                .or(Some(notion_checked))
538                .unwrap_or(false);
539
540            // Text extraction: prefer Notion's wrappers when present,
541            // fall back to the whole <li> minus checkboxes + nested
542            // lists.
543            let notion_wrapper = find_descendant_with_any_class(
544                li,
545                &["to-do-children-checked", "to-do-children-unchecked"],
546            );
547            let spans = if let Some(w) = notion_wrapper {
548                extract_spans(&w)
549            } else {
550                let clone = clone_node_without_checkboxes(li);
551                let clone = strip_nested_blocks(&clone);
552                extract_spans(&clone)
553            };
554            if !is_all_whitespace(&spans) {
555                let id = self.next_id();
556                let mut props = Map::new();
557                props.insert("checked".into(), Value::Bool(checked));
558                let mut b = Block::with_props(id, "todo", props);
559                b.content = spans;
560                self.push_root(b);
561            }
562        }
563    }
564
565    // ── Toggle children (from either <details> or v3 heuristic) ──
566
567    fn process_handles_with_indent(&mut self, handles: &[Handle], indent: i64) {
568        let synthetic = build_synthetic_parent(handles);
569        self.process_children_with_indent(&synthetic, indent);
570    }
571
572    // ── Table handler ────────────────────────────────────────────
573
574    fn process_table(&mut self, table_el: &Handle, indent: i64) {
575        // Flatten rows from tbody / thead / tfoot / bare <tr>.
576        let row_els: Vec<Handle> = collect_table_rows(table_el);
577        if row_els.is_empty() {
578            return;
579        }
580        let max_cols = row_els
581            .iter()
582            .map(|r| direct_children_of_tag_any(r, &["td", "th"]).len())
583            .max()
584            .unwrap_or(0);
585        if max_cols == 0 {
586            return;
587        }
588
589        // Build row & cell descendants first.
590        let mut row_ids: Vec<BlockId> = Vec::new();
591        for tr in &row_els {
592            let cells = direct_children_of_tag_any(tr, &["td", "th"]);
593            let mut cell_ids: Vec<BlockId> = Vec::with_capacity(max_cols);
594            for c in 0..max_cols {
595                let spans = cells.get(c).map(extract_spans).unwrap_or_default();
596                let mut props = cells
597                    .get(c)
598                    .and_then(extract_cell_props)
599                    .unwrap_or_default();
600                // Remove colspan/rowspan redundancy if cell only has
601                // them and no other props? Keep everything — mirror TS.
602                let cell_id = self.next_id();
603                let mut cell_block = if props.is_empty() {
604                    Block::new(cell_id.clone(), "table_cell")
605                } else {
606                    // Normalize colspan/rowspan number types to u64.
607                    normalize_span_numbers(&mut props);
608                    Block::with_props(cell_id.clone(), "table_cell", props)
609                };
610                cell_block.content = spans;
611                cell_ids.push(self.insert(cell_block));
612            }
613            let row_props = extract_row_props(tr).unwrap_or_default();
614            let row_id = self.next_id();
615            let mut row_block = if row_props.is_empty() {
616                Block::new(row_id.clone(), "table_row")
617            } else {
618                Block::with_props(row_id.clone(), "table_row", row_props)
619            };
620            row_block.children.clone_from(&cell_ids);
621            // Backfill `parent`
622            for cid in &cell_ids {
623                if let Some(c) = self.by_id.get_mut(cid) {
624                    c.parent = Some(row_id.clone());
625                }
626            }
627            row_ids.push(self.insert(row_block));
628        }
629
630        // Table props
631        let mut table_props = decode_props_from_element(table_el).unwrap_or_default();
632        if let Some(style_attr) = attr_value(table_el, "style") {
633            let decl = parse_inline_style(&style_attr);
634            if !table_props.contains_key("backgroundColor")
635                && let Some(v) = decl.background_color
636            {
637                table_props.insert("backgroundColor".into(), Value::String(v));
638            }
639            if !table_props.contains_key("borderColor")
640                && let Some(v) = decl.border_color
641            {
642                table_props.insert("borderColor".into(), Value::String(v));
643            }
644            if !table_props.contains_key("borderWidth")
645                && let Some(v) = decl.border_width
646            {
647                table_props.insert("borderWidth".into(), Value::String(v));
648            }
649            if !table_props.contains_key("borderStyle")
650                && let Some(v) = decl.border_style
651            {
652                table_props.insert("borderStyle".into(), Value::String(v));
653            }
654            if !table_props.contains_key("verticalAlign")
655                && let Some(v) = decl.vertical_align
656            {
657                table_props.insert("verticalAlign".into(), Value::String(v));
658            }
659            if !table_props.contains_key("padding")
660                && let Some(v) = decl.padding
661            {
662                table_props.insert("padding".into(), Value::String(v));
663            }
664        }
665        // Columns
666        if !table_props.contains_key("columns") {
667            let widths = extract_colgroup_widths(table_el, max_cols);
668            let cols: Vec<Value> = match widths {
669                Some(ws) => ws
670                    .into_iter()
671                    .map(|w| {
672                        let mut m = Map::new();
673                        m.insert("width".into(), Value::from(w));
674                        Value::Object(m)
675                    })
676                    .collect(),
677                None => (0..max_cols)
678                    .map(|_| {
679                        let mut m = Map::new();
680                        m.insert("width".into(), Value::from(120u64));
681                        Value::Object(m)
682                    })
683                    .collect(),
684            };
685            table_props.insert("columns".into(), Value::Array(cols));
686        }
687        if indent > 0 {
688            table_props.insert("indent".into(), Value::from(indent));
689        }
690        let table_id = self.next_id();
691        let mut table_block = Block::with_props(table_id.clone(), "table", table_props);
692        table_block.children.clone_from(&row_ids);
693        // Backfill parent on rows
694        for rid in &row_ids {
695            if let Some(r) = self.by_id.get_mut(rid) {
696                r.parent = Some(table_id.clone());
697            }
698        }
699        self.push_root(table_block);
700    }
701}
702
703fn normalize_span_numbers(props: &mut Map<String, Value>) {
704    for key in ["colspan", "rowspan"] {
705        if let Some(v) = props.get(key) {
706            let n = match v {
707                Value::Number(n) => n.as_u64(),
708                Value::String(s) => s.parse::<u64>().ok(),
709                _ => None,
710            };
711            if let Some(n) = n {
712                props.insert(key.into(), Value::from(n));
713            }
714        }
715    }
716}
717
718fn new_block(id: BlockId, ty: &str, indent: i64) -> Block {
719    if indent > 0 {
720        let mut props = Map::new();
721        props.insert("indent".into(), Value::from(indent));
722        Block::with_props(id, ty, props)
723    } else {
724        Block::new(id, ty)
725    }
726}
727
728// ── Helpers: tag recognition ──────────────────────────────────────
729
730pub(super) fn is_heading_tag(tag: &str) -> bool {
731    matches!(tag, "h1" | "h2" | "h3" | "h4" | "h5" | "h6")
732}
733
734static BLOCK_TAGS: &[&str] = &[
735    "address",
736    "article",
737    "aside",
738    "blockquote",
739    "div",
740    "dd",
741    "dl",
742    "dt",
743    "figcaption",
744    "figure",
745    "footer",
746    "h1",
747    "h2",
748    "h3",
749    "h4",
750    "h5",
751    "h6",
752    "header",
753    "hr",
754    "li",
755    "main",
756    "nav",
757    "ol",
758    "p",
759    "pre",
760    "section",
761    "table",
762    "tbody",
763    "td",
764    "th",
765    "thead",
766    "tfoot",
767    "tr",
768    "ul",
769];
770
771pub(super) static TABLE_STRUCTURE_TAGS: &[&str] = &[
772    "table", "thead", "tbody", "tfoot", "tr", "td", "th", "col", "colgroup", "caption",
773];