Skip to main content

damascene_html/
transform.rs

1//! DOM walker that maps the tier-1 HTML tag set onto Damascene `El` widgets.
2//!
3//! The walker mirrors `damascene-markdown::Walker` in spirit: a small flat
4//! `InlineState` carries italic / bold / strike / underline / mono /
5//! link / inline-color through nested inline tags, and a context split
6//! (`walk_block_children` vs `walk_inline_children`) decides whether
7//! each child is rendered as a block-level Damascene widget or appended to
8//! an inline run buffer.
9//!
10//! Unknown tags fall back to a context-sensitive pass-through —
11//! block-context: recurse as block; inline-context: recurse as inline.
12//! That matches what browsers do with tag-soup HTML and keeps the
13//! transformer total: every input produces an El, even if some content
14//! gets flattened.
15
16use damascene_core::prelude::*;
17// `namespace_url` is the trait the `ns!()` macro consumes via fully-
18// qualified path. Without it in scope the macro expands to an empty
19// atom and every `name.ns != ns!(html)` comparison spuriously rejects
20// HTML elements.
21#[allow(unused_imports)]
22use html5ever::namespace_url;
23use html5ever::ns;
24use markup5ever_rcdom::{Handle, NodeData};
25
26use crate::css::{ComputedStyle, read_inline_style};
27use crate::lints::{Finding, FindingKind, Lints};
28use crate::options::HtmlOptions;
29use crate::parser::{parse_document_dom, parse_fragment_dom};
30use crate::sanitize::{is_blocked_attr, is_blocked_tag, is_safe_url};
31use crate::selectors::Stylesheet;
32
33/// Walker-wide context — read-only options bag, the cascaded
34/// stylesheet collected from `<style>` blocks at entry, and the
35/// lint collector that the per-element apply / margin-reconciliation
36/// passes push findings into. Threaded through every walker function
37/// so the tag dispatchers can read all three without separate
38/// parameter plumbing.
39struct WalkCx<'a> {
40    opts: &'a HtmlOptions,
41    stylesheet: &'a Stylesheet,
42    lints: &'a Lints,
43}
44
45/// Render an HTML document as an Damascene `El`. Returns a `column([...])`
46/// of block-level Damascene widgets — the same shape an author would have
47/// hand-written, and the same shape `damascene_markdown::md` returns.
48pub fn html(input: &str) -> El {
49    html_with_options(input, HtmlOptions::default())
50}
51
52/// Render an HTML document with explicit options.
53pub fn html_with_options(input: &str, opts: HtmlOptions) -> El {
54    html_with_lints(input, opts).0
55}
56
57/// Like [`html_with_options`] but also returns the lint findings
58/// gathered during the walk. Use this when you need to surface
59/// dropped declarations, unsupported selectors, or margin-asymmetry
60/// reconciliations to the author or downstream tools.
61pub fn html_with_lints(input: &str, opts: HtmlOptions) -> (El, Vec<Finding>) {
62    // `document` must stay in scope for the duration of the walk.
63    // `markup5ever_rcdom::Node::Drop` iteratively `mem::take`s the
64    // children of every descendant to avoid stack overflow on deep
65    // trees; if the document handle drops while we still hold a body
66    // sub-handle, the body's `children` Vec is silently emptied.
67    let document = parse_document_dom(input);
68    let lints = Lints::default();
69    let stylesheet = collect_stylesheets(&document, &opts, &lints);
70    let body = find_body(&document).unwrap_or_else(|| document.clone());
71    let cx = WalkCx {
72        opts: &opts,
73        stylesheet: &stylesheet,
74        lints: &lints,
75    };
76    let state = InlineState::default();
77    let seq = walk_block_children(&body, &state, &cx);
78    let gap = seq.gap.unwrap_or(tokens::SPACE_4);
79    let leading_pad = seq.leading_pad;
80    let trailing_pad = seq.trailing_pad;
81    let mut el = column(seq.blocks)
82        .gap(gap)
83        .width(Size::Fill(1.0))
84        .height(Size::Hug);
85    if let Some(top) = leading_pad {
86        el = el.pt(top);
87    }
88    if let Some(bottom) = trailing_pad {
89        el = el.pb(bottom);
90    }
91    (el, lints.into_vec())
92}
93
94/// Like [`html_with_options`] but returns the block-level Els
95/// directly instead of wrapping them in a `column`. Intended for
96/// callers (e.g. `damascene-markdown`'s block-HTML event handler) that
97/// already have a containing block frame and just want the produced
98/// children appended.
99pub fn html_blocks(input: &str, opts: HtmlOptions) -> Vec<El> {
100    html_blocks_with_lints(input, opts).0
101}
102
103/// Lints-returning sibling of [`html_blocks`]. The reconciled `gap`
104/// is not surfaced — the calling block frame already owns its rhythm
105/// — but margin-asymmetry findings still land in the result.
106pub fn html_blocks_with_lints(input: &str, opts: HtmlOptions) -> (Vec<El>, Vec<Finding>) {
107    let document = parse_document_dom(input);
108    let lints = Lints::default();
109    let stylesheet = collect_stylesheets(&document, &opts, &lints);
110    let body = find_body(&document).unwrap_or_else(|| document.clone());
111    let cx = WalkCx {
112        opts: &opts,
113        stylesheet: &stylesheet,
114        lints: &lints,
115    };
116    let state = InlineState::default();
117    let seq = walk_block_children(&body, &state, &cx);
118    (seq.blocks, lints.into_vec())
119}
120
121/// Inline-only entry point: parse `input` as an HTML fragment and
122/// return the inline runs it produces. The intended caller is
123/// `damascene-markdown`'s `Event::InlineHtml` handler, which buffers
124/// consecutive inline-HTML events into one string, hands the buffer
125/// here, and appends the produced runs to the open paragraph /
126/// heading / link / table cell.
127///
128/// Block-level tags appearing in the fragment are flattened: their
129/// children render inline in source order rather than terminating the
130/// paragraph.
131pub fn html_fragment_inline(input: &str, opts: HtmlOptions) -> Vec<El> {
132    html_fragment_inline_with_lints(input, opts).0
133}
134
135/// Lints-returning sibling of [`html_fragment_inline`].
136pub fn html_fragment_inline_with_lints(input: &str, opts: HtmlOptions) -> (Vec<El>, Vec<Finding>) {
137    // See `html_with_options` for the rcdom drop trap — keep
138    // `document` alive for the whole walk.
139    let document = parse_fragment_dom(input);
140    let lints = Lints::default();
141    let stylesheet = collect_stylesheets(&document, &opts, &lints);
142    let root = find_fragment_root(&document).unwrap_or_else(|| document.clone());
143    let cx = WalkCx {
144        opts: &opts,
145        stylesheet: &stylesheet,
146        lints: &lints,
147    };
148    let state = InlineState::default();
149    let mut runs = Vec::new();
150    for child in root.children.borrow().iter() {
151        walk_inline_node(child, &state, &mut runs, &cx);
152    }
153    (runs, lints.into_vec())
154}
155
156/// Walk the DOM collecting the text contents of every `<style>`
157/// element and parse them into a single [`Stylesheet`]. Source order
158/// is preserved across blocks. `<head>` and other sanitize-blocked
159/// ancestors don't gate this walk — the cascade is the only way to
160/// reach `<style>` inside `<head>`, so we descend everywhere except
161/// `<script>` / `<iframe>` / friends.
162fn collect_stylesheets(root: &Handle, opts: &HtmlOptions, lints: &Lints) -> Stylesheet {
163    let mut bodies: Vec<String> = Vec::new();
164    walk_for_style_blocks(root, &mut bodies);
165    if opts.sanitize_styles {
166        if !bodies.is_empty() {
167            lints.push(
168                FindingKind::SanitizedStyle,
169                format!(
170                    "{} <style> block(s) dropped by sanitize_styles",
171                    bodies.len()
172                ),
173            );
174        }
175        return Stylesheet::default();
176    }
177    Stylesheet::from_blocks(bodies.iter().map(|s| s.as_str()), lints)
178}
179
180fn walk_for_style_blocks(node: &Handle, out: &mut Vec<String>) {
181    if let NodeData::Element { name, .. } = &node.data {
182        let local = name.local.as_ref().to_ascii_lowercase();
183        // Always recurse into structural elements (html/head/body)
184        // even though `is_blocked_tag` blocks them from rendering.
185        // Bail out of execution-context tags so we don't walk into
186        // script/style payloads we shouldn't.
187        if matches!(local.as_str(), "script" | "iframe" | "noscript") {
188            return;
189        }
190        if local == "style" {
191            let mut body = String::new();
192            collect_text_recursive(node, &mut body);
193            if !body.trim().is_empty() {
194                out.push(body);
195            }
196            // `<style>` contents aren't recursed for nested style
197            // elements (none in valid HTML).
198            return;
199        }
200    }
201    for child in node.children.borrow().iter() {
202        walk_for_style_blocks(child, out);
203    }
204}
205
206// ---------- DOM helpers ----------
207
208/// Walk the document tree to find the `<body>` element html5ever
209/// always synthesises for full-document parses.
210fn find_body(node: &Handle) -> Option<Handle> {
211    if let NodeData::Element { name, .. } = &node.data
212        && name.local.as_ref() == "body"
213    {
214        return Some(node.clone());
215    }
216    for child in node.children.borrow().iter() {
217        if let Some(found) = find_body(child) {
218            return Some(found);
219        }
220    }
221    None
222}
223
224/// `parse_fragment` wraps the input in a synthetic `<html>` element
225/// whose children are the fragment's top-level nodes. Find it so the
226/// caller iterates the fragment's siblings rather than the wrapper.
227fn find_fragment_root(node: &Handle) -> Option<Handle> {
228    if let NodeData::Element { name, .. } = &node.data
229        && name.local.as_ref() == "html"
230    {
231        return Some(node.clone());
232    }
233    for child in node.children.borrow().iter() {
234        if let Some(found) = find_fragment_root(child) {
235            return Some(found);
236        }
237    }
238    None
239}
240
241fn element_tag(node: &Handle) -> Option<String> {
242    if let NodeData::Element { name, .. } = &node.data {
243        if name.ns != ns!(html) {
244            return None;
245        }
246        Some(name.local.as_ref().to_ascii_lowercase())
247    } else {
248        None
249    }
250}
251
252fn element_attr(node: &Handle, attr: &str) -> Option<String> {
253    let NodeData::Element { attrs, .. } = &node.data else {
254        return None;
255    };
256    for a in attrs.borrow().iter() {
257        if a.name.local.as_ref().eq_ignore_ascii_case(attr)
258            && !is_blocked_attr(a.name.local.as_ref())
259        {
260            return Some(a.value.to_string());
261        }
262    }
263    None
264}
265
266/// Split an element's `class` attribute on whitespace.
267fn element_classes(node: &Handle) -> Vec<String> {
268    element_attr(node, "class")
269        .map(|s| {
270            s.split_ascii_whitespace()
271                .map(String::from)
272                .collect::<Vec<_>>()
273        })
274        .unwrap_or_default()
275}
276
277/// Read the cascaded style for `node`: matching `<style>` block rules
278/// (sorted by specificity then source order) flattened, then the
279/// element's inline `style="..."` declarations layered on top so
280/// inline always wins over the cascade — matches CSS's
281/// "author-stylesheet inline beats non-inline" rule and is the
282/// expected mental model for embedded scrap authors.
283fn cascade_style(node: &Handle, cx: &WalkCx<'_>) -> ComputedStyle {
284    let mut style = if cx.stylesheet.is_empty() {
285        ComputedStyle::default()
286    } else {
287        let tag = element_tag(node).unwrap_or_default();
288        let classes = element_classes(node);
289        let class_refs: Vec<&str> = classes.iter().map(String::as_str).collect();
290        let id = element_attr(node, "id");
291        cx.stylesheet.cascade(&tag, &class_refs, id.as_deref())
292    };
293    let inline = read_inline_style(node, cx.lints, cx.opts.sanitize_styles);
294    style.merge(&inline);
295    style
296}
297
298// ---------- Inline state ----------
299
300/// Inline styling currently in effect for new text runs. Mirrors
301/// `damascene-markdown::InlineState` but extends it to the HTML-specific
302/// tags (`<u>`, `<kbd>`, `<mark>`, `<code>` as an inline run), to
303/// `<a href>` which carries a value rather than just a flag, and to
304/// per-element `style="..."` overrides.
305#[derive(Default, Clone)]
306struct InlineState {
307    // Tag-derived depth counters. Bumped on entry into a styled tag
308    // (`<strong>`, `<em>`, …) and consulted by `apply` to decide which
309    // boolean / role modifier to layer onto each text leaf.
310    italic_depth: u32,
311    bold_depth: u32,
312    strike_depth: u32,
313    underline_depth: u32,
314    code_depth: u32,
315    mono_depth: u32,
316
317    // Value overrides (innermost wins). Sourced from semantic tags
318    // (`<mark>` sets `text_bg`, `<a>` sets `link`) and from per-
319    // element `style="..."`. CSS declarations beat tag defaults
320    // because the dispatcher applies style overrides after tag updates.
321    text_color: Option<Color>,
322    text_bg: Option<Color>,
323    font_size: Option<f32>,
324    font_weight: Option<FontWeight>,
325    /// Most-recent open `<a href="...">`. Inline tags inside an `<a>`
326    /// inherit the same href so the painter groups them as one link.
327    link: Option<String>,
328}
329
330impl InlineState {
331    fn apply(&self, mut el: El) -> El {
332        // Explicit weight override wins over `<strong>`-derived bold.
333        if let Some(w) = self.font_weight {
334            el = el.font_weight(w);
335        } else if self.bold_depth > 0 {
336            el = el.bold();
337        }
338        if self.italic_depth > 0 {
339            el = el.italic();
340        }
341        if self.strike_depth > 0 {
342            el = el.strikethrough();
343        }
344        if self.underline_depth > 0 {
345            el = el.underline();
346        }
347        if self.code_depth > 0 {
348            el = el.code();
349        } else if self.mono_depth > 0 {
350            // `<kbd>` etc. use mono without the inline-code surface
351            // role; `<code>`'s `.code()` already implies mono.
352            el = el.mono();
353        }
354        if let Some(c) = self.text_color {
355            el = el.text_color(c);
356        }
357        if let Some(c) = self.text_bg {
358            el = el.background(c);
359        }
360        if let Some(s) = self.font_size {
361            el = el.font_size(s);
362        }
363        if let Some(href) = &self.link {
364            el = el.link(href.clone());
365        }
366        el
367    }
368
369    /// Fold CSS-shaped value overrides into the state in place. Called
370    /// after a tag update so explicit `style="..."` declarations beat
371    /// the tag's default (e.g. `<mark style="background: blue">` uses
372    /// blue, not the WARNING-yellow `<mark>` default).
373    fn merge_style_overrides(&mut self, style: &ComputedStyle) {
374        if let Some(c) = style.text_color {
375            self.text_color = Some(c);
376        }
377        if let Some(c) = style.background {
378            self.text_bg = Some(c);
379        }
380        if let Some(s) = style.font_size {
381            self.font_size = Some(s);
382        }
383        if let Some(w) = style.font_weight {
384            self.font_weight = Some(w);
385        }
386        // `Some(false)` is a cancellation override (`font-style:
387        // normal`, `text-decoration: none`): it zeroes the inherited
388        // depth so `<em><span style="font-style: normal">x</span></em>`
389        // renders upright. The state is cloned per child, so the
390        // siblings outside the span keep their depth.
391        match style.italic {
392            Some(true) => self.italic_depth += 1,
393            Some(false) => self.italic_depth = 0,
394            None => {}
395        }
396        match style.underline {
397            Some(true) => self.underline_depth += 1,
398            Some(false) => self.underline_depth = 0,
399            None => {}
400        }
401        match style.strikethrough {
402            Some(true) => self.strike_depth += 1,
403            Some(false) => self.strike_depth = 0,
404            None => {}
405        }
406        if let Some(true) = style.font_mono {
407            // `font-family: monospace` (and friends) bumps the mono
408            // counter the same way `<kbd>` does, so the apply path
409            // already handles it — no new branch needed.
410            self.mono_depth += 1;
411        }
412    }
413}
414
415// ---------- Tag classification ----------
416
417/// Tags whose semantic is purely inline. Block tags appearing inside
418/// an inline buffer get coerced to their inline-equivalent flattening.
419fn is_inline_tag(tag: &str) -> bool {
420    matches!(
421        tag,
422        "a" | "abbr"
423            | "b"
424            | "bdi"
425            | "bdo"
426            | "br"
427            | "button"
428            | "cite"
429            | "code"
430            | "data"
431            | "dfn"
432            | "em"
433            | "i"
434            | "img"
435            | "input"
436            | "kbd"
437            | "mark"
438            | "q"
439            | "s"
440            | "samp"
441            | "small"
442            | "span"
443            | "strong"
444            | "strike"
445            | "del"
446            | "sub"
447            | "sup"
448            | "time"
449            | "u"
450            | "var"
451            | "wbr"
452    )
453}
454
455/// Whether a DOM node — element or text — is "inline" for block-context
456/// flow. Comments and whitespace-only text count as inline so they can
457/// be absorbed into a pending paragraph buffer rather than triggering
458/// an anonymous paragraph flush.
459fn is_inline_node(node: &Handle) -> bool {
460    match &node.data {
461        NodeData::Text { .. } | NodeData::Comment { .. } => true,
462        NodeData::Element { name, .. } => {
463            if name.ns != ns!(html) {
464                return true;
465            }
466            let tag = name.local.as_ref().to_ascii_lowercase();
467            if is_blocked_tag(&tag) {
468                return true;
469            }
470            is_inline_tag(&tag)
471        }
472        _ => true,
473    }
474}
475
476// ---------- Block walker ----------
477
478/// Reconciled output of [`walk_block_children`]: the produced block
479/// Els plus the parent-side rhythm derived from sibling margins.
480///
481/// `gap` is `Some(px)` when at least one block child declared a
482/// margin and the per-sibling-pair `max(prev.margin_bottom,
483/// next.margin_top)` values agreed. Asymmetric pairs collapse to the
484/// largest value seen and emit a [`FindingKind::MarginAsymmetryFlattened`].
485///
486/// `leading_pad` / `trailing_pad` are the first child's `margin-top`
487/// / last child's `margin-bottom` lifted as caller-facing hints —
488/// `html_with_lints` folds them into the outer column's padding when
489/// no padding is otherwise declared. Container builders that own
490/// their own padding (`blockquote`, `figure`) ignore these fields.
491pub(crate) struct BlockSequence {
492    pub blocks: Vec<El>,
493    pub gap: Option<f32>,
494    pub leading_pad: Option<f32>,
495    pub trailing_pad: Option<f32>,
496}
497
498fn walk_block_children(parent: &Handle, state: &InlineState, cx: &WalkCx<'_>) -> BlockSequence {
499    let mut produced: Vec<(El, Option<Sides>)> = Vec::new();
500    let mut inline_buf: Vec<El> = Vec::new();
501    for child in parent.children.borrow().iter() {
502        if is_inline_node(child) {
503            walk_inline_node(child, state, &mut inline_buf, cx);
504        } else {
505            flush_inline_buf(&mut inline_buf, &mut produced);
506            walk_block_node(child, state, &mut produced, cx);
507        }
508    }
509    flush_inline_buf(&mut inline_buf, &mut produced);
510    reconcile_margins(produced, cx)
511}
512
513/// Fold an accumulated inline-run buffer into an anonymous paragraph
514/// block. Drops a buffer that contains only whitespace runs.
515/// Anonymous paragraphs contribute no margin to the parent's
516/// reconciliation — they're a synthetic construct, not an authored
517/// element.
518fn flush_inline_buf(inline_buf: &mut Vec<El>, blocks: &mut Vec<(El, Option<Sides>)>) {
519    if inline_buf.is_empty() {
520        return;
521    }
522    let runs = normalize_inline_runs(std::mem::take(inline_buf));
523    if runs.is_empty() || runs_are_blank(&runs) {
524        return;
525    }
526    blocks.push((build_paragraph(runs), None));
527}
528
529/// Collapse adjacent sibling margins into a single parent `gap`,
530/// matching CSS's `max(prev.margin_bottom, next.margin_top)` rule.
531/// When pair values agree, the gap is exact and lossless. When they
532/// disagree we pick the largest and emit a lint so the author knows
533/// the rhythm collapsed.
534fn reconcile_margins(produced: Vec<(El, Option<Sides>)>, cx: &WalkCx<'_>) -> BlockSequence {
535    let leading_pad = produced
536        .first()
537        .and_then(|(_, m)| m.map(|s| s.top))
538        .filter(|v| *v > 0.0);
539    let trailing_pad = produced
540        .last()
541        .and_then(|(_, m)| m.map(|s| s.bottom))
542        .filter(|v| *v > 0.0);
543
544    let mut pair_gaps: Vec<f32> = Vec::with_capacity(produced.len().saturating_sub(1));
545    for pair in produced.windows(2) {
546        let prev_bottom = pair[0].1.map(|s| s.bottom).unwrap_or(0.0);
547        let next_top = pair[1].1.map(|s| s.top).unwrap_or(0.0);
548        pair_gaps.push(prev_bottom.max(next_top));
549    }
550
551    let gap = if pair_gaps.is_empty() {
552        None
553    } else if pair_gaps.iter().all(|&g| g == pair_gaps[0]) {
554        if pair_gaps[0] > 0.0 {
555            Some(pair_gaps[0])
556        } else {
557            None
558        }
559    } else {
560        let max = pair_gaps.iter().cloned().fold(0.0_f32, f32::max);
561        let min = pair_gaps.iter().cloned().fold(f32::INFINITY, f32::min);
562        cx.lints.push(
563            FindingKind::MarginAsymmetryFlattened,
564            format!(
565                "sibling pair margins ranged {min}..{max}px; flattened to {max}px gap on parent"
566            ),
567        );
568        Some(max)
569    };
570
571    let blocks: Vec<El> = produced.into_iter().map(|(el, _)| el).collect();
572    BlockSequence {
573        blocks,
574        gap,
575        leading_pad,
576        trailing_pad,
577    }
578}
579
580fn build_paragraph(runs: Vec<El>) -> El {
581    if let Some(plain) = single_plain_text(&runs) {
582        paragraph(plain)
583    } else {
584        text_runs(runs)
585            .wrap_text()
586            .width(Size::Fill(1.0))
587            .height(Size::Hug)
588    }
589}
590
591fn walk_block_node(
592    node: &Handle,
593    state: &InlineState,
594    blocks: &mut Vec<(El, Option<Sides>)>,
595    cx: &WalkCx<'_>,
596) {
597    let Some(tag) = element_tag(node) else {
598        return;
599    };
600    if is_blocked_tag(&tag) {
601        return;
602    }
603    if is_unsupported_block_tag(&tag) {
604        cx.lints.push(
605            FindingKind::UnsupportedTag,
606            format!("<{tag}> has no Damascene equivalent; contents flattened"),
607        );
608        // Fall through to the unknown-tag arm below — flatten content
609        // so authored text isn't lost.
610    }
611    let style = cascade_style(node, cx);
612    let margin = style.margin;
613    match tag.as_str() {
614        "p" => {
615            let runs = collect_inline_runs(node, state, cx);
616            if !runs_are_blank(&runs) {
617                blocks.push((style.apply_to_block(build_paragraph(runs)), margin));
618            }
619        }
620        "h1" | "h2" | "h3" | "h4" | "h5" | "h6" => {
621            let runs = collect_inline_runs(node, state, cx);
622            blocks.push((style.apply_to_block(build_heading(&tag, runs)), margin));
623        }
624        "br" => {
625            blocks.push((style.apply_to_block(paragraph("")), margin));
626        }
627        "hr" => blocks.push((style.apply_to_block(divider()), margin)),
628        "ul" => blocks.push((
629            style.apply_to_block(build_unordered_list(node, state, cx)),
630            margin,
631        )),
632        "ol" => blocks.push((
633            style.apply_to_block(build_ordered_list(node, state, cx)),
634            margin,
635        )),
636        "dl" => blocks.push((
637            style.apply_to_block(build_definition_list(node, state, cx)),
638            margin,
639        )),
640        "blockquote" => {
641            let inner = walk_block_children(node, state, cx);
642            blocks.push((style.apply_to_block(blockquote(inner.blocks)), margin));
643        }
644        "pre" => blocks.push((style.apply_to_block(build_pre(node)), margin)),
645        "table" => blocks.push((style.apply_to_block(build_table(node, state, cx)), margin)),
646        "img" => {
647            if let Some(placeholder) = build_image_placeholder(node) {
648                blocks.push((style.apply_to_block(placeholder), margin));
649            }
650        }
651        "details" => blocks.push((style.apply_to_block(build_details(node, state, cx)), margin)),
652        "figure" => blocks.push((style.apply_to_block(build_figure(node, state, cx)), margin)),
653        // Generic block containers — pass through to children unless
654        // the element carries CSS, in which case wrap the children in
655        // a styled column so the layout / visual properties have a
656        // surface to land on. Untouched containers stay flat so a
657        // `<section>` wrapping a single `<h2>` doesn't gain a useless
658        // extra level of nesting.
659        "div" | "section" | "article" | "main" | "header" | "footer" | "nav" | "aside"
660        | "summary" | "figcaption" | "form" | "fieldset" | "legend" | "body" | "html" => {
661            push_generic_container(node, state, &style, margin, blocks, cx);
662        }
663        _ => {
664            push_generic_container(node, state, &style, margin, blocks, cx);
665        }
666    }
667}
668
669/// Tags we don't render but want to call out so authors see what's
670/// dropped. These slip past `is_blocked_tag` (which is the security
671/// filter) and reach the walker as unknown content. We still recurse
672/// into their children to preserve any text, but emit a finding so
673/// the author knows the wrapping element was lost.
674fn is_unsupported_block_tag(tag: &str) -> bool {
675    matches!(
676        tag,
677        "video" | "audio" | "canvas" | "dialog" | "menu" | "marquee" | "applet" | "bgsound"
678    )
679}
680
681/// Wrap a generic-container element's children in a styled `column`
682/// (when CSS calls for it) or pass them through flat. Honors the
683/// child-margin reconciliation by folding the inner sequence's `gap`
684/// onto the wrapper, and applies any container-layout overrides
685/// (display:flex / align-items / justify-content / overflow) before
686/// the wrap-in-scroll step.
687fn push_generic_container(
688    node: &Handle,
689    state: &InlineState,
690    style: &ComputedStyle,
691    margin: Option<Sides>,
692    blocks: &mut Vec<(El, Option<Sides>)>,
693    cx: &WalkCx<'_>,
694) {
695    let inner = walk_block_children(node, state, cx);
696    let needs_wrap = !style.is_empty();
697    if !needs_wrap {
698        // Flat pass-through. The child sequence's reconciliation
699        // results are dropped here — the surrounding parent's
700        // reconciliation already collapses across all flattened
701        // descendants in source order via the same pass.
702        blocks.extend(inner.blocks.into_iter().map(|el| (el, None)));
703        return;
704    }
705    let gap = inner.gap.unwrap_or(0.0);
706    let mut wrapper = column(inner.blocks)
707        .gap(gap)
708        .width(Size::Fill(1.0))
709        .height(Size::Hug);
710    // Fold the inner sequence's leading / trailing margin pads onto
711    // the wrapper's padding when CSS hasn't declared one explicitly.
712    if style.padding.is_none() {
713        if let Some(top) = inner.leading_pad {
714            wrapper = wrapper.pt(top);
715        }
716        if let Some(bottom) = inner.trailing_pad {
717            wrapper = wrapper.pb(bottom);
718        }
719    }
720    wrapper = style.apply_container_layout(wrapper);
721    wrapper = style.apply_to_block(wrapper);
722    wrapper = style.wrap_with_overflow(wrapper);
723    blocks.push((wrapper, margin));
724}
725
726// ---------- Inline walker ----------
727
728fn walk_inline_node(node: &Handle, state: &InlineState, runs: &mut Vec<El>, cx: &WalkCx<'_>) {
729    match &node.data {
730        NodeData::Text { contents } => {
731            let s = contents.borrow().to_string();
732            if s.is_empty() {
733                return;
734            }
735            // CSS `white-space: normal`: runs of document whitespace
736            // (including the newlines + indentation of pretty-printed
737            // source) collapse to a single space. Damascene's text
738            // pipeline treats a literal `\n` as a hard line break, so
739            // skipping this would turn source formatting into visible
740            // breaks. Block-edge trimming happens later, in
741            // `normalize_inline_runs`, once the full run sequence is
742            // known.
743            runs.push(state.apply(text(collapse_whitespace(&s))));
744        }
745        NodeData::Comment { .. } => {}
746        NodeData::Element { name, .. } => {
747            if name.ns != ns!(html) {
748                // Foreign-namespace subtree — inline `<svg>`, MathML
749                // `<math>`. Nothing in it maps onto the widget
750                // vocabulary, so the whole subtree drops; the finding
751                // is the author's only signal.
752                cx.lints.push(
753                    FindingKind::UnsupportedTag,
754                    format!(
755                        "<{}> (foreign-namespace subtree dropped)",
756                        name.local.as_ref()
757                    ),
758                );
759                return;
760            }
761            let tag = name.local.as_ref().to_ascii_lowercase();
762            if is_blocked_tag(&tag) {
763                return;
764            }
765            dispatch_inline_element(node, &tag, state, runs, cx);
766        }
767        _ => {}
768    }
769}
770
771fn dispatch_inline_element(
772    node: &Handle,
773    tag: &str,
774    state: &InlineState,
775    runs: &mut Vec<El>,
776    cx: &WalkCx<'_>,
777) {
778    match tag {
779        "br" => runs.push(hard_break()),
780        "img" => {
781            if let Some(placeholder) = build_image_placeholder(node) {
782                // The placeholder builder returns a text El styled as
783                // muted italic plus an optional link; reapply the
784                // current inline state so an `<img>` inside `<strong>`
785                // still reads as bold-italic.
786                runs.push(state.apply(placeholder));
787            }
788        }
789        "button" => {
790            let label = inline_text_only(node);
791            runs.push(button(label));
792        }
793        "input" => {
794            if let Some(el) = build_html_input(node) {
795                runs.push(el);
796            }
797        }
798        _ => {
799            let next = child_inline_state(node, tag, state, cx);
800            walk_inline_children(node, &next, runs, cx);
801        }
802    }
803}
804
805/// Collect plain text content from an element's subtree, ignoring
806/// inline markup. Used by `<button>` to extract a flat label string.
807fn inline_text_only(node: &Handle) -> String {
808    let mut out = String::new();
809    collect_text_recursive(node, &mut out);
810    out.split_whitespace().collect::<Vec<_>>().join(" ")
811}
812
813/// Build the cosmetic Damascene widget for an HTML `<input>` element. v1
814/// only honours `type="checkbox"`; other input types return `None` so
815/// the walker silently skips them.
816fn build_html_input(node: &Handle) -> Option<El> {
817    let ty = element_attr(node, "type").unwrap_or_else(|| "text".to_string());
818    if !ty.eq_ignore_ascii_case("checkbox") {
819        return None;
820    }
821    let checked = element_attr(node, "checked").is_some();
822    // Cosmetic checkbox: derive the routing key from the element's
823    // `id` when present so repeated inputs stay distinguishable.
824    let key = match element_attr(node, "id") {
825        Some(id) => format!("html-checkbox-{id}"),
826        None => "html-checkbox".to_string(),
827    };
828    Some(checkbox(key, checked))
829}
830
831/// Build the inline state that should govern an element's children.
832/// Folds the tag's semantic effect (`<strong>` bumps bold depth,
833/// `<mark>` sets the highlight background, `<a>` adopts the href)
834/// then layers the element's `style="..."` declarations on top so
835/// explicit CSS always wins over the tag default.
836fn child_inline_state(
837    node: &Handle,
838    tag: &str,
839    state: &InlineState,
840    cx: &WalkCx<'_>,
841) -> InlineState {
842    let mut next = state.clone();
843    match tag {
844        "strong" | "b" => next.bold_depth += 1,
845        "em" | "i" | "cite" | "dfn" | "var" => next.italic_depth += 1,
846        "u" => next.underline_depth += 1,
847        "s" | "strike" | "del" => next.strike_depth += 1,
848        "code" => next.code_depth += 1,
849        "kbd" | "samp" => next.mono_depth += 1,
850        "mark" => {
851            // Soft yellow band behind the glyphs. Snapshot of the
852            // theme's WARNING token at build time — explicit
853            // `style="background: ..."` on the `<mark>` overrides
854            // because the style merge runs after this assignment.
855            next.text_bg = Some(tokens::WARNING.with_alpha_u8(60));
856        }
857        "a" => {
858            // Inner `<a>` overrides outer href (browser semantics:
859            // nested `<a>` is invalid, but we take the innermost).
860            if let Some(href) = element_attr(node, "href").filter(|h| is_safe_url(h)) {
861                next.link = Some(href);
862            }
863        }
864        // Pass-through inline tags — no state mutation, but style
865        // overrides on a `<span>` still take effect via the merge
866        // below. `<sub>` / `<sup>` lose their baseline shift in v1
867        // (no inline baseline-shift primitive yet) but their content
868        // still renders.
869        "span" | "abbr" | "bdi" | "bdo" | "data" | "q" | "small" | "time" | "wbr" | "sub"
870        | "sup" => {}
871        // Unknown tag in inline context: flatten its children. This
872        // includes block-shaped tags appearing inside an inline
873        // buffer — exactly the tag-soup coercion browsers do.
874        _ => {}
875    }
876    let style = cascade_style(node, cx);
877    next.merge_style_overrides(&style);
878    next
879}
880
881fn walk_inline_children(node: &Handle, state: &InlineState, runs: &mut Vec<El>, cx: &WalkCx<'_>) {
882    for child in node.children.borrow().iter() {
883        walk_inline_node(child, state, runs, cx);
884    }
885}
886
887fn collect_inline_runs(node: &Handle, state: &InlineState, cx: &WalkCx<'_>) -> Vec<El> {
888    let mut runs = Vec::new();
889    walk_inline_children(node, state, &mut runs, cx);
890    normalize_inline_runs(runs)
891}
892
893/// Collapse runs of HTML document whitespace (space, tab, CR, LF, FF)
894/// into a single space, per CSS `white-space: normal`. Leading/trailing
895/// runs become a single edge space here; whether that space survives is
896/// decided by [`normalize_inline_runs`] once neighbouring runs are
897/// known. U+00A0 and other non-ASCII spaces are deliberately preserved —
898/// `&nbsp;` exists to defeat collapsing.
899fn collapse_whitespace(s: &str) -> String {
900    let mut out = String::with_capacity(s.len());
901    let mut in_ws = false;
902    for c in s.chars() {
903        if matches!(c, ' ' | '\t' | '\n' | '\r' | '\x0C') {
904            if !in_ws {
905                out.push(' ');
906            }
907            in_ws = true;
908        } else {
909            out.push(c);
910            in_ws = false;
911        }
912    }
913    out
914}
915
916/// The cross-run half of CSS whitespace processing, applied to a
917/// block's fully assembled inline runs (in-node collapsing is
918/// [`collapse_whitespace`]'s job). Removes the spaces that only become
919/// removable once neighbours are known: leading space at the block
920/// start or after a `<br>`, a space whose preceding text run already
921/// ends in one (`foo <b> bar</b>`), and trailing space at the block end
922/// or before a `<br>`. Text runs reduced to nothing are dropped.
923/// Non-text atoms (buttons, checkboxes, images) act like words —
924/// spaces around them survive.
925fn normalize_inline_runs(runs: Vec<El>) -> Vec<El> {
926    let mut out: Vec<El> = Vec::with_capacity(runs.len());
927    // True at the block start and just after a hard break — positions
928    // where leading whitespace is dropped entirely.
929    let mut at_boundary = true;
930    for mut run in runs {
931        match run.kind {
932            Kind::HardBreak => {
933                trim_trailing_edge(&mut out);
934                at_boundary = true;
935                out.push(run);
936            }
937            Kind::Text => {
938                let prev_ends_in_space = out
939                    .last()
940                    .is_some_and(|p| matches!(&p.text, Some(t) if t.ends_with(' ')));
941                if let Some(t) = run.text.take() {
942                    let t = if at_boundary || prev_ends_in_space {
943                        t.trim_start_matches(' ').to_string()
944                    } else {
945                        t
946                    };
947                    if t.is_empty() {
948                        // A whitespace-only run absorbed by the
949                        // boundary; the boundary state carries over.
950                        continue;
951                    }
952                    at_boundary = false;
953                    run.text = Some(t);
954                } else {
955                    at_boundary = false;
956                }
957                out.push(run);
958            }
959            _ => {
960                at_boundary = false;
961                out.push(run);
962            }
963        }
964    }
965    trim_trailing_edge(&mut out);
966    out
967}
968
969/// Strip trailing spaces from the text runs at the tail of `out`,
970/// dropping runs that become empty. Stops at the first non-text run
971/// (a space before a trailing button is interior, not edge).
972fn trim_trailing_edge(out: &mut Vec<El>) {
973    while let Some(last) = out.last_mut() {
974        if last.kind != Kind::Text {
975            return;
976        }
977        let Some(t) = &last.text else {
978            return;
979        };
980        let trimmed = t.trim_end_matches(' ');
981        if trimmed.is_empty() {
982            out.pop();
983            continue;
984        }
985        if trimmed.len() != t.len() {
986            last.text = Some(trimmed.to_string());
987        }
988        return;
989    }
990}
991
992// ---------- Builders ----------
993
994fn build_heading(tag: &str, runs: Vec<El>) -> El {
995    // Headings h4–h6 clamp to h3 to match damascene-markdown's behaviour.
996    let plain = single_plain_text(&runs);
997    if let Some(plain) = plain {
998        return match tag {
999            "h1" => h1(plain),
1000            "h2" => h2(plain),
1001            _ => h3(plain),
1002        };
1003    }
1004    let role = match tag {
1005        "h1" => TextRole::Display,
1006        "h2" => TextRole::Heading,
1007        _ => TextRole::Title,
1008    };
1009    text_runs(runs)
1010        .text_role(role)
1011        .wrap_text()
1012        .width(Size::Fill(1.0))
1013        .height(Size::Hug)
1014}
1015
1016fn build_unordered_list(node: &Handle, state: &InlineState, cx: &WalkCx<'_>) -> El {
1017    let items = collect_list_items(node, state, cx);
1018    // Detect a task-list shape — non-empty, and every item begins with
1019    // `<input type="checkbox">`. GFM and many static-site generators
1020    // emit markdown task lists as HTML in this shape.
1021    if !items.is_empty() && items.iter().all(|item| item.checkbox_state.is_some()) {
1022        return task_list(
1023            items
1024                .into_iter()
1025                .map(|item| (item.checkbox_state.unwrap_or(false), item.content)),
1026        );
1027    }
1028    bullet_list(items.into_iter().map(|item| item.content))
1029}
1030
1031fn build_ordered_list(node: &Handle, state: &InlineState, cx: &WalkCx<'_>) -> El {
1032    let start = element_attr(node, "start")
1033        .and_then(|s| s.parse::<u64>().ok())
1034        .unwrap_or(1);
1035    let items = collect_list_items(node, state, cx);
1036    numbered_list_from(start, items.into_iter().map(|item| item.content))
1037}
1038
1039struct CollectedItem {
1040    content: El,
1041    /// `Some(checked)` if the first DOM child is `<input type="checkbox">`;
1042    /// `None` otherwise. Used to detect a GFM task-list shape.
1043    checkbox_state: Option<bool>,
1044}
1045
1046fn collect_list_items(node: &Handle, state: &InlineState, cx: &WalkCx<'_>) -> Vec<CollectedItem> {
1047    let mut items = Vec::new();
1048    for child in node.children.borrow().iter() {
1049        let Some(tag) = element_tag(child) else {
1050            continue;
1051        };
1052        if tag != "li" {
1053            continue;
1054        }
1055        let checkbox_state = first_checkbox_state(child);
1056        let seq = walk_block_children(child, state, cx);
1057        let content = if seq.blocks.len() == 1 {
1058            seq.blocks.into_iter().next().unwrap()
1059        } else if seq.blocks.is_empty() {
1060            paragraph("")
1061        } else {
1062            column(seq.blocks)
1063                .gap(seq.gap.unwrap_or(tokens::SPACE_2))
1064                .width(Size::Fill(1.0))
1065                .height(Size::Hug)
1066        };
1067        items.push(CollectedItem {
1068            content,
1069            checkbox_state,
1070        });
1071    }
1072    items
1073}
1074
1075/// If the first element child of `<li>` is `<input type="checkbox">`,
1076/// return its checked state. The walker hides the actual `<input>` Els
1077/// when classifying the list as a task list.
1078fn first_checkbox_state(li: &Handle) -> Option<bool> {
1079    for child in li.children.borrow().iter() {
1080        if let NodeData::Text { contents } = &child.data {
1081            if contents.borrow().trim().is_empty() {
1082                continue;
1083            }
1084            return None;
1085        }
1086        if let Some(tag) = element_tag(child) {
1087            if tag != "input" {
1088                return None;
1089            }
1090            let ty = element_attr(child, "type").unwrap_or_default();
1091            if !ty.eq_ignore_ascii_case("checkbox") {
1092                return None;
1093            }
1094            let checked = element_attr(child, "checked").is_some();
1095            return Some(checked);
1096        }
1097    }
1098    None
1099}
1100
1101/// Render a `<details>` element as a static cosmetic disclosure: a
1102/// summary row with a leading chevron (▼ when `open`, ▶ when not),
1103/// followed by the rest of the children when `open`. No toggle wiring
1104/// — apps that want interactive behaviour can fork the tier-1 widget
1105/// or compose `accordion_item` directly with their own state.
1106fn build_details(node: &Handle, state: &InlineState, cx: &WalkCx<'_>) -> El {
1107    let open = element_attr(node, "open").is_some();
1108    let chevron = if open { "\u{25BE}" } else { "\u{25B8}" };
1109    let mut summary_runs: Vec<El> = Vec::new();
1110    let mut body_blocks: Vec<(El, Option<Sides>)> = Vec::new();
1111    for child in node.children.borrow().iter() {
1112        match element_tag(child).as_deref() {
1113            Some("summary") => {
1114                summary_runs = collect_inline_runs(child, state, cx);
1115            }
1116            _ => {
1117                if open {
1118                    let mut buf = Vec::new();
1119                    let was_inline = is_inline_node(child);
1120                    if was_inline {
1121                        walk_inline_node(child, state, &mut buf, cx);
1122                        if !runs_are_blank(&buf) {
1123                            body_blocks.push((build_paragraph(buf), None));
1124                        }
1125                    } else {
1126                        walk_block_node(child, state, &mut body_blocks, cx);
1127                    }
1128                }
1129            }
1130        }
1131    }
1132    let body_blocks: Vec<El> = body_blocks.into_iter().map(|(el, _)| el).collect();
1133    let summary_label: El = if summary_runs.is_empty() {
1134        text("Details").label()
1135    } else if let Some(plain) = single_plain_text(&summary_runs) {
1136        text(plain).label().font_weight(FontWeight::Medium)
1137    } else {
1138        text_runs(summary_runs).width(Size::Fill(1.0))
1139    };
1140    let summary_row = row([
1141        text(chevron).text_color(tokens::MUTED_FOREGROUND),
1142        summary_label,
1143    ])
1144    .gap(tokens::SPACE_2)
1145    .align(Align::Center)
1146    .width(Size::Fill(1.0));
1147    let mut parts: Vec<El> = vec![summary_row];
1148    if open && !body_blocks.is_empty() {
1149        parts.push(
1150            column(body_blocks)
1151                .gap(tokens::SPACE_2)
1152                .width(Size::Fill(1.0))
1153                .height(Size::Hug)
1154                .padding(Sides::left(tokens::SPACE_4)),
1155        );
1156    }
1157    column(parts)
1158        .gap(tokens::SPACE_2)
1159        .width(Size::Fill(1.0))
1160        .height(Size::Hug)
1161}
1162
1163/// Render a `<figure>` as a column where `<figcaption>` children get
1164/// their inner blocks muted + italicised. Mirrors the markdown image-
1165/// placeholder visual treatment so figures sit next to images in the
1166/// same tone.
1167fn build_figure(node: &Handle, state: &InlineState, cx: &WalkCx<'_>) -> El {
1168    let mut parts: Vec<(El, Option<Sides>)> = Vec::new();
1169    for child in node.children.borrow().iter() {
1170        match element_tag(child).as_deref() {
1171            Some("figcaption") => {
1172                let seq = walk_block_children(child, state, cx);
1173                for el in seq.blocks {
1174                    parts.push((el.muted().italic(), None));
1175                }
1176            }
1177            Some(_) => walk_block_node(child, state, &mut parts, cx),
1178            None => {
1179                if is_inline_node(child) {
1180                    let mut buf = Vec::new();
1181                    walk_inline_node(child, state, &mut buf, cx);
1182                    if !runs_are_blank(&buf) {
1183                        parts.push((build_paragraph(buf), None));
1184                    }
1185                }
1186            }
1187        }
1188    }
1189    column(parts.into_iter().map(|(el, _)| el).collect::<Vec<_>>())
1190        .gap(tokens::SPACE_2)
1191        .width(Size::Fill(1.0))
1192        .height(Size::Hug)
1193}
1194
1195fn build_pre(node: &Handle) -> El {
1196    // If the `<pre>` wraps a single `<code>` element, take its text
1197    // content as the code body (the common
1198    // `<pre><code class="language-X">…</code></pre>` shape). Otherwise
1199    // collect the `<pre>`'s own text content.
1200    let body = inner_code_text(node);
1201    code_block(body)
1202}
1203
1204fn inner_code_text(pre: &Handle) -> String {
1205    let children = pre.children.borrow();
1206    let code_child = children.iter().find_map(|c| {
1207        if let NodeData::Element { name, .. } = &c.data {
1208            if name.local.as_ref().eq_ignore_ascii_case("code") {
1209                return Some(c.clone());
1210            }
1211        }
1212        None
1213    });
1214    let target = code_child.as_ref().unwrap_or(pre);
1215    let mut out = String::new();
1216    collect_text_recursive(target, &mut out);
1217    out
1218}
1219
1220fn collect_text_recursive(node: &Handle, out: &mut String) {
1221    match &node.data {
1222        NodeData::Text { contents } => out.push_str(&contents.borrow()),
1223        NodeData::Element { .. } => {
1224            for child in node.children.borrow().iter() {
1225                collect_text_recursive(child, out);
1226            }
1227        }
1228        _ => {}
1229    }
1230}
1231
1232// ---------- Definition lists ----------
1233
1234/// `<dl>` renders as a column of semibold terms (`<dt>`) with their
1235/// definitions (`<dd>`) indented underneath — the classic browser
1236/// shape, minus the hanging-indent refinements. Definitions walk the
1237/// full block walker, so a `<dd>` holding paragraphs or a list keeps
1238/// its structure. Per the HTML spec, `<div>` wrappers around dt/dd
1239/// pairs are transparent.
1240fn build_definition_list(node: &Handle, state: &InlineState, cx: &WalkCx<'_>) -> El {
1241    let mut items = Vec::new();
1242    collect_definition_items(node, state, cx, &mut items);
1243    column(items)
1244        .gap(tokens::SPACE_1)
1245        .width(Size::Fill(1.0))
1246        .height(Size::Hug)
1247}
1248
1249fn collect_definition_items(
1250    node: &Handle,
1251    state: &InlineState,
1252    cx: &WalkCx<'_>,
1253    items: &mut Vec<El>,
1254) {
1255    for child in node.children.borrow().iter() {
1256        let Some(tag) = element_tag(child) else {
1257            continue;
1258        };
1259        match tag.as_str() {
1260            "dt" => {
1261                let runs = collect_inline_runs(child, state, cx);
1262                if !runs_are_blank(&runs) {
1263                    items.push(build_paragraph(runs).semibold());
1264                }
1265            }
1266            "dd" => {
1267                let inner = walk_block_children(child, state, cx);
1268                let gap = inner.gap.unwrap_or(0.0);
1269                items.push(
1270                    column(inner.blocks)
1271                        .gap(gap)
1272                        .pl(tokens::SPACE_4)
1273                        .width(Size::Fill(1.0))
1274                        .height(Size::Hug),
1275                );
1276            }
1277            // The spec allows wrapping each name/value group in a div.
1278            "div" => collect_definition_items(child, state, cx, items),
1279            _ => {}
1280        }
1281    }
1282}
1283
1284// ---------- Tables ----------
1285
1286fn build_table(node: &Handle, state: &InlineState, cx: &WalkCx<'_>) -> El {
1287    let mut header_rows = Vec::new();
1288    let mut body_rows = Vec::new();
1289    let mut explicit_header = false;
1290    walk_table_sections(
1291        node,
1292        state,
1293        cx,
1294        &mut header_rows,
1295        &mut body_rows,
1296        &mut explicit_header,
1297        false,
1298    );
1299    let mut sections = Vec::new();
1300    if !header_rows.is_empty() {
1301        sections.push(table_header(header_rows));
1302    }
1303    if !body_rows.is_empty() {
1304        sections.push(table_body(body_rows));
1305    }
1306    let table = table(sections);
1307    // `<caption>` renders as a muted-italic line above the table —
1308    // the same treatment `<figcaption>` gets under `<figure>`.
1309    let caption = node.children.borrow().iter().find_map(|child| {
1310        (element_tag(child).as_deref() == Some("caption"))
1311            .then(|| collect_inline_runs(child, state, cx))
1312            .filter(|runs| !runs_are_blank(runs))
1313    });
1314    match caption {
1315        Some(runs) => column([build_paragraph(runs).muted().italic(), table])
1316            .gap(4.0)
1317            .width(Size::Fill(1.0))
1318            .height(Size::Hug),
1319        None => table,
1320    }
1321}
1322
1323fn walk_table_sections(
1324    node: &Handle,
1325    state: &InlineState,
1326    cx: &WalkCx<'_>,
1327    header_rows: &mut Vec<El>,
1328    body_rows: &mut Vec<El>,
1329    explicit_header: &mut bool,
1330    in_thead: bool,
1331) {
1332    for child in node.children.borrow().iter() {
1333        let Some(tag) = element_tag(child) else {
1334            continue;
1335        };
1336        match tag.as_str() {
1337            "thead" => {
1338                *explicit_header = true;
1339                walk_table_sections(
1340                    child,
1341                    state,
1342                    cx,
1343                    header_rows,
1344                    body_rows,
1345                    explicit_header,
1346                    true,
1347                );
1348            }
1349            "tbody" | "tfoot" => {
1350                walk_table_sections(
1351                    child,
1352                    state,
1353                    cx,
1354                    header_rows,
1355                    body_rows,
1356                    explicit_header,
1357                    false,
1358                );
1359            }
1360            "tr" => {
1361                let row = build_table_row(child, state, cx);
1362                if in_thead {
1363                    header_rows.push(row);
1364                } else if !*explicit_header && header_rows.is_empty() && row_is_all_headers(child) {
1365                    // First row of a header-less table that contains
1366                    // only `<th>` cells reads as a header row, matching
1367                    // common authoring.
1368                    header_rows.push(row);
1369                } else {
1370                    body_rows.push(row);
1371                }
1372            }
1373            // Handled by `build_table` (rendered above the table).
1374            "caption" => {}
1375            // Column-level width/styling has no Damascene table
1376            // equivalent — columns size to content.
1377            "colgroup" | "col" => {
1378                cx.lints.push(
1379                    FindingKind::UnsupportedTag,
1380                    format!("<{tag}> dropped (column-level table styling is unsupported)"),
1381                );
1382            }
1383            _ => {}
1384        }
1385    }
1386}
1387
1388fn row_is_all_headers(row: &Handle) -> bool {
1389    let mut any = false;
1390    for child in row.children.borrow().iter() {
1391        let Some(tag) = element_tag(child) else {
1392            continue;
1393        };
1394        match tag.as_str() {
1395            "th" => any = true,
1396            "td" => return false,
1397            _ => {}
1398        }
1399    }
1400    any
1401}
1402
1403fn build_table_row(node: &Handle, state: &InlineState, cx: &WalkCx<'_>) -> El {
1404    let mut cells: Vec<El> = Vec::new();
1405    for child in node.children.borrow().iter() {
1406        let Some(tag) = element_tag(child) else {
1407            continue;
1408        };
1409        match tag.as_str() {
1410            "th" => cells.push(build_table_head_cell(child, state, cx)),
1411            "td" => cells.push(build_table_body_cell(child, state, cx)),
1412            _ => {}
1413        }
1414        if matches!(tag.as_str(), "th" | "td") {
1415            lint_cell_spans(child, &tag, cx);
1416            lint_cell_block_content(child, &tag, cx);
1417        }
1418    }
1419    table_row(cells)
1420}
1421
1422/// Damascene's table renders every cell unmerged — a `colspan`/`rowspan`
1423/// other than 1 produces a misaligned grid, which the author can only
1424/// learn about from this finding.
1425fn lint_cell_spans(cell: &Handle, tag: &str, cx: &WalkCx<'_>) {
1426    for attr in ["colspan", "rowspan"] {
1427        if let Some(v) = element_attr(cell, attr)
1428            && v.trim() != "1"
1429        {
1430            cx.lints.push(
1431                FindingKind::UnsupportedAttribute,
1432                format!("{attr}=\"{v}\" on <{tag}> ignored (cells render unmerged)"),
1433            );
1434        }
1435    }
1436}
1437
1438/// Cell contents render as inline runs; block-level children (`<ul>`,
1439/// `<p>`, nested tables) lose their block structure — text survives,
1440/// breaks and nesting don't.
1441fn lint_cell_block_content(cell: &Handle, tag: &str, cx: &WalkCx<'_>) {
1442    let has_block = cell
1443        .children
1444        .borrow()
1445        .iter()
1446        .any(|child| !is_inline_node(child));
1447    if has_block {
1448        cx.lints.push(
1449            FindingKind::FlattenedContent,
1450            format!("block content in <{tag}> flattened to inline runs"),
1451        );
1452    }
1453}
1454
1455fn build_table_head_cell(node: &Handle, state: &InlineState, cx: &WalkCx<'_>) -> El {
1456    let runs = collect_inline_runs(node, state, cx);
1457    if let Some(plain) = single_plain_text(&runs) {
1458        table_head(plain)
1459    } else if runs.is_empty() {
1460        table_head("")
1461    } else {
1462        table_head_el(text_runs(runs).width(Size::Fill(1.0)))
1463    }
1464}
1465
1466fn build_table_body_cell(node: &Handle, state: &InlineState, cx: &WalkCx<'_>) -> El {
1467    let runs = collect_inline_runs(node, state, cx);
1468    if let Some(plain) = single_plain_text(&runs) {
1469        table_cell(text(plain))
1470    } else if runs.is_empty() {
1471        table_cell(text(""))
1472    } else {
1473        table_cell(text_runs(runs).width(Size::Fill(1.0)))
1474    }
1475}
1476
1477// ---------- Images ----------
1478
1479fn build_image_placeholder(node: &Handle) -> Option<El> {
1480    let alt = element_attr(node, "alt").unwrap_or_default();
1481    let src = element_attr(node, "src")
1482        .filter(|s| is_safe_url(s))
1483        .unwrap_or_default();
1484    let title = element_attr(node, "title").unwrap_or_default();
1485    if alt.is_empty() && src.is_empty() && title.is_empty() {
1486        return None;
1487    }
1488    let label = image_placeholder_label(&alt, &src, &title);
1489    let mut el = text(label).muted().italic();
1490    // data: URLs are fine as an <img> src but make no sense as a click
1491    // target — navigating to one opens the raw payload as a document.
1492    if !src.is_empty() && !src.trim().to_ascii_lowercase().starts_with("data:") {
1493        el = el.link(src);
1494    }
1495    Some(el)
1496}
1497
1498fn image_placeholder_label(alt: &str, src: &str, title: &str) -> String {
1499    let mut label = match (alt.is_empty(), src.is_empty()) {
1500        (true, true) => "[image]".to_string(),
1501        (false, true) => format!("[image: {alt}]"),
1502        (true, false) => format!("[image: {src}]"),
1503        (false, false) => format!("[image: {alt}] {src}"),
1504    };
1505    if !title.is_empty() {
1506        label.push_str(" \"");
1507        label.push_str(title);
1508        label.push('"');
1509    }
1510    label
1511}
1512
1513// ---------- Run helpers ----------
1514
1515/// Mirrors `damascene-markdown::single_plain_text`. Returns a single plain
1516/// string when every run is a default-styled `Kind::Text` leaf — drives
1517/// the `paragraph(s)` / `h1(s)` fast paths.
1518fn single_plain_text(runs: &[El]) -> Option<String> {
1519    let mut out = String::new();
1520    for run in runs {
1521        if run.kind != Kind::Text {
1522            return None;
1523        }
1524        if run.font_weight != FontWeight::default()
1525            || run.text_italic
1526            || run.text_underline
1527            || run.text_strikethrough
1528            || run.text_link.is_some()
1529            || run.text_bg.is_some()
1530            || run.font_mono
1531        {
1532            return None;
1533        }
1534        // The Body role's auto-applied FOREGROUND token counts as
1535        // "default"; any other explicit color (from `style="color:..."`)
1536        // forces the rich-runs path so the per-run colour survives.
1537        if let Some(c) = run.text_color
1538            && c != tokens::FOREGROUND
1539        {
1540            return None;
1541        }
1542        let Some(s) = &run.text else {
1543            return None;
1544        };
1545        out.push_str(s);
1546    }
1547    Some(out)
1548}
1549
1550fn runs_are_blank(runs: &[El]) -> bool {
1551    for run in runs {
1552        if run.kind != Kind::Text {
1553            return false;
1554        }
1555        let Some(s) = &run.text else {
1556            continue;
1557        };
1558        if !s.chars().all(char::is_whitespace) {
1559            return false;
1560        }
1561    }
1562    true
1563}
1564
1565#[cfg(test)]
1566mod tests {
1567    use super::*;
1568
1569    fn blocks(input: &str) -> Vec<El> {
1570        let root = html(input);
1571        assert_eq!(root.kind, Kind::Group);
1572        assert_eq!(root.axis, Axis::Column);
1573        root.children
1574    }
1575
1576    fn flatten_text(el: &El) -> String {
1577        let mut out = String::new();
1578        if let Some(s) = &el.text {
1579            out.push_str(s);
1580        }
1581        for child in &el.children {
1582            out.push_str(&flatten_text(child));
1583        }
1584        out
1585    }
1586
1587    #[test]
1588    fn empty_document_yields_an_empty_column() {
1589        assert!(blocks("").is_empty());
1590    }
1591
1592    #[test]
1593    fn plain_paragraph_collapses_to_paragraph_fast_path() {
1594        let bs = blocks("<p>Hello world.</p>");
1595        assert_eq!(bs.len(), 1);
1596        assert_eq!(bs[0].kind, Kind::Text);
1597        assert_eq!(bs[0].text.as_deref(), Some("Hello world."));
1598    }
1599
1600    #[test]
1601    fn pretty_printed_source_whitespace_collapses_to_single_spaces() {
1602        // Newlines + indentation in the source are formatting, not
1603        // line breaks (CSS `white-space: normal`). Damascene treats a
1604        // literal \n as a hard break, so without collapsing this
1605        // paragraph would render as three short lines.
1606        let bs = blocks("<p>\n  This paragraph wraps across\n  indented source lines.\n</p>");
1607        assert_eq!(bs.len(), 1);
1608        assert_eq!(
1609            bs[0].text.as_deref(),
1610            Some("This paragraph wraps across indented source lines.")
1611        );
1612    }
1613
1614    #[test]
1615    fn whitespace_collapses_across_inline_element_boundaries() {
1616        // "foo " + " bar" (bold) must not yield a double space, and
1617        // block-edge whitespace must trim.
1618        let bs = blocks("<p>\n  foo <b>\n bar</b>\n</p>");
1619        assert_eq!(bs.len(), 1);
1620        assert_eq!(bs[0].kind, Kind::Inlines);
1621        let texts: Vec<&str> = bs[0]
1622            .children
1623            .iter()
1624            .filter_map(|r| r.text.as_deref())
1625            .collect();
1626        assert_eq!(texts, vec!["foo ", "bar"]);
1627    }
1628
1629    #[test]
1630    fn whitespace_only_text_between_inline_elements_survives_as_separator() {
1631        let bs = blocks("<p><b>a</b> <b>b</b></p>");
1632        assert_eq!(bs.len(), 1);
1633        let texts: Vec<&str> = bs[0]
1634            .children
1635            .iter()
1636            .filter_map(|r| r.text.as_deref())
1637            .collect();
1638        assert_eq!(texts, vec!["a", " ", "b"]);
1639    }
1640
1641    #[test]
1642    fn br_still_breaks_and_swallows_adjacent_source_whitespace() {
1643        let bs = blocks("<p>alpha\n  <br>\n  beta</p>");
1644        assert_eq!(bs.len(), 1);
1645        let kinds: Vec<Kind> = bs[0].children.iter().map(|r| r.kind.clone()).collect();
1646        assert_eq!(kinds, vec![Kind::Text, Kind::HardBreak, Kind::Text]);
1647        assert_eq!(bs[0].children[0].text.as_deref(), Some("alpha"));
1648        assert_eq!(bs[0].children[2].text.as_deref(), Some("beta"));
1649    }
1650
1651    #[test]
1652    fn nbsp_is_not_collapsed() {
1653        let bs = blocks("<p>a&nbsp;&nbsp;b</p>");
1654        assert_eq!(bs.len(), 1);
1655        assert_eq!(bs[0].text.as_deref(), Some("a\u{a0}\u{a0}b"));
1656    }
1657
1658    #[test]
1659    fn h1_h2_h3_map_to_heading_kinds_with_roles() {
1660        let bs = blocks("<h1>One</h1><h2>Two</h2><h3>Three</h3>");
1661        assert_eq!(bs.len(), 3);
1662        for b in &bs {
1663            assert_eq!(b.kind, Kind::Heading);
1664        }
1665        assert_eq!(bs[0].text_role, TextRole::Display);
1666        assert_eq!(bs[1].text_role, TextRole::Heading);
1667        assert_eq!(bs[2].text_role, TextRole::Title);
1668        assert_eq!(bs[0].text.as_deref(), Some("One"));
1669    }
1670
1671    #[test]
1672    fn h4_h5_h6_clamp_to_h3() {
1673        let bs = blocks("<h4>Four</h4><h5>Five</h5><h6>Six</h6>");
1674        for b in &bs {
1675            assert_eq!(b.kind, Kind::Heading);
1676            assert_eq!(b.text_role, TextRole::Title);
1677        }
1678    }
1679
1680    #[test]
1681    fn mixed_inline_paragraph_becomes_text_runs_with_styled_children() {
1682        let bs = blocks("<p>Hello <strong>bold</strong> and <em>italic</em>.</p>");
1683        assert_eq!(bs.len(), 1);
1684        let p = &bs[0];
1685        assert_eq!(p.kind, Kind::Inlines);
1686        // 5 runs: "Hello ", "bold", " and ", "italic", "."
1687        assert_eq!(p.children.len(), 5);
1688        assert_eq!(p.children[0].text.as_deref(), Some("Hello "));
1689        assert_eq!(p.children[1].text.as_deref(), Some("bold"));
1690        assert_eq!(p.children[1].font_weight, FontWeight::Bold);
1691        assert_eq!(p.children[3].text.as_deref(), Some("italic"));
1692        assert!(p.children[3].text_italic);
1693    }
1694
1695    #[test]
1696    fn nested_inline_state_composes() {
1697        let bs = blocks("<p><strong>bold and <em>both</em></strong></p>");
1698        assert_eq!(bs.len(), 1);
1699        let p = &bs[0];
1700        assert_eq!(p.kind, Kind::Inlines);
1701        let bold_only = &p.children[0];
1702        assert_eq!(bold_only.text.as_deref(), Some("bold and "));
1703        assert_eq!(bold_only.font_weight, FontWeight::Bold);
1704        assert!(!bold_only.text_italic);
1705        let bold_and_italic = &p.children[1];
1706        assert_eq!(bold_and_italic.text.as_deref(), Some("both"));
1707        assert_eq!(bold_and_italic.font_weight, FontWeight::Bold);
1708        assert!(bold_and_italic.text_italic);
1709    }
1710
1711    #[test]
1712    fn anchor_propagates_href_through_nested_runs() {
1713        let bs =
1714            blocks("<p>Go to <a href=\"https://damascene.dev\">the <strong>site</strong></a>.</p>");
1715        let p = &bs[0];
1716        assert_eq!(p.kind, Kind::Inlines);
1717        let linked_runs: Vec<&El> = p
1718            .children
1719            .iter()
1720            .filter(|r| r.text_link.is_some())
1721            .collect();
1722        assert_eq!(linked_runs.len(), 2);
1723        for r in linked_runs {
1724            assert_eq!(r.text_link.as_deref(), Some("https://damascene.dev"));
1725        }
1726    }
1727
1728    #[test]
1729    fn br_in_paragraph_emits_hard_break_run() {
1730        let bs = blocks("<p>line one<br>line two</p>");
1731        let p = &bs[0];
1732        assert_eq!(p.kind, Kind::Inlines);
1733        assert!(p.children.iter().any(|r| r.kind == Kind::HardBreak));
1734    }
1735
1736    #[test]
1737    fn hr_emits_divider() {
1738        let bs = blocks("<hr>");
1739        assert_eq!(bs.len(), 1);
1740        assert_eq!(bs[0].height, Size::Fixed(1.0));
1741    }
1742
1743    #[test]
1744    fn ul_emits_one_block_per_item() {
1745        let bs = blocks("<ul><li>apple</li><li>banana</li><li>cherry</li></ul>");
1746        assert_eq!(bs.len(), 1);
1747        let list = &bs[0];
1748        // bullet_list returns a column of N item-rows.
1749        assert_eq!(list.children.len(), 3);
1750    }
1751
1752    #[test]
1753    fn ol_with_start_attribute_offsets_marker() {
1754        let bs = blocks("<ol start=\"5\"><li>five</li><li>six</li></ol>");
1755        let list = &bs[0];
1756        // numbered_list_from(5, ...) labels the first marker as "5.".
1757        let first_marker_text = flatten_text(&list.children[0]);
1758        assert!(first_marker_text.starts_with("5."));
1759        assert!(first_marker_text.contains("five"));
1760    }
1761
1762    #[test]
1763    fn ul_with_checkbox_first_children_becomes_task_list() {
1764        let bs = blocks(
1765            "<ul>\
1766                <li><input type=\"checkbox\" checked> done thing</li>\
1767                <li><input type=\"checkbox\"> open thing</li>\
1768            </ul>",
1769        );
1770        let list = &bs[0];
1771        // task_list also produces a column with one row per item.
1772        assert_eq!(list.children.len(), 2);
1773        // Item text should not include the literal `<input>` markup —
1774        // the marker is consumed by the task-list shape detector.
1775        let combined = flatten_text(list);
1776        assert!(combined.contains("done thing"));
1777        assert!(combined.contains("open thing"));
1778        assert!(!combined.contains("checkbox"));
1779    }
1780
1781    #[test]
1782    fn nested_ul_renders_as_nested_blocks() {
1783        let bs = blocks("<ul><li>outer<ul><li>inner</li></ul></li></ul>");
1784        let outer = &bs[0];
1785        assert_eq!(outer.children.len(), 1);
1786        let combined = flatten_text(outer);
1787        assert!(combined.contains("outer"));
1788        assert!(combined.contains("inner"));
1789    }
1790
1791    #[test]
1792    fn pre_code_block_preserves_body_text() {
1793        let bs = blocks(
1794            "<pre><code class=\"language-rust\">fn main() {\n    println!(\"hi\");\n}</code></pre>",
1795        );
1796        assert_eq!(bs.len(), 1);
1797        let combined = flatten_text(&bs[0]);
1798        assert!(combined.contains("fn main()"));
1799        assert!(combined.contains("println!"));
1800    }
1801
1802    #[test]
1803    fn blockquote_wraps_inner_blocks() {
1804        let bs = blocks("<blockquote><p>quoted text</p></blockquote>");
1805        assert_eq!(bs.len(), 1);
1806        // blockquote's exact shape is a widget composition, so we
1807        // only assert the quoted text survives the wrap.
1808        assert!(flatten_text(&bs[0]).contains("quoted text"));
1809    }
1810
1811    #[test]
1812    fn table_with_thead_and_tbody_emits_header_and_body_sections() {
1813        let bs = blocks(
1814            "<table>\
1815                <thead><tr><th>Col A</th><th>Col B</th></tr></thead>\
1816                <tbody>\
1817                    <tr><td>a1</td><td>b1</td></tr>\
1818                    <tr><td>a2</td><td>b2</td></tr>\
1819                </tbody>\
1820            </table>",
1821        );
1822        assert_eq!(bs.len(), 1);
1823        let t = &bs[0];
1824        assert_eq!(t.kind, Kind::Custom("table"));
1825        // First section is the header; subsequent rows live in the body.
1826        let combined = flatten_text(t);
1827        for needle in ["Col A", "Col B", "a1", "b1", "a2", "b2"] {
1828            assert!(combined.contains(needle), "missing {needle}");
1829        }
1830    }
1831
1832    #[test]
1833    fn table_without_thead_promotes_all_th_first_row_to_header() {
1834        let bs = blocks(
1835            "<table>\
1836                <tr><th>Name</th><th>Score</th></tr>\
1837                <tr><td>Alice</td><td>10</td></tr>\
1838            </table>",
1839        );
1840        let t = &bs[0];
1841        // The first child after the implicit promotion should be the
1842        // header section (table_header). Walk in and check its first
1843        // cell is a TableHeaderCell row.
1844        let combined = flatten_text(t);
1845        assert!(combined.contains("Name"));
1846        assert!(combined.contains("Alice"));
1847    }
1848
1849    #[test]
1850    fn img_with_alt_and_src_renders_as_muted_italic_link() {
1851        let bs = blocks("<p><img src=\"https://damascene.dev/x.png\" alt=\"Damascene mark\"></p>");
1852        let p = &bs[0];
1853        // Either an Inlines containing the placeholder, or the
1854        // placeholder run promoted via the single-run fast path.
1855        let combined = flatten_text(p);
1856        assert!(combined.contains("Damascene mark"));
1857        assert!(combined.contains("https://damascene.dev/x.png"));
1858    }
1859
1860    #[test]
1861    fn script_tag_is_dropped_entirely() {
1862        let bs = blocks("<p>before</p><script>alert('xss')</script><p>after</p>");
1863        let combined: String = bs.iter().map(flatten_text).collect();
1864        assert!(combined.contains("before"));
1865        assert!(combined.contains("after"));
1866        assert!(!combined.contains("alert"));
1867    }
1868
1869    #[test]
1870    fn iframe_object_noscript_are_dropped_with_their_contents() {
1871        // `<embed>` is a void element in HTML5 so it can't contain
1872        // text and is exercised by the script test instead.
1873        for tag in ["iframe", "object", "noscript"] {
1874            let bs = blocks(&format!("<p>x</p><{tag}>danger</{tag}><p>y</p>"));
1875            let combined: String = bs.iter().map(flatten_text).collect();
1876            assert!(!combined.contains("danger"), "tag {tag} not dropped");
1877        }
1878    }
1879
1880    #[test]
1881    fn javascript_href_is_treated_as_no_href() {
1882        let bs = blocks("<p><a href=\"javascript:alert(1)\">click</a></p>");
1883        let p = &bs[0];
1884        let runs: Vec<&El> = match p.kind {
1885            Kind::Inlines => p.children.iter().collect(),
1886            Kind::Text => vec![p],
1887            _ => panic!("unexpected paragraph kind: {:?}", p.kind),
1888        };
1889        for r in runs {
1890            assert!(r.text_link.is_none(), "javascript: href should be stripped");
1891        }
1892    }
1893
1894    #[test]
1895    fn on_attrs_are_dropped() {
1896        // The walker should never see an `onclick` handler — it gets
1897        // filtered at the attribute layer. Easiest test: ensure the
1898        // anchor still parses and the href passes through, with no
1899        // crash from the handler attribute.
1900        let bs = blocks("<p><a href=\"https://damascene.dev\" onclick=\"alert(1)\">link</a></p>");
1901        let p = &bs[0];
1902        let combined = flatten_text(p);
1903        assert!(combined.contains("link"));
1904        // No way to assert the handler was dropped beyond "didn't
1905        // panic"; the dedicated sanitizer test exercises the rule.
1906    }
1907
1908    #[test]
1909    fn unknown_block_tag_passes_through_children() {
1910        let bs = blocks("<section><p>inside</p></section><article><h2>also</h2></article>");
1911        assert!(bs.iter().any(|b| flatten_text(b).contains("inside")));
1912        assert!(bs.iter().any(|b| flatten_text(b).contains("also")));
1913    }
1914
1915    #[test]
1916    fn loose_text_between_blocks_becomes_anonymous_paragraph() {
1917        let bs = blocks("loose text<p>real paragraph</p>");
1918        assert_eq!(bs.len(), 2);
1919        assert_eq!(flatten_text(&bs[0]), "loose text");
1920        assert_eq!(flatten_text(&bs[1]), "real paragraph");
1921    }
1922
1923    #[test]
1924    fn html_fragment_inline_returns_runs_only() {
1925        let runs = html_fragment_inline(
1926            "hello <strong>strong</strong> world",
1927            HtmlOptions::default(),
1928        );
1929        assert_eq!(runs.len(), 3);
1930        assert_eq!(runs[0].text.as_deref(), Some("hello "));
1931        assert_eq!(runs[1].text.as_deref(), Some("strong"));
1932        assert_eq!(runs[1].font_weight, FontWeight::Bold);
1933        assert_eq!(runs[2].text.as_deref(), Some(" world"));
1934    }
1935
1936    #[test]
1937    fn html_fragment_inline_coerces_block_tag_to_its_inline_content() {
1938        // A `<div>` arriving inside an inline buffer should flatten —
1939        // its children become inline runs rather than terminating the
1940        // paragraph.
1941        let runs = html_fragment_inline(
1942            "a <div>b <strong>c</strong></div> d",
1943            HtmlOptions::default(),
1944        );
1945        let joined: String = runs
1946            .iter()
1947            .filter_map(|r| r.text.as_deref())
1948            .collect::<Vec<_>>()
1949            .join("");
1950        assert!(joined.contains("a "));
1951        assert!(joined.contains("b "));
1952        assert!(joined.contains("c"));
1953        assert!(joined.contains(" d"));
1954    }
1955
1956    #[test]
1957    fn mark_run_carries_inline_background() {
1958        let bs = blocks("<p>see <mark>this</mark> here</p>");
1959        let p = &bs[0];
1960        let mark_run = p
1961            .children
1962            .iter()
1963            .find(|r| r.text.as_deref() == Some("this"))
1964            .expect("mark run");
1965        assert!(mark_run.text_bg.is_some());
1966    }
1967
1968    #[test]
1969    fn kbd_run_renders_as_monospace_inline() {
1970        let bs = blocks("<p>press <kbd>Ctrl</kbd>+<kbd>K</kbd>.</p>");
1971        let p = &bs[0];
1972        let kbd_runs: Vec<&El> = p.children.iter().filter(|r| r.font_mono).collect();
1973        assert_eq!(kbd_runs.len(), 2);
1974    }
1975
1976    #[test]
1977    fn link_run_with_strong_inside_still_links() {
1978        let bs = blocks("<p><a href=\"https://damascene.dev\"><strong>bold link</strong></a></p>");
1979        let p = &bs[0];
1980        let bold_link = match p.kind {
1981            Kind::Inlines => p.children[0].clone(),
1982            Kind::Text => p.clone(),
1983            _ => panic!("unexpected kind: {:?}", p.kind),
1984        };
1985        assert_eq!(bold_link.text.as_deref(), Some("bold link"));
1986        assert_eq!(bold_link.font_weight, FontWeight::Bold);
1987        assert_eq!(
1988            bold_link.text_link.as_deref(),
1989            Some("https://damascene.dev")
1990        );
1991    }
1992
1993    // ---------- Work-or-lint coverage (issue #70) ----------
1994
1995    #[test]
1996    fn font_style_normal_cancels_inherited_italic() {
1997        let bs = blocks("<p><em>it <span style=\"font-style: normal\">up</span></em></p>");
1998        let p = &bs[0];
1999        let it = p
2000            .children
2001            .iter()
2002            .find(|r| r.text.as_deref() == Some("it "))
2003            .expect("italic run");
2004        assert!(it.text_italic);
2005        let up = p
2006            .children
2007            .iter()
2008            .find(|r| r.text.as_deref() == Some("up"))
2009            .expect("cancelled run");
2010        assert!(!up.text_italic, "font-style: normal must cancel <em>");
2011    }
2012
2013    #[test]
2014    fn text_decoration_none_cancels_inherited_underline() {
2015        let bs = blocks("<p><u>under <span style=\"text-decoration: none\">plain</span></u></p>");
2016        let p = &bs[0];
2017        let plain = p
2018            .children
2019            .iter()
2020            .find(|r| r.text.as_deref() == Some("plain"))
2021            .expect("cancelled run");
2022        assert!(!plain.text_underline);
2023    }
2024
2025    #[test]
2026    fn foreign_namespace_subtree_drops_with_lint() {
2027        let (root, findings) = html_with_lints(
2028            "<p>before <svg viewBox=\"0 0 1 1\"><circle r=\"1\"/></svg> after</p>",
2029            HtmlOptions::default(),
2030        );
2031        assert!(
2032            findings
2033                .iter()
2034                .any(|f| matches!(f.kind, FindingKind::UnsupportedTag) && f.detail.contains("svg")),
2035            "expected an UnsupportedTag finding for <svg>, got {findings:?}"
2036        );
2037        // The surrounding text survives (plain runs collapse onto the
2038        // paragraph's own text via the fast path).
2039        let p = &root.children[0];
2040        let joined: String = p
2041            .text
2042            .clone()
2043            .into_iter()
2044            .chain(p.children.iter().filter_map(|r| r.text.clone()))
2045            .collect();
2046        assert!(joined.contains("before"), "got {joined:?}");
2047        assert!(joined.contains("after"), "got {joined:?}");
2048    }
2049
2050    #[test]
2051    fn colspan_lints_as_unsupported_attribute() {
2052        let (_, findings) = html_with_lints(
2053            "<table><tr><td colspan=\"2\">a</td></tr><tr><td>b</td><td>c</td></tr></table>",
2054            HtmlOptions::default(),
2055        );
2056        assert!(
2057            findings
2058                .iter()
2059                .any(|f| matches!(f.kind, FindingKind::UnsupportedAttribute)
2060                    && f.detail.contains("colspan")),
2061            "expected colspan finding, got {findings:?}"
2062        );
2063        // colspan="1" stays silent.
2064        let (_, quiet) = html_with_lints(
2065            "<table><tr><td colspan=\"1\">a</td></tr></table>",
2066            HtmlOptions::default(),
2067        );
2068        assert!(
2069            !quiet
2070                .iter()
2071                .any(|f| matches!(f.kind, FindingKind::UnsupportedAttribute)),
2072            "colspan=1 must not lint, got {quiet:?}"
2073        );
2074    }
2075
2076    #[test]
2077    fn block_content_in_cell_lints_as_flattened() {
2078        let (_, findings) = html_with_lints(
2079            "<table><tr><td><ul><li>x</li></ul></td></tr></table>",
2080            HtmlOptions::default(),
2081        );
2082        assert!(
2083            findings
2084                .iter()
2085                .any(|f| matches!(f.kind, FindingKind::FlattenedContent)),
2086            "expected FlattenedContent finding, got {findings:?}"
2087        );
2088    }
2089
2090    #[test]
2091    fn table_caption_renders_above_the_table() {
2092        let bs = blocks("<table><caption>Quarterly results</caption><tr><th>Q</th></tr></table>");
2093        // The table block becomes a column: caption paragraph then table.
2094        let wrapper = &bs[0];
2095        let caption = &wrapper.children[0];
2096        assert_eq!(caption.text.as_deref(), Some("Quarterly results"));
2097        assert!(caption.text_italic);
2098    }
2099
2100    #[test]
2101    fn colgroup_lints_as_unsupported() {
2102        let (_, findings) = html_with_lints(
2103            "<table><colgroup><col style=\"width: 40px\"></colgroup><tr><td>a</td></tr></table>",
2104            HtmlOptions::default(),
2105        );
2106        assert!(
2107            findings
2108                .iter()
2109                .any(|f| matches!(f.kind, FindingKind::UnsupportedTag)
2110                    && f.detail.contains("colgroup")),
2111            "expected colgroup finding, got {findings:?}"
2112        );
2113    }
2114
2115    #[test]
2116    fn definition_list_renders_terms_and_indented_definitions() {
2117        let bs = blocks(
2118            "<dl><dt>Term</dt><dd>Definition body</dd><div><dt>T2</dt><dd>D2</dd></div></dl>",
2119        );
2120        let dl = &bs[0];
2121        assert_eq!(dl.children.len(), 4, "two dt + two dd: {dl:?}");
2122        let term = &dl.children[0];
2123        assert_eq!(term.text.as_deref(), Some("Term"));
2124        assert_eq!(term.font_weight, FontWeight::Semibold);
2125        let def = &dl.children[1];
2126        // The definition is an indented column holding the paragraph.
2127        assert!(def.padding.left > 0.0);
2128        assert_eq!(def.children[0].text.as_deref(), Some("Definition body"));
2129        // div-wrapped pairs are transparent.
2130        assert_eq!(dl.children[2].text.as_deref(), Some("T2"));
2131    }
2132
2133    #[test]
2134    fn data_svg_image_placeholder_is_not_clickable() {
2135        let bs = blocks("<p><img src=\"data:image/png;base64,AAAA\" alt=\"chart\"></p>");
2136        let p = &bs[0];
2137        let placeholder = p
2138            .children
2139            .iter()
2140            .find(|r| r.text.as_deref().is_some_and(|t| t.contains("chart")))
2141            .expect("placeholder run");
2142        assert!(
2143            placeholder.text_link.is_none(),
2144            "data: image src must not become a click target"
2145        );
2146    }
2147
2148    #[test]
2149    fn unsupported_color_syntax_lints() {
2150        let (_, findings) = html_with_lints(
2151            "<p style=\"color: oklch(0.7 0.1 200)\">x</p>",
2152            HtmlOptions::default(),
2153        );
2154        assert!(
2155            findings
2156                .iter()
2157                .any(|f| matches!(f.kind, FindingKind::DroppedDeclaration)
2158                    && f.detail.contains("oklch")),
2159            "expected dropped-color finding, got {findings:?}"
2160        );
2161    }
2162
2163    // ---------- CSS tier-2A integration ----------
2164
2165    #[test]
2166    fn block_style_attr_applies_background_padding_and_radius() {
2167        let bs = blocks(
2168            "<div style=\"background: #ff0000; padding: 12px; border-radius: 4px\">\
2169                <p>inside</p>\
2170            </div>",
2171        );
2172        // The styled <div> wraps its children in a column with the
2173        // style applied.
2174        assert_eq!(bs.len(), 1);
2175        let wrap = &bs[0];
2176        assert_eq!(wrap.fill, Some(Color::srgb_u8(255, 0, 0)));
2177        assert_eq!(wrap.padding, Sides::all(12.0));
2178        assert_eq!(wrap.radius.tl, 4.0);
2179    }
2180
2181    #[test]
2182    fn unstyled_div_stays_flat_no_extra_nesting() {
2183        // Existing behaviour: <div> with no style passes children through.
2184        let bs = blocks("<div><p>inside</p></div>");
2185        assert_eq!(bs.len(), 1);
2186        assert_eq!(bs[0].kind, Kind::Text);
2187        assert_eq!(bs[0].text.as_deref(), Some("inside"));
2188    }
2189
2190    #[test]
2191    fn paragraph_style_applies_to_paragraph_el() {
2192        let bs = blocks(r#"<p style="text-align: center; color: blue">hi</p>"#);
2193        let p = &bs[0];
2194        assert_eq!(p.kind, Kind::Text);
2195        assert_eq!(p.text.as_deref(), Some("hi"));
2196        assert_eq!(p.text_align, TextAlign::Center);
2197        assert_eq!(p.text_color, Some(Color::srgb_u8(0, 0, 255)));
2198    }
2199
2200    #[test]
2201    fn block_style_width_height_resolve_to_damascene_size() {
2202        let bs = blocks(r#"<div style="width: 240px; height: 50%"><p>x</p></div>"#);
2203        let wrap = &bs[0];
2204        assert_eq!(wrap.width, Size::Fixed(240.0));
2205        assert_eq!(wrap.height, Size::Fill(0.5));
2206    }
2207
2208    #[test]
2209    fn span_style_color_applies_to_inline_run() {
2210        let bs = blocks(r#"<p>hello <span style="color: #00ff00">green</span> world</p>"#);
2211        let p = &bs[0];
2212        assert_eq!(p.kind, Kind::Inlines);
2213        let green = p
2214            .children
2215            .iter()
2216            .find(|r| r.text.as_deref() == Some("green"))
2217            .expect("green run");
2218        assert_eq!(green.text_color, Some(Color::srgb_u8(0, 255, 0)));
2219    }
2220
2221    #[test]
2222    fn span_style_overrides_outer_mark_background() {
2223        let bs =
2224            blocks(r#"<p><mark>outer <span style="background: #0000ff">inner</span></mark></p>"#);
2225        let p = &bs[0];
2226        assert_eq!(p.kind, Kind::Inlines);
2227        let outer = p
2228            .children
2229            .iter()
2230            .find(|r| r.text.as_deref() == Some("outer "))
2231            .expect("outer run");
2232        let inner = p
2233            .children
2234            .iter()
2235            .find(|r| r.text.as_deref() == Some("inner"))
2236            .expect("inner run");
2237        // Outer keeps the mark's yellow.
2238        assert_eq!(outer.text_bg, Some(tokens::WARNING.with_alpha_u8(60)));
2239        // Inner's style attr wins.
2240        assert_eq!(inner.text_bg, Some(Color::srgb_u8(0, 0, 255)));
2241    }
2242
2243    #[test]
2244    fn span_style_font_weight_and_font_style_compose_with_tag_state() {
2245        let bs = blocks(
2246            r#"<p><strong>bold <span style="font-style: italic; font-size: 24px">and italic</span></strong></p>"#,
2247        );
2248        let p = &bs[0];
2249        assert_eq!(p.kind, Kind::Inlines);
2250        let bold_only = p
2251            .children
2252            .iter()
2253            .find(|r| r.text.as_deref() == Some("bold "))
2254            .expect("bold-only run");
2255        let bold_italic = p
2256            .children
2257            .iter()
2258            .find(|r| r.text.as_deref() == Some("and italic"))
2259            .expect("bold + italic run");
2260        assert_eq!(bold_only.font_weight, FontWeight::Bold);
2261        assert!(!bold_only.text_italic);
2262        assert_eq!(bold_italic.font_weight, FontWeight::Bold);
2263        assert!(bold_italic.text_italic);
2264        assert_eq!(bold_italic.font_size, 24.0);
2265    }
2266
2267    #[test]
2268    fn style_attr_with_invalid_value_silently_drops_that_decl() {
2269        // padding is malformed; color and font-weight still apply.
2270        let bs = blocks(r#"<p style="color: red; padding: bogus; font-weight: 700">hello</p>"#);
2271        let p = &bs[0];
2272        assert_eq!(p.text_color, Some(Color::srgb_u8(255, 0, 0)));
2273        assert_eq!(p.font_weight, FontWeight::Bold);
2274        assert_eq!(p.padding, Sides::zero());
2275    }
2276
2277    #[test]
2278    fn ul_style_applies_to_outer_list_container() {
2279        let bs = blocks(r#"<ul style="padding: 16px; background: #eee"><li>a</li><li>b</li></ul>"#);
2280        let list = &bs[0];
2281        assert_eq!(list.padding, Sides::all(16.0));
2282        assert_eq!(list.fill, Some(Color::srgb_u8(238, 238, 238)));
2283    }
2284
2285    // ---------- tier-2C: details / figure / button / input ----------
2286
2287    #[test]
2288    fn details_without_open_shows_only_summary() {
2289        let bs = blocks("<details><summary>more</summary><p>body</p></details>");
2290        assert_eq!(bs.len(), 1);
2291        let combined = flatten_text(&bs[0]);
2292        assert!(combined.contains("more"));
2293        assert!(!combined.contains("body"));
2294    }
2295
2296    #[test]
2297    fn details_with_open_attr_shows_summary_and_body() {
2298        let bs = blocks("<details open><summary>more</summary><p>body</p></details>");
2299        let combined = flatten_text(&bs[0]);
2300        assert!(combined.contains("more"));
2301        assert!(combined.contains("body"));
2302    }
2303
2304    #[test]
2305    fn details_without_summary_renders_placeholder_label() {
2306        let bs = blocks("<details open><p>orphan body</p></details>");
2307        let combined = flatten_text(&bs[0]);
2308        assert!(combined.contains("Details"));
2309        assert!(combined.contains("orphan body"));
2310    }
2311
2312    #[test]
2313    fn figure_with_figcaption_applies_muted_italic_to_caption() {
2314        let bs = blocks(
2315            "<figure><img src=\"https://damascene.dev/x.png\" alt=\"img\"><figcaption>caption text</figcaption></figure>",
2316        );
2317        assert_eq!(bs.len(), 1);
2318        let fig = &bs[0];
2319        // The figcaption's block is the last child, with muted+italic.
2320        let caption = fig
2321            .children
2322            .iter()
2323            .find(|c| c.text.as_deref() == Some("caption text"))
2324            .expect("caption block");
2325        assert!(caption.text_italic);
2326        // .muted() on a text leaf swaps text_color to MUTED_FOREGROUND.
2327        assert_eq!(caption.text_color, Some(tokens::MUTED_FOREGROUND));
2328    }
2329
2330    #[test]
2331    fn standalone_button_renders_as_button_widget() {
2332        // <button> is inline-classified; a standalone block-level
2333        // button arrives as an anonymous paragraph wrapping the
2334        // button run.
2335        let bs = blocks("<button>Save</button>");
2336        assert_eq!(bs.len(), 1);
2337        // Find a Custom("button") leaf anywhere in the produced tree.
2338        let mut found = false;
2339        fn search(el: &El, found: &mut bool) {
2340            if el.kind == Kind::Custom("button") {
2341                *found = true;
2342            }
2343            for c in &el.children {
2344                search(c, found);
2345            }
2346        }
2347        search(&bs[0], &mut found);
2348        assert!(found, "expected a button widget in the tree");
2349    }
2350
2351    #[test]
2352    fn button_inside_paragraph_flows_inline_with_text() {
2353        let bs = blocks("<p>click <button>here</button> please</p>");
2354        let p = &bs[0];
2355        assert_eq!(p.kind, Kind::Inlines);
2356        // 3 runs: "click ", <button>here</button>, " please"
2357        assert_eq!(p.children.len(), 3);
2358        assert_eq!(p.children[0].text.as_deref(), Some("click "));
2359        assert_eq!(p.children[1].kind, Kind::Custom("button"));
2360        assert_eq!(p.children[2].text.as_deref(), Some(" please"));
2361    }
2362
2363    #[test]
2364    fn standalone_input_checkbox_renders_with_checked_state() {
2365        let bs = blocks(r#"<input type="checkbox" checked>"#);
2366        // The checkbox widget is a styled bool. Tag is Kind::Custom.
2367        let mut found_kind: Option<Kind> = None;
2368        fn search(el: &El, found: &mut Option<Kind>) {
2369            if matches!(el.kind, Kind::Custom(_)) && found.is_none() {
2370                *found = Some(el.kind.clone());
2371            }
2372            for c in &el.children {
2373                search(c, found);
2374            }
2375        }
2376        search(&bs[0], &mut found_kind);
2377        assert!(found_kind.is_some(), "expected a custom widget kind");
2378    }
2379
2380    #[test]
2381    fn input_non_checkbox_is_silently_dropped() {
2382        let bs = blocks(r#"<p>before <input type="text" value="ignored"> after</p>"#);
2383        let p = &bs[0];
2384        // Should be one paragraph with "before  after" — input dropped.
2385        let combined = flatten_text(p);
2386        assert!(combined.contains("before"));
2387        assert!(combined.contains("after"));
2388        assert!(!combined.contains("ignored"));
2389    }
2390
2391    // ---------- tier-2B: <style> block + selector cascade ----------
2392
2393    #[test]
2394    fn style_block_tag_selector_applies_to_matching_elements() {
2395        let bs =
2396            blocks(r#"<style>p { color: red }</style><p>red text</p><h1>untouched heading</h1>"#);
2397        // The <style> tag itself is blocked from rendering.
2398        let combined: String = bs.iter().map(flatten_text).collect();
2399        assert!(!combined.contains("color: red"));
2400        // Find the paragraph and confirm the rule landed.
2401        let p = bs
2402            .iter()
2403            .find(|b| b.text.as_deref() == Some("red text"))
2404            .expect("matching paragraph");
2405        assert_eq!(p.text_color, Some(Color::srgb_u8(255, 0, 0)));
2406        // The h1 has its own role default — the rule shouldn't touch it.
2407        let h = bs
2408            .iter()
2409            .find(|b| b.text.as_deref() == Some("untouched heading"))
2410            .expect("heading");
2411        assert_eq!(h.text_color, Some(tokens::FOREGROUND));
2412    }
2413
2414    #[test]
2415    fn style_block_class_selector_matches_by_class_attr() {
2416        let bs = blocks(
2417            r#"<style>.callout { background: #ff0000; padding: 8px }</style>
2418               <div class="callout"><p>inside</p></div>
2419               <div><p>outside</p></div>"#,
2420        );
2421        // Find the styled div (one wraps "inside", the other "outside" stays flat).
2422        let styled_div = bs
2423            .iter()
2424            .find(|b| b.fill == Some(Color::srgb_u8(255, 0, 0)))
2425            .expect("styled callout div");
2426        assert_eq!(styled_div.padding, Sides::all(8.0));
2427        assert!(flatten_text(styled_div).contains("inside"));
2428        // The plain div passes through without a wrap.
2429        assert!(
2430            bs.iter()
2431                .any(|b| { b.text.as_deref() == Some("outside") && b.fill.is_none() })
2432        );
2433    }
2434
2435    #[test]
2436    fn style_block_id_selector_matches_by_id_attr() {
2437        let bs = blocks(
2438            r#"<style>#hero { color: #00ff00 }</style>
2439               <p id="hero">hello</p>"#,
2440        );
2441        let p = &bs[0];
2442        assert_eq!(p.text_color, Some(Color::srgb_u8(0, 255, 0)));
2443    }
2444
2445    #[test]
2446    fn inline_style_attr_beats_style_block_rule() {
2447        let bs = blocks(
2448            r#"<style>p { color: red }</style>
2449               <p style="color: blue">overridden</p>"#,
2450        );
2451        let p = &bs[0];
2452        // Inline always wins, even against a more-specific rule.
2453        assert_eq!(p.text_color, Some(Color::srgb_u8(0, 0, 255)));
2454    }
2455
2456    #[test]
2457    fn higher_specificity_rule_wins_over_lower() {
2458        let bs = blocks(
2459            r#"<style>
2460                 p { color: red }
2461                 p.note { color: blue }
2462                 #hero { color: green }
2463               </style>
2464               <p>plain → red</p>
2465               <p class="note">class → blue</p>
2466               <p id="hero">id → green</p>
2467               <p class="note" id="hero">id beats class</p>"#,
2468        );
2469        let plain = &bs[0];
2470        let class_match = &bs[1];
2471        let id_match = &bs[2];
2472        let id_and_class = &bs[3];
2473        assert_eq!(plain.text_color, Some(Color::srgb_u8(255, 0, 0)));
2474        assert_eq!(class_match.text_color, Some(Color::srgb_u8(0, 0, 255)));
2475        assert_eq!(id_match.text_color, Some(Color::srgb_u8(0, 128, 0)));
2476        assert_eq!(id_and_class.text_color, Some(Color::srgb_u8(0, 128, 0)));
2477    }
2478
2479    #[test]
2480    fn later_rule_wins_at_equal_specificity() {
2481        let bs = blocks(
2482            r#"<style>p { color: red } p { color: blue }</style>
2483               <p>later wins</p>"#,
2484        );
2485        let p = &bs[0];
2486        assert_eq!(p.text_color, Some(Color::srgb_u8(0, 0, 255)));
2487    }
2488
2489    #[test]
2490    fn style_block_inside_head_still_applies() {
2491        // pulldown-cmark-style scraps may include a <head><style>...
2492        // wrapper. collect_stylesheets must descend through <head>
2493        // even though <head> is blocked from rendering.
2494        let bs = blocks(
2495            r#"<html>
2496                 <head><style>p { color: red }</style></head>
2497                 <body><p>red</p></body>
2498               </html>"#,
2499        );
2500        let p = &bs[0];
2501        assert_eq!(p.text_color, Some(Color::srgb_u8(255, 0, 0)));
2502    }
2503
2504    #[test]
2505    fn sanitize_styles_option_drops_style_blocks() {
2506        let opts = HtmlOptions::default().sanitize_styles(true);
2507        let (root, findings) = html_with_lints("<style>p { color: red }</style><p>plain</p>", opts);
2508        let p = &root.children[0];
2509        // Style block was dropped; the paragraph keeps its role default.
2510        assert_eq!(p.text_color, Some(tokens::FOREGROUND));
2511        assert!(
2512            findings
2513                .iter()
2514                .any(|f| matches!(f.kind, FindingKind::SanitizedStyle))
2515        );
2516    }
2517
2518    #[test]
2519    fn sanitize_styles_option_drops_inline_style_attributes() {
2520        // The untrusted-input knob: attacker-authored inline CSS
2521        // (invisible text, size games) must not be honoured.
2522        let opts = HtmlOptions::default().sanitize_styles(true);
2523        let (root, findings) = html_with_lints(
2524            "<p style=\"color: #112233; font-size: 1px\">styled</p>",
2525            opts,
2526        );
2527        let p = &root.children[0];
2528        assert_eq!(p.text_color, Some(tokens::FOREGROUND));
2529        assert!(
2530            findings
2531                .iter()
2532                .any(|f| matches!(f.kind, FindingKind::SanitizedStyle)
2533                    && f.detail.contains("color: #112233"))
2534        );
2535        // Default (trusted) mode still honours the attribute.
2536        let root = html("<p style=\"color: #112233\">styled</p>");
2537        assert_eq!(
2538            root.children[0].text_color,
2539            Some(Color::srgb_u8(0x11, 0x22, 0x33))
2540        );
2541    }
2542
2543    #[test]
2544    fn comma_grouped_selectors_apply_to_each_listed_tag() {
2545        let bs = blocks(
2546            r#"<style>h1, h2, h3 { color: #ff0000 }</style>
2547               <h1>one</h1><h2>two</h2><h3>three</h3>"#,
2548        );
2549        for h in &bs {
2550            assert_eq!(h.text_color, Some(Color::srgb_u8(255, 0, 0)));
2551        }
2552    }
2553
2554    #[test]
2555    fn class_rule_applies_to_inline_span_runs() {
2556        let bs = blocks(
2557            r#"<style>.hl { color: #ff8800 }</style>
2558               <p>before <span class="hl">marked</span> after</p>"#,
2559        );
2560        let p = &bs[0];
2561        assert_eq!(p.kind, Kind::Inlines);
2562        let hl = p
2563            .children
2564            .iter()
2565            .find(|r| r.text.as_deref() == Some("marked"))
2566            .expect("highlighted run");
2567        assert_eq!(hl.text_color, Some(Color::srgb_u8(255, 136, 0)));
2568    }
2569
2570    // ---------- Tier-2D — layout reconciliation + lint surface ----------
2571
2572    #[test]
2573    fn uniform_sibling_margins_become_outer_column_gap() {
2574        let (el, findings) = html_with_lints(
2575            "<p style=\"margin: 12px 0\">a</p>\
2576             <p style=\"margin: 12px 0\">b</p>\
2577             <p style=\"margin: 12px 0\">c</p>",
2578            HtmlOptions::default(),
2579        );
2580        // No asymmetry → no lint.
2581        assert!(
2582            !findings
2583                .iter()
2584                .any(|f| matches!(f.kind, FindingKind::MarginAsymmetryFlattened))
2585        );
2586        // Outer column gap should be the collapsed pair max (12).
2587        assert_eq!(el.gap, 12.0);
2588    }
2589
2590    #[test]
2591    fn asymmetric_sibling_margins_lint_and_flatten_to_max() {
2592        let (el, findings) = html_with_lints(
2593            "<p style=\"margin-bottom: 20px\">a</p>\
2594             <p style=\"margin: 4px 0\">b</p>\
2595             <p style=\"margin: 4px 0\">c</p>",
2596            HtmlOptions::default(),
2597        );
2598        // Pair (a, b): max(20, 4) = 20. Pair (b, c): max(4, 4) = 4.
2599        // Pairs disagree → flatten to 20 and lint.
2600        assert!(
2601            findings
2602                .iter()
2603                .any(|f| matches!(f.kind, FindingKind::MarginAsymmetryFlattened))
2604        );
2605        assert_eq!(el.gap, 20.0);
2606    }
2607
2608    #[test]
2609    fn first_child_margin_top_folds_into_outer_padding_top() {
2610        let (el, _findings) = html_with_lints(
2611            "<p style=\"margin-top: 32px\">a</p><p>b</p>",
2612            HtmlOptions::default(),
2613        );
2614        assert_eq!(el.padding.top, 32.0);
2615    }
2616
2617    #[test]
2618    fn display_flex_with_row_direction_sets_axis_on_styled_div() {
2619        let bs = blocks(
2620            "<div style=\"display: flex; flex-direction: row; \
2621             align-items: center; justify-content: space-between\">\
2622                <p>left</p><p>right</p>\
2623             </div>",
2624        );
2625        assert_eq!(bs.len(), 1);
2626        let wrapper = &bs[0];
2627        assert_eq!(wrapper.axis, Axis::Row);
2628        assert_eq!(wrapper.align, Align::Center);
2629        assert_eq!(wrapper.justify, Justify::SpaceBetween);
2630    }
2631
2632    #[test]
2633    fn overflow_hidden_sets_clip_on_styled_container() {
2634        let bs = blocks("<div style=\"overflow: hidden; padding: 8px\"><p>x</p></div>");
2635        assert!(bs[0].clip);
2636    }
2637
2638    #[test]
2639    fn overflow_auto_wraps_container_in_scroll() {
2640        let bs = blocks("<div style=\"overflow: auto; padding: 8px\"><p>x</p></div>");
2641        assert_eq!(bs[0].kind, Kind::Scroll);
2642    }
2643
2644    #[test]
2645    fn box_shadow_blur_lands_on_shadow_modifier() {
2646        let bs = blocks("<div style=\"padding: 4px; box-shadow: 0 2px 12px black\"><p>x</p></div>");
2647        assert!((bs[0].shadow - 12.0).abs() < 0.001);
2648    }
2649
2650    #[test]
2651    fn font_family_monospace_flips_mono_on_inline_run() {
2652        let bs = blocks("<p>plain <span style=\"font-family: monospace\">mono</span> tail</p>");
2653        let p = &bs[0];
2654        assert_eq!(p.kind, Kind::Inlines);
2655        let mono = p
2656            .children
2657            .iter()
2658            .find(|r| r.text.as_deref() == Some("mono"))
2659            .expect("mono run");
2660        assert!(mono.font_mono, "expected font_mono on the styled span");
2661    }
2662
2663    #[test]
2664    fn unsupported_unit_in_inline_style_emits_finding() {
2665        let (_el, findings) =
2666            html_with_lints("<p style=\"font-size: 4vw\">a</p>", HtmlOptions::default());
2667        assert!(findings.iter().any(|f| {
2668            matches!(f.kind, FindingKind::DroppedDeclaration) && f.detail.contains("4vw")
2669        }));
2670    }
2671
2672    #[test]
2673    fn position_absolute_emits_finding_but_keeps_content() {
2674        let (el, findings) = html_with_lints(
2675            "<p style=\"position: absolute\">still rendered</p>",
2676            HtmlOptions::default(),
2677        );
2678        assert!(findings.iter().any(|f| {
2679            matches!(f.kind, FindingKind::DroppedDeclaration) && f.detail.contains("position")
2680        }));
2681        // Content still renders.
2682        assert_eq!(flatten_text(&el), "still rendered");
2683    }
2684
2685    #[test]
2686    fn float_left_emits_finding() {
2687        let (_el, findings) = html_with_lints(
2688            "<div style=\"float: left\"><p>x</p></div>",
2689            HtmlOptions::default(),
2690        );
2691        assert!(findings.iter().any(|f| {
2692            matches!(f.kind, FindingKind::DroppedDeclaration) && f.detail.contains("float")
2693        }));
2694    }
2695
2696    #[test]
2697    fn unsupported_video_tag_emits_finding_and_flattens_text() {
2698        let (el, findings) = html_with_lints(
2699            "<p>before</p><video><p>video body</p></video><p>after</p>",
2700            HtmlOptions::default(),
2701        );
2702        assert!(findings.iter().any(|f| {
2703            matches!(f.kind, FindingKind::UnsupportedTag) && f.detail.contains("video")
2704        }));
2705        // The inner <p> still renders so author text isn't lost.
2706        let flat = flatten_text(&el);
2707        assert!(flat.contains("video body"));
2708    }
2709
2710    #[test]
2711    fn unsupported_style_selector_emits_finding_other_rules_still_apply() {
2712        let (el, findings) = html_with_lints(
2713            "<style>p > span { color: red } .note { color: blue }</style>\
2714             <p class=\"note\">styled</p>",
2715            HtmlOptions::default(),
2716        );
2717        assert!(findings.iter().any(|f| {
2718            matches!(f.kind, FindingKind::UnsupportedSelector) && f.detail.contains("p > span")
2719        }));
2720        // The .note rule still applied.
2721        assert_eq!(el.children[0].text_color, Some(Color::srgb_u8(0, 0, 255)));
2722    }
2723}