Skip to main content

pdf_xfa/
template_parser.rs

1//! XDP template XML → FormTree parser.
2//!
3//! Reads the `<template>` packet from an XFA XDP document and builds a
4//! `FormTree` suitable for `LayoutEngine::layout()`.
5//!
6//! XFA Spec 3.3 §2.1 — Form Structural Building Blocks:
7//!   Container elements: subform, field, draw, exclGroup, area
8//!   Page-level:         pageSet, pageArea, contentArea, medium
9//!   Metadata:           caption, value, ui, font, border, margin, para
10//!
11//! XFA Spec 3.3 §2.2 — Basic Composition:
12//!   Measurements use absolute units (in, cm, mm, pt) with inches as default.
13//!   Dimension strings ("0.5in", "72pt", "10mm") are converted to PDF
14//!   points via `Measurement::parse`.
15
16use roxmltree::Node;
17
18use xfa_layout_engine::form::{
19    AnchorType, ContentArea, DrawContent, EventScript, FieldKind, FormNode, FormNodeId,
20    FormNodeMeta, FormNodeStyle, FormNodeType, FormTree, GroupKind, Occur, Presence,
21    ScriptLanguage,
22};
23use xfa_layout_engine::text::{FontFamily, FontMetrics};
24use xfa_layout_engine::types::{
25    BoxModel, Caption, CaptionPlacement, Insets, LayoutStrategy, Measurement, TextAlign,
26    VerticalAlign,
27};
28
29use crate::error::{Result, XfaError};
30
31/// Parse a `<template>` XML packet into a `FormTree`.
32///
33/// `xml` should be the raw content of the template packet, starting with
34/// the `<template …>` element (with or without an XML declaration).
35///
36/// If `datasets_xml` is provided, field values are merged from the
37/// `<xfa:data>` section of the datasets packet.
38pub fn parse_template(xml: &str, datasets_xml: Option<&str>) -> Result<(FormTree, FormNodeId)> {
39    let doc = roxmltree::Document::parse(xml)
40        .map_err(|e| XfaError::ParseFailed(format!("template XML parse error: {e}")))?;
41
42    let root_elem = doc.root_element();
43    // The packet may start with <template> directly, or may have been
44    // wrapped inside <xdp:xdp>. Accept both.
45    let template_elem = if root_elem.tag_name().name() == "template" {
46        root_elem
47    } else {
48        // Find the first <template> descendant.
49        find_first_child_by_name(root_elem, "template")
50            .ok_or_else(|| XfaError::PacketNotFound("no <template> element found".to_string()))?
51    };
52
53    let mut tree = FormTree::new();
54    let (root_id, _trailing) = parse_node(&mut tree, template_elem, true)?;
55
56    // Data binding: merge field values from datasets XML.
57    if let Some(ds_xml) = datasets_xml {
58        if let Ok(ds_doc) = roxmltree::Document::parse(ds_xml) {
59            if let Some(data_root) = find_data_root(ds_doc.root_element()) {
60                bind_data(&mut tree, root_id, &data_root, &data_root);
61            }
62        }
63    }
64
65    Ok((tree, root_id))
66}
67
68// ---------------------------------------------------------------------------
69// Recursive node parser
70// ---------------------------------------------------------------------------
71
72/// Parse an XML element into a form node and add it to the tree.
73///
74/// Returns `(FormNodeId, trailing_break)` where `trailing_break` is true when
75/// the node's `add_children` ended with a pending breakBefore that could not
76/// be consumed within the node (i.e. the breakBefore appeared after the last
77/// content child and should propagate to the next sibling at the parent level).
78fn parse_node(
79    tree: &mut FormTree,
80    elem: Node<'_, '_>,
81    is_root: bool,
82) -> Result<(FormNodeId, (bool, Option<String>))> {
83    let tag = elem.tag_name().name();
84
85    let (node, trailing_info) = match tag {
86        "template" => {
87            let mut n = parse_root_node(tree, elem)?;
88            let ti = add_children(tree, &mut n, elem)?;
89            (n, ti)
90        }
91        // XFA Spec 3.3 §2.1 — Five building blocks: subform, field, draw, exclGroup, area.
92        // `area` is a fixed-size container identical to subform but does not grow;
93        // if capacity is exceeded a new area is created. We parse it like a subform.
94        "subform" | "exclGroup" | "area" => {
95            let mut n = parse_subform_node(tree, elem, is_root)?;
96            let ti = add_children(tree, &mut n, elem)?;
97            // Per XFA 3.3 §2.1: checkButton inside exclGroup renders as radio button (circle).
98            if tag == "exclGroup" {
99                for &child_id in &n.children {
100                    let child_meta = tree.meta_mut(child_id);
101                    if child_meta.field_kind == FieldKind::Checkbox {
102                        child_meta.field_kind = FieldKind::Radio;
103                    }
104                }
105            }
106            (n, ti)
107        }
108        "field" => (parse_field(tree, elem)?, (false, None)),
109        "draw" => (parse_draw(tree, elem)?, (false, None)),
110        "pageSet" => (parse_page_set(tree, elem)?, (false, None)),
111        "pageArea" => (parse_page_area(tree, elem)?, (false, None)),
112        _ => {
113            let mut n = blank_node(tag);
114            let ti = add_children(tree, &mut n, elem)?;
115            (n, ti)
116        }
117    };
118
119    let mut meta = parse_node_meta(elem);
120    // Only fields need their <margin ...Inset> values forwarded to
121    // style.inset_*_pt — the renderer uses that to offset the value and
122    // shrink the border/bg to the inner rect. Draws are skipped because
123    // their decorative insets in background-image forms (see 49f8705c)
124    // cause visible misalignment with the pre-rendered backdrop; subforms
125    // are skipped to avoid the double-application the layout engine
126    // already handles via content_width()/content_height() subtraction.
127    if tag == "field" {
128        meta.style.inset_top_pt = Some(node.box_model.margins.top);
129        meta.style.inset_bottom_pt = Some(node.box_model.margins.bottom);
130        meta.style.inset_left_pt = Some(node.box_model.margins.left);
131        meta.style.inset_right_pt = Some(node.box_model.margins.right);
132    }
133    Ok((tree.add_node_with_meta(node, meta), trailing_info))
134}
135
136/// Build the root FormNode (without children — `parse_node` calls `add_children`).
137fn parse_root_node(_tree: &mut FormTree, _elem: Node<'_, '_>) -> Result<FormNode> {
138    Ok(FormNode {
139        name: "root".to_string(),
140        node_type: FormNodeType::Root,
141        box_model: BoxModel {
142            max_width: f64::MAX,
143            max_height: f64::MAX,
144            ..Default::default()
145        },
146        layout: LayoutStrategy::TopToBottom,
147        children: Vec::new(),
148        occur: Occur::once(),
149        font: FontMetrics::default(),
150        calculate: None,
151        validate: None,
152        column_widths: Vec::new(),
153        col_span: 1,
154    })
155}
156
157/// Build a subform FormNode (without children — `parse_node` calls `add_children`).
158fn parse_subform_node(
159    _tree: &mut FormTree,
160    elem: Node<'_, '_>,
161    _is_root: bool,
162) -> Result<FormNode> {
163    let name = attr(elem, "name").unwrap_or("").to_string();
164    let layout = parse_layout_attr(elem);
165    let mut bm = parse_box_model(elem);
166    // For paginate-layout root subforms, set a default US Letter size if
167    // no explicit size is given.
168    if layout == LayoutStrategy::TopToBottom && bm.width.is_none() {
169        bm.width = Some(612.0);
170    }
171
172    Ok(FormNode {
173        name,
174        node_type: FormNodeType::Subform,
175        box_model: bm,
176        layout,
177        children: Vec::new(),
178        occur: parse_occur(elem),
179        font: FontMetrics::default(),
180        calculate: None,
181        validate: None,
182        column_widths: Vec::new(),
183        col_span: 1,
184    })
185}
186
187fn parse_field(tree: &mut FormTree, elem: Node<'_, '_>) -> Result<FormNode> {
188    let name = attr(elem, "name").unwrap_or("").to_string();
189    let bm = parse_box_model(elem);
190
191    // Always extract the field value and preserve the Field node type.
192    // Visibility is controlled by FormNodeMeta.presence -- the layout
193    // engine skips invisible/inactive nodes; hidden ones produce empty space.
194    // Dynamic scripts can later toggle presence to "visible", so we must
195    // preserve the content for all fields.
196    let value = extract_value_text(elem).unwrap_or_default();
197
198    // Extract caption from <caption> child element (placement, reserve, text).
199    let mut bm_with_caption = bm.clone();
200    if !is_hidden(elem) {
201        if let Some(cap) = parse_caption(elem) {
202            bm_with_caption.caption = Some(cap);
203        }
204    }
205
206    let mut font = parse_font_metrics(elem);
207    // Override font size from exData HTML if present.
208    if let Some(html_size) = extract_exdata_font_size(elem) {
209        font.size = html_size;
210    }
211
212    let node = FormNode {
213        name,
214        node_type: FormNodeType::Field { value },
215        box_model: bm_with_caption,
216        layout: LayoutStrategy::Positioned,
217        children: Vec::new(),
218        occur: Occur::once(),
219        font,
220        calculate: None,
221        validate: None,
222        column_widths: Vec::new(),
223        col_span: parse_col_span(elem),
224    };
225    // Fields are leaf nodes — no child traversal needed.
226    let _ = tree; // suppress unused warning
227    Ok(node)
228}
229
230fn parse_draw(tree: &mut FormTree, elem: Node<'_, '_>) -> Result<FormNode> {
231    let name = attr(elem, "name").unwrap_or("").to_string();
232    let bm = parse_box_model(elem);
233
234    // Try geometric draw content (line, rectangle, arc) first
235    if let Some(draw_content) = extract_draw_content(elem) {
236        let node = FormNode {
237            name,
238            node_type: FormNodeType::Draw(draw_content),
239            box_model: bm,
240            layout: LayoutStrategy::Positioned,
241            children: Vec::new(),
242            occur: Occur::once(),
243            font: FontMetrics::default(),
244            calculate: None,
245            validate: None,
246            column_widths: Vec::new(),
247            col_span: 1,
248        };
249        let _ = tree;
250        return Ok(node);
251    }
252
253    if let Some((image_data, mime_type)) = extract_value_image(elem) {
254        let node = FormNode {
255            name,
256            node_type: FormNodeType::Image {
257                data: image_data,
258                mime_type,
259            },
260            box_model: bm,
261            layout: LayoutStrategy::Positioned,
262            children: Vec::new(),
263            occur: Occur::once(),
264            font: FontMetrics::default(),
265            calculate: None,
266            validate: None,
267            column_widths: Vec::new(),
268            col_span: 1,
269        };
270        let _ = tree;
271        return Ok(node);
272    }
273
274    let mut font = parse_font_metrics(elem);
275    // If the content came from <exData contentType="text/html">, extract
276    // the dominant font size from the HTML <span style="font-size:Xpt">.
277    // This overrides the default 10pt when the HTML specifies differently.
278    if let Some(html_size) = extract_exdata_font_size(elem) {
279        font.size = html_size;
280    }
281
282    // Always extract content — visibility is controlled by metadata.
283    let content = extract_value_text(elem).unwrap_or_default();
284
285    let node = FormNode {
286        name,
287        node_type: FormNodeType::Draw(DrawContent::Text(content)),
288        box_model: bm,
289        layout: LayoutStrategy::Positioned,
290        children: Vec::new(),
291        occur: Occur::once(),
292        font,
293        calculate: None,
294        validate: None,
295        column_widths: Vec::new(),
296        col_span: 1,
297    };
298    let _ = tree;
299    Ok(node)
300}
301
302/// Return `true` when the element has a `presence` attribute value that means
303/// the element should not be rendered (`"hidden"`, `"invisible"`, `"inactive"`).
304/// Elements with no `presence` attribute or `presence="visible"` are rendered.
305fn is_hidden(elem: Node<'_, '_>) -> bool {
306    matches!(
307        attr(elem, "presence"),
308        Some("hidden") | Some("invisible") | Some("inactive")
309    )
310}
311
312/// Parse a font size string where bare numbers are in **points** (not inches).
313///
314/// XFA `<font size="…">` uses points as the default unit — a bare number like `"10"`
315/// means 10pt, not 10 inches.  General dimension attributes (w, h, x, y) use inches
316/// as the default, so they go through `parse_dim` instead.
317fn parse_font_size(s: &str) -> Option<f64> {
318    // Bare number → points (XFA default for font sizes).
319    if let Ok(v) = s.trim().parse::<f64>() {
320        return if v > 0.0 { Some(v) } else { None };
321    }
322    // Explicit unit ("10pt", "3mm", …) → convert to points.
323    Measurement::parse(s).map(|m| m.to_points())
324}
325
326/// Parse a percentage string like `"96%"` → `0.96`, `"110%"` → `1.1`.
327/// Returns `None` if the string is not a valid percentage.
328fn parse_percentage(s: &str) -> Option<f64> {
329    let s = s.trim();
330    let num_str = s.strip_suffix('%')?;
331    let v: f64 = num_str.trim().parse().ok()?;
332    Some(v / 100.0)
333}
334
335/// Parse a letter-spacing string. Supported formats:
336/// - `"-0.018em"` → converted to points using the given font size
337/// - `"0.5pt"` → points directly
338/// - `"1mm"` → converted to points via Measurement
339///   Returns `None` if the string cannot be parsed.
340fn parse_letter_spacing(s: &str, font_size_pt: f64) -> Option<f64> {
341    let s = s.trim();
342    if s == "0" {
343        return Some(0.0);
344    }
345    // Try em-based value: "-0.018em"
346    if let Some(num_str) = s.strip_suffix("em") {
347        let v: f64 = num_str.trim().parse().ok()?;
348        return Some(v * font_size_pt);
349    }
350    // Try standard measurement ("0.5pt", "1mm", etc.)
351    Measurement::parse(s).map(|m| m.to_points())
352}
353
354/// Parse font size and text alignment from `<font size="…">` and `<para hAlign="…">` child
355/// elements (XFA 3.3 §7.1). Returns `FontMetrics::default()` when no matching elements found.
356///
357/// XFA Spec 3.3 §2.6 (p57-58) — Font properties: typeface (default Courier for
358/// data-entry), size (default 10pt), posture (normal/italic), weight (bold/normal),
359/// baselineShift, fontHorizontalScale, fontVerticalScale, kerningMode, letterSpacing,
360/// lineThrough/lineThroughPeriod, overline/overlinePeriod, underline/underlinePeriod.
361///
362/// TODO(§2.6): baselineShift, kerningMode, lineThrough, underline not parsed.
363/// TODO(§2.4 p59): hAlign="radix" and hAlign="justifyAll" not handled.
364///
365/// XFA Spec 3.3 §28.1 — Adobe Non-conformance: Adobe ignores the "overline"
366/// attribute on the font element (p1227) and the "lineThroughPeriod" attribute
367/// (p1228). We intentionally skip these to match Adobe's behavior for SSIM.
368fn parse_font_metrics(elem: Node<'_, '_>) -> FontMetrics {
369    let font_elem = find_first_child_by_name(elem, "font");
370    let size = font_elem
371        .and_then(|f| attr(f, "size"))
372        .and_then(parse_font_size)
373        .unwrap_or(FontMetrics::default().size);
374    // XFA Spec 3.3 §17 (p716) — genericFamily: fallback classification hint.
375    // If present, use it to classify the font family instead of guessing from
376    // the typeface name. This matches Adobe's §28.2 step 4 behavior.
377    let generic_family_str = font_elem.and_then(|f| attr(f, "genericFamily"));
378    let typeface = if let Some(gf) = generic_family_str {
379        FontFamily::from_generic_family(gf)
380    } else {
381        font_elem
382            .and_then(|f| attr(f, "typeface"))
383            .map(FontFamily::from_typeface)
384            .unwrap_or_default()
385    };
386    // XFA Spec 3.3 §2.4 (p44, p59-60) — hAlign values:
387    //   left, center, right, justify, justifyAll, radix
388    let text_align = find_first_child_by_name(elem, "para")
389        .and_then(|p| attr(p, "hAlign"))
390        .map(|a| match a {
391            "center" => TextAlign::Center,
392            "right" => TextAlign::Right,
393            "justify" | "justifyAll" => TextAlign::Justify,
394            // TODO(§2.4): "radix" alignment needs radixOffset support
395            _ => TextAlign::Left,
396        })
397        .unwrap_or_default();
398    FontMetrics {
399        size,
400        text_align,
401        typeface,
402        ..FontMetrics::default()
403    }
404}
405
406fn parse_page_set(tree: &mut FormTree, elem: Node<'_, '_>) -> Result<FormNode> {
407    let name = attr(elem, "name").unwrap_or("pageSet").to_string();
408    let mut node = FormNode {
409        name,
410        node_type: FormNodeType::PageSet,
411        box_model: BoxModel {
412            max_width: f64::MAX,
413            max_height: f64::MAX,
414            ..Default::default()
415        },
416        layout: LayoutStrategy::TopToBottom,
417        children: Vec::new(),
418        occur: Occur::once(),
419        font: FontMetrics::default(),
420        calculate: None,
421        validate: None,
422        column_widths: Vec::new(),
423        col_span: 1,
424    };
425    // Children of pageSet are pageArea elements.
426    for child in elem.children().filter(|n| n.is_element()) {
427        if child.tag_name().name() == "pageArea" {
428            let (child_id, _) = parse_node(tree, child, false)?;
429            node.children.push(child_id);
430        }
431    }
432    Ok(node)
433}
434
435fn parse_page_area(tree: &mut FormTree, elem: Node<'_, '_>) -> Result<FormNode> {
436    let name = attr(elem, "name").unwrap_or("").to_string();
437
438    // Read <medium> for page dimensions.
439    let (page_w, page_h) = read_medium(elem);
440
441    // Read <contentArea> elements.
442    let content_areas = read_content_areas(elem, page_w, page_h);
443
444    let bm = BoxModel {
445        width: Some(page_w),
446        height: Some(page_h),
447        ..Default::default()
448    };
449
450    let mut node = FormNode {
451        name,
452        node_type: FormNodeType::PageArea { content_areas },
453        box_model: bm,
454        layout: LayoutStrategy::Positioned,
455        children: Vec::new(),
456        occur: Occur::once(),
457        font: FontMetrics::default(),
458        calculate: None,
459        validate: None,
460        column_widths: Vec::new(),
461        col_span: 1,
462    };
463    // Parse child draw/subform elements (page-level headers, footers, lines).
464    add_children(tree, &mut node, elem)?;
465    Ok(node)
466}
467
468// ---------------------------------------------------------------------------
469// Metadata parsing
470// ---------------------------------------------------------------------------
471
472/// Build `FormNodeMeta` from XFA element attributes and child elements.
473fn parse_node_meta(elem: Node<'_, '_>) -> FormNodeMeta {
474    let tag = elem.tag_name().name();
475
476    // (a) Presence attribute (XFA 3.3 §2.6 p67-68):
477    //     visible  — normal rendering (default)
478    //     invisible — takes space but not visible
479    //     hidden   — no space, no visible rendering
480    //     inactive — completely ignored (no binding, no space)
481    let presence = match attr(elem, "presence") {
482        Some("hidden") => Presence::Hidden,
483        Some("invisible") => Presence::Invisible,
484        Some("inactive") => Presence::Inactive,
485        _ => Presence::Visible,
486    };
487
488    // (b) Page break detection: look for <breakBefore>, <breakAfter>, or <break> child.
489    let (page_break_before, break_before_target) = detect_page_break_before(elem);
490    let (page_break_after, break_after_target) = detect_page_break_after(elem);
491    let content_area_break = detect_content_area_break(elem);
492
493    // Prefer break_before_target, then break_after_target.
494    let break_target = break_before_target.or(break_after_target);
495
496    // (c) Event scripts.
497    let event_scripts = collect_event_scripts(elem);
498
499    // (d) Keep child attributes.
500    let (keep_next_content_area, keep_previous_content_area, keep_intact_content_area) =
501        parse_keep(elem);
502
503    // (e) Overflow leader/trailer.
504    let (overflow_leader, overflow_trailer) = parse_overflow(elem);
505
506    // (f) Group kind: exclGroup → ExclusiveChoice.
507    let group_kind = if tag == "exclGroup" {
508        GroupKind::ExclusiveChoice
509    } else {
510        GroupKind::None
511    };
512
513    // (g) Item value for field elements: <items><text>VALUE</text></items>.
514    let item_value = if tag == "field" {
515        parse_item_value(elem)
516    } else {
517        None
518    };
519
520    // (g2) Choice list items for dropdown fields (XFA 3.3 §7.7).
521    let (display_items, save_items) = if tag == "field" {
522        parse_items_lists(elem)
523    } else {
524        (Vec::new(), Vec::new())
525    };
526
527    // (h) XFA id attribute.
528    let xfa_id = attr(elem, "id").map(|s| s.to_string());
529
530    // (i) Field UI kind: detect <checkButton>, <choiceList>, etc. inside <ui>.
531    let field_kind = detect_field_kind(elem);
532
533    // (j) Visual style: colors, borders, font from XFA template elements.
534    let style = parse_node_style(elem);
535    let (data_bind_ref, data_bind_none) = parse_bind(elem);
536    let anchor_type = parse_anchor_type(elem);
537
538    FormNodeMeta {
539        xfa_id,
540        presence,
541        page_break_before,
542        page_break_after,
543        break_target,
544        content_area_break,
545        overflow_leader,
546        overflow_trailer,
547        keep_next_content_area,
548        keep_previous_content_area,
549        keep_intact_content_area,
550        event_scripts,
551        data_bind_ref,
552        data_bind_none,
553        group_kind,
554        item_value,
555        field_kind,
556        style,
557        display_items,
558        save_items,
559        anchor_type,
560        ..Default::default()
561    }
562}
563
564fn parse_bind(elem: Node<'_, '_>) -> (Option<String>, bool) {
565    let Some(bind) = find_first_child_by_name(elem, "bind") else {
566        return (None, false);
567    };
568
569    let bind_none = attr(bind, "match") == Some("none");
570    let bind_ref = if bind_none {
571        None
572    } else {
573        attr(bind, "ref").map(|s| s.trim().to_string())
574    };
575    (bind_ref, bind_none)
576}
577
578/// Parse visual style from XFA template elements.
579///
580/// Extracts colors from `<fill><color value="r,g,b"/>`, border from
581/// `<border><edge><color value="r,g,b"/>`, and font from `<font>`.
582fn parse_node_style(elem: Node<'_, '_>) -> FormNodeStyle {
583    let mut style = FormNodeStyle {
584        check_button_mark: parse_check_button_mark(elem),
585        ..Default::default()
586    };
587
588    // Parse <fill><color value="r,g,b"/> for background color.
589    // Skip when presence="hidden"/"invisible"/"inactive".
590    if let Some(fill) = find_first_child_by_name(elem, "fill") {
591        if !is_hidden(fill) {
592            if let Some(color) = find_first_child_by_name(fill, "color") {
593                if let Some(rgb) = parse_xfa_color(color) {
594                    style.bg_color = Some(rgb);
595                }
596            }
597            // Also check <fill><solid><color .../> pattern.
598            if style.bg_color.is_none() {
599                if let Some(solid) = find_first_child_by_name(fill, "solid") {
600                    if let Some(color) = find_first_child_by_name(solid, "color") {
601                        if let Some(rgb) = parse_xfa_color(color) {
602                            style.bg_color = Some(rgb);
603                        }
604                    }
605                }
606            }
607        }
608    }
609
610    // Parse <border><edge> for border color and thickness.
611    // Borders can live directly on the element OR inside <ui><textEdit|…><border>.
612    let border = find_first_child_by_name(elem, "border").or_else(|| {
613        let ui = find_first_child_by_name(elem, "ui")?;
614        ui.children()
615            .filter(|c| c.is_element() && c.tag_name().name() != "border")
616            .find_map(|widget| find_first_child_by_name(widget, "border"))
617    });
618    if let Some(border) = border {
619        // Collect all <edge> children for per-edge visibility (XFA §D.7).
620        let edges: Vec<_> = border
621            .children()
622            .filter(|c| c.is_element() && c.tag_name().name() == "edge")
623            .collect();
624        // Use first visible edge for color/thickness (backward compat).
625        let first_visible = edges
626            .iter()
627            .find(|e| !is_hidden(**e))
628            .or_else(|| edges.first());
629        if let Some(edge) = first_visible {
630            if let Some(color) = find_first_child_by_name(*edge, "color") {
631                if let Some(rgb) = parse_xfa_color(color) {
632                    style.border_color = Some(rgb);
633                }
634            }
635            let stroke = attr(*edge, "stroke").unwrap_or("solid");
636            if stroke != "none" {
637                // fix(#808): XFA default edge thickness is 1pt (matches BoxModel::border_width
638                // default and Adobe's behavior). Previously defaulted to 0.5pt, causing borders
639                // to appear ~1px thinner at 150 DPI rendering.
640                let thickness = attr(*edge, "thickness")
641                    .and_then(Measurement::parse)
642                    .map(|m| m.to_points())
643                    .unwrap_or(1.0);
644                if thickness > 0.0 {
645                    style.border_width_pt = Some(thickness);
646                }
647            }
648        }
649        // Per-edge visibility: 1=all, 2=even/odd, 3=T/RL/B, 4=T/R/B/L.
650        let edge_visible = |e: &roxmltree::Node| -> bool {
651            !is_hidden(*e) && attr(*e, "stroke").unwrap_or("solid") != "none"
652        };
653        style.border_edges = match edges.len() {
654            0 => [true, true, true, true],
655            1 => {
656                let v = edge_visible(&edges[0]);
657                [v, v, v, v]
658            }
659            2 => {
660                let even = edge_visible(&edges[0]);
661                let odd = edge_visible(&edges[1]);
662                [even, odd, even, odd]
663            }
664            3 => {
665                let top = edge_visible(&edges[0]);
666                let rl = edge_visible(&edges[1]);
667                let bot = edge_visible(&edges[2]);
668                [top, rl, bot, rl]
669            }
670            _ => [
671                edge_visible(&edges[0]),
672                edge_visible(&edges[1]),
673                edge_visible(&edges[2]),
674                edge_visible(&edges[3]),
675            ],
676        };
677        let default_thickness = style.border_width_pt.unwrap_or(0.5);
678        let edge_thickness = |edge: roxmltree::Node<'_, '_>| -> f64 {
679            attr(edge, "thickness")
680                .and_then(Measurement::parse)
681                .map(|m| m.to_points())
682                .unwrap_or(default_thickness)
683        };
684        let per_edge_widths = match edges.len() {
685            0 | 1 => None,
686            2 => Some([
687                edge_thickness(edges[0]),
688                edge_thickness(edges[1]),
689                edge_thickness(edges[0]),
690                edge_thickness(edges[1]),
691            ]),
692            3 => Some([
693                edge_thickness(edges[0]),
694                edge_thickness(edges[1]),
695                edge_thickness(edges[2]),
696                edge_thickness(edges[1]),
697            ]),
698            _ => Some([
699                edge_thickness(edges[0]),
700                edge_thickness(edges[1]),
701                edge_thickness(edges[2]),
702                edge_thickness(edges[3]),
703            ]),
704        };
705        if let Some(widths) = per_edge_widths {
706            if !(widths[0] == widths[1] && widths[1] == widths[2] && widths[2] == widths[3]) {
707                style.border_widths = Some(widths);
708            }
709        }
710        // Also parse <border><fill><color .../> for border background (field bg).
711        // Skip when fill has presence="hidden"/"invisible"/"inactive".
712        if style.bg_color.is_none() {
713            if let Some(fill) = find_first_child_by_name(border, "fill") {
714                if !is_hidden(fill) {
715                    if let Some(color) = find_first_child_by_name(fill, "color") {
716                        if let Some(rgb) = parse_xfa_color(color) {
717                            style.bg_color = Some(rgb);
718                        }
719                    }
720                }
721            }
722        }
723    }
724
725    // Parse <value><rectangle><edge><color> for draw rectangle border color.
726    // XFA rectangle elements define their stroke color on <edge> children
727    // inside the <value> container, NOT inside <border>.
728    if style.border_color.is_none() {
729        if let Some(value) = find_first_child_by_name(elem, "value") {
730            if let Some(rect) = find_first_child_by_name(value, "rectangle") {
731                let edges: Vec<_> = rect
732                    .children()
733                    .filter(|c| c.is_element() && c.tag_name().name() == "edge")
734                    .collect();
735                if let Some(edge) = edges.first() {
736                    if let Some(color) = find_first_child_by_name(*edge, "color") {
737                        if let Some(rgb) = parse_xfa_color(color) {
738                            style.border_color = Some(rgb);
739                        }
740                    }
741                    if style.border_width_pt.is_none() {
742                        let thickness = attr(*edge, "thickness")
743                            .and_then(Measurement::parse)
744                            .map(|m| m.to_points())
745                            .unwrap_or(1.0);
746                        if thickness > 0.0 {
747                            style.border_width_pt = Some(thickness);
748                        }
749                    }
750                }
751            }
752        }
753    }
754
755    // Parse <font typeface="..." size="..." weight="..."> for font properties.
756    // XFA Spec 3.3 §28.1 — Adobe Non-conformance:
757    //   - font-weight: numeric values (100-900) ignored, only "bold"/"normal" (p1229)
758    //   - font-stretch: not implemented in rich text (p1228)
759    //   - font-family: only first name used in rich text (p1228)
760    // We follow Adobe's behavior for all three.
761    if let Some(font) = find_first_child_by_name(elem, "font") {
762        if let Some(typeface) = attr(font, "typeface") {
763            style.font_family = Some(typeface.to_string());
764        }
765        // XFA Spec 3.3 §17 (p716) — genericFamily attribute.
766        if let Some(gf) = attr(font, "genericFamily") {
767            style.generic_family = Some(gf.to_string());
768        }
769        if let Some(size_str) = attr(font, "size") {
770            if let Some(m) = Measurement::parse(size_str) {
771                style.font_size = Some(m.to_points());
772            }
773        }
774        if let Some(weight) = attr(font, "weight") {
775            style.font_weight = Some(weight.to_string());
776        }
777        if let Some(posture) = attr(font, "posture") {
778            style.font_style = Some(posture.to_string());
779        }
780        // <font><fill><color .../> for text color
781        if let Some(fill) = find_first_child_by_name(font, "fill") {
782            if let Some(color) = find_first_child_by_name(fill, "color") {
783                if let Some(rgb) = parse_xfa_color(color) {
784                    style.text_color = Some(rgb);
785                }
786            }
787        }
788        // <font color="#RRGGBB"> attribute (fallback when <fill><color> not present)
789        if style.text_color.is_none() {
790            if let Some(color_str) = attr(font, "color") {
791                if let Some(rgb) = parse_font_color_attr(color_str) {
792                    style.text_color = Some(rgb);
793                }
794            }
795        }
796        // fontHorizontalScale="96%" → 0.96
797        if let Some(scale_str) = attr(font, "fontHorizontalScale") {
798            if let Some(v) = parse_percentage(scale_str) {
799                style.font_horizontal_scale = Some(v);
800            }
801        }
802        // letterSpacing="-0.018em" or "0.5pt"
803        if let Some(ls_str) = attr(font, "letterSpacing") {
804            if let Some(v) = parse_letter_spacing(ls_str, style.font_size.unwrap_or(10.0)) {
805                style.letter_spacing_pt = Some(v);
806            }
807        }
808        // XFA Spec 3.3 §2.6 — underline="1" (single) or "2" (double)
809        if let Some(underline_str) = attr(font, "underline") {
810            style.underline = underline_str == "1" || underline_str == "2";
811        }
812        // XFA Spec 3.3 §2.6 — lineThrough="1"
813        if let Some(line_through_str) = attr(font, "lineThrough") {
814            style.line_through = line_through_str == "1";
815        }
816    }
817
818    // XFA Spec 3.3 §17 "para" (p803) — Paragraph-level formatting attributes:
819    // hAlign: left/center/right/justify (handled in parse_font_metrics)
820    // vAlign: top/middle/bottom for vertical alignment within container
821    // spaceAbove, spaceBelow: paragraph spacing in points
822    // marginLeft, marginRight: paragraph indentation
823    // Note: hAlign/vAlign on container elements are deprecated since XFA 2.4
824    // and ignored by Adobe; we correctly read these only from <para>.
825    if let Some(para) = find_first_child_by_name(elem, "para") {
826        if let Some(v) = attr(para, "spaceAbove").and_then(Measurement::parse) {
827            style.space_above_pt = Some(v.to_points());
828        }
829        if let Some(v) = attr(para, "spaceBelow").and_then(Measurement::parse) {
830            style.space_below_pt = Some(v.to_points());
831        }
832        if let Some(v) = attr(para, "marginLeft").and_then(Measurement::parse) {
833            style.margin_left_pt = Some(v.to_points());
834        }
835        if let Some(v) = attr(para, "marginRight").and_then(Measurement::parse) {
836            style.margin_right_pt = Some(v.to_points());
837        }
838        // XFA Spec 3.3 §17 "para" (p803) — lineHeight / textIndent.
839        if let Some(v) = attr(para, "lineHeight").and_then(Measurement::parse) {
840            style.line_height_pt = Some(v.to_points());
841        }
842        if let Some(v) = attr(para, "textIndent").and_then(Measurement::parse) {
843            style.text_indent_pt = Some(v.to_points());
844        }
845        if let Some(va) = attr(para, "vAlign") {
846            style.v_align = Some(match va {
847                "middle" => VerticalAlign::Middle,
848                "bottom" => VerticalAlign::Bottom,
849                _ => VerticalAlign::Top,
850            });
851        }
852        // XFA Spec 3.3 §8.3 (p282-284) — hAlign positions child within parent
853        // layout container. Stored separately from FontMetrics.text_align which
854        // controls text rendering alignment within the element itself.
855        if let Some(ha) = attr(para, "hAlign") {
856            style.h_align = Some(match ha {
857                "center" => TextAlign::Center,
858                "right" => TextAlign::Right,
859                _ => TextAlign::Left,
860            });
861        }
862    }
863
864    // Parse <border><corner> for border radius and <border><edge> for border style.
865    if let Some(border) = border {
866        if let Some(corner) = find_first_child_by_name(border, "corner") {
867            if let Some(v) = attr(corner, "radius").and_then(Measurement::parse) {
868                style.border_radius_pt = Some(v.to_points());
869            }
870        }
871        if let Some(edge) = find_first_child_by_name(border, "edge") {
872            if let Some(stroke) = attr(edge, "stroke") {
873                if stroke != "none" {
874                    style.border_style = Some(stroke.to_string());
875                }
876            }
877        }
878    }
879
880    // Parse <format><picture> for numeric/date/time formatting patterns.
881    if let Some(format) = find_first_child_by_name(elem, "format") {
882        if let Some(picture) = find_first_child_by_name(format, "picture") {
883            if let Some(text) = picture.text() {
884                let trimmed = text.trim();
885                if !trimmed.is_empty() {
886                    style.format_pattern = Some(trimmed.to_string());
887                }
888            }
889        }
890    }
891
892    style
893}
894
895fn parse_check_button_mark(elem: Node<'_, '_>) -> Option<String> {
896    let ui = find_first_child_by_name(elem, "ui")?;
897    let check_button = ui
898        .children()
899        .find(|n| n.is_element() && n.tag_name().name() == "checkButton")?;
900    let mark = attr(check_button, "mark")?.to_ascii_lowercase();
901    match mark.as_str() {
902        "check" | "circle" | "cross" | "diamond" | "square" | "star" => Some(mark),
903        _ => None,
904    }
905}
906
907/// Parse XFA `<color value="r,g,b"/>` into (u8, u8, u8).
908fn parse_xfa_color(color_node: Node<'_, '_>) -> Option<(u8, u8, u8)> {
909    let value = attr(color_node, "value")?;
910    let parts: Vec<&str> = value.split(',').collect();
911    if parts.len() >= 3 {
912        let r = parts[0].trim().parse::<u8>().ok()?;
913        let g = parts[1].trim().parse::<u8>().ok()?;
914        let b = parts[2].trim().parse::<u8>().ok()?;
915        Some((r, g, b))
916    } else {
917        None
918    }
919}
920
921/// Parse a color string from a `color` attribute on `<font>`.
922///
923/// Supported formats:
924/// - `#RRGGBB` (e.g. `#000080`)
925/// - `#RGB` shorthand (e.g. `#00F` → `#0000FF`)
926/// - `r,g,b` with decimal values 0-255 (e.g. `0,0,128`)
927fn parse_font_color_attr(s: &str) -> Option<(u8, u8, u8)> {
928    let s = s.trim();
929    if let Some(hex) = s.strip_prefix('#') {
930        match hex.len() {
931            6 => {
932                let r = u8::from_str_radix(&hex[0..2], 16).ok()?;
933                let g = u8::from_str_radix(&hex[2..4], 16).ok()?;
934                let b = u8::from_str_radix(&hex[4..6], 16).ok()?;
935                Some((r, g, b))
936            }
937            3 => {
938                let r = u8::from_str_radix(&hex[0..1], 16).ok()?;
939                let g = u8::from_str_radix(&hex[1..2], 16).ok()?;
940                let b = u8::from_str_radix(&hex[2..3], 16).ok()?;
941                Some((r * 17, g * 17, b * 17))
942            }
943            _ => None,
944        }
945    } else {
946        // Try "r,g,b" decimal format
947        let parts: Vec<&str> = s.split(',').collect();
948        if parts.len() >= 3 {
949            let r = parts[0].trim().parse::<u8>().ok()?;
950            let g = parts[1].trim().parse::<u8>().ok()?;
951            let b = parts[2].trim().parse::<u8>().ok()?;
952            Some((r, g, b))
953        } else {
954            None
955        }
956    }
957}
958
959/// Detect field UI type from `<ui>` child element.
960///
961/// XFA Spec 3.3 §2.1 (p35) — User Interface: each container may have a `<ui>`
962/// subelement specifying the widget type. If absent, defaults based on content type.
963/// Supported: textEdit, checkButton, button, choiceList, dateTimeEdit,
964///            numericEdit, passwordEdit, imageEdit, signature, barcode.
965fn detect_field_kind(elem: Node<'_, '_>) -> FieldKind {
966    let Some(ui) = find_first_child_by_name(elem, "ui") else {
967        return FieldKind::Text;
968    };
969    for child in ui.children().filter(|n| n.is_element()) {
970        let tag = child.tag_name().name();
971        match tag {
972            "checkButton" => {
973                // XFA 3.3 §7.2.7: shape="round" → radio button (circle).
974                let shape = attr(child, "shape").unwrap_or("square");
975                return if shape == "round" {
976                    FieldKind::Radio
977                } else {
978                    FieldKind::Checkbox
979                };
980            }
981            "choiceList" => return FieldKind::Dropdown,
982            "button" => return FieldKind::Button,
983            "dateTimeEdit" => return FieldKind::DateTimePicker,
984            "numericEdit" => return FieldKind::NumericEdit,
985            "passwordEdit" => return FieldKind::PasswordEdit,
986            "imageEdit" => return FieldKind::ImageEdit,
987            "signature" => return FieldKind::Signature,
988            "barcode" => return FieldKind::Barcode,
989            _ => {}
990        }
991    }
992    FieldKind::Text
993}
994
995/// Detect page breaks: look for a child element named `breakBefore` or `break`.
996///
997/// Only considers breakBefore/break elements that appear BEFORE the first
998/// content child (subform/field/draw/exclGroup). Inline breakBefore elements
999/// between content children are handled by `add_children` which propagates
1000/// them to the next sibling's metadata.
1001///
1002/// Returns `(break_found, target_name)`.
1003fn detect_page_break_before(elem: Node<'_, '_>) -> (bool, Option<String>) {
1004    for child in elem.children().filter(|n| n.is_element()) {
1005        let tag = child.tag_name().name();
1006        if matches!(tag, "subform" | "field" | "draw" | "exclGroup") {
1007            break;
1008        }
1009        if tag == "breakBefore" && attr(child, "targetType") == Some("pageArea") {
1010            return (true, attr(child, "target").map(|s| s.to_string()));
1011        }
1012        if tag == "break" && attr(child, "before") == Some("pageArea") {
1013            // XFA §9.2.1: `before="pageArea"` triggers a page break
1014            // regardless of targetType. targetType only constrains which
1015            // specific page area to target.
1016            return (true, attr(child, "target").map(|s| s.to_string()));
1017        }
1018    }
1019    (false, None)
1020}
1021
1022/// Detect page-break-after: look for a child element named `breakAfter` or `break`.
1023///
1024/// Scans all children after the last content child.
1025fn detect_page_break_after(elem: Node<'_, '_>) -> (bool, Option<String>) {
1026    let mut last_content_idx = 0;
1027    let children: Vec<_> = elem.children().filter(|n| n.is_element()).collect();
1028    for (i, child) in children.iter().enumerate() {
1029        let tag = child.tag_name().name();
1030        if matches!(tag, "subform" | "field" | "draw" | "exclGroup") {
1031            last_content_idx = i;
1032        }
1033    }
1034
1035    // Check elements after the last content node.
1036    for child in children.iter().skip(last_content_idx) {
1037        let tag = child.tag_name().name();
1038        if tag == "breakAfter" && attr(*child, "targetType") == Some("pageArea") {
1039            return (true, attr(*child, "target").map(|s| s.to_string()));
1040        }
1041        if tag == "break" && attr(*child, "after") == Some("pageArea") {
1042            return (true, attr(*child, "target").map(|s| s.to_string()));
1043        }
1044    }
1045    (false, None)
1046}
1047
1048/// Detect `breakBefore targetType="contentArea"` — the node targets a
1049/// specific named content area (e.g. "flatten", "eSign") and should be
1050/// excluded from the primary content flow.
1051///
1052/// Scans ALL children (not just before the first content child) because
1053/// contentArea breaks often appear as trailing elements inside subforms
1054/// (DOT form pattern: eSign/lock break appears after their content fields).
1055fn detect_content_area_break(elem: Node<'_, '_>) -> bool {
1056    for child in elem.children().filter(|n| n.is_element()) {
1057        let tag = child.tag_name().name();
1058        if tag == "breakBefore" && attr(child, "targetType") == Some("contentArea") {
1059            return true;
1060        }
1061    }
1062    false
1063}
1064
1065/// Collect event scripts from `<event>` and `<calculate>` children.
1066fn collect_event_scripts(elem: Node<'_, '_>) -> Vec<EventScript> {
1067    let mut scripts = Vec::new();
1068    for child in elem.children().filter(|n| n.is_element()) {
1069        let child_tag = child.tag_name().name();
1070        if child_tag == "event" {
1071            // Skip layout-ready events (activity="ready" ref="$layout").
1072            let activity = attr(child, "activity");
1073            let event_ref = attr(child, "ref");
1074            if activity == Some("ready") && event_ref == Some("$layout") {
1075                continue;
1076            }
1077            // Look for a <script> child.
1078            if let Some(script_elem) = find_first_child_by_name(child, "script") {
1079                if let Some(script) =
1080                    build_event_script(script_elem, activity, event_ref, attr(script_elem, "runAt"))
1081                {
1082                    scripts.push(script);
1083                }
1084            }
1085        } else if child_tag == "calculate" {
1086            // Direct <calculate><script>...</script></calculate>
1087            if let Some(script_elem) = find_first_child_by_name(child, "script") {
1088                if let Some(script) = build_event_script(
1089                    script_elem,
1090                    Some("calculate"),
1091                    None,
1092                    attr(script_elem, "runAt"),
1093                ) {
1094                    scripts.push(script);
1095                }
1096            }
1097        }
1098    }
1099    scripts
1100}
1101
1102fn build_event_script(
1103    script_elem: Node<'_, '_>,
1104    activity: Option<&str>,
1105    event_ref: Option<&str>,
1106    run_at: Option<&str>,
1107) -> Option<EventScript> {
1108    let text = script_elem.text()?.trim();
1109    if text.is_empty() {
1110        return None;
1111    }
1112
1113    Some(EventScript::new(
1114        text.to_string(),
1115        detect_script_language(attr(script_elem, "contentType")),
1116        activity.map(str::to_string),
1117        event_ref.map(str::to_string),
1118        run_at.map(str::to_string),
1119    ))
1120}
1121
1122fn detect_script_language(content_type: Option<&str>) -> ScriptLanguage {
1123    match content_type.map(|value| value.trim().to_ascii_lowercase()) {
1124        None => ScriptLanguage::FormCalc,
1125        Some(value) if value == "application/x-formcalc" || value.ends_with("/x-formcalc") => {
1126            ScriptLanguage::FormCalc
1127        }
1128        Some(value)
1129            if value == "application/x-javascript"
1130                || value == "application/javascript"
1131                || value == "text/javascript"
1132                || value.ends_with("/x-javascript") =>
1133        {
1134            ScriptLanguage::JavaScript
1135        }
1136        Some(_) => ScriptLanguage::Other,
1137    }
1138}
1139
1140// XFA Spec 3.3 §17 "keep" (p776-777) — Controls whether content Area breaks are allowed:
1141// next: keep next content area together
1142// previous: keep previous content area together
1143// intact: keep content area intact (no breaks within)
1144fn parse_keep(elem: Node<'_, '_>) -> (bool, bool, bool) {
1145    if let Some(keep) = find_first_child_by_name(elem, "keep") {
1146        let next = attr(keep, "next") == Some("contentArea");
1147        let prev = attr(keep, "previous") == Some("contentArea");
1148        let intact = attr(keep, "intact") == Some("contentArea");
1149        (next, prev, intact)
1150    } else {
1151        (false, false, false)
1152    }
1153}
1154
1155// XFA Spec 3.3 §17 "overflow" (p804-805) — Overflow leader/trailer for pagination:
1156// leader: reference to element to render before overflow content
1157// trailer: reference to element to render after overflow content
1158fn parse_overflow(elem: Node<'_, '_>) -> (Option<String>, Option<String>) {
1159    if let Some(overflow) = find_first_child_by_name(elem, "overflow") {
1160        let leader = attr(overflow, "leader").map(|s| s.to_string());
1161        let trailer = attr(overflow, "trailer").map(|s| s.to_string());
1162        (leader, trailer)
1163    } else {
1164        (None, None)
1165    }
1166}
1167
1168/// Parse item value from `<items><text>VALUE</text></items>`.
1169fn parse_item_value(elem: Node<'_, '_>) -> Option<String> {
1170    let items = find_first_child_by_name(elem, "items")?;
1171    let text_elem = find_first_child_by_name(items, "text")?;
1172    let text = text_elem.text()?.trim();
1173    if text.is_empty() {
1174        None
1175    } else {
1176        Some(text.to_string())
1177    }
1178}
1179
1180/// Extract all text values from an `<items>` element.
1181fn collect_items_texts(items_elem: Node<'_, '_>) -> Vec<String> {
1182    items_elem
1183        .children()
1184        .filter(|n| n.is_element())
1185        .filter_map(|child| {
1186            let txt = child.text().unwrap_or("").trim().to_string();
1187            if txt.is_empty() {
1188                None
1189            } else {
1190                Some(txt)
1191            }
1192        })
1193        .collect()
1194}
1195
1196/// Parse choice list `<items>` elements from a `<field>` node (XFA 3.3 §7.7).
1197fn parse_items_lists(elem: Node<'_, '_>) -> (Vec<String>, Vec<String>) {
1198    let items_elems: Vec<_> = elem
1199        .children()
1200        .filter(|n| n.is_element() && n.tag_name().name() == "items")
1201        .collect();
1202    match items_elems.len() {
1203        0 => (Vec::new(), Vec::new()),
1204        1 => {
1205            let vals = collect_items_texts(items_elems[0]);
1206            (vals, Vec::new())
1207        }
1208        _ => {
1209            let first = items_elems[0];
1210            let second = items_elems[1];
1211            let first_is_save = attr(first, "save") == Some("1");
1212            if first_is_save {
1213                (collect_items_texts(second), collect_items_texts(first))
1214            } else {
1215                (collect_items_texts(first), collect_items_texts(second))
1216            }
1217        }
1218    }
1219}
1220
1221// ---------------------------------------------------------------------------
1222// Data binding
1223// ---------------------------------------------------------------------------
1224
1225/// Find the data root element from a datasets document.
1226/// Datasets packet: `<xfa:datasets><xfa:data>...</xfa:data></xfa:datasets>`.
1227fn find_data_root<'a, 'input>(root: Node<'a, 'input>) -> Option<Node<'a, 'input>> {
1228    // Look for a child named "data".
1229    for child in root.children().filter(|n| n.is_element()) {
1230        if child.tag_name().name() == "data" {
1231            // Return first element child of <data>, or <data> itself.
1232            return child.children().find(|n| n.is_element()).or(Some(child));
1233        }
1234    }
1235    // If root is the data element itself.
1236    if root.tag_name().name() == "data" {
1237        return root.children().find(|n| n.is_element()).or(Some(root));
1238    }
1239    // Fall back to first element child.
1240    root.children().find(|n| n.is_element())
1241}
1242
1243/// Recursively walk the form tree and bind data values from the datasets.
1244fn bind_data(
1245    tree: &mut FormTree,
1246    node_id: FormNodeId,
1247    data_root: &Node<'_, '_>,
1248    data_node: &Node<'_, '_>,
1249) {
1250    let name = tree.get(node_id).name.clone();
1251    let children: Vec<FormNodeId> = tree.get(node_id).children.clone();
1252    let meta = tree.meta(node_id).clone();
1253    let group_kind = meta.group_kind;
1254
1255    // For exclGroups: look up group value, set matching child, clear others.
1256    if group_kind == GroupKind::ExclusiveChoice && !name.is_empty() {
1257        let data_value = lookup_bound_text(data_root, data_node, &meta, &name);
1258        // Pre-collect item values to avoid borrow conflicts.
1259        let child_item_vals: Vec<(FormNodeId, Option<String>)> = children
1260            .iter()
1261            .map(|&cid| (cid, tree.meta(cid).item_value.clone()))
1262            .collect();
1263        for (child_id, item_val) in child_item_vals {
1264            if let FormNodeType::Field { ref mut value } = tree.get_mut(child_id).node_type {
1265                if let Some(ref dv) = data_value {
1266                    if item_val.as_deref() == Some(dv.as_str()) {
1267                        *value = dv.clone();
1268                    } else {
1269                        *value = String::new();
1270                    }
1271                } else {
1272                    // No data found: clear all children to prevent template defaults
1273                    // from firing wrong scripts.
1274                    *value = String::new();
1275                }
1276            }
1277        }
1278        return;
1279    }
1280
1281    // For fields: look up data value directly.
1282    if let FormNodeType::Field { ref mut value } = tree.get_mut(node_id).node_type {
1283        if let Some(dv) = lookup_bound_text(data_root, data_node, &meta, &name) {
1284            *value = dv;
1285        }
1286        return; // Fields are leaf nodes.
1287    }
1288
1289    // For subforms: find matching data child and recurse.
1290    // When occur max > 1 and data has multiple matching children,
1291    // clone the subform for each additional data instance.
1292    let bound_nodes = resolve_bound_nodes(data_root, data_node, &meta, &name);
1293    let effective_data = bound_nodes.first().copied().unwrap_or(*data_node);
1294
1295    // Check for repeating subform instances in the data.
1296    let occur = tree.get(node_id).occur.clone();
1297    let max_instances = occur.max.map(|max| max as usize).unwrap_or(usize::MAX);
1298    if max_instances > 1 && !meta.data_bind_none {
1299        let data_instances: Vec<_> = if let Some(bind_ref) = meta.data_bind_ref.as_deref() {
1300            resolve_bind_nodes(data_root, data_node, bind_ref)
1301        } else if !name.is_empty() {
1302            data_node
1303                .children()
1304                .filter(|child| child.is_element() && child.tag_name().name() == name)
1305                .collect()
1306        } else {
1307            Vec::new()
1308        };
1309
1310        if !data_instances.is_empty() {
1311            tree.get_mut(node_id).occur.initial = 1;
1312        }
1313
1314        if data_instances.len() > 1 {
1315            // Bind the first instance to the existing subform node.
1316            bind_data_children(tree, node_id, &children, data_root, &data_instances[0]);
1317
1318            // Clone the subform for each additional data instance.
1319            let parent_id = tree
1320                .nodes
1321                .iter()
1322                .enumerate()
1323                .find(|(_, n)| n.children.contains(&node_id))
1324                .map(|(i, _)| FormNodeId(i));
1325            let mut insert_pos = parent_id.and_then(|pid| {
1326                tree.get(pid)
1327                    .children
1328                    .iter()
1329                    .position(|&c| c == node_id)
1330                    .map(|pos| (pid, pos + 1))
1331            });
1332
1333            for data_inst in &data_instances[1..data_instances.len().min(max_instances)] {
1334                let cloned_id = clone_subtree(tree, node_id);
1335                tree.get_mut(cloned_id).occur.initial = 1;
1336                bind_data_children(
1337                    tree,
1338                    cloned_id,
1339                    &tree.get(cloned_id).children.clone(),
1340                    data_root,
1341                    data_inst,
1342                );
1343                // Insert clone after the original in the parent's children list.
1344                if let Some((pid, pos)) = insert_pos.as_mut() {
1345                    let parent = tree.get_mut(*pid);
1346                    parent.children.insert(*pos, cloned_id);
1347                    *pos += 1;
1348                }
1349            }
1350            return;
1351        }
1352    }
1353
1354    bind_data_children(tree, node_id, &children, data_root, &effective_data);
1355}
1356
1357/// Bind data to a subform's children.
1358fn bind_data_children(
1359    tree: &mut FormTree,
1360    _parent_id: FormNodeId,
1361    children: &[FormNodeId],
1362    data_root: &Node<'_, '_>,
1363    data_node: &Node<'_, '_>,
1364) {
1365    for &child_id in children {
1366        bind_data(tree, child_id, data_root, data_node);
1367    }
1368}
1369
1370/// Deep-clone a subtree in the FormTree, returning the new root's ID.
1371fn clone_subtree(tree: &mut FormTree, source_id: FormNodeId) -> FormNodeId {
1372    let source = tree.get(source_id).clone();
1373    let source_meta = tree.meta(source_id).clone();
1374
1375    // Clone children recursively first.
1376    let new_children: Vec<FormNodeId> = source
1377        .children
1378        .iter()
1379        .map(|&child_id| clone_subtree(tree, child_id))
1380        .collect();
1381
1382    let mut new_node = source;
1383    new_node.children = new_children;
1384    tree.add_node_with_meta(new_node, source_meta)
1385}
1386
1387/// Look up a text value for a named element in the data node.
1388fn lookup_data_text(data_node: &Node<'_, '_>, name: &str) -> Option<String> {
1389    let child = find_child_element_by_name(data_node, name)?;
1390    child.text().map(|s| s.to_string())
1391}
1392
1393fn lookup_bound_text(
1394    data_root: &Node<'_, '_>,
1395    data_node: &Node<'_, '_>,
1396    meta: &FormNodeMeta,
1397    fallback_name: &str,
1398) -> Option<String> {
1399    if meta.data_bind_none {
1400        return None;
1401    }
1402
1403    if let Some(bind_ref) = meta.data_bind_ref.as_deref() {
1404        return resolve_bind_nodes(data_root, data_node, bind_ref)
1405            .into_iter()
1406            .next()
1407            .and_then(|node| node.text().map(|s| s.to_string()));
1408    }
1409
1410    if fallback_name.is_empty() {
1411        None
1412    } else {
1413        lookup_data_text(data_node, fallback_name)
1414    }
1415}
1416
1417fn resolve_bound_nodes<'a, 'input>(
1418    data_root: &Node<'a, 'input>,
1419    data_node: &Node<'a, 'input>,
1420    meta: &FormNodeMeta,
1421    fallback_name: &str,
1422) -> Vec<Node<'a, 'input>> {
1423    if meta.data_bind_none {
1424        return Vec::new();
1425    }
1426
1427    if let Some(bind_ref) = meta.data_bind_ref.as_deref() {
1428        return resolve_bind_nodes(data_root, data_node, bind_ref);
1429    }
1430
1431    if fallback_name.is_empty() {
1432        Vec::new()
1433    } else {
1434        find_child_element_by_name(data_node, fallback_name)
1435            .into_iter()
1436            .collect()
1437    }
1438}
1439
1440fn resolve_bind_nodes<'a, 'input>(
1441    data_root: &Node<'a, 'input>,
1442    data_node: &Node<'a, 'input>,
1443    bind_ref: &str,
1444) -> Vec<Node<'a, 'input>> {
1445    let mut path = bind_ref.trim();
1446    if path.is_empty() {
1447        return Vec::new();
1448    }
1449
1450    let mut current = if let Some(rest) = path.strip_prefix("$record.") {
1451        path = rest;
1452        vec![*data_root]
1453    } else if path == "$record" {
1454        return vec![*data_root];
1455    } else if let Some(rest) = path.strip_prefix("$.") {
1456        path = rest;
1457        vec![*data_node]
1458    } else {
1459        vec![*data_node]
1460    };
1461
1462    let segments: Vec<&str> = path
1463        .split('.')
1464        .map(str::trim)
1465        .filter(|segment| !segment.is_empty())
1466        .collect();
1467    if segments.is_empty() {
1468        return current;
1469    }
1470
1471    for segment in segments {
1472        let (name, selector) = parse_bind_segment(segment);
1473        if name.is_empty() {
1474            continue;
1475        }
1476
1477        let mut next = Vec::new();
1478        for node in current {
1479            let matches: Vec<Node<'a, 'input>> = node
1480                .children()
1481                .filter(|child| child.is_element() && child.tag_name().name() == name)
1482                .collect();
1483            match selector {
1484                BindSelector::First => {
1485                    if let Some(first) = matches.into_iter().next() {
1486                        next.push(first);
1487                    }
1488                }
1489                BindSelector::All => next.extend(matches),
1490                BindSelector::Index(idx) => {
1491                    if let Some(found) = matches.into_iter().nth(idx) {
1492                        next.push(found);
1493                    }
1494                }
1495            }
1496        }
1497        current = next;
1498        if current.is_empty() {
1499            break;
1500        }
1501    }
1502
1503    current
1504}
1505
1506#[derive(Clone, Copy)]
1507enum BindSelector {
1508    First,
1509    All,
1510    Index(usize),
1511}
1512
1513fn parse_bind_segment(segment: &str) -> (&str, BindSelector) {
1514    let Some(start) = segment.find('[') else {
1515        return (segment, BindSelector::First);
1516    };
1517    let name = &segment[..start];
1518    let index = segment[start + 1..]
1519        .strip_suffix(']')
1520        .unwrap_or_default()
1521        .trim();
1522    match index {
1523        "*" => (name, BindSelector::All),
1524        "" => (name, BindSelector::First),
1525        _ => index
1526            .parse::<usize>()
1527            .map(|idx| (name, BindSelector::Index(idx)))
1528            .unwrap_or((name, BindSelector::First)),
1529    }
1530}
1531
1532/// Find a direct child element by name.
1533fn find_child_element_by_name<'a, 'input>(
1534    node: &Node<'a, 'input>,
1535    name: &str,
1536) -> Option<Node<'a, 'input>> {
1537    node.children()
1538        .filter(|n| n.is_element())
1539        .find(|n| n.tag_name().name() == name)
1540}
1541
1542// ---------------------------------------------------------------------------
1543// Helpers
1544// ---------------------------------------------------------------------------
1545
1546/// Recursively add child form nodes (subform, field, draw, pageSet, pageArea).
1547///
1548/// When a `<breakBefore>` element appears between content children (inline
1549/// break), it is propagated as `page_break_before` on the next content
1550/// sibling's metadata.
1551///
1552/// Returns `(break_found, target_name)` if a pending break remains (i.e. a
1553/// `breakBefore` was found after the last content child), meaning the NEXT
1554/// sibling at the parent level should receive the break.
1555fn add_children(
1556    tree: &mut FormTree,
1557    node: &mut FormNode,
1558    elem: Node<'_, '_>,
1559) -> std::result::Result<(bool, Option<String>), crate::error::XfaError> {
1560    let mut pending_break = false;
1561    let mut pending_break_target = None;
1562    let mut pending_ca_break = false;
1563    for child in elem.children().filter(|n| n.is_element()) {
1564        let tag = child.tag_name().name();
1565        match tag {
1566            // XFA Spec 3.3 §2.1 — Container elements that produce form nodes.
1567            "subform" | "field" | "draw" | "pageSet" | "pageArea" | "exclGroup" | "area" => {
1568                let (child_id, (trailing_break, trailing_target)) = parse_node(tree, child, false)?;
1569                if pending_break {
1570                    let meta = tree.meta_mut(child_id);
1571                    meta.page_break_before = true;
1572                    if meta.break_target.is_none() {
1573                        meta.break_target = pending_break_target.take();
1574                    }
1575                    pending_break = false;
1576                }
1577                if pending_ca_break {
1578                    tree.meta_mut(child_id).content_area_break = true;
1579                    pending_ca_break = false;
1580                }
1581                node.children.push(child_id);
1582                if trailing_break {
1583                    pending_break = true;
1584                    pending_break_target = trailing_target;
1585                }
1586            }
1587            "breakBefore" => {
1588                let target_type = attr(child, "targetType");
1589                if target_type == Some("pageArea") {
1590                    pending_break = true;
1591                    pending_break_target = attr(child, "target").map(|s| s.to_string());
1592                } else if target_type == Some("contentArea") {
1593                    pending_ca_break = true;
1594                }
1595            }
1596            // Legacy <break> element between content children.
1597            "break"
1598                if attr(child, "before") == Some("pageArea")
1599                    && attr(child, "targetType") == Some("pageArea") =>
1600            {
1601                pending_break = true;
1602                pending_break_target = attr(child, "target").map(|s| s.to_string());
1603            }
1604            // Ignore XML elements that are layout metadata, not form nodes.
1605            // XFA Spec 3.3 §28.1 (p1229) — Adobe Non-conformance: stipple rate only
1606            // supports 25, 50, 75; others→100. Blends with WHITE, not bg color.
1607            // Currently not rendered; matches Adobe's limited implementation.
1608            "caption" | "value" | "ui" | "font" | "border" | "margin" | "para" | "format"
1609            | "items" | "medium" | "contentArea" | "desc" | "occur" | "event" | "bind"
1610            | "calculate" | "validate" | "assist" | "toolTip" | "fill" | "edge" | "corner"
1611            | "linear" | "radial" | "pattern" | "stipple" | "color" | "extras" | "traversal"
1612            | "proto" | "overflow" => {
1613                // Handled elsewhere or not needed for layout.
1614            }
1615            _ => {
1616                // Unknown element — skip silently.
1617            }
1618        }
1619    }
1620    Ok((pending_break, pending_break_target))
1621}
1622
1623fn blank_node(tag: &str) -> FormNode {
1624    FormNode {
1625        name: tag.to_string(),
1626        node_type: FormNodeType::Subform,
1627        box_model: BoxModel {
1628            max_width: f64::MAX,
1629            max_height: f64::MAX,
1630            ..Default::default()
1631        },
1632        layout: LayoutStrategy::TopToBottom,
1633        children: Vec::new(),
1634        occur: Occur::once(),
1635        font: FontMetrics::default(),
1636        calculate: None,
1637        validate: None,
1638        column_widths: Vec::new(),
1639        col_span: 1,
1640    }
1641}
1642
1643/// Parse the `layout` attribute into a `LayoutStrategy`.
1644///
1645/// XFA Spec 3.3 §2.6 — Layout Strategies: positioned (fixed x,y) and
1646/// flowing (tb, lr-tb, rl-tb, table, row). `pageArea` uses positioned only.
1647/// Default for subforms without an explicit layout attribute is "position".
1648fn parse_layout_attr(elem: Node<'_, '_>) -> LayoutStrategy {
1649    match attr(elem, "layout").unwrap_or("") {
1650        "tb" => LayoutStrategy::TopToBottom,
1651        "lr-tb" => LayoutStrategy::LeftToRightTB,
1652        "rl-tb" => LayoutStrategy::RightToLeftTB,
1653        "table" => LayoutStrategy::Table,
1654        "row" => LayoutStrategy::Row,
1655        "paginate" => LayoutStrategy::TopToBottom, // root layout
1656        "position" => LayoutStrategy::Positioned,
1657        _ => LayoutStrategy::Positioned,
1658    }
1659}
1660
1661/// Parse the `anchorType` attribute (XFA 3.3 §2.6, App A p1510).
1662///
1663/// Determines which anchor point of the element is placed at (x,y) in
1664/// positioned layout.  Default is `topLeft`.
1665fn parse_anchor_type(elem: Node<'_, '_>) -> AnchorType {
1666    match attr(elem, "anchorType").unwrap_or("") {
1667        "topCenter" => AnchorType::TopCenter,
1668        "topRight" => AnchorType::TopRight,
1669        "middleLeft" => AnchorType::MiddleLeft,
1670        "middleCenter" => AnchorType::MiddleCenter,
1671        "middleRight" => AnchorType::MiddleRight,
1672        "bottomLeft" => AnchorType::BottomLeft,
1673        "bottomCenter" => AnchorType::BottomCenter,
1674        "bottomRight" => AnchorType::BottomRight,
1675        _ => AnchorType::TopLeft,
1676    }
1677}
1678
1679/// Parse dimensional attributes (w, h, x, y) into a `BoxModel`.
1680///
1681/// XFA Spec 3.3 §2.6 — Box Model (p49): nominal extent is w × h.
1682/// Margins lie inside the nominal extent. Borders lie inside margins.
1683/// Caption may occupy part of the nominal content region.
1684/// Constraints: minW/minH/maxW/maxH (§2.6 p53).
1685///
1686/// TODO(§2.6): rotate not parsed — counter-clockwise rotation in degrees (multiples of 90).
1687fn parse_box_model(elem: Node<'_, '_>) -> BoxModel {
1688    let w = attr(elem, "w").and_then(parse_dim);
1689    let h = attr(elem, "h").and_then(parse_dim);
1690    let x = attr(elem, "x").and_then(parse_dim).unwrap_or(0.0);
1691    let y = attr(elem, "y").and_then(parse_dim).unwrap_or(0.0);
1692    let min_h = attr(elem, "minH").and_then(parse_dim).unwrap_or(0.0);
1693    let min_w = attr(elem, "minW").and_then(parse_dim).unwrap_or(0.0);
1694    let max_h = attr(elem, "maxH").and_then(parse_dim).unwrap_or(f64::MAX);
1695    let max_w = attr(elem, "maxW").and_then(parse_dim).unwrap_or(f64::MAX);
1696    let margins = parse_margin(elem);
1697
1698    BoxModel {
1699        width: w,
1700        height: h,
1701        x,
1702        y,
1703        margins,
1704        min_width: min_w,
1705        max_width: max_w,
1706        min_height: min_h,
1707        max_height: max_h,
1708        ..Default::default()
1709    }
1710}
1711
1712fn parse_margin(elem: Node<'_, '_>) -> Insets {
1713    if let Some(margin) = find_first_child_by_name(elem, "margin") {
1714        Insets {
1715            top: attr(margin, "topInset").and_then(parse_dim).unwrap_or(0.0),
1716            bottom: attr(margin, "bottomInset")
1717                .and_then(parse_dim)
1718                .unwrap_or(0.0),
1719            left: attr(margin, "leftInset").and_then(parse_dim).unwrap_or(0.0),
1720            right: attr(margin, "rightInset")
1721                .and_then(parse_dim)
1722                .unwrap_or(0.0),
1723        }
1724    } else {
1725        Insets::default()
1726    }
1727}
1728
1729/// Parse an XFA dimension string ("0.5in", "72pt", "10mm") to PDF points.
1730fn parse_dim(s: &str) -> Option<f64> {
1731    // Handle bare numbers as inches (common in XFA)
1732    if s.trim().parse::<f64>().is_ok() {
1733        return Measurement::parse(&format!("{s}in")).map(|m| m.to_points());
1734    }
1735    Measurement::parse(s).map(|m| m.to_points())
1736}
1737
1738// XFA Spec 3.3 §17 "occur" (p800-802) — Specifies min/max/initial occurrences:
1739// min: minimum instances (default 1)
1740// max: maximum instances (-1 means unlimited)
1741// initial: number of instances at initialization
1742fn parse_occur(elem: Node<'_, '_>) -> Occur {
1743    if let Some(occur) = find_first_child_by_name(elem, "occur") {
1744        let min: u32 = attr(occur, "min").and_then(|s| s.parse().ok()).unwrap_or(1);
1745        let max: Option<u32> = attr(occur, "max")
1746            .map(|s| if s == "-1" { None } else { s.parse().ok() })
1747            .unwrap_or(Some(1));
1748        // XFA 3.3 §3.2.5: when initial is absent, default to at least 1 —
1749        // the subform exists in the template and should render once unless
1750        // explicitly suppressed by initial="0".
1751        let initial: u32 = attr(occur, "initial")
1752            .and_then(|s| s.parse().ok())
1753            .unwrap_or(min.max(1));
1754        Occur::repeating(min, max, initial)
1755    } else {
1756        Occur::once()
1757    }
1758}
1759
1760/// Parse `colSpan` attribute.
1761fn parse_col_span(elem: Node<'_, '_>) -> i32 {
1762    attr(elem, "colSpan")
1763        .and_then(|s| s.parse().ok())
1764        .unwrap_or(1)
1765}
1766
1767/// Read the `<medium>` child and return (page_width, page_height) in points.
1768fn read_medium(page_area: Node<'_, '_>) -> (f64, f64) {
1769    if let Some(m) = find_first_child_by_name(page_area, "medium") {
1770        // XFA: short = narrow dimension, long = tall dimension.
1771        let short = attr(m, "short").and_then(parse_dim).unwrap_or(612.0);
1772        let long_ = attr(m, "long").and_then(parse_dim).unwrap_or(792.0);
1773        (short, long_)
1774    } else {
1775        (612.0, 792.0)
1776    }
1777}
1778
1779/// Read all `<contentArea>` children and return their `ContentArea` structs.
1780fn read_content_areas(
1781    page_area: Node<'_, '_>,
1782    page_width: f64,
1783    page_height: f64,
1784) -> Vec<ContentArea> {
1785    let mut areas = Vec::new();
1786    for child in page_area.children().filter(|n| n.is_element()) {
1787        if child.tag_name().name() == "contentArea" {
1788            // XFA 3.3 §8.3.1 — contentArea x/y default to 0 when omitted.
1789            // Treating missing coordinates as a 0.5in inset shifts the entire
1790            // page content down/right for templates that define full-page
1791            // content areas with only w/h.
1792            let x = attr(child, "x").and_then(parse_dim).unwrap_or(0.0);
1793            let y = attr(child, "y").and_then(parse_dim).unwrap_or(0.0);
1794            // w/h default to full page dimensions when omitted, matching the
1795            // behavior of the empty (no contentArea) fallback. Previously used
1796            // hardcoded 540×720 (US Letter body), which was inconsistent with
1797            // the no-contentArea path and had no spec basis.
1798            let w = attr(child, "w").and_then(parse_dim).unwrap_or(page_width);
1799            let h = attr(child, "h").and_then(parse_dim).unwrap_or(page_height);
1800            areas.push(ContentArea {
1801                name: attr(child, "name").unwrap_or("").to_string(),
1802                x,
1803                y,
1804                width: w,
1805                height: h,
1806                leader: None,
1807                trailer: None,
1808            });
1809        }
1810    }
1811    if areas.is_empty() {
1812        areas.push(ContentArea {
1813            name: String::new(),
1814            x: 0.0,
1815            y: 0.0,
1816            width: page_width,
1817            height: page_height,
1818            leader: None,
1819            trailer: None,
1820        });
1821    }
1822    areas
1823}
1824
1825/// Extract text from `<value><text>…</text></value>` or `<value><float>…</float></value>`.
1826///
1827/// Also handles `<value><exData contentType="text/html">…</exData></value>` by
1828/// stripping the HTML/XHTML markup and returning the concatenated plain text.
1829/// This covers XFA draw elements whose content is rich-text (e.g. IRS form
1830/// instructions stored as inline XHTML). (#557)
1831/// Extract image data from `<value><image contentType="image/…">…</image></value>`.
1832///
1833/// Returns `(raw_image_data, mime_type)` for supported image types:
1834/// - `image/jpeg` → JPEG bytes
1835/// - `image/png` → PNG bytes
1836/// - `image/bmp` → converted to PNG (PDF doesn't support BMP natively)
1837///
1838/// BMP images (magic bytes `0x42 0x4D`) are automatically converted to PNG
1839/// regardless of the declared `contentType`.
1840fn extract_value_image(elem: Node<'_, '_>) -> Option<(Vec<u8>, String)> {
1841    let value = find_first_child_by_name(elem, "value")?;
1842    let image = find_first_child_by_name(value, "image")?;
1843    let content_type = attr(image, "contentType")
1844        .unwrap_or("image/png")
1845        .to_string();
1846    let data = image.text().unwrap_or_default();
1847    let decoded = base64_decode(data);
1848
1849    // BMP is not supported by PDF — convert to PNG.
1850    // Detect by magic bytes (0x42 0x4D = "BM") or declared content type.
1851    if decoded.starts_with(b"BM") || content_type == "image/bmp" {
1852        if let Some(png_data) = bmp_to_png(&decoded) {
1853            return Some((png_data, "image/png".to_string()));
1854        }
1855        // Conversion failed — log and skip this image.
1856        log::warn!("BMP to PNG conversion failed; skipping image");
1857        return None;
1858    }
1859
1860    Some((decoded, content_type))
1861}
1862
1863/// Convert BMP image data to PNG format.
1864fn bmp_to_png(bmp_data: &[u8]) -> Option<Vec<u8>> {
1865    let img = image::load_from_memory_with_format(bmp_data, image::ImageFormat::Bmp).ok()?;
1866    let mut buf = Vec::new();
1867    img.write_to(&mut std::io::Cursor::new(&mut buf), image::ImageFormat::Png)
1868        .ok()?;
1869    Some(buf)
1870}
1871
1872fn extract_value_text(elem: Node<'_, '_>) -> Option<String> {
1873    let value = find_first_child_by_name(elem, "value")?;
1874    // Try <text>, <float>, <integer>, <date>
1875    for tag in &["text", "float", "integer", "date", "dateTime", "decimal"] {
1876        if let Some(child) = find_first_child_by_name(value, tag) {
1877            let text = child.text().unwrap_or("");
1878            let trimmed = text.trim_start_matches(|c: char| c.is_whitespace() && c != '\n');
1879            let trimmed = trimmed.trim_end_matches(|c: char| c.is_whitespace() && c != '\n');
1880            if !trimmed.is_empty() {
1881                return Some(trimmed.to_string());
1882            }
1883        }
1884    }
1885    // Fall back to <exData contentType="text/html|text/xml|…"> — collect all
1886    // descendant text nodes and join them, stripping the XHTML markup.
1887    if let Some(ex) = find_first_child_by_name(value, "exData") {
1888        let text = extract_text_from_descendants(ex);
1889        if !text.is_empty() {
1890            return Some(text);
1891        }
1892    }
1893    None
1894}
1895
1896/// XFA Spec 3.3 §2.1 (p24) — Draw element: container for fixed content (boilerplate).
1897/// Contains text, lines, rectangles, arcs, or images that remain unchanged.
1898/// Also handles `<value><image>` for embedded image data (§2.3 p41-42).
1899fn extract_draw_content(elem: Node<'_, '_>) -> Option<DrawContent> {
1900    let value = find_first_child_by_name(elem, "value")?;
1901
1902    if let Some(line) = find_first_child_by_name(value, "line") {
1903        let x1 = attr_as_f64(line, "x1").unwrap_or(0.0);
1904        let y1 = attr_as_f64(line, "y1").unwrap_or(0.0);
1905        let x2 = attr_as_f64(line, "x2").unwrap_or(0.0);
1906        let y2 = attr_as_f64(line, "y2").unwrap_or(0.0);
1907        return Some(DrawContent::Line { x1, y1, x2, y2 });
1908    }
1909
1910    if let Some(rect) = find_first_child_by_name(value, "rectangle") {
1911        let x = attr_as_f64(rect, "x").unwrap_or(0.0);
1912        let y = attr_as_f64(rect, "y").unwrap_or(0.0);
1913        let w = attr_as_f64(rect, "w").unwrap_or(attr_as_f64(rect, "width").unwrap_or(0.0));
1914        let h = attr_as_f64(rect, "h").unwrap_or(attr_as_f64(rect, "height").unwrap_or(0.0));
1915        let radius =
1916            attr_as_f64(rect, "r").unwrap_or(attr_as_f64(rect, "cornerRadius").unwrap_or(0.0));
1917        return Some(DrawContent::Rectangle { x, y, w, h, radius });
1918    }
1919
1920    if let Some(arc) = find_first_child_by_name(value, "arc") {
1921        let x = attr_as_f64(arc, "x").unwrap_or(0.0);
1922        let y = attr_as_f64(arc, "y").unwrap_or(0.0);
1923        let w = attr_as_f64(arc, "w").unwrap_or(attr_as_f64(arc, "width").unwrap_or(0.0));
1924        let h = attr_as_f64(arc, "h").unwrap_or(attr_as_f64(arc, "height").unwrap_or(0.0));
1925        let start_angle = attr_as_f64(arc, "startAngle").unwrap_or(0.0);
1926        let sweep_angle = attr_as_f64(arc, "sweepAngle").unwrap_or(0.0);
1927        return Some(DrawContent::Arc {
1928            x,
1929            y,
1930            w,
1931            h,
1932            start_angle,
1933            sweep_angle,
1934        });
1935    }
1936
1937    None
1938}
1939
1940/// Walk all descendant nodes of `node` and return text content joined
1941/// by newlines between block-level elements (e.g. `<p>`, `<div>`), preserving
1942/// paragraph structure. Used to extract plain text from XHTML-encoded
1943/// `<exData>` nodes. (#686)
1944fn extract_text_from_descendants(node: Node<'_, '_>) -> String {
1945    let block_tags = [
1946        "p", "div", "h1", "h2", "h3", "h4", "h5", "h6", "li", "tr", "br",
1947    ];
1948    let mut result = String::new();
1949    let mut last_was_block = false;
1950
1951    for desc in node.descendants() {
1952        let is_block = desc.is_element() && block_tags.contains(&desc.tag_name().name());
1953
1954        if is_block && !result.is_empty() {
1955            result.push('\n');
1956            last_was_block = true;
1957        }
1958
1959        if desc.is_text() {
1960            if let Some(t) = desc.text() {
1961                let t = t.trim();
1962                if !t.is_empty() {
1963                    if last_was_block || result.is_empty() {
1964                        result.push_str(t);
1965                    } else {
1966                        result.push(' ');
1967                        result.push_str(t);
1968                    }
1969                    last_was_block = false;
1970                }
1971            }
1972        }
1973    }
1974
1975    result.trim().to_string()
1976}
1977
1978fn base64_decode(input: &str) -> Vec<u8> {
1979    use base64::Engine;
1980    base64::engine::general_purpose::STANDARD
1981        .decode(input.trim())
1982        .unwrap_or_default()
1983}
1984
1985/// Extract the dominant font size from `<exData contentType="text/html">` HTML.
1986///
1987/// Scans `<span style="font-size:Xpt">` attributes and returns the first
1988/// (typically dominant) font size in points.  Returns `None` when the element
1989/// has no exData HTML or no font-size is specified.
1990fn extract_exdata_font_size(elem: Node<'_, '_>) -> Option<f64> {
1991    let value = find_first_child_by_name(elem, "value")?;
1992    let ex = find_first_child_by_name(value, "exData")?;
1993    // Walk all descendant elements looking for style="...font-size:Xpt..."
1994    for desc in ex.descendants() {
1995        if !desc.is_element() {
1996            continue;
1997        }
1998        let style = desc
1999            .attribute("style")
2000            .or_else(|| desc.attribute("Style"))?;
2001        // Parse font-size from CSS style string
2002        for part in style.split(';') {
2003            let part = part.trim();
2004            if let Some(val) = part
2005                .strip_prefix("font-size:")
2006                .or_else(|| part.strip_prefix("font-size :"))
2007            {
2008                let val = val.trim();
2009                if let Some(pt) = val.strip_suffix("pt") {
2010                    if let Ok(size) = pt.trim().parse::<f64>() {
2011                        if size > 0.0 {
2012                            return Some(size);
2013                        }
2014                    }
2015                }
2016            }
2017        }
2018    }
2019    None
2020}
2021
2022/// Parse caption from `<caption placement="..." reserve="...">` element.
2023///
2024/// XFA Spec 3.3 §2.6 (p51) — Captions: reserve is a height for top/bottom
2025/// placement and a width for left/right placement. When reserve is absent
2026/// or zero, the layout processor calculates the minimum size.
2027/// Note: Acrobat only renders captions on button and barcode fields (§2.1 p32 Note).
2028fn parse_caption(elem: Node<'_, '_>) -> Option<Caption> {
2029    let cap_elem = find_first_child_by_name(elem, "caption")?;
2030    // Skip captions with presence="hidden"/"invisible"/"inactive".
2031    if is_hidden(cap_elem) {
2032        return None;
2033    }
2034    let text = extract_value_text(cap_elem)?;
2035    if text.is_empty() {
2036        return None;
2037    }
2038    let placement = match attr(cap_elem, "placement") {
2039        Some("right") => CaptionPlacement::Right,
2040        Some("top") => CaptionPlacement::Top,
2041        Some("bottom") => CaptionPlacement::Bottom,
2042        Some("inline") => CaptionPlacement::Inline,
2043        _ => CaptionPlacement::Left,
2044    };
2045    let reserve = attr(cap_elem, "reserve")
2046        .and_then(Measurement::parse)
2047        .map(|m| m.to_points());
2048    Some(Caption {
2049        placement,
2050        reserve,
2051        text,
2052    })
2053}
2054
2055/// Get an attribute value by local name, ignoring namespace prefixes.
2056fn attr<'a>(elem: Node<'a, '_>, name: &str) -> Option<&'a str> {
2057    elem.attributes()
2058        .find(|a| a.name() == name)
2059        .map(|a| a.value())
2060}
2061
2062fn attr_as_f64(elem: Node<'_, '_>, name: &str) -> Option<f64> {
2063    attr(elem, name)?.parse().ok()
2064}
2065
2066/// Find the first direct child element with a given local tag name.
2067fn find_first_child_by_name<'a, 'input>(
2068    elem: Node<'a, 'input>,
2069    name: &str,
2070) -> Option<Node<'a, 'input>> {
2071    elem.children()
2072        .filter(|n| n.is_element())
2073        .find(|n| n.tag_name().name() == name)
2074}
2075
2076// ---------------------------------------------------------------------------
2077// Tests
2078// ---------------------------------------------------------------------------
2079
2080#[cfg(test)]
2081mod tests {
2082    use super::*;
2083
2084    const SIMPLE_TEMPLATE: &str = r#"<?xml version="1.0" encoding="UTF-8"?>
2085<xdp:xdp xmlns:xdp="http://ns.adobe.com/xdp/">
2086<template xmlns="http://www.xfa.org/schema/xfa-template/3.3/">
2087  <subform name="form1" layout="paginate">
2088    <pageSet>
2089      <pageArea name="Page1">
2090        <contentArea x="0.5in" y="0.5in" w="7.5in" h="10in"/>
2091        <medium stock="default" short="8.5in" long="11in"/>
2092      </pageArea>
2093    </pageSet>
2094    <subform name="section" layout="tb" w="7.5in">
2095      <field name="firstName" w="3.5in" h="0.3in">
2096        <caption><value><text>First Name</text></value></caption>
2097        <ui><textEdit/></ui>
2098        <value><text/></value>
2099      </field>
2100      <field name="lastName" w="3.5in" h="0.3in">
2101        <caption><value><text>Last Name</text></value></caption>
2102        <ui><textEdit/></ui>
2103        <value><text>Default</text></value>
2104      </field>
2105    </subform>
2106  </subform>
2107</template>
2108</xdp:xdp>"#;
2109
2110    #[test]
2111    fn parse_simple_form() {
2112        let (tree, root_id) = parse_template(SIMPLE_TEMPLATE, None).unwrap();
2113        let root = tree.get(root_id);
2114        // Root should have children (the paginate subform)
2115        assert!(!root.children.is_empty(), "root has no children");
2116    }
2117
2118    #[test]
2119    fn field_with_default_value() {
2120        let (tree, root_id) = parse_template(SIMPLE_TEMPLATE, None).unwrap();
2121        // Walk to find lastName field
2122        let found = find_node_by_name(&tree, root_id, "lastName");
2123        assert!(found.is_some(), "lastName field not found");
2124        if let Some(n) = found {
2125            match &n.node_type {
2126                FormNodeType::Field { value } => assert_eq!(value, "Default"),
2127                other => panic!("expected Field, got {other:?}"),
2128            }
2129        }
2130    }
2131
2132    #[test]
2133    fn unlimited_occur_expands_all_dataset_instances_in_order() {
2134        let template = r#"<?xml version="1.0" encoding="UTF-8"?>
2135<template xmlns="http://www.xfa.org/schema/xfa-template/3.3/">
2136  <subform name="form1" layout="paginate">
2137    <pageSet>
2138      <pageArea name="Page1">
2139        <contentArea x="0.5in" y="0.5in" w="7.5in" h="10in"/>
2140      </pageArea>
2141    </pageSet>
2142    <subform name="items" layout="tb" w="7in">
2143      <subform name="row" layout="tb" w="7in">
2144        <occur min="0" max="-1"/>
2145        <field name="value" w="2in" h="0.3in">
2146          <ui><textEdit/></ui>
2147          <value><text/></value>
2148        </field>
2149      </subform>
2150    </subform>
2151  </subform>
2152</template>"#;
2153        let datasets = r#"<?xml version="1.0" encoding="UTF-8"?>
2154<xfa:datasets xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/">
2155  <xfa:data>
2156    <form1>
2157      <items>
2158        <row><value>A</value></row>
2159        <row><value>B</value></row>
2160        <row><value>C</value></row>
2161      </items>
2162    </form1>
2163  </xfa:data>
2164</xfa:datasets>"#;
2165
2166        let (tree, root_id) = parse_template(template, Some(datasets)).unwrap();
2167        let items_id = find_node_id_by_name(&tree, root_id, "items").unwrap();
2168        let row_ids = tree.get(items_id).children.clone();
2169
2170        assert_eq!(row_ids.len(), 3);
2171        assert!(row_ids
2172            .iter()
2173            .all(|&row_id| tree.get(row_id).occur.count() == 1));
2174
2175        let values: Vec<String> = row_ids
2176            .iter()
2177            .map(|&row_id| {
2178                let field_id = tree.get(row_id).children[0];
2179                match &tree.get(field_id).node_type {
2180                    FormNodeType::Field { value } => value.clone(),
2181                    other => panic!("expected Field, got {other:?}"),
2182                }
2183            })
2184            .collect();
2185
2186        assert_eq!(values, vec!["A", "B", "C"]);
2187    }
2188
2189    #[test]
2190    fn explicit_dataref_bind_repeats_subform_instances() {
2191        let template = r#"<?xml version="1.0" encoding="UTF-8"?>
2192<template xmlns="http://www.xfa.org/schema/xfa-template/3.3/">
2193  <subform name="form1" layout="paginate">
2194    <pageSet>
2195      <pageArea name="Page1">
2196        <contentArea x="0.5in" y="0.5in" w="7.5in" h="10in"/>
2197      </pageArea>
2198    </pageSet>
2199    <subform name="items" layout="tb" w="7in">
2200      <subform name="entryRow" layout="tb" w="7in">
2201        <occur min="0" max="-1"/>
2202        <bind match="dataRef" ref="$.item[*]"/>
2203        <field name="label" w="2in" h="0.3in">
2204          <bind match="dataRef" ref="$.value"/>
2205          <ui><textEdit/></ui>
2206          <value><text/></value>
2207        </field>
2208      </subform>
2209    </subform>
2210  </subform>
2211</template>"#;
2212        let datasets = r#"<?xml version="1.0" encoding="UTF-8"?>
2213<xfa:datasets xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/">
2214  <xfa:data>
2215    <form1>
2216      <items>
2217        <item><value>One</value></item>
2218        <item><value>Two</value></item>
2219        <item><value>Three</value></item>
2220      </items>
2221    </form1>
2222  </xfa:data>
2223</xfa:datasets>"#;
2224
2225        let (tree, root_id) = parse_template(template, Some(datasets)).unwrap();
2226        let items_id = find_node_id_by_name(&tree, root_id, "items").unwrap();
2227        let row_ids = tree.get(items_id).children.clone();
2228
2229        assert_eq!(row_ids.len(), 3);
2230        assert!(row_ids
2231            .iter()
2232            .all(|&row_id| tree.get(row_id).occur.count() == 1));
2233
2234        let values: Vec<String> = row_ids
2235            .iter()
2236            .map(|&row_id| {
2237                let field_id = tree.get(row_id).children[0];
2238                match &tree.get(field_id).node_type {
2239                    FormNodeType::Field { value } => value.clone(),
2240                    other => panic!("expected Field, got {other:?}"),
2241                }
2242            })
2243            .collect();
2244
2245        assert_eq!(values, vec!["One", "Two", "Three"]);
2246    }
2247
2248    #[test]
2249    fn dimension_parsing() {
2250        assert!((parse_dim("0.5in").unwrap() - 36.0).abs() < 0.01);
2251        assert!((parse_dim("72pt").unwrap() - 72.0).abs() < 0.01);
2252        assert!((parse_dim("1in").unwrap() - 72.0).abs() < 0.01);
2253        assert!((parse_dim("8.5in").unwrap() - 612.0).abs() < 0.1);
2254        assert!((parse_dim("11in").unwrap() - 792.0).abs() < 0.1);
2255    }
2256
2257    #[test]
2258    fn layout_attr_parsing() {
2259        assert_eq!(parse_layout_str("tb"), LayoutStrategy::TopToBottom);
2260        assert_eq!(parse_layout_str("lr-tb"), LayoutStrategy::LeftToRightTB);
2261        assert_eq!(parse_layout_str("paginate"), LayoutStrategy::TopToBottom);
2262        assert_eq!(parse_layout_str("position"), LayoutStrategy::Positioned);
2263    }
2264
2265    fn parse_layout_str(s: &str) -> LayoutStrategy {
2266        let xml = format!(
2267            r#"<?xml version="1.0"?><template xmlns="http://www.xfa.org/schema/xfa-template/3.3/"><subform layout="{s}"/></template>"#
2268        );
2269        let doc = roxmltree::Document::parse(&xml).unwrap();
2270        let root = doc.root_element();
2271        let subform = root.children().filter(|n| n.is_element()).next().unwrap();
2272        parse_layout_attr(subform)
2273    }
2274
2275    fn find_node_by_name<'a>(
2276        tree: &'a FormTree,
2277        id: FormNodeId,
2278        name: &str,
2279    ) -> Option<&'a FormNode> {
2280        let node = tree.get(id);
2281        if node.name == name {
2282            return Some(node);
2283        }
2284        for &child_id in &node.children {
2285            if let Some(found) = find_node_by_name(tree, child_id, name) {
2286                return Some(found);
2287            }
2288        }
2289        None
2290    }
2291
2292    fn find_node_id_by_name(tree: &FormTree, id: FormNodeId, name: &str) -> Option<FormNodeId> {
2293        if tree.get(id).name == name {
2294            return Some(id);
2295        }
2296        for &child_id in &tree.get(id).children.clone() {
2297            if let Some(found) = find_node_id_by_name(tree, child_id, name) {
2298                return Some(found);
2299            }
2300        }
2301        None
2302    }
2303
2304    /// <exData contentType="text/html"> rich-text draw nodes must have their
2305    /// HTML stripped and plain text extracted so LayoutEngine can render them.
2306    #[test]
2307    fn draw_exdata_html_text_extracted() {
2308        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
2309<template xmlns="http://www.xfa.org/schema/xfa-template/3.3/">
2310  <subform name="form1" layout="paginate">
2311    <pageSet>
2312      <pageArea name="Page1">
2313        <contentArea x="0.5in" y="0.5in" w="7.5in" h="10in"/>
2314        <medium stock="default" short="8.5in" long="11in"/>
2315      </pageArea>
2316    </pageSet>
2317    <subform name="body" layout="tb" w="7.5in">
2318      <draw name="instructions" w="7in" h="1in">
2319        <value>
2320          <exData contentType="text/html">
2321            <body xmlns="http://www.w3.org/1999/xhtml">
2322              <p>Do <span>not</span> file this form.</p>
2323            </body>
2324          </exData>
2325        </value>
2326      </draw>
2327    </subform>
2328  </subform>
2329</template>"#;
2330        let (tree, root_id) = parse_template(xml, None).unwrap();
2331        let node =
2332            find_node_by_name(&tree, root_id, "instructions").expect("instructions draw not found");
2333        match &node.node_type {
2334            FormNodeType::Draw(DrawContent::Text(content)) => {
2335                assert!(
2336                    content.contains("not") && content.contains("file"),
2337                    "expected HTML text extracted, got: {content:?}"
2338                );
2339            }
2340            other => panic!("expected Draw, got {other:?}"),
2341        }
2342    }
2343
2344    /// Draw and field elements with presence="hidden" must not expose content
2345    /// to the renderer (content should be empty) while still occupying layout
2346    /// space (node is still present in the tree). (#557)
2347    #[test]
2348    fn hidden_elements_have_empty_content() {
2349        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
2350<template xmlns="http://www.xfa.org/schema/xfa-template/3.3/">
2351  <subform name="form1" layout="paginate">
2352    <pageSet>
2353      <pageArea name="Page1">
2354        <contentArea x="0.5in" y="0.5in" w="7.5in" h="10in"/>
2355        <medium stock="default" short="8.5in" long="11in"/>
2356      </pageArea>
2357    </pageSet>
2358    <subform name="body" layout="tb" w="7.5in">
2359      <draw name="visible_draw" w="7in" h="0.5in">
2360        <value><text>Visible text</text></value>
2361      </draw>
2362      <draw name="hidden_draw" w="7in" h="0.5in" presence="hidden">
2363        <value><text>DRAFT</text></value>
2364      </draw>
2365      <field name="hidden_field" w="3in" h="0.3in" presence="hidden">
2366        <value><text>secret</text></value>
2367      </field>
2368    </subform>
2369  </subform>
2370</template>"#;
2371        let (tree, root_id) = parse_template(xml, None).unwrap();
2372
2373        // Visible draw retains its content.
2374        let visible = find_node_by_name(&tree, root_id, "visible_draw").unwrap();
2375        match &visible.node_type {
2376            FormNodeType::Draw(DrawContent::Text(content)) => assert_eq!(content, "Visible text"),
2377            other => panic!("expected Draw, got {other:?}"),
2378        }
2379
2380        // Hidden draw preserves content (scripts may make it visible).
2381        // Visibility is tracked in FormNodeMeta.
2382        let hidden_draw = find_node_by_name(&tree, root_id, "hidden_draw").unwrap();
2383        match &hidden_draw.node_type {
2384            FormNodeType::Draw(DrawContent::Text(content)) => assert_eq!(content, "DRAFT"),
2385            other => panic!("expected Draw, got {other:?}"),
2386        }
2387        let hidden_draw_id = find_node_id_by_name(&tree, root_id, "hidden_draw").unwrap();
2388        assert!(tree.meta(hidden_draw_id).presence.is_not_visible());
2389
2390        // Hidden fields preserve content and remain Field type — layout
2391        // engine skips them via metadata.
2392        let hidden_field = find_node_by_name(&tree, root_id, "hidden_field").unwrap();
2393        match &hidden_field.node_type {
2394            FormNodeType::Field { value } => assert_eq!(value, "secret"),
2395            other => panic!("expected Field, got {other:?}"),
2396        }
2397        let hidden_field_id = find_node_id_by_name(&tree, root_id, "hidden_field").unwrap();
2398        assert!(tree.meta(hidden_field_id).presence.is_not_visible());
2399    }
2400
2401    /// Font sizes given as bare numbers (`<font size="10">`) must be treated as
2402    /// **points**, not inches.  A bare "10" used to go through `parse_dim` which
2403    /// added the "in" suffix, turning 10pt → 720pt and making text enormous. (#557)
2404    #[test]
2405    fn font_size_bare_number_is_points() {
2406        // parse_font_size must not multiply by 72.
2407        assert_eq!(parse_font_size("10"), Some(10.0));
2408        assert_eq!(parse_font_size("8"), Some(8.0));
2409        assert_eq!(parse_font_size("12"), Some(12.0));
2410        // Explicit unit must still work.
2411        assert!((parse_font_size("10pt").unwrap() - 10.0).abs() < 0.01);
2412        // Zero/negative → None.
2413        assert_eq!(parse_font_size("0"), None);
2414    }
2415
2416    /// `<para hAlign="center/right/justify">` on draw/field elements must be
2417    /// reflected in `FontMetrics.text_align` so the renderer can align text
2418    /// within the element's bounding box. (#557)
2419    #[test]
2420    fn para_halign_parsed_into_font_metrics() {
2421        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
2422<template xmlns="http://www.xfa.org/schema/xfa-template/3.3/">
2423  <subform name="form1" layout="paginate">
2424    <pageSet>
2425      <pageArea name="Page1">
2426        <contentArea x="0.5in" y="0.5in" w="7.5in" h="10in"/>
2427        <medium stock="default" short="8.5in" long="11in"/>
2428      </pageArea>
2429    </pageSet>
2430    <subform name="body" layout="tb" w="7.5in">
2431      <draw name="left_draw" w="7in" h="0.5in">
2432        <value><text>Left</text></value>
2433        <para hAlign="left"/>
2434      </draw>
2435      <draw name="center_draw" w="7in" h="0.5in">
2436        <value><text>Centered</text></value>
2437        <para hAlign="center"/>
2438      </draw>
2439      <draw name="right_draw" w="7in" h="0.5in">
2440        <value><text>Right</text></value>
2441        <para hAlign="right"/>
2442      </draw>
2443    </subform>
2444  </subform>
2445</template>"#;
2446        let (tree, root_id) = parse_template(xml, None).unwrap();
2447
2448        let left = find_node_by_name(&tree, root_id, "left_draw").unwrap();
2449        assert_eq!(
2450            left.font.text_align,
2451            TextAlign::Left,
2452            "left_draw should be Left"
2453        );
2454
2455        let center = find_node_by_name(&tree, root_id, "center_draw").unwrap();
2456        assert_eq!(
2457            center.font.text_align,
2458            TextAlign::Center,
2459            "center_draw should be Center"
2460        );
2461
2462        let right = find_node_by_name(&tree, root_id, "right_draw").unwrap();
2463        assert_eq!(
2464            right.font.text_align,
2465            TextAlign::Right,
2466            "right_draw should be Right"
2467        );
2468    }
2469
2470    /// BMP images in `<image contentType="image/bmp">` must be converted to PNG.
2471    /// PDF does not support BMP natively, so the parser converts on extraction. (#670)
2472    #[test]
2473    fn bmp_image_converted_to_png() {
2474        // Minimal 1×1 BMP (24-bit, no compression): 58 bytes.
2475        let bmp_bytes: [u8; 58] = [
2476            0x42, 0x4D, // "BM" magic
2477            0x3A, 0x00, 0x00, 0x00, // file size = 58
2478            0x00, 0x00, 0x00, 0x00, // reserved
2479            0x36, 0x00, 0x00, 0x00, // pixel data offset = 54
2480            0x28, 0x00, 0x00, 0x00, // DIB header size = 40
2481            0x01, 0x00, 0x00, 0x00, // width = 1
2482            0x01, 0x00, 0x00, 0x00, // height = 1
2483            0x01, 0x00, // planes = 1
2484            0x18, 0x00, // bits per pixel = 24
2485            0x00, 0x00, 0x00, 0x00, // compression = 0
2486            0x04, 0x00, 0x00, 0x00, // image size = 4 (1 pixel + 1 byte padding)
2487            0x13, 0x0B, 0x00, 0x00, // h-res
2488            0x13, 0x0B, 0x00, 0x00, // v-res
2489            0x00, 0x00, 0x00, 0x00, // colors
2490            0x00, 0x00, 0x00, 0x00, // important colors
2491            0xFF, 0x00, 0x00,
2492            0x00, // pixel (BGR: blue=FF, green=0, red=0) + 1 byte row padding
2493        ];
2494        use base64::Engine;
2495        let b64 = base64::engine::general_purpose::STANDARD.encode(&bmp_bytes);
2496
2497        let xml = format!(
2498            r#"<?xml version="1.0" encoding="UTF-8"?>
2499<template xmlns="http://www.xfa.org/schema/xfa-template/3.3/">
2500  <subform name="form1" layout="paginate">
2501    <pageSet>
2502      <pageArea name="Page1">
2503        <contentArea x="0.5in" y="0.5in" w="7.5in" h="10in"/>
2504        <medium stock="default" short="8.5in" long="11in"/>
2505      </pageArea>
2506    </pageSet>
2507    <subform name="body" layout="tb" w="7.5in">
2508      <draw name="barcode_img" w="2in" h="0.5in">
2509        <value>
2510          <image contentType="image/bmp">{b64}</image>
2511        </value>
2512      </draw>
2513    </subform>
2514  </subform>
2515</template>"#
2516        );
2517        let (tree, root_id) = parse_template(&xml, None).unwrap();
2518        let node = find_node_by_name(&tree, root_id, "barcode_img").expect("barcode_img not found");
2519        match &node.node_type {
2520            FormNodeType::Image { data, mime_type } => {
2521                assert_eq!(mime_type, "image/png", "BMP should be converted to PNG");
2522                // PNG magic bytes: 0x89 P N G
2523                assert!(
2524                    data.starts_with(&[0x89, 0x50, 0x4E, 0x47]),
2525                    "expected PNG magic bytes, got {:?}",
2526                    &data[..4.min(data.len())]
2527                );
2528            }
2529            other => panic!("expected Image, got {other:?}"),
2530        }
2531    }
2532
2533    #[test]
2534    fn parse_percentage_values() {
2535        assert!((parse_percentage("96%").unwrap() - 0.96).abs() < 1e-10);
2536        assert!((parse_percentage("110%").unwrap() - 1.10).abs() < 1e-10);
2537        assert!((parse_percentage("100%").unwrap() - 1.0).abs() < 1e-10);
2538        assert!((parse_percentage("50%").unwrap() - 0.50).abs() < 1e-10);
2539        assert!(parse_percentage("notanumber%").is_none());
2540        assert!(parse_percentage("96").is_none()); // no % suffix
2541    }
2542
2543    #[test]
2544    fn parse_letter_spacing_values() {
2545        let font_size = 10.0;
2546        // em-based
2547        let v = parse_letter_spacing("-0.018em", font_size).unwrap();
2548        assert!((v - (-0.018 * 10.0)).abs() < 1e-10);
2549        let v = parse_letter_spacing("0.1em", font_size).unwrap();
2550        assert!((v - 1.0).abs() < 1e-10);
2551        // bare zero
2552        assert_eq!(parse_letter_spacing("0", font_size), Some(0.0));
2553        // pt-based (via Measurement)
2554        let v = parse_letter_spacing("0.5pt", font_size).unwrap();
2555        assert!((v - 0.5).abs() < 0.01);
2556    }
2557
2558    #[test]
2559    fn parse_font_color_attr_hex6() {
2560        assert_eq!(parse_font_color_attr("#000080"), Some((0, 0, 128)));
2561        assert_eq!(parse_font_color_attr("#FF0000"), Some((255, 0, 0)));
2562        assert_eq!(parse_font_color_attr("#00ff00"), Some((0, 255, 0)));
2563        assert_eq!(parse_font_color_attr("#ABCDEF"), Some((0xAB, 0xCD, 0xEF)));
2564    }
2565
2566    #[test]
2567    fn parse_font_color_attr_hex3() {
2568        // #RGB shorthand: each digit is doubled (e.g. #F00 → #FF0000)
2569        assert_eq!(parse_font_color_attr("#F00"), Some((255, 0, 0)));
2570        assert_eq!(parse_font_color_attr("#0F0"), Some((0, 255, 0)));
2571        assert_eq!(parse_font_color_attr("#00F"), Some((0, 0, 255)));
2572        assert_eq!(parse_font_color_attr("#ABC"), Some((0xAA, 0xBB, 0xCC)));
2573    }
2574
2575    #[test]
2576    fn parse_font_color_attr_decimal_csv() {
2577        assert_eq!(parse_font_color_attr("0,0,128"), Some((0, 0, 128)));
2578        assert_eq!(parse_font_color_attr("255, 128, 0"), Some((255, 128, 0)));
2579    }
2580
2581    #[test]
2582    fn parse_font_color_attr_invalid() {
2583        assert_eq!(parse_font_color_attr(""), None);
2584        assert_eq!(parse_font_color_attr("#GG0000"), None);
2585        assert_eq!(parse_font_color_attr("#12345"), None);
2586        assert_eq!(parse_font_color_attr("not_a_color"), None);
2587    }
2588
2589    /// `<font color="#000080">` attribute must be parsed into
2590    /// `style.text_color` when no `<fill><color>` child is present. (#740)
2591    #[test]
2592    fn font_color_attribute_parsed() {
2593        let xml = r##"<?xml version="1.0" encoding="UTF-8"?>
2594<template xmlns="http://www.xfa.org/schema/xfa-template/3.3/">
2595  <subform name="form1" layout="paginate">
2596    <pageSet>
2597      <pageArea name="Page1">
2598        <contentArea x="0.5in" y="0.5in" w="7.5in" h="10in"/>
2599        <medium stock="default" short="8.5in" long="11in"/>
2600      </pageArea>
2601    </pageSet>
2602    <subform name="body" layout="tb" w="7.5in">
2603      <draw name="blue_text" w="7in" h="0.5in">
2604        <value><text>Navy blue</text></value>
2605        <font typeface="Arial" size="10pt" color="#000080"/>
2606      </draw>
2607    </subform>
2608  </subform>
2609</template>"##;
2610        let (tree, root_id) = parse_template(xml, None).unwrap();
2611        let id = find_node_id_by_name(&tree, root_id, "blue_text").unwrap();
2612        let style = &tree.meta(id).style;
2613        assert_eq!(
2614            style.text_color,
2615            Some((0, 0, 128)),
2616            "font color=#000080 should parse to (0, 0, 128)"
2617        );
2618    }
2619
2620    #[test]
2621    fn font_horizontal_scale_and_letter_spacing_parsed() {
2622        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
2623<template xmlns="http://www.xfa.org/schema/xfa-template/3.3/">
2624  <subform name="form1" layout="paginate">
2625    <pageSet>
2626      <pageArea name="Page1">
2627        <contentArea x="0.5in" y="0.5in" w="7.5in" h="10in"/>
2628        <medium stock="default" short="8.5in" long="11in"/>
2629      </pageArea>
2630    </pageSet>
2631    <subform name="body" layout="tb" w="7.5in">
2632      <draw name="scaled_text" w="7in" h="0.5in">
2633        <value><text>Scaled</text></value>
2634        <font typeface="Arial" size="10pt" fontHorizontalScale="96%" letterSpacing="-0.018em"/>
2635      </draw>
2636    </subform>
2637  </subform>
2638</template>"#;
2639        let (tree, root_id) = parse_template(xml, None).unwrap();
2640        let id = find_node_id_by_name(&tree, root_id, "scaled_text").unwrap();
2641        let style = &tree.meta(id).style;
2642        assert!(
2643            (style.font_horizontal_scale.unwrap() - 0.96).abs() < 1e-10,
2644            "expected 0.96, got {:?}",
2645            style.font_horizontal_scale
2646        );
2647        assert!(
2648            (style.letter_spacing_pt.unwrap() - (-0.18)).abs() < 0.01,
2649            "expected -0.18pt, got {:?}",
2650            style.letter_spacing_pt
2651        );
2652    }
2653
2654    #[test]
2655    fn choice_list_items_parsed() {
2656        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
2657<template xmlns="http://www.xfa.org/schema/xfa-template/3.3/">
2658  <subform name="root" layout="tb">
2659    <pageSet>
2660      <pageArea name="Page1">
2661        <contentArea w="8in" h="10in"/>
2662      </pageArea>
2663    </pageSet>
2664    <field name="country" w="3in" h="0.3in">
2665      <ui><choiceList/></ui>
2666      <value><text>US</text></value>
2667      <items>
2668        <text>United States</text>
2669        <text>United Kingdom</text>
2670        <text>Canada</text>
2671      </items>
2672      <items save="1">
2673        <text>US</text>
2674        <text>UK</text>
2675        <text>CA</text>
2676      </items>
2677    </field>
2678    <field name="single_items" w="3in" h="0.3in">
2679      <ui><choiceList/></ui>
2680      <value><text>Red</text></value>
2681      <items>
2682        <text>Red</text>
2683        <text>Green</text>
2684        <text>Blue</text>
2685      </items>
2686    </field>
2687  </subform>
2688</template>"#;
2689
2690        let (tree, _pages) = parse_template(xml, None).unwrap();
2691
2692        // Find the "country" field — should have display + save items
2693        let country = tree
2694            .nodes
2695            .iter()
2696            .enumerate()
2697            .find(|(_, n)| n.name == "country")
2698            .map(|(i, _)| FormNodeId(i))
2699            .expect("country field not found");
2700        let meta = tree.meta(country);
2701        assert_eq!(meta.field_kind, FieldKind::Dropdown);
2702        assert_eq!(
2703            meta.display_items,
2704            vec!["United States", "United Kingdom", "Canada"]
2705        );
2706        assert_eq!(meta.save_items, vec!["US", "UK", "CA"]);
2707
2708        // Find the "single_items" field — only display items, no save
2709        let single = tree
2710            .nodes
2711            .iter()
2712            .enumerate()
2713            .find(|(_, n)| n.name == "single_items")
2714            .map(|(i, _)| FormNodeId(i))
2715            .expect("single_items field not found");
2716        let meta_s = tree.meta(single);
2717        assert_eq!(meta_s.display_items, vec!["Red", "Green", "Blue"]);
2718        assert!(meta_s.save_items.is_empty());
2719    }
2720
2721    #[test]
2722    fn content_area_without_xy_defaults_to_origin() {
2723        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
2724<template xmlns="http://www.xfa.org/schema/xfa-template/3.3/">
2725  <subform name="root" layout="tb">
2726    <pageSet>
2727      <pageArea name="Page1">
2728        <contentArea w="8in" h="10in"/>
2729      </pageArea>
2730    </pageSet>
2731  </subform>
2732</template>"#;
2733
2734        let (tree, root_id) = parse_template(xml, None).unwrap();
2735        let page_area_id = find_node_id_by_name(&tree, root_id, "Page1").unwrap();
2736        let page_area = tree.get(page_area_id);
2737
2738        match &page_area.node_type {
2739            FormNodeType::PageArea { content_areas } => {
2740                assert_eq!(content_areas.len(), 1);
2741                assert_eq!(content_areas[0].x, 0.0);
2742                assert_eq!(content_areas[0].y, 0.0);
2743                assert!((content_areas[0].width - 576.0).abs() < 0.01);
2744                assert!((content_areas[0].height - 720.0).abs() < 0.01);
2745            }
2746            other => panic!("expected PageArea, got {other:?}"),
2747        }
2748    }
2749
2750    #[test]
2751    fn check_button_mark_parsed_into_style() {
2752        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
2753<template xmlns="http://www.xfa.org/schema/xfa-template/3.3/">
2754  <subform name="root" layout="tb">
2755    <pageSet>
2756      <pageArea name="Page1">
2757        <contentArea w="8in" h="10in"/>
2758      </pageArea>
2759    </pageSet>
2760    <field name="agree" w="0.3in" h="0.3in">
2761      <ui><checkButton mark="circle"/></ui>
2762      <value><text>1</text></value>
2763    </field>
2764  </subform>
2765</template>"#;
2766
2767        let (tree, root_id) = parse_template(xml, None).unwrap();
2768        let id = find_node_id_by_name(&tree, root_id, "agree").unwrap();
2769        let meta = tree.meta(id);
2770        assert_eq!(meta.field_kind, FieldKind::Checkbox);
2771        assert_eq!(meta.style.check_button_mark.as_deref(), Some("circle"));
2772    }
2773
2774    #[test]
2775    fn border_widths_parsed_from_per_edge_template_border() {
2776        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
2777<template xmlns="http://www.xfa.org/schema/xfa-template/3.3/">
2778  <subform name="root" layout="tb">
2779    <pageSet>
2780      <pageArea name="Page1">
2781        <contentArea w="8in" h="10in"/>
2782      </pageArea>
2783    </pageSet>
2784    <field name="amount" w="2in" h="0.3in">
2785      <ui><textEdit/></ui>
2786      <value><text>42</text></value>
2787      <border>
2788        <edge thickness="1pt"/>
2789        <edge thickness="2pt"/>
2790        <edge thickness="3pt"/>
2791        <edge thickness="4pt"/>
2792      </border>
2793    </field>
2794  </subform>
2795</template>"#;
2796
2797        let (tree, root_id) = parse_template(xml, None).unwrap();
2798        let id = find_node_id_by_name(&tree, root_id, "amount").unwrap();
2799        let style = &tree.meta(id).style;
2800
2801        assert_eq!(style.border_width_pt, Some(1.0));
2802        assert_eq!(style.border_widths, Some([1.0, 2.0, 3.0, 4.0]));
2803    }
2804}