Skip to main content

pdf_xfa/
merger.rs

1//! XFA Form DOM — the merged result of template + data.
2//!
3//! Implements the Form DOM from XFA 3.3 §3 and §5.
4//! The Form DOM is a hierarchical tree of merged nodes, where repeating
5//! subforms have been expanded based on data instances.
6//!
7//! Data binding follows XFA Spec 3.3 §4.4 p176-214 ("Merging Data with a
8//! Template"). Implements both `consumeData` and `matchTemplate` merge modes.
9//!
10//! ## Spec gaps (see individual TODOs):
11//! - §4.4.3 p176: `bind match="global"` not implemented (approximated)
12//! - §4.4 p197: attribute matching step skipped
13//! - §4.4 p198: re-normalization not implemented
14//! - §4.2 p143: localization/canonicalization not implemented
15//! - §4.4 p195: exclusion group short/long format not implemented
16//! - §4.4 p199: setProperty/bindItems not implemented
17
18/// XFA Spec 3.3 §4.4 p176 — Merge mode controlling how data is bound to template.
19///
20/// - `ConsumeData` (default): walk the template tree top-down, binding each
21///   node against the current data context positionally (by name within context).
22/// - `MatchTemplate`: data drives the merge — for each data node, find the
23///   template node by name match regardless of hierarchy depth.
24#[derive(Debug, Clone, Copy, PartialEq, Eq)]
25pub enum MergeMode {
26    /// XFA §4.4 p176 — template-driven merge (default).
27    ConsumeData,
28    /// XFA §4.4 p176 — data-driven merge: match data nodes to template by name.
29    MatchTemplate,
30}
31
32/// Detect the merge mode from the XFA config packet or default to `ConsumeData`.
33///
34/// XFA Spec 3.3 §4.4 p176: the merge mode can be specified via the
35/// `<config><present><xfa:generator>` or `<config><acrobat><xdp><packet>` elements.
36/// In practice most forms do not specify a mode and `consumeData` is implied.
37/// We detect `matchTemplate` by scanning for explicit `matchTemplate` text in
38/// a `<config>` packet when one is present in the template XML.
39pub fn detect_merge_mode(template_xml: &str) -> MergeMode {
40    // Quick scan: if the template explicitly requests matchTemplate mode.
41    if template_xml.contains("matchTemplate") {
42        return MergeMode::MatchTemplate;
43    }
44    MergeMode::ConsumeData
45}
46
47use crate::error::{Result, XfaError};
48use roxmltree::Node;
49use xfa_dom_resolver::data_dom::{DataDom, DataNodeId};
50use xfa_dom_resolver::som::resolve_data_path;
51use xfa_layout_engine::form::{
52    AnchorType, ContentArea, DrawContent, EventScript, FieldKind, FormNode, FormNodeId,
53    FormNodeMeta, FormNodeStyle, FormNodeType, FormTree, GroupKind, Occur, Presence, RichTextSpan,
54    ScriptLanguage,
55};
56use xfa_layout_engine::text::{FontFamily, FontMetrics};
57use xfa_layout_engine::types::{
58    BoxModel, Caption, CaptionPlacement, Insets, LayoutStrategy, Measurement, TextAlign,
59    VerticalAlign,
60};
61
62/// Merges an XFA template (XML) with data from a DataDom to produce a FormTree.
63pub struct FormMerger<'a> {
64    data_dom: &'a DataDom,
65    form_tree: FormTree,
66    /// Embedded image files from the PDF's Names/EmbeddedFiles tree,
67    /// keyed by filename (e.g. `.\lintje.jpg`).  Used to resolve
68    /// `<image href="…">` references in the XFA template (XFA §2.3).
69    image_files: std::collections::HashMap<String, Vec<u8>>,
70    /// M5.2b — running count of field bindings whose `presence` attribute
71    /// was `invisible`, `hidden`, or `inactive` and therefore did NOT set
72    /// the global `any_data_bound` signal. Reported via
73    /// `emit_invisible_binding_summary` just before `merge()` returns.
74    invisible_bindings_ignored: usize,
75}
76
77fn area_layout(elem: Node<'_, '_>) -> LayoutStrategy {
78    if elem.tag_name().name() == "area" && attr(elem, "layout").is_none() {
79        let has_positioned_child = elem.children().filter(|n| n.is_element()).any(|child| {
80            attr(child, "x")
81                .and_then(parse_dim)
82                .is_some_and(|v| v > 0.0)
83                || attr(child, "y")
84                    .and_then(parse_dim)
85                    .is_some_and(|v| v > 0.0)
86        });
87        if has_positioned_child {
88            LayoutStrategy::Positioned
89        } else {
90            LayoutStrategy::TopToBottom
91        }
92    } else {
93        parse_layout_attr(elem)
94    }
95}
96
97impl<'a> FormMerger<'a> {
98    /// new.
99    pub fn new(data_dom: &'a DataDom) -> Self {
100        Self {
101            data_dom,
102            form_tree: FormTree::new(),
103            image_files: std::collections::HashMap::new(),
104            invisible_bindings_ignored: 0,
105        }
106    }
107
108    /// Set embedded image files for resolving `<image href="…">` references.
109    pub fn with_image_files(mut self, files: std::collections::HashMap<String, Vec<u8>>) -> Self {
110        self.image_files = files;
111        self
112    }
113
114    /// Merge the template XML into a FormTree.
115    ///
116    /// XFA Spec 3.3 §4.4 p176 — supports both merge modes:
117    /// - `consumeData` (default): walk template top-down, bind each node
118    ///   against matching data context positionally.
119    /// - `matchTemplate`: walk data nodes, find template counterparts by name,
120    ///   bind regardless of hierarchy depth.
121    pub fn merge(mut self, template_xml: &str) -> Result<(FormTree, FormNodeId)> {
122        let mode = detect_merge_mode(template_xml);
123        log::debug!(
124            "XFA merge: mode={:?}, {} data nodes",
125            mode,
126            self.data_dom.len()
127        );
128        let doc = roxmltree::Document::parse(template_xml)
129            .map_err(|e| XfaError::ParseFailed(format!("template XML parse error: {e}")))?;
130        let root_elem = doc.root_element();
131
132        // The packet may start with <template> directly, or may have been
133        // wrapped inside <xdp:xdp>. Accept both.
134        let template_elem = if root_elem.tag_name().name() == "template" {
135            root_elem
136        } else {
137            // Find the first <template> descendant.
138            find_first_child_by_name(root_elem, "template").ok_or_else(|| {
139                XfaError::PacketNotFound("no <template> element found".to_string())
140            })?
141        };
142
143        if mode == MergeMode::MatchTemplate {
144            // XFA §4.4 p176 matchTemplate: pre-index all template nodes by name,
145            // then for each data node find and bind to its template counterpart.
146            self.apply_match_template_bindings(template_elem);
147        }
148
149        // Phase D-ι: collect <variables> <script name="X"> blocks at the root
150        // subform level. XFA 3.3 §5.5 — these named scripts define
151        // form-level objects whose top-level `var`/`function` declarations
152        // are accessible from event/calculate scripts via `<scriptName>.X`.
153        // Sandboxed JS runtime evaluates these once per document.
154        self.collect_variables_scripts(template_elem);
155        let (root_id, _trailing) = self.parse_node(template_elem, None, true)?;
156
157        // M5.2b: emit the once-per-merge summary for rule
158        // `IgnoreInvisibleServerMetadataBindingsForDataBoundSignal`.
159        // Silent when no invisible binding was observed.
160        crate::adobe_compat::emit_invisible_binding_summary(self.invisible_bindings_ignored);
161
162        Ok((self.form_tree, root_id))
163    }
164
165    /// Phase D-ι / D-ι.2: walk `<variables>` blocks at every subform level
166    /// below the template root (XFA 3.3 §5.5). Root-subform scripts get
167    /// `subform_scope = None` (globally accessible); named descendant-subform
168    /// scripts get `subform_scope = Some(subform_name)` so they are accessible
169    /// via `subformHandle.variables.scriptName`.
170    fn collect_variables_scripts(&mut self, template_elem: Node<'_, '_>) {
171        let Some(root_subform) = find_first_child_by_name(template_elem, "subform") else {
172            return;
173        };
174        self.collect_variables_scripts_recursive(root_subform, None);
175    }
176
177    fn collect_variables_scripts_recursive(
178        &mut self,
179        subform: Node<'_, '_>,
180        subform_scope: Option<String>,
181    ) {
182        for child in subform.children().filter(|n| n.is_element()) {
183            match child.tag_name().name() {
184                "variables" => {
185                    for var_child in child.children().filter(|n| n.is_element()) {
186                        if var_child.tag_name().name() != "script" {
187                            continue;
188                        }
189                        let Some(name) = attr(var_child, "name") else {
190                            continue;
191                        };
192                        let body: String = var_child
193                            .children()
194                            .filter(|n| n.is_text())
195                            .filter_map(|n| n.text())
196                            .collect::<String>();
197                        if body.trim().is_empty() {
198                            continue;
199                        }
200                        self.form_tree.variables_scripts.push((
201                            subform_scope.clone(),
202                            name.to_string(),
203                            body,
204                        ));
205                    }
206                }
207                "subform" | "area" | "exclGroup" => {
208                    let child_scope = attr(child, "name")
209                        .filter(|n| !n.is_empty())
210                        .map(|n| n.to_string());
211                    self.collect_variables_scripts_recursive(child, child_scope);
212                }
213                _ => {}
214            }
215        }
216    }
217
218    /// XFA Spec 3.3 §4.4 p176 — matchTemplate pre-pass.
219    ///
220    /// In matchTemplate mode the data DOM drives the merge: for each data node
221    /// that has a matching (same-named) template node, the binding is resolved
222    /// purely by name without regard to hierarchy depth.  We implement this by
223    /// walking the data DOM once and storing the resolved data value for every
224    /// field whose name appears as a data child anywhere in the data tree.
225    ///
226    /// Because `parse_field` already performs a global fallback via
227    /// `find_value_in_descendants`, matchTemplate mode for most real-world forms
228    /// reduces to ensuring the global fallback is preferred over the positional
229    /// context match.  The actual switch happens in `lookup_value_by_name`:
230    /// when matchTemplate is active we skip the context-child check and go
231    /// straight to the global search.
232    fn apply_match_template_bindings(&mut self, _template_elem: roxmltree::Node<'_, '_>) {
233        // Intentionally empty: the matchTemplate semantics are realised by
234        // passing `None` as the data_context for all field lookups when the
235        // mode is MatchTemplate, which forces `lookup_value_by_name` to use
236        // the global descendant search (find_value_in_descendants) exclusively.
237        // The parse_node / parse_field paths already handle None context.
238        // This hook exists for future extension (e.g. explicit bind-ref tables).
239    }
240
241    fn parse_node(
242        &mut self,
243        elem: Node<'_, '_>,
244        data_context: Option<DataNodeId>,
245        is_root: bool,
246    ) -> Result<(FormNodeId, (bool, Option<String>))> {
247        let tag = elem.tag_name().name();
248
249        let (node, trailing_info) = match tag {
250            "template" => {
251                let mut n = self.blank_node("root", FormNodeType::Root);
252                n.layout = LayoutStrategy::TopToBottom;
253                let ti = self.add_children(&mut n, elem, data_context)?;
254                (n, ti)
255            }
256            "subform" | "exclGroup" | "area" => {
257                let name = attr(elem, "name").unwrap_or("").to_string();
258                let layout = area_layout(elem);
259                let bm = parse_box_model(elem);
260                let occur = parse_occur(elem);
261                self.build_subform_instance(elem, data_context, is_root, occur, name, layout, bm)?
262            }
263            "field" => (self.parse_field(elem, data_context)?, (false, None)),
264            "draw" => (self.parse_draw(elem, data_context)?, (false, None)),
265            "pageSet" => (self.parse_page_set(elem, data_context)?, (false, None)),
266            "pageArea" => (self.parse_page_area(elem, data_context)?, (false, None)),
267            _ => {
268                let mut n = self.blank_node(tag, FormNodeType::Subform);
269                let ti = self.add_children(&mut n, elem, data_context)?;
270                (n, ti)
271            }
272        };
273
274        if tag == "exclGroup" {
275            // fixes #798: flatten.rs uses FormMerger, not template_parser, so
276            // exclusion-group selection must be resolved here. XFA 3.3 §4.4.5
277            // / §11.2 says assigning a value to an exclGroup selects the child
278            // whose asserted value matches the group's bound value.
279            self.apply_exclusive_choice_value(elem, data_context, &node.children);
280        }
281
282        let mut meta = parse_node_meta(elem);
283
284        // XFA Spec 3.3 §7.4.2 — presence binding: a data node named "presence"
285        // as a child of this element's data context can override the template's
286        // static `presence` attribute.  Check for a bound data value and apply it.
287        if let Some(bound_presence) = self.lookup_value_by_name("presence", data_context) {
288            meta.presence = parse_presence_str(&bound_presence);
289        }
290
291        let is_draw_or_field = tag == "draw" || tag == "field";
292        if is_draw_or_field {
293            // Fields get their <margin topInset/...> bridged to style.inset_*_pt
294            // so the renderer offsets the value and shrinks the border/bg to
295            // the inner rect (render_bridge.rs:238-280). Draws skip this bridge:
296            // draws in complex background-image forms (e.g. VA Form 10-2478,
297            // 49f8705c) carry decorative insets that visually misalign with the
298            // pre-rendered form backdrop when shifted inward. Keeping the
299            // transfer only for fields preserves the #841 field-padding fix
300            // without re-introducing the −0.18 regression on 49f8705c that
301            // c24798f7a originally set out to fix.
302            if tag == "field" {
303                meta.style.inset_top_pt = Some(node.box_model.margins.top);
304                meta.style.inset_bottom_pt = Some(node.box_model.margins.bottom);
305                meta.style.inset_left_pt = Some(node.box_model.margins.left);
306                meta.style.inset_right_pt = Some(node.box_model.margins.right);
307            }
308            if meta.style.font_weight.is_none() {
309                if let Some(weight) = extract_exdata_font_weight(elem) {
310                    meta.style.font_weight = Some(weight);
311                }
312            }
313            if meta.style.font_style.is_none() {
314                if let Some(style) = extract_exdata_font_style(elem) {
315                    meta.style.font_style = Some(style);
316                }
317            }
318            if meta.style.font_family.is_none() {
319                if let Some(family) = extract_exdata_font_family(elem) {
320                    meta.style.font_family = Some(family);
321                }
322            }
323            if meta.style.text_color.is_none() {
324                if let Some(color) = extract_exdata_color(elem) {
325                    meta.style.text_color = Some(color);
326                }
327            }
328            if meta.style.space_above_pt.is_none() || meta.style.space_below_pt.is_none() {
329                if let Some((above, below)) = extract_exdata_margins(elem) {
330                    if meta.style.space_above_pt.is_none() {
331                        meta.style.space_above_pt = Some(above);
332                    }
333                    if meta.style.space_below_pt.is_none() {
334                        meta.style.space_below_pt = Some(below);
335                    }
336                }
337            }
338            if meta.style.rich_text_spans.is_none() {
339                meta.style.rich_text_spans = parse_exdata_rich_text_spans(elem);
340            }
341        }
342        // Phase D-γ: store the data binding so the JS runtime can resolve
343        // `$record` (the data record for the current subform's context).
344        meta.bound_data_node = data_context.map(|c| c.as_raw());
345        let id = self.form_tree.add_node_with_meta(node, meta);
346        Ok((id, trailing_info))
347    }
348
349    #[allow(clippy::too_many_arguments)]
350    fn build_subform_instance(
351        &mut self,
352        elem: Node<'_, '_>,
353        data_context: Option<DataNodeId>,
354        is_root: bool,
355        occur: Occur,
356        name: String,
357        layout: LayoutStrategy,
358        mut bm: BoxModel,
359    ) -> Result<(FormNode, (bool, Option<String>))> {
360        if layout == LayoutStrategy::TopToBottom && bm.width.is_none() && is_root {
361            bm.width = Some(612.0);
362        }
363
364        // XFA Spec 3.3 §4.4 p193 — transparent nodes: a subform with no `name`
365        // attribute is "transparent" to data binding.  Its children bind against
366        // the parent's data context directly — we do NOT descend into a child
367        // data group.
368        if name.is_empty() {
369            // Transparent subform: pass parent context straight through.
370            let mut n = FormNode {
371                name,
372                node_type: FormNodeType::Subform,
373                box_model: bm,
374                layout,
375                children: Vec::new(),
376                occur,
377                font: FontMetrics::default(),
378                calculate: None,
379                validate: None,
380                column_widths: Vec::new(),
381                col_span: 1,
382            };
383            let ti = self.add_children(&mut n, elem, data_context)?;
384            return Ok((n, ti));
385        }
386
387        // XFA Spec 3.3 §4.4.3 p180-185 — Data binding for subforms:
388        // Step 1: "direct match" — find a data node with matching name
389        // among the current context's children.
390        // Step 2: scope matching — walk ancestor chain if direct match fails
391        //   (XFA §4.4.3 p185).
392        //
393        // Wave-3 fidelity fix: when `expand_repeating_subform_instances`
394        // builds an instance from a pre-bound per-instance data record
395        // (i.e. `data_context` is *already* the bound node, whose name
396        // matches the subform's), do **not** re-resolve. The scope-walk
397        // fallback would otherwise return the first sibling named
398        // `name` (always instance 0) and every clamped instance would
399        // collapse to the first data record's values. The wave-3
400        // dossier covers this in detail.
401        let already_bound_instance = data_context
402            .and_then(|ctx| self.data_dom.get(ctx))
403            .is_some_and(|n| n.name() == name);
404        let mut child_context = data_context;
405        if !already_bound_instance {
406            if let Some(ctx) = data_context {
407                // Direct child match
408                let matches = self.data_dom.children_by_name(ctx, &name);
409                if let Some(&first) = matches.first() {
410                    child_context = Some(first);
411                } else {
412                    // Scope match: walk up ancestor chain
413                    child_context = self.resolve_with_scope_group(ctx, &name).or(data_context);
414                }
415            } else if let Some(root) = self.data_dom.root() {
416                if self.data_dom.get(root).is_some_and(|n| n.name() == name) {
417                    // Exact name match: the data root IS the subform's record.
418                    child_context = Some(root);
419                } else {
420                    let matches = self.data_dom.children_by_name(root, &name);
421                    if let Some(&first) = matches.first() {
422                        // Named child match (e.g. template "form1" → data <form1>).
423                        child_context = Some(first);
424                    } else {
425                        // No name match: bind the root subform to the data root so
426                        // that `$record.FIELD` paths in scripts resolve correctly.
427                        // XFA Spec 3.3 §4.4.3 p180: when the root subform has no
428                        // explicit bind element and its name doesn't match any data
429                        // node, the form context is the data DOM root — making
430                        // `$record` refer to the top-level data element (e.g.
431                        // `$record.COMPANY_ADDRESS.nodes` works when data root has
432                        // `<COMPANY_ADDRESS>` as a direct child).
433                        child_context = Some(root);
434                    }
435                }
436            }
437        }
438
439        let mut n = FormNode {
440            name,
441            node_type: FormNodeType::Subform,
442            box_model: bm,
443            layout,
444            children: Vec::new(),
445            occur,
446            font: FontMetrics::default(),
447            calculate: None,
448            validate: None,
449            column_widths: Vec::new(),
450            col_span: 1,
451        };
452        let ti = self.add_children(&mut n, elem, child_context)?;
453        Ok((n, ti))
454    }
455
456    // XFA Spec 3.3 §9.2 "Variable Number of Subforms" (p336):
457    // The data binding process creates min copies, then adds more copies
458    // for each additional data match up to max. When max=-1 the template
459    // is unbounded, but merger-time expansion must still stop at the
460    // number of matching data records so we do not synthesize extra
461    // siblings and over-paginate repeating content.
462    /// XFA Spec 3.3 §4.4 p186-192 — Repeating subforms: when `<occur>` allows
463    /// multiple instances (max > 1 or max = -1), the number of form subform
464    /// instances is driven by matching data records. Each data record creates
465    /// one subform instance, clamped to [occur.min, occur.max].
466    #[allow(clippy::type_complexity)]
467    fn expand_repeating_subform_instances(
468        &mut self,
469        element: Node<'_, '_>,
470        data_context: Option<DataNodeId>,
471        is_root: bool,
472    ) -> Result<Vec<(FormNodeId, (bool, Option<String>))>> {
473        let name = attr(element, "name").unwrap_or("").to_string();
474        let occur = parse_occur(element);
475
476        // XFA §4.4.3: match="none" means no data binding, no automatic expansion.
477        // The subform exists as a single instance; only scriptable instanceManager
478        // may add more.
479        let (_bind_ref_unused, bind_none) = parse_bind(element);
480        if bind_none {
481            return Ok(vec![self.parse_node(element, data_context, is_root)?]);
482        }
483
484        // Get raw bind ref for multi-segment SOM path resolution.
485        // parse_bind_data_name() only takes the last segment, which fails for
486        // multi-segment refs like "$.group.field[*]" (XFA §4.4 p199).
487        let bind_ref = find_first_child_by_name(element, "bind")
488            .and_then(|b| attr(b, "ref"))
489            .map(|s| s.to_string());
490
491        let data_instances = if let Some(ref raw_ref) = bind_ref {
492            // Normalize $record → $ for SOM parser compatibility
493            let normalized = if let Some(rest) = raw_ref.strip_prefix("$record") {
494                format!("${}", rest)
495            } else {
496                raw_ref.clone()
497            };
498            // Ensure [*] on last segment: repeating subforms need ALL matches
499            let with_wildcard = if normalized.ends_with("[*]") {
500                normalized
501            } else {
502                format!("{}[*]", normalized)
503            };
504            // Full SOM resolution; fall back to root when no data_context
505            resolve_data_path(self.data_dom, &with_wildcard, data_context)
506                .or_else(|_| resolve_data_path(self.data_dom, &with_wildcard, self.data_dom.root()))
507                .unwrap_or_default()
508        } else {
509            // No bind ref: look up by element name
510            if let Some(ctx) = data_context {
511                self.data_dom.children_by_name(ctx, &name)
512            } else if let Some(root) = self.data_dom.root() {
513                self.data_dom.children_by_name(root, &name)
514            } else {
515                Vec::new()
516            }
517        };
518
519        let data_count = data_instances.len() as u32;
520        // M5.3: instance count comes from the executable
521        // `repeating_subform_instance_count_clamped_to_occur_range` rule.
522        // Behaviour bit-identical to the prior inline
523        // `data_count.clamp(min, max)`; the rule emits an `(occur, …)`
524        // trace anchor when the clamp fires (max-cap or min-lift) and
525        // stays silent when the data count is already in range.
526        let (count, _outcome) =
527            crate::adobe_compat::repeating_subform_instance_count_clamped_to_occur_range(
528                &name, data_count, occur.min, occur.max,
529            );
530
531        let layout = area_layout(element);
532        let bm = parse_box_model(element);
533
534        if count == 0 && occur.max.unwrap_or(u32::MAX) > 0 {
535            // Build a single Hidden skeleton instance so `parent._<name>` resolves
536            // to a usable InstanceManager. The skeleton is layout-skipped via
537            // presence=Hidden; the flag tells the host that count() should return 0.
538            let (inst_node, trailing) = self.build_subform_instance(
539                element,
540                None,
541                is_root,
542                Occur::once(),
543                name.clone(),
544                layout,
545                bm.clone(),
546            )?;
547            let mut meta = parse_node_meta(element);
548            meta.is_zero_instance_prototype = true;
549            meta.presence = Presence::Hidden;
550            let inst_id = self.form_tree.add_node_with_meta(inst_node, meta);
551            return Ok(vec![(inst_id, trailing)]);
552        }
553
554        let mut instances = Vec::with_capacity(count as usize);
555
556        for i in 0..count {
557            let instance_data_ctx = data_instances.get(i as usize).copied();
558            let (inst_node, trailing) = self.build_subform_instance(
559                element,
560                instance_data_ctx,
561                is_root,
562                Occur::once(),
563                name.clone(),
564                layout,
565                bm.clone(),
566            )?;
567            if element.tag_name().name() == "exclGroup" {
568                self.apply_exclusive_choice_value(element, instance_data_ctx, &inst_node.children);
569            }
570            let mut meta = parse_node_meta(element);
571            // Phase D-γ: bind each repeating instance to its own data record.
572            meta.bound_data_node = instance_data_ctx.map(|c| c.as_raw());
573            let inst_id = self.form_tree.add_node_with_meta(inst_node, meta);
574            instances.push((inst_id, trailing));
575        }
576
577        Ok(instances)
578    }
579
580    /// Search descendants of a data node for a DataValue with the given name.
581    /// Returns the first matching value (breadth-first).
582    ///
583    /// XFA Spec 3.3 §4.4.3 p185 — global search used as final fallback after
584    /// scope matching (direct child → sibling → ancestor chain) all fail.
585    fn find_value_in_descendants(&self, node: DataNodeId, name: &str) -> Option<String> {
586        for &child in self.data_dom.children(node) {
587            if let Some(cn) = self.data_dom.get(child) {
588                if cn.name() == name && cn.is_value() {
589                    return self.data_dom.value(child).ok().map(|s| s.to_string());
590                }
591            }
592        }
593        // Recurse into child groups
594        for &child in self.data_dom.children(node) {
595            if self.data_dom.get(child).is_some_and(|n| n.is_group()) {
596                if let Some(val) = self.find_value_in_descendants(child, name) {
597                    return Some(val);
598                }
599            }
600        }
601        None
602    }
603
604    /// XFA Spec 3.3 §4.4.3 p185 — Scope matching for DataValue nodes.
605    ///
606    /// Resolution order per spec:
607    /// 1. Direct child of current data context.
608    /// 2. Sibling (direct child of parent data context).
609    /// 3. Ancestor chain — walk up to root, try children at each level.
610    /// 4. Global scope (root-level and all descendants).
611    fn resolve_with_scope_value(&self, data_context: DataNodeId, name: &str) -> Option<String> {
612        // 1. Direct child
613        let direct = self.data_dom.children_by_name(data_context, name);
614        if let Some(&val_id) = direct.first() {
615            if self.data_dom.get(val_id).is_some_and(|n| n.is_value()) {
616                return self.data_dom.value(val_id).ok().map(|s| s.to_string());
617            }
618        }
619
620        // 2. Sibling (parent's children)
621        if let Some(parent) = self.data_dom.get(data_context).and_then(|n| n.parent()) {
622            let sibling_matches = self.data_dom.children_by_name(parent, name);
623            if let Some(&val_id) = sibling_matches.first() {
624                if self.data_dom.get(val_id).is_some_and(|n| n.is_value()) {
625                    return self.data_dom.value(val_id).ok().map(|s| s.to_string());
626                }
627            }
628
629            // 3. Walk up ancestor chain
630            let mut ancestor = self.data_dom.get(parent).and_then(|n| n.parent());
631            while let Some(anc_id) = ancestor {
632                let anc_matches = self.data_dom.children_by_name(anc_id, name);
633                if let Some(&val_id) = anc_matches.first() {
634                    if self.data_dom.get(val_id).is_some_and(|n| n.is_value()) {
635                        return self.data_dom.value(val_id).ok().map(|s| s.to_string());
636                    }
637                }
638                ancestor = self.data_dom.get(anc_id).and_then(|n| n.parent());
639            }
640        }
641
642        // 4. Global fallback from root
643        self.data_dom
644            .root()
645            .and_then(|root| self.find_value_in_descendants(root, name))
646    }
647
648    /// XFA Spec 3.3 §4.4.3 p185 — Scope matching for DataGroup nodes.
649    ///
650    /// Used when looking for a named child data group for subform binding.
651    /// Walks the ancestor chain to find a matching group node by name.
652    fn resolve_with_scope_group(&self, data_context: DataNodeId, name: &str) -> Option<DataNodeId> {
653        // Walk up the ancestor chain looking for a same-named group
654        let mut cursor = self.data_dom.get(data_context).and_then(|n| n.parent());
655        while let Some(anc_id) = cursor {
656            let matches = self.data_dom.children_by_name(anc_id, name);
657            if let Some(&grp_id) = matches.first() {
658                if self.data_dom.get(grp_id).is_some_and(|n| n.is_group()) {
659                    return Some(grp_id);
660                }
661            }
662            cursor = self.data_dom.get(anc_id).and_then(|n| n.parent());
663        }
664        None
665    }
666
667    fn lookup_value_by_name(&self, name: &str, data_context: Option<DataNodeId>) -> Option<String> {
668        if name.is_empty() {
669            return None;
670        }
671
672        if let Some(ctx) = data_context {
673            // Use full scope resolution: direct → sibling → ancestor → global
674            return self.resolve_with_scope_value(ctx, name);
675        }
676
677        self.data_dom
678            .root()
679            .and_then(|root| self.find_value_in_descendants(root, name))
680    }
681
682    fn apply_exclusive_choice_value(
683        &mut self,
684        elem: Node<'_, '_>,
685        data_context: Option<DataNodeId>,
686        child_ids: &[FormNodeId],
687    ) {
688        let Some(group_name) = attr(elem, "name") else {
689            return;
690        };
691        let Some(group_value) = self.lookup_value_by_name(group_name, data_context) else {
692            return;
693        };
694
695        for &child_id in child_ids {
696            let item_value = self.form_tree.meta(child_id).item_value.clone();
697            if let FormNodeType::Field { value } = &mut self.form_tree.get_mut(child_id).node_type {
698                // fixes #798: only the matching radio/check child should stay
699                // asserted. The others render as off/null so Acrobat-style
700                // radio groups do not show multiple active choices.
701                *value = if item_value.as_deref() == Some(group_value.as_str()) {
702                    group_value.clone()
703                } else {
704                    String::new()
705                };
706            }
707        }
708    }
709
710    /// XFA Spec 3.3 §4.4.3 p180 — Field data binding: fields are leaf nodes
711    /// that bind to DataValue nodes in the data DOM.
712    fn parse_field(
713        &mut self,
714        elem: Node<'_, '_>,
715        data_context: Option<DataNodeId>,
716    ) -> Result<FormNode> {
717        let name = attr(elem, "name").unwrap_or("").to_string();
718        let bm = parse_box_model(elem);
719
720        let mut value = extract_value_text(elem).unwrap_or_default();
721
722        // XFA Spec 3.3 §4.4.3 p180-185 — Field binding with scope resolution:
723        // 1. Direct match: search current context children by name (§4.4.3 p180)
724        // 2. Sibling match: children of parent context (§4.4.3 p185)
725        // 3. Ancestor chain walk (§4.4.3 p185)
726        // 4. Global fallback: descendant search from data root
727        //
728        // NOTE: §4.4 p197 — attribute matching (data attributes → fields) is
729        // not implemented; we only match element nodes.
730        if let Some(bound_value) = self.lookup_value_by_name(&name, data_context) {
731            value = bound_value;
732            // M5.2b: `ignore_invisible_server_metadata_bindings` is the
733            // executable Adobe-compat rule that decides whether this
734            // binding sets the global `any_data_bound` signal. Behaviour
735            // is bit-identical to the prior inline match. The per-field
736            // call is silent; the merge() exit point reports the total
737            // ignored count via `emit_invisible_binding_summary`.
738            let presence = attr(elem, "presence").unwrap_or("");
739            match crate::adobe_compat::ignore_invisible_server_metadata_bindings(presence) {
740                crate::adobe_compat::InvisibleBindingDecision::AcceptedVisible => {
741                    self.form_tree.any_data_bound = true;
742                }
743                crate::adobe_compat::InvisibleBindingDecision::IgnoredInvisible => {
744                    self.invisible_bindings_ignored += 1;
745                }
746            }
747        }
748
749        let mut bm_with_caption = bm.clone();
750        if !is_hidden(elem) {
751            if let Some(cap) = parse_caption(elem) {
752                bm_with_caption.caption = Some(cap);
753            }
754        }
755
756        let mut font = parse_font_metrics(elem);
757        if let Some(html_size) = extract_exdata_font_size(elem) {
758            font.size = html_size;
759        }
760        if let Some(css_align) = extract_exdata_text_align(elem) {
761            font.text_align = css_align;
762        }
763
764        Ok(FormNode {
765            name,
766            node_type: FormNodeType::Field { value },
767            box_model: bm_with_caption,
768            layout: LayoutStrategy::Positioned,
769            children: Vec::new(),
770            occur: Occur::once(),
771            font,
772            calculate: None,
773            validate: None,
774            column_widths: Vec::new(),
775            col_span: parse_col_span(elem),
776        })
777    }
778
779    /// XFA Spec 3.3 §4.4 p180 — Draw elements are static content that do
780    /// not participate in data binding (they have no `<bind>` element).
781    /// The `_data_context` parameter is unused.
782    fn parse_draw(
783        &mut self,
784        elem: Node<'_, '_>,
785        _data_context: Option<DataNodeId>,
786    ) -> Result<FormNode> {
787        let name = attr(elem, "name").unwrap_or("").to_string();
788        let bm = parse_box_model(elem);
789
790        if let Some(draw_content) = extract_draw_content(elem) {
791            return Ok(FormNode {
792                name,
793                node_type: FormNodeType::Draw(draw_content),
794                box_model: bm,
795                layout: LayoutStrategy::Positioned,
796                children: Vec::new(),
797                occur: Occur::once(),
798                font: FontMetrics::default(),
799                calculate: None,
800                validate: None,
801                column_widths: Vec::new(),
802                col_span: 1,
803            });
804        }
805
806        if let Some((image_data, mime_type)) = extract_value_image(elem, &self.image_files) {
807            return Ok(FormNode {
808                name,
809                node_type: FormNodeType::Image {
810                    data: image_data,
811                    mime_type,
812                },
813                box_model: bm,
814                layout: LayoutStrategy::Positioned,
815                children: Vec::new(),
816                occur: Occur::once(),
817                font: FontMetrics::default(),
818                calculate: None,
819                validate: None,
820                column_widths: Vec::new(),
821                col_span: 1,
822            });
823        }
824
825        let mut font = parse_font_metrics(elem);
826        if let Some(html_size) = extract_exdata_font_size(elem) {
827            font.size = html_size;
828        }
829        if let Some(css_align) = extract_exdata_text_align(elem) {
830            font.text_align = css_align;
831        }
832
833        let content = extract_value_text(elem).unwrap_or_default();
834
835        Ok(FormNode {
836            name,
837            node_type: FormNodeType::Draw(DrawContent::Text(content)),
838            box_model: bm,
839            layout: LayoutStrategy::Positioned,
840            children: Vec::new(),
841            occur: Occur::once(),
842            font,
843            calculate: None,
844            validate: None,
845            column_widths: Vec::new(),
846            col_span: 1,
847        })
848    }
849
850    fn parse_page_set(
851        &mut self,
852        elem: Node<'_, '_>,
853        data_context: Option<DataNodeId>,
854    ) -> Result<FormNode> {
855        let name = attr(elem, "name").unwrap_or("pageSet").to_string();
856        let mut node = FormNode {
857            name,
858            node_type: FormNodeType::PageSet,
859            box_model: BoxModel {
860                max_width: f64::MAX,
861                max_height: f64::MAX,
862                ..Default::default()
863            },
864            layout: LayoutStrategy::TopToBottom,
865            children: Vec::new(),
866            occur: Occur::once(),
867            font: FontMetrics::default(),
868            calculate: None,
869            validate: None,
870            column_widths: Vec::new(),
871            col_span: 1,
872        };
873        for child in elem.children().filter(|n| n.is_element()) {
874            if child.tag_name().name() == "pageArea" {
875                let (child_id, _) = self.parse_node(child, data_context, false)?;
876                node.children.push(child_id);
877            }
878        }
879        Ok(node)
880    }
881
882    fn parse_page_area(
883        &mut self,
884        elem: Node<'_, '_>,
885        data_context: Option<DataNodeId>,
886    ) -> Result<FormNode> {
887        let name = attr(elem, "name").unwrap_or("").to_string();
888        let (page_w, page_h) = read_medium(elem);
889        let content_areas = read_content_areas(elem, page_w, page_h);
890
891        let bm = BoxModel {
892            width: Some(page_w),
893            height: Some(page_h),
894            ..Default::default()
895        };
896
897        let mut node = FormNode {
898            name,
899            node_type: FormNodeType::PageArea { content_areas },
900            box_model: bm,
901            layout: LayoutStrategy::Positioned,
902            children: Vec::new(),
903            occur: Occur::once(),
904            font: FontMetrics::default(),
905            calculate: None,
906            validate: None,
907            column_widths: Vec::new(),
908            col_span: 1,
909        };
910        self.add_children(&mut node, elem, data_context)?;
911        Ok(node)
912    }
913
914    /// Recursively process child elements of a template node.
915    ///
916    /// XFA Spec 3.3 §4.4 p180 — the merge walks template children in document
917    /// order, binding each to the current data context.
918    ///
919    /// TODO: XFA Spec 3.3 §4.4 p198 — re-normalization step: after all template
920    /// children are bound, excess/unmatched data nodes should create new form
921    /// nodes. Not implemented.
922    ///
923    /// TODO: XFA Spec 3.3 §4.4 p199 — setProperty and bindItems: after binding,
924    /// `<setProperty>` and `<bindItems>` on `<bind>` should be evaluated to
925    /// dynamically set properties or populate choice lists from data. Not implemented.
926    ///
927    /// TODO: XFA Spec 3.3 §4.2 p143-170 — localization: data values should be
928    /// canonicalized using picture clauses before comparison/binding. Not implemented.
929    fn add_children(
930        &mut self,
931        node: &mut FormNode,
932        elem: Node<'_, '_>,
933        data_context: Option<DataNodeId>,
934    ) -> Result<(bool, Option<String>)> {
935        let mut pending_break = false;
936        let mut pending_break_target = None;
937        let mut pending_ca_break = false;
938
939        for child in elem.children().filter(|n| n.is_element()) {
940            let tag = child.tag_name().name();
941            match tag {
942                "subform" | "field" | "draw" | "pageSet" | "pageArea" | "exclGroup" | "area" => {
943                    let child_entries = if matches!(tag, "subform" | "exclGroup" | "area") {
944                        let name = attr(child, "name").unwrap_or("");
945                        let occur = parse_occur(child);
946                        let (_, bind_none) = parse_bind(child);
947                        // M5.3b: bind-none short-circuit is now the
948                        // executable `bind_none_subform_does_not_auto_expand`
949                        // rule. The gate returns `Blocked` iff bind=none is
950                        // set; the caller still composes structural gates
951                        // (`occur.is_repeating()`, `!name.is_empty()`) around
952                        // it. Behaviour bit-identical to the prior `!bind_none`
953                        // clause in the inline `if`.
954                        let gate = crate::adobe_compat::bind_none_subform_does_not_auto_expand(
955                            name,
956                            occur.is_repeating(),
957                            name.is_empty(),
958                            bind_none,
959                        );
960                        let bind_none_allows =
961                            matches!(gate, crate::adobe_compat::BindNoneExpansionGate::Allowed);
962                        if occur.is_repeating() && !name.is_empty() && bind_none_allows {
963                            self.expand_repeating_subform_instances(child, data_context, false)?
964                        } else {
965                            vec![self.parse_node(child, data_context, false)?]
966                        }
967                    } else {
968                        vec![self.parse_node(child, data_context, false)?]
969                    };
970
971                    for (child_id, (trailing_break, trailing_target)) in child_entries {
972                        if pending_break {
973                            let meta = self.form_tree.meta_mut(child_id);
974                            meta.page_break_before = true;
975                            if meta.break_target.is_none() {
976                                meta.break_target = pending_break_target.take();
977                            }
978                            pending_break = false;
979                        }
980                        if pending_ca_break {
981                            self.form_tree.meta_mut(child_id).content_area_break = true;
982                            pending_ca_break = false;
983                        }
984                        node.children.push(child_id);
985                        if trailing_break {
986                            pending_break = true;
987                            pending_break_target = trailing_target;
988                        }
989                    }
990                }
991                "breakBefore" => {
992                    let target_type = attr(child, "targetType");
993                    if target_type == Some("pageArea") {
994                        pending_break = true;
995                        pending_break_target = attr(child, "target").map(|s| s.to_string());
996                    } else if target_type == Some("contentArea") {
997                        pending_ca_break = true;
998                    }
999                }
1000                "break"
1001                    if attr(child, "before") == Some("pageArea")
1002                        && attr(child, "targetType") == Some("pageArea") =>
1003                {
1004                    pending_break = true;
1005                    pending_break_target = attr(child, "target").map(|s| s.to_string());
1006                }
1007                _ => {}
1008            }
1009        }
1010        Ok((pending_break, pending_break_target))
1011    }
1012
1013    fn blank_node(&self, name: &str, node_type: FormNodeType) -> FormNode {
1014        FormNode {
1015            name: name.to_string(),
1016            node_type,
1017            box_model: BoxModel {
1018                max_width: f64::MAX,
1019                max_height: f64::MAX,
1020                ..Default::default()
1021            },
1022            layout: LayoutStrategy::TopToBottom,
1023            children: Vec::new(),
1024            occur: Occur::once(),
1025            font: FontMetrics::default(),
1026            calculate: None,
1027            validate: None,
1028            column_widths: Vec::new(),
1029            col_span: 1,
1030        }
1031    }
1032}
1033
1034// ---------------------------------------------------------------------------
1035// Helper functions (mirrored from template_parser.rs)
1036// ---------------------------------------------------------------------------
1037
1038fn attr<'a>(elem: Node<'a, '_>, name: &str) -> Option<&'a str> {
1039    elem.attributes()
1040        .find(|a| a.name() == name)
1041        .map(|a| a.value())
1042}
1043
1044fn find_first_child_by_name<'a, 'input>(
1045    elem: Node<'a, 'input>,
1046    name: &str,
1047) -> Option<Node<'a, 'input>> {
1048    elem.children()
1049        .filter(|n| n.is_element())
1050        .find(|n| n.tag_name().name() == name)
1051}
1052
1053fn parse_layout_attr(elem: Node<'_, '_>) -> LayoutStrategy {
1054    match attr(elem, "layout").unwrap_or("") {
1055        "tb" => LayoutStrategy::TopToBottom,
1056        "lr-tb" => LayoutStrategy::LeftToRightTB,
1057        "rl-tb" => LayoutStrategy::RightToLeftTB,
1058        "table" => LayoutStrategy::Table,
1059        "row" => LayoutStrategy::Row,
1060        "paginate" => LayoutStrategy::TopToBottom,
1061        "position" => LayoutStrategy::Positioned,
1062        _ => LayoutStrategy::Positioned,
1063    }
1064}
1065
1066fn parse_anchor_type(elem: Node<'_, '_>) -> AnchorType {
1067    match attr(elem, "anchorType").unwrap_or("") {
1068        "topCenter" => AnchorType::TopCenter,
1069        "topRight" => AnchorType::TopRight,
1070        "middleLeft" => AnchorType::MiddleLeft,
1071        "middleCenter" => AnchorType::MiddleCenter,
1072        "middleRight" => AnchorType::MiddleRight,
1073        "bottomLeft" => AnchorType::BottomLeft,
1074        "bottomCenter" => AnchorType::BottomCenter,
1075        "bottomRight" => AnchorType::BottomRight,
1076        _ => AnchorType::TopLeft,
1077    }
1078}
1079
1080fn parse_box_model(elem: Node<'_, '_>) -> BoxModel {
1081    let w = attr(elem, "w").and_then(parse_dim);
1082    let h = attr(elem, "h").and_then(parse_dim);
1083    let x = attr(elem, "x").and_then(parse_dim).unwrap_or(0.0);
1084    let y = attr(elem, "y").and_then(parse_dim).unwrap_or(0.0);
1085    let min_h = attr(elem, "minH").and_then(parse_dim).unwrap_or(0.0);
1086    let min_w = attr(elem, "minW").and_then(parse_dim).unwrap_or(0.0);
1087    let max_h = attr(elem, "maxH").and_then(parse_dim).unwrap_or(f64::MAX);
1088    let max_w = attr(elem, "maxW").and_then(parse_dim).unwrap_or(f64::MAX);
1089    let margins = parse_margin(elem);
1090
1091    BoxModel {
1092        width: w,
1093        height: h,
1094        x,
1095        y,
1096        margins,
1097        min_width: min_w,
1098        max_width: max_w,
1099        min_height: min_h,
1100        max_height: max_h,
1101        ..Default::default()
1102    }
1103}
1104
1105fn parse_margin(elem: Node<'_, '_>) -> Insets {
1106    if let Some(margin) = find_first_child_by_name(elem, "margin") {
1107        Insets {
1108            top: attr(margin, "topInset").and_then(parse_dim).unwrap_or(0.0),
1109            bottom: attr(margin, "bottomInset")
1110                .and_then(parse_dim)
1111                .unwrap_or(0.0),
1112            left: attr(margin, "leftInset").and_then(parse_dim).unwrap_or(0.0),
1113            right: attr(margin, "rightInset")
1114                .and_then(parse_dim)
1115                .unwrap_or(0.0),
1116        }
1117    } else {
1118        Insets::default()
1119    }
1120}
1121
1122fn parse_dim(s: &str) -> Option<f64> {
1123    if s.trim().parse::<f64>().is_ok() {
1124        return Measurement::parse(&format!("{s}in")).map(|m: Measurement| m.to_points());
1125    }
1126    Measurement::parse(s).map(|m: Measurement| m.to_points())
1127}
1128
1129/// Parse a percentage string like `"96%"` → `0.96`, `"110%"` → `1.1`.
1130fn parse_percentage(s: &str) -> Option<f64> {
1131    let s = s.trim();
1132    let num_str = s.strip_suffix('%')?;
1133    let v: f64 = num_str.trim().parse().ok()?;
1134    Some(v / 100.0)
1135}
1136
1137/// Parse a letter-spacing string (`"-0.018em"`, `"0.5pt"`, etc.) to points.
1138fn parse_letter_spacing(s: &str, font_size_pt: f64) -> Option<f64> {
1139    let s = s.trim();
1140    if s == "0" {
1141        return Some(0.0);
1142    }
1143    if let Some(num_str) = s.strip_suffix("em") {
1144        let v: f64 = num_str.trim().parse().ok()?;
1145        return Some(v * font_size_pt);
1146    }
1147    Measurement::parse(s).map(|m| m.to_points())
1148}
1149
1150// XFA Spec 3.3 §9.2 "The Occur Element" (p339):
1151// - min: defaults to 1 — minimum copies during non-empty merge
1152// - max: defaults to min (NOT to 1); -1 means unlimited
1153// - initial: defaults to min — copies during empty merge
1154//
1155// §9.2 p357: if max is not supplied, max defaults to min.
1156// §9.2 p357: if initial is not supplied, initial defaults to min.
1157//
1158// Our implementation correctly handles these defaults and the -1 sentinel.
1159fn parse_occur(elem: Node<'_, '_>) -> Occur {
1160    if let Some(occur) = find_first_child_by_name(elem, "occur") {
1161        let min: u32 = attr(occur, "min").and_then(|s| s.parse().ok()).unwrap_or(1);
1162        // XFA Spec 3.3 §9.2 p357: "if the max attribute is not supplied then
1163        // the max property defaults to the value of min."
1164        //
1165        // NOTE: the previous `.map(…).unwrap_or()` chain let parse failures
1166        // (e.g. max="") collapse to `None` (unlimited), causing massive
1167        // over-pagination.  Use an explicit match instead.
1168        let max: Option<u32> = match attr(occur, "max") {
1169            Some("-1") => None, // -1 means unlimited
1170            Some(s) => Some(s.parse::<u32>().unwrap_or(min)),
1171            None => Some(min),
1172        };
1173        // XFA Spec 3.3 §9.2 p357: "if the initial attribute is not supplied
1174        // then the initial property defaults to the value of min."
1175        let initial: u32 = attr(occur, "initial")
1176            .and_then(|s| s.parse().ok())
1177            .unwrap_or(min);
1178        Occur::repeating(min, max, initial)
1179    } else {
1180        Occur::once()
1181    }
1182}
1183
1184fn parse_font_size(s: &str) -> Option<f64> {
1185    if let Ok(v) = s.trim().parse::<f64>() {
1186        return if v > 0.0 { Some(v) } else { None };
1187    }
1188    Measurement::parse(s).map(|m: Measurement| m.to_points())
1189}
1190
1191fn parse_font_metrics(elem: Node<'_, '_>) -> FontMetrics {
1192    let font_elem = find_first_child_by_name(elem, "font");
1193    let size = font_elem
1194        .and_then(|f| attr(f, "size"))
1195        .and_then(parse_font_size)
1196        .unwrap_or(FontMetrics::default().size);
1197    let typeface = font_elem
1198        .and_then(|f| attr(f, "typeface"))
1199        .map(FontFamily::from_typeface)
1200        .unwrap_or_default();
1201    let text_align = find_first_child_by_name(elem, "para")
1202        .and_then(|p| attr(p, "hAlign"))
1203        .map(|a| match a {
1204            "center" => TextAlign::Center,
1205            "right" => TextAlign::Right,
1206            "justify" => TextAlign::Justify,
1207            _ => TextAlign::Left,
1208        })
1209        .unwrap_or_default();
1210    FontMetrics {
1211        size,
1212        text_align,
1213        typeface,
1214        ..FontMetrics::default()
1215    }
1216}
1217
1218fn read_medium(page_area: Node<'_, '_>) -> (f64, f64) {
1219    if let Some(m) = find_first_child_by_name(page_area, "medium") {
1220        let short = attr(m, "short").and_then(parse_dim).unwrap_or(612.0);
1221        let long_ = attr(m, "long").and_then(parse_dim).unwrap_or(792.0);
1222        (short, long_)
1223    } else {
1224        (612.0, 792.0)
1225    }
1226}
1227
1228fn read_content_areas(
1229    page_area: Node<'_, '_>,
1230    page_width: f64,
1231    page_height: f64,
1232) -> Vec<ContentArea> {
1233    let mut areas = Vec::new();
1234    for child in page_area.children().filter(|n| n.is_element()) {
1235        if child.tag_name().name() == "contentArea" {
1236            // XFA 3.3 §8.3.1 — contentArea x/y default to "0in" (0pt).
1237            // Previously defaulted to 36pt (0.5in margin), which shifted
1238            // all content down/right for templates omitting x/y. Fixes #797.
1239            let x = attr(child, "x").and_then(parse_dim).unwrap_or(0.0);
1240            let y = attr(child, "y").and_then(parse_dim).unwrap_or(0.0);
1241            // w/h default to full page dimensions when omitted, matching the
1242            // behavior of the empty (no contentArea) fallback. Previously used
1243            // hardcoded 540×720 (US Letter body), which was inconsistent with
1244            // the no-contentArea path and had no spec basis.
1245            let w = attr(child, "w").and_then(parse_dim).unwrap_or(page_width);
1246            let h = attr(child, "h").and_then(parse_dim).unwrap_or(page_height);
1247            areas.push(ContentArea {
1248                name: attr(child, "name").unwrap_or("").to_string(),
1249                x,
1250                y,
1251                width: w,
1252                height: h,
1253                leader: None,
1254                trailer: None,
1255            });
1256        }
1257    }
1258    if areas.is_empty() {
1259        areas.push(ContentArea {
1260            name: String::new(),
1261            x: 0.0,
1262            y: 0.0,
1263            width: page_width,
1264            height: page_height,
1265            leader: None,
1266            trailer: None,
1267        });
1268    }
1269    areas
1270}
1271
1272fn extract_value_text(elem: Node<'_, '_>) -> Option<String> {
1273    let value = find_first_child_by_name(elem, "value")?;
1274    for tag in &["text", "float", "integer", "date", "dateTime", "decimal"] {
1275        if let Some(child) = find_first_child_by_name(value, tag) {
1276            let text = child.text().unwrap_or("");
1277            let trimmed = text.trim_start_matches(|c: char| c.is_whitespace() && c != '\n');
1278            let trimmed = trimmed.trim_end_matches(|c: char| c.is_whitespace() && c != '\n');
1279            if !trimmed.is_empty() {
1280                return Some(trimmed.to_string());
1281            }
1282        }
1283    }
1284    if let Some(ex) = find_first_child_by_name(value, "exData") {
1285        let text = extract_text_from_descendants(ex);
1286        if !text.is_empty() {
1287            return Some(text);
1288        }
1289    }
1290    None
1291}
1292
1293fn extract_text_from_descendants(node: Node<'_, '_>) -> String {
1294    let block_tags = [
1295        "p", "div", "h1", "h2", "h3", "h4", "h5", "h6", "li", "tr", "br",
1296    ];
1297    let mut result = String::new();
1298    let mut last_was_block = false;
1299
1300    for desc in node.descendants() {
1301        let is_block = desc.is_element() && block_tags.contains(&desc.tag_name().name());
1302
1303        if is_block && !result.is_empty() {
1304            result.push('\n');
1305            last_was_block = true;
1306        }
1307
1308        if desc.is_text() {
1309            if let Some(t) = desc.text() {
1310                let t = t.trim();
1311                if !t.is_empty() {
1312                    if last_was_block || result.is_empty() {
1313                        result.push_str(t);
1314                    } else {
1315                        result.push(' ');
1316                        result.push_str(t);
1317                    }
1318                    last_was_block = false;
1319                }
1320            }
1321        }
1322    }
1323
1324    result.trim().to_string()
1325}
1326
1327fn extract_exdata_font_size(elem: Node<'_, '_>) -> Option<f64> {
1328    let value = find_first_child_by_name(elem, "value")?;
1329    let ex = find_first_child_by_name(value, "exData")?;
1330    for desc in ex.descendants() {
1331        if !desc.is_element() {
1332            continue;
1333        }
1334        let style = desc
1335            .attribute("style")
1336            .or_else(|| desc.attribute("Style"))?;
1337        for part in style.split(';') {
1338            let part = part.trim();
1339            if let Some(val) = part
1340                .strip_prefix("font-size:")
1341                .or_else(|| part.strip_prefix("font-size :"))
1342            {
1343                let val = val.trim();
1344                if let Some(pt) = val.strip_suffix("pt") {
1345                    if let Ok(size) = pt.trim().parse::<f64>() {
1346                        if size > 0.0 {
1347                            return Some(size);
1348                        }
1349                    }
1350                }
1351            }
1352        }
1353    }
1354    None
1355}
1356
1357/// Extract the dominant `font-weight` from `<exData contentType="text/html">` styles.
1358/// Returns `Some("bold")` when the first styled `<p>` or `<span>` has `font-weight:bold`.
1359/// XFA Spec 3.3 §27.4: font-weight supports normal, bold, and numeric values 100-900.
1360fn extract_exdata_font_weight(elem: Node<'_, '_>) -> Option<String> {
1361    let value = find_first_child_by_name(elem, "value")?;
1362    let ex = find_first_child_by_name(value, "exData")?;
1363    for desc in ex.descendants() {
1364        if !desc.is_element() {
1365            continue;
1366        }
1367        let style = desc.attribute("style")?;
1368        for part in style.split(';') {
1369            let part = part.trim();
1370            if let Some(val) = part
1371                .strip_prefix("font-weight:")
1372                .or_else(|| part.strip_prefix("font-weight :"))
1373            {
1374                let val = val.trim();
1375                if val == "bold" || val == "700" || val == "800" || val == "900" {
1376                    return Some("bold".to_string());
1377                }
1378                if val == "normal" || val == "400" || val == "500" || val == "600" {
1379                    return Some("normal".to_string());
1380                }
1381            }
1382        }
1383    }
1384    None
1385}
1386
1387/// Extract `font-style` from `<exData contentType="text/html">` CSS styles.
1388/// XFA Spec 3.3 §27.4: font-style supports normal, italic, oblique.
1389fn extract_exdata_font_style(elem: Node<'_, '_>) -> Option<String> {
1390    let value = find_first_child_by_name(elem, "value")?;
1391    let ex = find_first_child_by_name(value, "exData")?;
1392    for desc in ex.descendants() {
1393        if !desc.is_element() {
1394            continue;
1395        }
1396        let style = desc.attribute("style")?;
1397        for part in style.split(';') {
1398            let part = part.trim();
1399            if let Some(val) = part
1400                .strip_prefix("font-style:")
1401                .or_else(|| part.strip_prefix("font-style :"))
1402            {
1403                let val = val.trim();
1404                if val == "italic" || val == "oblique" {
1405                    return Some("italic".to_string());
1406                }
1407                if val == "normal" {
1408                    return Some("normal".to_string());
1409                }
1410            }
1411        }
1412    }
1413    None
1414}
1415
1416/// Extract `color` from `<exData contentType="text/html">` CSS styles.
1417/// XFA Spec 3.3 §27.4: color supports #RGB, #RRGGBB, and rgb(r,g,b) formats.
1418/// Returns RGB tuple (0-255).
1419fn extract_exdata_color(elem: Node<'_, '_>) -> Option<(u8, u8, u8)> {
1420    let value = find_first_child_by_name(elem, "value")?;
1421    let ex = find_first_child_by_name(value, "exData")?;
1422    for desc in ex.descendants() {
1423        if !desc.is_element() {
1424            continue;
1425        }
1426        let style = desc.attribute("style")?;
1427        for part in style.split(';') {
1428            let part = part.trim();
1429            if let Some(val) = part
1430                .strip_prefix("color:")
1431                .or_else(|| part.strip_prefix("color :"))
1432            {
1433                let val = val.trim();
1434                if let Some(rgb) = parse_css_color(val) {
1435                    return Some(rgb);
1436                }
1437            }
1438        }
1439    }
1440    None
1441}
1442
1443/// Parse CSS color value: #RGB, #RRGGBB, rgb(r,g,b), or r,g,b.
1444fn parse_ascii_hex_color(hex: &str) -> Option<(u8, u8, u8)> {
1445    fn hex_nibble(byte: u8) -> Option<u8> {
1446        match byte {
1447            b'0'..=b'9' => Some(byte - b'0'),
1448            b'a'..=b'f' => Some(byte - b'a' + 10),
1449            b'A'..=b'F' => Some(byte - b'A' + 10),
1450            _ => None,
1451        }
1452    }
1453
1454    match hex.as_bytes() {
1455        [r, g, b] => Some((
1456            hex_nibble(*r)? * 17,
1457            hex_nibble(*g)? * 17,
1458            hex_nibble(*b)? * 17,
1459        )),
1460        [r1, r2, g1, g2, b1, b2] => Some((
1461            (hex_nibble(*r1)? << 4) | hex_nibble(*r2)?,
1462            (hex_nibble(*g1)? << 4) | hex_nibble(*g2)?,
1463            (hex_nibble(*b1)? << 4) | hex_nibble(*b2)?,
1464        )),
1465        _ => None,
1466    }
1467}
1468
1469fn parse_css_color(s: &str) -> Option<(u8, u8, u8)> {
1470    let s = s.trim();
1471    if let Some(hex) = s.strip_prefix('#') {
1472        parse_ascii_hex_color(hex)
1473    } else if let Some(rgb_part) = s.strip_prefix("rgb(") {
1474        if let Some(inner) = rgb_part.strip_suffix(')') {
1475            let parts: Vec<&str> = inner.split(',').collect();
1476            if parts.len() >= 3 {
1477                let r: f64 = parts[0].trim().parse().ok()?;
1478                let g: f64 = parts[1].trim().parse().ok()?;
1479                let b: f64 = parts[2].trim().parse().ok()?;
1480                return Some((r as u8, g as u8, b as u8));
1481            }
1482        }
1483        None
1484    } else {
1485        let parts: Vec<&str> = s.split(',').collect();
1486        if parts.len() >= 3 {
1487            let r = parts[0].trim().parse::<u8>().ok()?;
1488            let g = parts[1].trim().parse::<u8>().ok()?;
1489            let b = parts[2].trim().parse::<u8>().ok()?;
1490            Some((r, g, b))
1491        } else {
1492            None
1493        }
1494    }
1495}
1496
1497/// Extract the first `font-family` name from `<exData contentType="text/html">` CSS.
1498/// XFA Spec 3.3 §27.5 (p1198) — Character Formatting: font-family specifies
1499/// the typeface. §28.1 (p1228): Adobe uses only the first family name.
1500fn extract_exdata_font_family(elem: Node<'_, '_>) -> Option<String> {
1501    let value = find_first_child_by_name(elem, "value")?;
1502    let ex = find_first_child_by_name(value, "exData")?;
1503    for desc in ex.descendants() {
1504        if !desc.is_element() {
1505            continue;
1506        }
1507        let style = desc.attribute("style")?;
1508        for part in style.split(';') {
1509            let part = part.trim();
1510            if let Some(val) = part
1511                .strip_prefix("font-family:")
1512                .or_else(|| part.strip_prefix("font-family :"))
1513            {
1514                let val = val.trim();
1515                let first_family = val
1516                    .split(',')
1517                    .next()
1518                    .map(|s| s.trim().trim_matches(['"', '\'']))
1519                    .filter(|s| !s.is_empty())?;
1520                return Some(first_family.to_string());
1521            }
1522        }
1523    }
1524    None
1525}
1526
1527/// Extract `text-align` from `<exData contentType="text/html">` CSS styles.
1528/// XFA Spec 3.3 §27.4 (p1190-1197): text-align supports left, center, right,
1529/// justify. Maps to XFA `<para hAlign>`.
1530fn extract_exdata_text_align(elem: Node<'_, '_>) -> Option<TextAlign> {
1531    let value = find_first_child_by_name(elem, "value")?;
1532    let ex = find_first_child_by_name(value, "exData")?;
1533    for desc in ex.descendants() {
1534        if !desc.is_element() {
1535            continue;
1536        }
1537        let style = desc.attribute("style")?;
1538        for part in style.split(';') {
1539            let part = part.trim();
1540            if let Some(val) = part
1541                .strip_prefix("text-align:")
1542                .or_else(|| part.strip_prefix("text-align :"))
1543            {
1544                let val = val.trim();
1545                return Some(match val {
1546                    "center" => TextAlign::Center,
1547                    "right" => TextAlign::Right,
1548                    "justify" => TextAlign::Justify,
1549                    _ => TextAlign::Left,
1550                });
1551            }
1552        }
1553    }
1554    None
1555}
1556
1557/// Extract `margin-top` and `margin-bottom` from `<exData contentType="text/html">` CSS.
1558/// XFA Spec 3.3 §27.4 (p1190-1197): margin-top maps to XFA `<para spaceAbove>`,
1559/// margin-bottom maps to `<para spaceBelow>`. Returns (space_above_pt, space_below_pt).
1560fn extract_exdata_margins(elem: Node<'_, '_>) -> Option<(f64, f64)> {
1561    let value = find_first_child_by_name(elem, "value")?;
1562    let ex = find_first_child_by_name(value, "exData")?;
1563    let mut space_above: Option<f64> = None;
1564    let mut space_below: Option<f64> = None;
1565    for desc in ex.descendants() {
1566        if !desc.is_element() {
1567            continue;
1568        }
1569        let style = match desc.attribute("style") {
1570            Some(s) => s,
1571            None => continue,
1572        };
1573        for part in style.split(';') {
1574            let part = part.trim();
1575            if space_above.is_none() {
1576                if let Some(val) = part
1577                    .strip_prefix("margin-top:")
1578                    .or_else(|| part.strip_prefix("margin-top :"))
1579                {
1580                    let val = val.trim();
1581                    if let Some(m) = Measurement::parse(val) {
1582                        let pt = m.to_points();
1583                        if pt >= 0.0 {
1584                            space_above = Some(pt);
1585                        }
1586                    }
1587                }
1588            }
1589            if space_below.is_none() {
1590                if let Some(val) = part
1591                    .strip_prefix("margin-bottom:")
1592                    .or_else(|| part.strip_prefix("margin-bottom :"))
1593                {
1594                    let val = val.trim();
1595                    if let Some(m) = Measurement::parse(val) {
1596                        let pt = m.to_points();
1597                        if pt >= 0.0 {
1598                            space_below = Some(pt);
1599                        }
1600                    }
1601                }
1602            }
1603        }
1604        if space_above.is_some() && space_below.is_some() {
1605            break;
1606        }
1607    }
1608    match (space_above, space_below) {
1609        (Some(above), Some(below)) => Some((above, below)),
1610        (Some(above), None) => Some((above, 0.0)),
1611        (None, Some(below)) => Some((0.0, below)),
1612        (None, None) => None,
1613    }
1614}
1615
1616/// Inline CSS style properties accumulated from parent elements.
1617#[derive(Debug, Clone, Default)]
1618struct InheritedStyle {
1619    font_size: Option<f64>,
1620    font_family: Option<String>,
1621    font_weight: Option<String>,
1622    font_style: Option<String>,
1623    text_color: Option<(u8, u8, u8)>,
1624    underline: bool,
1625    line_through: bool,
1626}
1627
1628impl InheritedStyle {
1629    fn merge_with_css(&self, css: &str) -> Self {
1630        let mut child = self.clone();
1631        for part in css.split(';') {
1632            let part = part.trim();
1633            if let Some(val) = strip_css_prop(part, "font-size") {
1634                if let Some(pt) = val.strip_suffix("pt") {
1635                    if let Ok(size) = pt.trim().parse::<f64>() {
1636                        if size > 0.0 {
1637                            child.font_size = Some(size);
1638                        }
1639                    }
1640                }
1641            } else if let Some(val) = strip_css_prop(part, "font-family") {
1642                let first = val
1643                    .split(',')
1644                    .next()
1645                    .map(|s| s.trim().trim_matches(['"', '\'']))
1646                    .filter(|s| !s.is_empty());
1647                if let Some(fam) = first {
1648                    child.font_family = Some(fam.to_string());
1649                }
1650            } else if let Some(val) = strip_css_prop(part, "font-weight") {
1651                if val == "bold" || val == "700" || val == "800" || val == "900" {
1652                    child.font_weight = Some("bold".to_string());
1653                } else if val == "normal" || val == "400" || val == "500" {
1654                    child.font_weight = Some("normal".to_string());
1655                }
1656            } else if let Some(val) = strip_css_prop(part, "font-style") {
1657                if val == "italic" || val == "oblique" {
1658                    child.font_style = Some("italic".to_string());
1659                } else if val == "normal" {
1660                    child.font_style = Some("normal".to_string());
1661                }
1662            } else if let Some(val) = strip_css_prop(part, "text-decoration") {
1663                if val.contains("underline") {
1664                    child.underline = true;
1665                } else if val.contains("line-through") {
1666                    child.line_through = true;
1667                } else if val == "none" {
1668                    child.underline = false;
1669                    child.line_through = false;
1670                }
1671            } else if let Some(val) = strip_css_prop(part, "color") {
1672                if let Some(rgb) = parse_css_color(val) {
1673                    child.text_color = Some(rgb);
1674                }
1675            }
1676        }
1677        child
1678    }
1679
1680    fn to_span(&self, text: String) -> RichTextSpan {
1681        RichTextSpan {
1682            text,
1683            font_size: self.font_size,
1684            font_family: self.font_family.clone(),
1685            font_weight: self.font_weight.clone(),
1686            font_style: self.font_style.clone(),
1687            text_color: self.text_color,
1688            underline: self.underline,
1689            line_through: self.line_through,
1690        }
1691    }
1692}
1693
1694fn strip_css_prop<'a>(decl: &'a str, prop: &str) -> Option<&'a str> {
1695    decl.strip_prefix(prop)
1696        .and_then(|rest| rest.trim_start().strip_prefix(':'))
1697        .map(|v| v.trim())
1698}
1699
1700/// Parse `<exData contentType="text/html">` XHTML into rich text spans.
1701fn parse_exdata_rich_text_spans(elem: Node<'_, '_>) -> Option<Vec<RichTextSpan>> {
1702    let value = find_first_child_by_name(elem, "value")?;
1703    let ex = find_first_child_by_name(value, "exData")?;
1704    let ct = ex.attribute("contentType").unwrap_or("");
1705    if ct != "text/html" && ct != "text/xml" {
1706        return None;
1707    }
1708    let body = ex
1709        .descendants()
1710        .find(|d| d.is_element() && d.tag_name().name() == "body")?;
1711    let mut spans: Vec<RichTextSpan> = Vec::new();
1712    let base_style = InheritedStyle::default();
1713    let mut first_para = true;
1714    for child in body.children() {
1715        if child.is_element() && child.tag_name().name() == "p" {
1716            if !first_para && !spans.is_empty() {
1717                spans.push(RichTextSpan {
1718                    text: "\n".to_string(),
1719                    font_size: None,
1720                    font_family: None,
1721                    font_weight: None,
1722                    font_style: None,
1723                    text_color: None,
1724                    underline: false,
1725                    line_through: false,
1726                });
1727            }
1728            first_para = false;
1729            let p_style = match child.attribute("style") {
1730                Some(css) => base_style.merge_with_css(css),
1731                None => base_style.clone(),
1732            };
1733            collect_inline_spans(child, &p_style, &mut spans);
1734        } else if child.is_text() {
1735            if let Some(t) = child.text() {
1736                let t = t.trim();
1737                if !t.is_empty() {
1738                    spans.push(base_style.to_span(t.to_string()));
1739                }
1740            }
1741        }
1742    }
1743    if spans.is_empty() {
1744        return None;
1745    }
1746    let all_default = spans.iter().all(|s| {
1747        s.font_size.is_none()
1748            && s.font_family.is_none()
1749            && s.font_weight.is_none()
1750            && s.font_style.is_none()
1751            && s.text_color.is_none()
1752            && !s.underline
1753    });
1754    if all_default {
1755        return None;
1756    }
1757    Some(spans)
1758}
1759
1760fn collect_inline_spans(
1761    node: Node<'_, '_>,
1762    inherited: &InheritedStyle,
1763    spans: &mut Vec<RichTextSpan>,
1764) {
1765    for child in node.children() {
1766        if child.is_text() {
1767            if let Some(t) = child.text() {
1768                let t = t.trim();
1769                if !t.is_empty() {
1770                    spans.push(inherited.to_span(t.to_string()));
1771                }
1772            }
1773        } else if child.is_element() {
1774            let tag = child.tag_name().name();
1775            match tag {
1776                "br" => {
1777                    spans.push(RichTextSpan {
1778                        text: "\n".to_string(),
1779                        font_size: None,
1780                        font_family: None,
1781                        font_weight: None,
1782                        font_style: None,
1783                        text_color: None,
1784                        underline: false,
1785                        line_through: false,
1786                    });
1787                }
1788                "span" => {
1789                    let child_style = match child.attribute("style") {
1790                        Some(css) => {
1791                            if css.contains("xfa-spacerun:yes") {
1792                                if let Some(t) = child.text() {
1793                                    if !t.is_empty() {
1794                                        spans.push(inherited.to_span(t.to_string()));
1795                                    }
1796                                }
1797                                collect_inline_spans(child, inherited, spans);
1798                                continue;
1799                            }
1800                            inherited.merge_with_css(css)
1801                        }
1802                        None => inherited.clone(),
1803                    };
1804                    collect_inline_spans(child, &child_style, spans);
1805                }
1806                "b" | "strong" => {
1807                    let mut s = inherited.clone();
1808                    s.font_weight = Some("bold".to_string());
1809                    if let Some(css) = child.attribute("style") {
1810                        s = s.merge_with_css(css);
1811                    }
1812                    collect_inline_spans(child, &s, spans);
1813                }
1814                "i" | "em" => {
1815                    let mut s = inherited.clone();
1816                    s.font_style = Some("italic".to_string());
1817                    if let Some(css) = child.attribute("style") {
1818                        s = s.merge_with_css(css);
1819                    }
1820                    collect_inline_spans(child, &s, spans);
1821                }
1822                "u" => {
1823                    let mut s = inherited.clone();
1824                    s.underline = true;
1825                    if let Some(css) = child.attribute("style") {
1826                        s = s.merge_with_css(css);
1827                    }
1828                    collect_inline_spans(child, &s, spans);
1829                }
1830                _ => {
1831                    let child_style = match child.attribute("style") {
1832                        Some(css) => inherited.merge_with_css(css),
1833                        None => inherited.clone(),
1834                    };
1835                    collect_inline_spans(child, &child_style, spans);
1836                }
1837            }
1838        }
1839    }
1840}
1841
1842fn parse_caption(elem: Node<'_, '_>) -> Option<Caption> {
1843    let cap_elem = find_first_child_by_name(elem, "caption")?;
1844    if is_hidden(cap_elem) {
1845        return None;
1846    }
1847    let text = extract_value_text(cap_elem)?;
1848    if text.is_empty() {
1849        return None;
1850    }
1851    let placement = match attr(cap_elem, "placement") {
1852        Some("right") => CaptionPlacement::Right,
1853        Some("top") => CaptionPlacement::Top,
1854        Some("bottom") => CaptionPlacement::Bottom,
1855        Some("inline") => CaptionPlacement::Inline,
1856        _ => CaptionPlacement::Left,
1857    };
1858    let reserve = attr(cap_elem, "reserve")
1859        .and_then(Measurement::parse)
1860        .map(|m| m.to_points());
1861    Some(Caption {
1862        placement,
1863        reserve,
1864        text,
1865    })
1866}
1867
1868fn is_hidden(elem: Node<'_, '_>) -> bool {
1869    matches!(
1870        attr(elem, "presence"),
1871        Some("hidden") | Some("invisible") | Some("inactive")
1872    )
1873}
1874
1875/// XFA Spec 3.3 §7.4.2 — Parse a presence string to the `Presence` enum.
1876///
1877/// Used when applying data-bound presence overrides.  Returns `Visible` for
1878/// any unrecognised value (spec default).
1879fn parse_presence_str(s: &str) -> Presence {
1880    match s.trim() {
1881        "hidden" => Presence::Hidden,
1882        "invisible" => Presence::Invisible,
1883        "inactive" => Presence::Inactive,
1884        _ => Presence::Visible,
1885    }
1886}
1887
1888fn extract_value_image(
1889    elem: Node<'_, '_>,
1890    image_files: &std::collections::HashMap<String, Vec<u8>>,
1891) -> Option<(Vec<u8>, String)> {
1892    let value = find_first_child_by_name(elem, "value")?;
1893    let image = find_first_child_by_name(value, "image")?;
1894    let content_type = attr(image, "contentType")
1895        .unwrap_or("image/png")
1896        .to_string();
1897
1898    // XFA §2.3: `href` references an image embedded in the PDF's Names tree.
1899    // When present, the inline text content is empty — resolve from the PDF.
1900    if let Some(href) = attr(image, "href") {
1901        if let Some(data) = image_files.get(href) {
1902            let decoded = data.clone();
1903            if decoded.starts_with(b"BM") || content_type == "image/bmp" {
1904                if let Some(png_data) = bmp_to_png(&decoded) {
1905                    return Some((png_data, "image/png".to_string()));
1906                }
1907                return None;
1908            }
1909            return Some((decoded, content_type));
1910        }
1911        // Try normalized key: strip leading ".\" or "./"
1912        let normalized = href.trim_start_matches(".\\").trim_start_matches("./");
1913        for (k, data) in image_files {
1914            let k_norm = k.trim_start_matches(".\\").trim_start_matches("./");
1915            if k_norm == normalized {
1916                let decoded = data.clone();
1917                if decoded.starts_with(b"BM") || content_type == "image/bmp" {
1918                    if let Some(png_data) = bmp_to_png(&decoded) {
1919                        return Some((png_data, "image/png".to_string()));
1920                    }
1921                    return None;
1922                }
1923                return Some((decoded, content_type));
1924            }
1925        }
1926    }
1927
1928    let data = image.text().unwrap_or_default();
1929    let decoded = base64_decode(data);
1930
1931    // Skip empty images from unresolved href references.
1932    if decoded.is_empty() {
1933        return None;
1934    }
1935
1936    if decoded.starts_with(b"BM") || content_type == "image/bmp" {
1937        if let Some(png_data) = bmp_to_png(&decoded) {
1938            return Some((png_data, "image/png".to_string()));
1939        }
1940        log::warn!("BMP to PNG conversion failed; skipping image");
1941        return None;
1942    }
1943
1944    Some((decoded, content_type))
1945}
1946
1947fn extract_draw_content(elem: Node<'_, '_>) -> Option<DrawContent> {
1948    let value = find_first_child_by_name(elem, "value")?;
1949
1950    if let Some(line) = find_first_child_by_name(value, "line") {
1951        let x1 = attr_as_f64(line, "x1").unwrap_or(0.0);
1952        let y1 = attr_as_f64(line, "y1").unwrap_or(0.0);
1953        let x2 = attr_as_f64(line, "x2").unwrap_or(0.0);
1954        let y2 = attr_as_f64(line, "y2").unwrap_or(0.0);
1955        return Some(DrawContent::Line { x1, y1, x2, y2 });
1956    }
1957
1958    if let Some(rect) = find_first_child_by_name(value, "rectangle") {
1959        let x = attr_as_f64(rect, "x").unwrap_or(0.0);
1960        let y = attr_as_f64(rect, "y").unwrap_or(0.0);
1961        let w = attr_as_f64(rect, "w").unwrap_or(attr_as_f64(rect, "width").unwrap_or(0.0));
1962        let h = attr_as_f64(rect, "h").unwrap_or(attr_as_f64(rect, "height").unwrap_or(0.0));
1963        let radius =
1964            attr_as_f64(rect, "r").unwrap_or(attr_as_f64(rect, "cornerRadius").unwrap_or(0.0));
1965        return Some(DrawContent::Rectangle { x, y, w, h, radius });
1966    }
1967
1968    if let Some(arc) = find_first_child_by_name(value, "arc") {
1969        let x = attr_as_f64(arc, "x").unwrap_or(0.0);
1970        let y = attr_as_f64(arc, "y").unwrap_or(0.0);
1971        let w = attr_as_f64(arc, "w").unwrap_or(attr_as_f64(arc, "width").unwrap_or(0.0));
1972        let h = attr_as_f64(arc, "h").unwrap_or(attr_as_f64(arc, "height").unwrap_or(0.0));
1973        let start_angle = attr_as_f64(arc, "startAngle").unwrap_or(0.0);
1974        let sweep_angle = attr_as_f64(arc, "sweepAngle").unwrap_or(0.0);
1975        return Some(DrawContent::Arc {
1976            x,
1977            y,
1978            w,
1979            h,
1980            start_angle,
1981            sweep_angle,
1982        });
1983    }
1984
1985    None
1986}
1987
1988fn base64_decode(input: &str) -> Vec<u8> {
1989    use base64::Engine;
1990    base64::engine::general_purpose::STANDARD
1991        .decode(input.trim())
1992        .unwrap_or_default()
1993}
1994
1995fn bmp_to_png(bmp_data: &[u8]) -> Option<Vec<u8>> {
1996    let img = image::load_from_memory_with_format(bmp_data, image::ImageFormat::Bmp).ok()?;
1997    let mut buf = Vec::new();
1998    img.write_to(&mut std::io::Cursor::new(&mut buf), image::ImageFormat::Png)
1999        .ok()?;
2000    Some(buf)
2001}
2002
2003fn attr_as_f64(elem: Node<'_, '_>, name: &str) -> Option<f64> {
2004    attr(elem, name)?.parse().ok()
2005}
2006
2007fn parse_col_span(elem: Node<'_, '_>) -> i32 {
2008    attr(elem, "colSpan")
2009        .and_then(|s| s.parse().ok())
2010        .unwrap_or(1)
2011}
2012
2013fn parse_node_meta(elem: Node<'_, '_>) -> FormNodeMeta {
2014    let tag = elem.tag_name().name();
2015    let presence = match attr(elem, "presence") {
2016        Some("hidden") => Presence::Hidden,
2017        Some("invisible") => Presence::Invisible,
2018        Some("inactive") => Presence::Inactive,
2019        _ => Presence::Visible,
2020    };
2021
2022    let (page_break_before, break_before_target) = detect_page_break_before(elem);
2023    let (page_break_after, break_after_target) = detect_page_break_after(elem);
2024    let content_area_break = detect_content_area_break(elem);
2025    let break_target = break_before_target.or(break_after_target);
2026
2027    let event_scripts = collect_event_scripts(elem);
2028    let (keep_next_content_area, keep_previous_content_area, keep_intact_content_area) =
2029        parse_keep(elem);
2030    let (overflow_leader, overflow_trailer) = parse_overflow(elem);
2031
2032    let group_kind = if tag == "exclGroup" {
2033        GroupKind::ExclusiveChoice
2034    } else {
2035        GroupKind::None
2036    };
2037
2038    let item_value = if tag == "field" {
2039        parse_item_value(elem)
2040    } else {
2041        None
2042    };
2043
2044    let (display_items, save_items) = if tag == "field" {
2045        parse_items_lists(elem)
2046    } else {
2047        (Vec::new(), Vec::new())
2048    };
2049
2050    let xfa_id = attr(elem, "id").map(|s| s.to_string());
2051    let field_kind = detect_field_kind(elem);
2052    let style = parse_node_style(elem);
2053    let (data_bind_ref, data_bind_none) = parse_bind(elem);
2054    let anchor_type = parse_anchor_type(elem);
2055
2056    FormNodeMeta {
2057        xfa_id,
2058        presence,
2059        page_break_before,
2060        page_break_after,
2061        break_target,
2062        content_area_break,
2063        overflow_leader,
2064        overflow_trailer,
2065        keep_next_content_area,
2066        keep_previous_content_area,
2067        keep_intact_content_area,
2068        event_scripts,
2069        data_bind_ref,
2070        data_bind_none,
2071        group_kind,
2072        item_value,
2073        field_kind,
2074        style,
2075        display_items,
2076        save_items,
2077        anchor_type,
2078        ..Default::default()
2079    }
2080}
2081
2082fn detect_page_break_before(elem: Node<'_, '_>) -> (bool, Option<String>) {
2083    for child in elem.children().filter(|n| n.is_element()) {
2084        let tag = child.tag_name().name();
2085        if matches!(tag, "subform" | "field" | "draw" | "exclGroup") {
2086            break;
2087        }
2088        if tag == "breakBefore" && attr(child, "targetType") == Some("pageArea") {
2089            return (true, attr(child, "target").map(|s| s.to_string()));
2090        }
2091        if tag == "break" && attr(child, "before") == Some("pageArea") {
2092            return (true, attr(child, "target").map(|s| s.to_string()));
2093        }
2094    }
2095    (false, None)
2096}
2097
2098fn detect_page_break_after(elem: Node<'_, '_>) -> (bool, Option<String>) {
2099    let mut last_content_idx = 0;
2100    let children: Vec<_> = elem.children().filter(|n| n.is_element()).collect();
2101    for (i, child) in children.iter().enumerate() {
2102        let tag = child.tag_name().name();
2103        if matches!(tag, "subform" | "field" | "draw" | "exclGroup") {
2104            last_content_idx = i;
2105        }
2106    }
2107
2108    for child in children.iter().skip(last_content_idx) {
2109        let tag = child.tag_name().name();
2110        if tag == "breakAfter" && attr(*child, "targetType") == Some("pageArea") {
2111            return (true, attr(*child, "target").map(|s| s.to_string()));
2112        }
2113        if tag == "break" && attr(*child, "after") == Some("pageArea") {
2114            return (true, attr(*child, "target").map(|s| s.to_string()));
2115        }
2116    }
2117    (false, None)
2118}
2119
2120fn detect_content_area_break(elem: Node<'_, '_>) -> bool {
2121    for child in elem.children().filter(|n| n.is_element()) {
2122        let tag = child.tag_name().name();
2123        if tag == "breakBefore" && attr(child, "targetType") == Some("contentArea") {
2124            return true;
2125        }
2126    }
2127    false
2128}
2129
2130fn collect_event_scripts(elem: Node<'_, '_>) -> Vec<EventScript> {
2131    let mut scripts = Vec::new();
2132    for child in elem.children().filter(|n| n.is_element()) {
2133        let child_tag = child.tag_name().name();
2134        if child_tag == "event" {
2135            let activity = attr(child, "activity");
2136            let event_ref = attr(child, "ref");
2137            if activity == Some("ready") && event_ref == Some("$layout") {
2138                continue;
2139            }
2140            if let Some(script_elem) = find_first_child_by_name(child, "script") {
2141                if let Some(script) =
2142                    build_event_script(script_elem, activity, event_ref, attr(script_elem, "runAt"))
2143                {
2144                    scripts.push(script);
2145                }
2146            }
2147        } else if child_tag == "calculate" {
2148            if let Some(script_elem) = find_first_child_by_name(child, "script") {
2149                if let Some(script) = build_event_script(
2150                    script_elem,
2151                    Some("calculate"),
2152                    None,
2153                    attr(script_elem, "runAt"),
2154                ) {
2155                    scripts.push(script);
2156                }
2157            }
2158        }
2159    }
2160    scripts
2161}
2162
2163fn build_event_script(
2164    script_elem: Node<'_, '_>,
2165    activity: Option<&str>,
2166    event_ref: Option<&str>,
2167    run_at: Option<&str>,
2168) -> Option<EventScript> {
2169    let text = script_elem.text()?.trim();
2170    if text.is_empty() {
2171        return None;
2172    }
2173
2174    Some(EventScript::new(
2175        text.to_string(),
2176        detect_script_language(attr(script_elem, "contentType")),
2177        activity.map(str::to_string),
2178        event_ref.map(str::to_string),
2179        run_at.map(str::to_string),
2180    ))
2181}
2182
2183fn detect_script_language(content_type: Option<&str>) -> ScriptLanguage {
2184    match content_type.map(|value| value.trim().to_ascii_lowercase()) {
2185        None => ScriptLanguage::FormCalc,
2186        Some(value) if value == "application/x-formcalc" || value.ends_with("/x-formcalc") => {
2187            ScriptLanguage::FormCalc
2188        }
2189        Some(value)
2190            if value == "application/x-javascript"
2191                || value == "application/javascript"
2192                || value == "text/javascript"
2193                || value.ends_with("/x-javascript") =>
2194        {
2195            ScriptLanguage::JavaScript
2196        }
2197        Some(_) => ScriptLanguage::Other,
2198    }
2199}
2200
2201fn parse_keep(elem: Node<'_, '_>) -> (bool, bool, bool) {
2202    if let Some(keep) = find_first_child_by_name(elem, "keep") {
2203        let next = attr(keep, "next") == Some("contentArea");
2204        let prev = attr(keep, "previous") == Some("contentArea");
2205        let intact = attr(keep, "intact") == Some("contentArea");
2206        (next, prev, intact)
2207    } else {
2208        (false, false, false)
2209    }
2210}
2211
2212fn parse_overflow(elem: Node<'_, '_>) -> (Option<String>, Option<String>) {
2213    if let Some(overflow) = find_first_child_by_name(elem, "overflow") {
2214        let leader = attr(overflow, "leader").map(|s| s.to_string());
2215        let trailer = attr(overflow, "trailer").map(|s| s.to_string());
2216        (leader, trailer)
2217    } else {
2218        (None, None)
2219    }
2220}
2221
2222fn parse_item_value(elem: Node<'_, '_>) -> Option<String> {
2223    let items = find_first_child_by_name(elem, "items")?;
2224    let text_elem = find_first_child_by_name(items, "text")?;
2225    let text = text_elem.text()?.trim();
2226    if text.is_empty() {
2227        None
2228    } else {
2229        Some(text.to_string())
2230    }
2231}
2232
2233/// Extract all text values from an `<items>` element.
2234fn collect_items_texts(items_elem: Node<'_, '_>) -> Vec<String> {
2235    items_elem
2236        .children()
2237        .filter(|n| n.is_element())
2238        .filter_map(|child| {
2239            let txt = child.text().unwrap_or("").trim().to_string();
2240            if txt.is_empty() {
2241                None
2242            } else {
2243                Some(txt)
2244            }
2245        })
2246        .collect()
2247}
2248
2249/// Parse choice list `<items>` elements from a `<field>` node (XFA 3.3 §7.7).
2250fn parse_items_lists(elem: Node<'_, '_>) -> (Vec<String>, Vec<String>) {
2251    let items_elems: Vec<_> = elem
2252        .children()
2253        .filter(|n| n.is_element() && n.tag_name().name() == "items")
2254        .collect();
2255    match items_elems.len() {
2256        0 => (Vec::new(), Vec::new()),
2257        1 => {
2258            let vals = collect_items_texts(items_elems[0]);
2259            (vals, Vec::new())
2260        }
2261        _ => {
2262            let first = items_elems[0];
2263            let second = items_elems[1];
2264            let first_is_save = attr(first, "save") == Some("1");
2265            if first_is_save {
2266                (collect_items_texts(second), collect_items_texts(first))
2267            } else {
2268                (collect_items_texts(first), collect_items_texts(second))
2269            }
2270        }
2271    }
2272}
2273
2274fn detect_field_kind(elem: Node<'_, '_>) -> FieldKind {
2275    let Some(ui) = find_first_child_by_name(elem, "ui") else {
2276        return FieldKind::Text;
2277    };
2278    for child in ui.children().filter(|n| n.is_element()) {
2279        match child.tag_name().name() {
2280            "button" => return FieldKind::Button,
2281            "checkButton" => {
2282                // fixes #798: XFA 3.3 §11.2 / §17.8 uses round checkButtons
2283                // for exclusion-group radio widgets. Treating every
2284                // checkButton as a checkbox loses the correct renderer.
2285                let shape = attr(child, "shape").unwrap_or("square");
2286                return if shape == "round" {
2287                    FieldKind::Radio
2288                } else {
2289                    FieldKind::Checkbox
2290                };
2291            }
2292            "choiceList" => return FieldKind::Dropdown,
2293            "dateTimeEdit" => return FieldKind::DateTimePicker,
2294            "numericEdit" => return FieldKind::NumericEdit,
2295            "passwordEdit" => return FieldKind::PasswordEdit,
2296            "imageEdit" => return FieldKind::ImageEdit,
2297            "signature" => return FieldKind::Signature,
2298            "barcode" => return FieldKind::Barcode,
2299            _ => {}
2300        }
2301    }
2302    FieldKind::Text
2303}
2304
2305fn parse_node_style(elem: Node<'_, '_>) -> FormNodeStyle {
2306    let (check_on_value, check_off_value, check_neutral_value) = parse_check_button_values(elem);
2307    let mut style = FormNodeStyle {
2308        check_button_mark: parse_check_button_mark(elem),
2309        check_button_on_value: check_on_value,
2310        check_button_off_value: check_off_value,
2311        check_button_neutral_value: check_neutral_value,
2312        ..Default::default()
2313    };
2314    if let Some(fill) = find_first_child_by_name(elem, "fill") {
2315        if !is_hidden(fill) {
2316            style.bg_color = parse_fill_color(fill);
2317        }
2318    }
2319    // Borders can live directly on the element OR inside <ui><textEdit|…><border>.
2320    let border = find_first_child_by_name(elem, "border").or_else(|| {
2321        let ui = find_first_child_by_name(elem, "ui")?;
2322        ui.children()
2323            .filter(|c| c.is_element() && c.tag_name().name() != "border")
2324            .find_map(|widget| find_first_child_by_name(widget, "border"))
2325    });
2326    if let Some(border) = border {
2327        // Collect all <edge> elements (XFA §9.3.3).
2328        let edge_elems: Vec<_> = border
2329            .children()
2330            .filter(|n| n.is_element() && n.tag_name().name() == "edge")
2331            .collect();
2332        if !edge_elems.is_empty() {
2333            let first_visible = edge_elems
2334                .iter()
2335                .find(|e| !is_hidden(**e) && attr(**e, "stroke").unwrap_or("solid") != "none")
2336                .or_else(|| edge_elems.first())
2337                .copied();
2338            let first = first_visible.unwrap_or(edge_elems[0]);
2339            if let Some(color) = find_first_child_by_name(first, "color") {
2340                if let Some(rgb) = parse_xfa_color(color) {
2341                    style.border_color = Some(rgb);
2342                }
2343            }
2344            let stroke = attr(first, "stroke").unwrap_or("solid");
2345            if stroke != "none" {
2346                let thickness = attr(first, "thickness")
2347                    .and_then(Measurement::parse)
2348                    .map(|m: Measurement| m.to_points())
2349                    .unwrap_or(0.5);
2350                if thickness > 0.0 {
2351                    style.border_width_pt = Some(thickness);
2352                }
2353            }
2354
2355            // Keep edge ordering consistent with render_bridge expectations:
2356            // [top, right, bottom, left]. The previous mapping treated 4-edge
2357            // borders as [top, bottom, left, right], which swapped bottom-only
2358            // underlines into left vertical lines.
2359            let edge_visible = |e: &roxmltree::Node<'_, '_>| -> bool {
2360                !is_hidden(*e) && attr(*e, "stroke").unwrap_or("solid") != "none"
2361            };
2362            style.border_edges = match edge_elems.len() {
2363                0 => [true, true, true, true],
2364                1 => {
2365                    let v = edge_visible(&edge_elems[0]);
2366                    [v, v, v, v]
2367                }
2368                2 => {
2369                    let even = edge_visible(&edge_elems[0]);
2370                    let odd = edge_visible(&edge_elems[1]);
2371                    [even, odd, even, odd]
2372                }
2373                3 => {
2374                    let top = edge_visible(&edge_elems[0]);
2375                    let rl = edge_visible(&edge_elems[1]);
2376                    let bot = edge_visible(&edge_elems[2]);
2377                    [top, rl, bot, rl]
2378                }
2379                _ => [
2380                    edge_visible(&edge_elems[0]),
2381                    edge_visible(&edge_elems[1]),
2382                    edge_visible(&edge_elems[2]),
2383                    edge_visible(&edge_elems[3]),
2384                ],
2385            };
2386
2387            if edge_elems.len() > 1 {
2388                let default_rgb = style.border_color.unwrap_or((0, 0, 0));
2389                let edge_color = |edge: roxmltree::Node<'_, '_>| -> (u8, u8, u8) {
2390                    find_first_child_by_name(edge, "color")
2391                        .and_then(parse_xfa_color)
2392                        .unwrap_or(default_rgb)
2393                };
2394                let per_edge_colors = match edge_elems.len() {
2395                    2 => Some([
2396                        edge_color(edge_elems[0]),
2397                        edge_color(edge_elems[1]),
2398                        edge_color(edge_elems[0]),
2399                        edge_color(edge_elems[1]),
2400                    ]),
2401                    3 => Some([
2402                        edge_color(edge_elems[0]),
2403                        edge_color(edge_elems[1]),
2404                        edge_color(edge_elems[2]),
2405                        edge_color(edge_elems[1]),
2406                    ]),
2407                    _ => Some([
2408                        edge_color(edge_elems[0]),
2409                        edge_color(edge_elems[1]),
2410                        edge_color(edge_elems[2]),
2411                        edge_color(edge_elems[3]),
2412                    ]),
2413                };
2414                if let Some([top, right, bottom, left]) = per_edge_colors {
2415                    if !(top == bottom && bottom == left && left == right) {
2416                        style.border_colors = Some([top, right, bottom, left]);
2417                    }
2418                }
2419
2420                let default_thickness = style.border_width_pt.unwrap_or(0.5);
2421                let edge_thickness = |edge: roxmltree::Node<'_, '_>| -> f64 {
2422                    attr(edge, "thickness")
2423                        .and_then(Measurement::parse)
2424                        .map(|m: Measurement| m.to_points())
2425                        .unwrap_or(default_thickness)
2426                };
2427                let per_edge_widths = match edge_elems.len() {
2428                    2 => Some([
2429                        edge_thickness(edge_elems[0]),
2430                        edge_thickness(edge_elems[1]),
2431                        edge_thickness(edge_elems[0]),
2432                        edge_thickness(edge_elems[1]),
2433                    ]),
2434                    3 => Some([
2435                        edge_thickness(edge_elems[0]),
2436                        edge_thickness(edge_elems[1]),
2437                        edge_thickness(edge_elems[2]),
2438                        edge_thickness(edge_elems[1]),
2439                    ]),
2440                    _ => Some([
2441                        edge_thickness(edge_elems[0]),
2442                        edge_thickness(edge_elems[1]),
2443                        edge_thickness(edge_elems[2]),
2444                        edge_thickness(edge_elems[3]),
2445                    ]),
2446                };
2447                if let Some([top_t, right_t, bottom_t, left_t]) = per_edge_widths {
2448                    if !(top_t == bottom_t && bottom_t == left_t && left_t == right_t) {
2449                        style.border_widths = Some([top_t, right_t, bottom_t, left_t]);
2450                    }
2451                }
2452            }
2453        }
2454        if style.bg_color.is_none() {
2455            if let Some(fill) = find_first_child_by_name(border, "fill") {
2456                if !is_hidden(fill) {
2457                    // fixes #809: XFA 3.3 §7.6.3 allows widget fills to be
2458                    // expressed as either <fill><color> or <fill><solid><color>.
2459                    // Supporting both keeps explicit field backgrounds aligned
2460                    // with Adobe/pdfRest output instead of leaving them white.
2461                    style.bg_color = parse_fill_color(fill);
2462                }
2463            }
2464        }
2465    }
2466    if let Some(font) = find_first_child_by_name(elem, "font") {
2467        if let Some(typeface) = attr(font, "typeface") {
2468            style.font_family = Some(typeface.to_string());
2469        }
2470        if let Some(gf) = attr(font, "genericFamily") {
2471            style.generic_family = Some(gf.to_string());
2472        }
2473        if let Some(size_str) = attr(font, "size") {
2474            if let Some(m) = Measurement::parse(size_str) {
2475                let m: Measurement = m;
2476                style.font_size = Some(m.to_points());
2477            }
2478        }
2479        if let Some(weight) = attr(font, "weight") {
2480            style.font_weight = Some(weight.to_string());
2481        }
2482        if let Some(posture) = attr(font, "posture") {
2483            style.font_style = Some(posture.to_string());
2484        }
2485        if let Some(fill) = find_first_child_by_name(font, "fill") {
2486            if let Some(color) = find_first_child_by_name(fill, "color") {
2487                if let Some(rgb) = parse_xfa_color(color) {
2488                    style.text_color = Some(rgb);
2489                }
2490            }
2491        }
2492        // <font color="#RRGGBB"> attribute (fallback when <fill><color> not present)
2493        if style.text_color.is_none() {
2494            if let Some(color_str) = attr(font, "color") {
2495                if let Some(rgb) = parse_font_color_attr(color_str) {
2496                    style.text_color = Some(rgb);
2497                }
2498            }
2499        }
2500        // fontHorizontalScale="96%" → 0.96
2501        if let Some(scale_str) = attr(font, "fontHorizontalScale") {
2502            if let Some(v) = parse_percentage(scale_str) {
2503                style.font_horizontal_scale = Some(v);
2504            }
2505        }
2506        // letterSpacing="-0.018em" or "0.5pt"
2507        if let Some(ls_str) = attr(font, "letterSpacing") {
2508            if let Some(v) = parse_letter_spacing(ls_str, style.font_size.unwrap_or(10.0)) {
2509                style.letter_spacing_pt = Some(v);
2510            }
2511        }
2512        // XFA Spec 3.3 §2.6 — underline="1" (single) or "2" (double)
2513        if let Some(underline_str) = attr(font, "underline") {
2514            style.underline = underline_str == "1" || underline_str == "2";
2515        }
2516        // XFA Spec 3.3 §2.6 — lineThrough="1"
2517        if let Some(line_through_str) = attr(font, "lineThrough") {
2518            style.line_through = line_through_str == "1";
2519        }
2520    }
2521
2522    // Parse <para> for paragraph attributes (XFA 3.3 §D.7).
2523    if let Some(para) = find_first_child_by_name(elem, "para") {
2524        if let Some(v) = attr(para, "spaceAbove").and_then(Measurement::parse) {
2525            style.space_above_pt = Some(v.to_points());
2526        }
2527        if let Some(v) = attr(para, "spaceBelow").and_then(Measurement::parse) {
2528            style.space_below_pt = Some(v.to_points());
2529        }
2530        if let Some(v) = attr(para, "marginLeft").and_then(Measurement::parse) {
2531            style.margin_left_pt = Some(v.to_points());
2532        }
2533        if let Some(v) = attr(para, "marginRight").and_then(Measurement::parse) {
2534            style.margin_right_pt = Some(v.to_points());
2535        }
2536        if let Some(va) = attr(para, "vAlign") {
2537            style.v_align = Some(match va {
2538                "middle" => VerticalAlign::Middle,
2539                "bottom" => VerticalAlign::Bottom,
2540                _ => VerticalAlign::Top,
2541            });
2542        }
2543    }
2544
2545    // Parse <border><corner> for border radius and <border><edge> for style/visibility.
2546    if let Some(border) = border {
2547        if let Some(corner) = find_first_child_by_name(border, "corner") {
2548            if let Some(v) = attr(corner, "radius").and_then(Measurement::parse) {
2549                style.border_radius_pt = Some(v.to_points());
2550            }
2551        }
2552        let edge_elems2: Vec<_> = border
2553            .children()
2554            .filter(|n| n.is_element() && n.tag_name().name() == "edge")
2555            .collect();
2556        if !edge_elems2.is_empty() {
2557            if let Some(stroke) = attr(edge_elems2[0], "stroke") {
2558                if stroke != "none" {
2559                    style.border_style = Some(stroke.to_string());
2560                }
2561            }
2562        }
2563    }
2564
2565    // Parse <caption> for caption text, placement, and reserve (XFA 3.3 §7.4).
2566    if let Some(cap) = parse_caption(elem) {
2567        style.caption_text = Some(cap.text);
2568        style.caption_placement = Some(
2569            match cap.placement {
2570                CaptionPlacement::Left => "left",
2571                CaptionPlacement::Right => "right",
2572                CaptionPlacement::Top => "top",
2573                CaptionPlacement::Bottom => "bottom",
2574                CaptionPlacement::Inline => "inline",
2575            }
2576            .to_string(),
2577        );
2578        style.caption_reserve = cap.reserve;
2579    }
2580
2581    style
2582}
2583
2584fn parse_fill_color(fill_node: Node<'_, '_>) -> Option<(u8, u8, u8)> {
2585    find_first_child_by_name(fill_node, "color")
2586        .and_then(parse_xfa_color)
2587        .or_else(|| {
2588            find_first_child_by_name(fill_node, "solid")
2589                .and_then(|solid| find_first_child_by_name(solid, "color"))
2590                .and_then(parse_xfa_color)
2591        })
2592}
2593
2594fn parse_check_button_mark(elem: Node<'_, '_>) -> Option<String> {
2595    let ui = find_first_child_by_name(elem, "ui")?;
2596    let check_button = ui
2597        .children()
2598        .find(|n| n.is_element() && n.tag_name().name() == "checkButton")?;
2599    let mark = attr(check_button, "mark")?.to_ascii_lowercase();
2600    match mark.as_str() {
2601        "check" | "circle" | "cross" | "diamond" | "square" | "star" => Some(mark),
2602        _ => None,
2603    }
2604}
2605
2606fn parse_check_button_values(
2607    elem: Node<'_, '_>,
2608) -> (Option<String>, Option<String>, Option<String>) {
2609    let ui = match find_first_child_by_name(elem, "ui") {
2610        Some(ui) => ui,
2611        None => return (None, None, None),
2612    };
2613    let check_button = match find_first_child_by_name(ui, "checkButton") {
2614        Some(check_button) => check_button,
2615        None => return (None, None, None),
2616    };
2617
2618    // XFA Spec 3.3 §7.2.21 allows `<items>` to be authored either directly on
2619    // the field or nested inside `<ui><checkButton>`. Adobe honors both forms;
2620    // if we only look on the field, asserted values like "Yes"/"No" collapse
2621    // back to the hardcoded 1/0 fallback and the checkbox renders unchecked.
2622    let items = find_first_child_by_name(elem, "items")
2623        .or_else(|| find_first_child_by_name(check_button, "items"));
2624    let Some(items) = items else {
2625        return (None, None, None);
2626    };
2627
2628    let values: Vec<String> = items
2629        .children()
2630        .filter(|n| n.is_element())
2631        .map(|child| child.text().unwrap_or("").trim().to_string())
2632        .collect();
2633
2634    (
2635        values.first().cloned(),
2636        values.get(1).cloned(),
2637        values.get(2).cloned(),
2638    )
2639}
2640
2641fn parse_xfa_color(color_node: Node<'_, '_>) -> Option<(u8, u8, u8)> {
2642    let value = attr(color_node, "value")?;
2643    let parts: Vec<&str> = value.split(',').collect();
2644    if parts.len() >= 3 {
2645        let r = parts[0].trim().parse::<u8>().ok()?;
2646        let g = parts[1].trim().parse::<u8>().ok()?;
2647        let b = parts[2].trim().parse::<u8>().ok()?;
2648        Some((r, g, b))
2649    } else {
2650        None
2651    }
2652}
2653
2654/// Parse a color string from a `color` attribute on `<font>`.
2655///
2656/// Supported formats:
2657/// - `#RRGGBB` (e.g. `#000080`)
2658/// - `#RGB` shorthand (e.g. `#00F` → `#0000FF`)
2659/// - `r,g,b` with decimal values 0-255 (e.g. `0,0,128`)
2660fn parse_font_color_attr(s: &str) -> Option<(u8, u8, u8)> {
2661    let s = s.trim();
2662    if let Some(hex) = s.strip_prefix('#') {
2663        parse_ascii_hex_color(hex)
2664    } else {
2665        // Try "r,g,b" decimal format
2666        let parts: Vec<&str> = s.split(',').collect();
2667        if parts.len() >= 3 {
2668            let r = parts[0].trim().parse::<u8>().ok()?;
2669            let g = parts[1].trim().parse::<u8>().ok()?;
2670            let b = parts[2].trim().parse::<u8>().ok()?;
2671            Some((r, g, b))
2672        } else {
2673            None
2674        }
2675    }
2676}
2677
2678/// Extract the data field name from a bind ref like `$.listInitiales[*]`.
2679/// Returns `Some("listInitiales")` for that example, or None if no bind/ref.
2680///
2681/// XFA Spec 3.3 §4.4 p199-201 — explicit data references via
2682/// `<bind match="dataRef" ref="SOM.expression">`. The ref is a SOM
2683/// expression that resolves to a data node. We extract the last segment
2684/// name for use as the data matching key.
2685///
2686/// TODO: XFA Spec 3.3 §4.4 p199 — full SOM expression evaluation not
2687/// implemented. Only simple `$.name` and `$.name[*]` patterns are supported.
2688/// Multi-segment paths like `$record.group.field` are partially supported
2689/// (we take the last segment).
2690#[allow(dead_code)]
2691fn parse_bind_data_name(elem: Node<'_, '_>) -> Option<String> {
2692    let bind = find_first_child_by_name(elem, "bind")?;
2693    let ref_val = attr(bind, "ref")?;
2694    // Typical refs: "$.fieldName", "$.fieldName[*]", "$record.fieldName"
2695    // Extract the last dot-separated segment, strip any trailing [*] etc.
2696    let segment = ref_val.rsplit('.').next().unwrap_or(ref_val);
2697    let name = segment.split('[').next().unwrap_or(segment).trim();
2698    if name.is_empty() || name == "$" {
2699        None
2700    } else {
2701        Some(name.to_string())
2702    }
2703}
2704
2705/// XFA Spec 3.3 §4.4.3 p176 — `<bind>` element controls data binding:
2706/// - `match="once"` (default): bind to first matching data node
2707/// - `match="none"`: skip data binding entirely
2708/// - `match="dataRef"`: use explicit SOM ref to locate data
2709/// - `match="global"`: search entire data DOM
2710///
2711/// TODO: XFA Spec 3.3 §4.4.3 p176 — `match="global"` not implemented.
2712/// Currently treated the same as `match="once"`.
2713fn parse_bind(elem: Node<'_, '_>) -> (Option<String>, bool) {
2714    let Some(bind) = find_first_child_by_name(elem, "bind") else {
2715        return (None, false);
2716    };
2717    let bind_none = attr(bind, "match") == Some("none");
2718    let bind_ref = if bind_none {
2719        None
2720    } else {
2721        attr(bind, "ref").map(|s| s.trim().to_string())
2722    };
2723    (bind_ref, bind_none)
2724}
2725
2726#[cfg(test)]
2727mod tests {
2728    use super::*;
2729    use xfa_dom_resolver::data_dom::DataDom;
2730
2731    #[test]
2732    fn parse_css_color_rejects_non_ascii_hex_without_panicking() {
2733        assert_eq!(parse_css_color("#€"), None);
2734        assert_eq!(parse_css_color("#€abc"), None);
2735        assert_eq!(parse_css_color("#0f8"), Some((0x00, 0xff, 0x88)));
2736        assert_eq!(parse_css_color("#00ff88"), Some((0x00, 0xff, 0x88)));
2737    }
2738
2739    #[test]
2740    fn parse_font_color_attr_rejects_non_ascii_hex_without_panicking() {
2741        assert_eq!(parse_font_color_attr("#€"), None);
2742        assert_eq!(parse_font_color_attr("#€abc"), None);
2743        assert_eq!(parse_font_color_attr("#0f8"), Some((0x00, 0xff, 0x88)));
2744        assert_eq!(parse_font_color_attr("#00ff88"), Some((0x00, 0xff, 0x88)));
2745    }
2746
2747    #[test]
2748    fn repeating_subform_expands_from_data() {
2749        let template = r#"<?xml version="1.0"?>
2750<template xmlns="http://www.xfa.org/schema/xfa-template/3.3/">
2751  <subform name="form1" layout="tb">
2752    <pageSet>
2753      <pageArea name="Page1">
2754        <contentArea w="595pt" h="842pt"/>
2755        <medium short="595pt" long="842pt"/>
2756      </pageArea>
2757    </pageSet>
2758    <subform name="Orders" layout="tb" w="500pt">
2759      <subform name="Order" layout="position" w="500pt" h="60pt">
2760        <occur min="0" max="10" initial="1"/>
2761        <field name="Item" w="200pt" h="20pt" x="0pt" y="0pt"/>
2762        <field name="Qty" w="100pt" h="20pt" x="200pt" y="0pt"/>
2763      </subform>
2764    </subform>
2765  </subform>
2766</template>"#;
2767
2768        let data_xml = r#"<?xml version="1.0"?>
2769<xfa:datasets xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/">
2770  <xfa:data>
2771    <form1>
2772      <Order><Item>Widget A</Item><Qty>5</Qty></Order>
2773      <Order><Item>Widget B</Item><Qty>3</Qty></Order>
2774      <Order><Item>Widget C</Item><Qty>7</Qty></Order>
2775    </form1>
2776  </xfa:data>
2777</xfa:datasets>"#;
2778
2779        let data_dom = DataDom::from_xml(data_xml).unwrap();
2780        let merger = FormMerger::new(&data_dom);
2781        let (tree, _root_id) = merger.merge(template).unwrap();
2782
2783        let orders_id = tree
2784            .nodes
2785            .iter()
2786            .enumerate()
2787            .find(|(_, n)| n.name == "Orders")
2788            .map(|(i, _)| FormNodeId(i))
2789            .unwrap();
2790        let orders = tree.get(orders_id);
2791        assert_eq!(orders.children.len(), 3);
2792        assert!(orders
2793            .children
2794            .iter()
2795            .all(|&id| tree.get(id).name == "Order" && tree.get(id).occur.count() == 1));
2796    }
2797
2798    /// Double-wrapped <xfa:data> must be unwrapped for data binding to work.
2799    #[test]
2800    fn repeating_subform_double_wrapped_data() {
2801        let template = r#"<?xml version="1.0"?>
2802<template xmlns="http://www.xfa.org/schema/xfa-template/3.3/">
2803  <subform name="form1" layout="tb">
2804    <pageSet>
2805      <pageArea name="Page">
2806        <contentArea w="595pt" h="842pt"/>
2807        <medium short="595pt" long="842pt"/>
2808      </pageArea>
2809    </pageSet>
2810    <subform name="Orders" layout="tb" w="559pt">
2811      <subform name="CoreOrders" layout="tb" w="361pt">
2812        <subform name="Order" layout="position" w="360pt" h="162pt">
2813          <occur min="0" max="3" initial="1"/>
2814          <field name="Item" w="200pt" h="25pt" x="9pt" y="9pt"/>
2815        </subform>
2816      </subform>
2817    </subform>
2818  </subform>
2819</template>"#;
2820
2821        let data_xml = concat!(
2822            r#"<xfa:datasets xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/">"#,
2823            r#"<xfa:data><xfa:data xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/">"#,
2824            r#"<form1>"#,
2825            r#"<Order><Item>A</Item></Order>"#,
2826            r#"<Order><Item>B</Item></Order>"#,
2827            r#"<Order><Item>C</Item></Order>"#,
2828            r#"</form1>"#,
2829            r#"</xfa:data></xfa:data></xfa:datasets>"#,
2830        );
2831
2832        let data_dom = DataDom::from_xml(data_xml).unwrap();
2833        let merger = FormMerger::new(&data_dom);
2834        let (tree, _root_id) = merger.merge(template).unwrap();
2835
2836        let core_orders_id = tree
2837            .nodes
2838            .iter()
2839            .enumerate()
2840            .find(|(_, n)| n.name == "CoreOrders")
2841            .map(|(i, _)| FormNodeId(i))
2842            .unwrap();
2843        let core_orders = tree.get(core_orders_id);
2844        assert_eq!(
2845            core_orders.children.len(),
2846            3,
2847            "Expected 3 Order instances from double-wrapped data"
2848        );
2849    }
2850
2851    /// Bind ref resolution: when subform name differs from data name,
2852    /// <bind match="dataRef" ref="$.dataName[*]"> should use the data name.
2853    #[test]
2854    fn repeating_subform_bind_ref_resolves_data_name() {
2855        let template = r#"<?xml version="1.0"?>
2856<template xmlns="http://www.xfa.org/schema/xfa-template/3.3/">
2857  <subform name="form" layout="tb">
2858    <pageSet>
2859      <pageArea name="Page1">
2860        <contentArea w="595pt" h="842pt"/>
2861        <medium short="595pt" long="842pt"/>
2862      </pageArea>
2863    </pageSet>
2864    <subform name="ListItems" layout="tb" w="500pt">
2865      <subform name="ItemGroup" layout="tb" w="500pt" h="50pt">
2866        <occur min="0" max="-1"/>
2867        <bind match="dataRef" ref="$.itemGroup[*]"/>
2868        <field name="title" w="200pt" h="20pt" x="0pt" y="0pt"/>
2869      </subform>
2870    </subform>
2871  </subform>
2872</template>"#;
2873
2874        let data_xml = r#"<?xml version="1.0"?>
2875<xfa:datasets xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/">
2876  <xfa:data>
2877    <form>
2878      <itemGroup><title>Group A</title></itemGroup>
2879      <itemGroup><title>Group B</title></itemGroup>
2880    </form>
2881  </xfa:data>
2882</xfa:datasets>"#;
2883
2884        let data_dom = DataDom::from_xml(data_xml).unwrap();
2885        let merger = FormMerger::new(&data_dom);
2886        let (tree, _root_id) = merger.merge(template).unwrap();
2887
2888        let list_items = tree
2889            .nodes
2890            .iter()
2891            .find(|n| n.name == "ListItems")
2892            .expect("ListItems must exist");
2893        assert_eq!(
2894            list_items.children.len(),
2895            2,
2896            "Expected 2 instances from bind ref $.itemGroup[*]"
2897        );
2898    }
2899
2900    #[test]
2901    fn repeating_table_rows_expand_as_sibling_rows() {
2902        let template = r#"<?xml version="1.0"?>
2903<template xmlns="http://www.xfa.org/schema/xfa-template/3.3/">
2904  <subform name="form1" layout="tb">
2905    <pageSet>
2906      <pageArea name="Page1">
2907        <contentArea w="595pt" h="842pt"/>
2908        <medium short="595pt" long="842pt"/>
2909      </pageArea>
2910    </pageSet>
2911    <subform name="Table1" layout="table" columnWidths="100pt 100pt 100pt">
2912      <subform name="HeaderRow" layout="row">
2913        <draw name="H1" w="100pt" h="20pt"/>
2914        <draw name="H2" w="100pt" h="20pt"/>
2915        <draw name="H3" w="100pt" h="20pt"/>
2916      </subform>
2917      <subform name="Row1" layout="row">
2918        <occur min="0" max="-1"/>
2919        <field name="Cell1" w="100pt" h="20pt"/>
2920        <field name="Cell2" w="100pt" h="20pt"/>
2921        <field name="Cell3" w="100pt" h="20pt"/>
2922      </subform>
2923    </subform>
2924  </subform>
2925</template>"#;
2926
2927        let data_xml = r#"<?xml version="1.0"?>
2928<xfa:datasets xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/">
2929  <xfa:data>
2930    <form1>
2931      <Table1>
2932        <Row1><Cell1>A1</Cell1><Cell2>B1</Cell2><Cell3>C1</Cell3></Row1>
2933        <Row1><Cell1>A2</Cell1><Cell2>B2</Cell2><Cell3>C2</Cell3></Row1>
2934        <Row1><Cell1>A3</Cell1><Cell2>B3</Cell2><Cell3>C3</Cell3></Row1>
2935        <Row1><Cell1>A4</Cell1><Cell2>B4</Cell2><Cell3>C4</Cell3></Row1>
2936      </Table1>
2937    </form1>
2938  </xfa:data>
2939</xfa:datasets>"#;
2940
2941        let data_dom = DataDom::from_xml(data_xml).unwrap();
2942        let merger = FormMerger::new(&data_dom);
2943        let (tree, _root_id) = merger.merge(template).unwrap();
2944
2945        let table_id = tree
2946            .nodes
2947            .iter()
2948            .enumerate()
2949            .find(|(_, n)| n.name == "Table1")
2950            .map(|(i, _)| FormNodeId(i))
2951            .unwrap();
2952        let table = tree.get(table_id);
2953        assert_eq!(table.children.len(), 5, "header + 4 repeated data rows");
2954        assert_eq!(tree.get(table.children[0]).name, "HeaderRow");
2955        assert!(table.children[1..]
2956            .iter()
2957            .all(|&id| tree.get(id).name == "Row1" && tree.get(id).children.len() == 3));
2958    }
2959
2960    #[test]
2961    fn repeating_subform_clamps_to_occur_max() {
2962        let template = r#"<?xml version="1.0"?>
2963<template xmlns="http://www.xfa.org/schema/xfa-template/3.3/">
2964  <subform name="form1" layout="tb">
2965    <subform name="Orders" layout="tb">
2966      <subform name="Order" layout="position" w="200pt" h="20pt">
2967        <occur min="0" max="2"/>
2968        <field name="Item" w="100pt" h="20pt"/>
2969      </subform>
2970    </subform>
2971  </subform>
2972</template>"#;
2973
2974        let data_xml = r#"<?xml version="1.0"?>
2975<xfa:datasets xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/">
2976  <xfa:data>
2977    <form1>
2978      <Order><Item>A</Item></Order>
2979      <Order><Item>B</Item></Order>
2980      <Order><Item>C</Item></Order>
2981      <Order><Item>D</Item></Order>
2982      <Order><Item>E</Item></Order>
2983    </form1>
2984  </xfa:data>
2985</xfa:datasets>"#;
2986
2987        let data_dom = DataDom::from_xml(data_xml).unwrap();
2988        let merger = FormMerger::new(&data_dom);
2989        let (tree, _root_id) = merger.merge(template).unwrap();
2990
2991        let orders_id = tree
2992            .nodes
2993            .iter()
2994            .enumerate()
2995            .find(|(_, n)| n.name == "Orders")
2996            .map(|(i, _)| FormNodeId(i))
2997            .unwrap();
2998        assert_eq!(tree.get(orders_id).children.len(), 2);
2999    }
3000
3001    #[test]
3002    fn repeating_subform_unbounded_uses_data_count() {
3003        let template = r#"<?xml version="1.0"?>
3004<template xmlns="http://www.xfa.org/schema/xfa-template/3.3/">
3005  <subform name="form1" layout="tb">
3006    <subform name="Orders" layout="tb">
3007      <subform name="Order" layout="position" w="200pt" h="20pt">
3008        <occur min="0" max="-1"/>
3009        <field name="Item" w="100pt" h="20pt"/>
3010      </subform>
3011    </subform>
3012  </subform>
3013</template>"#;
3014
3015        let data_xml = r#"<?xml version="1.0"?>
3016<xfa:datasets xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/">
3017  <xfa:data>
3018    <form1>
3019      <Order><Item>A</Item></Order>
3020      <Order><Item>B</Item></Order>
3021      <Order><Item>C</Item></Order>
3022    </form1>
3023  </xfa:data>
3024</xfa:datasets>"#;
3025
3026        let data_dom = DataDom::from_xml(data_xml).unwrap();
3027        let merger = FormMerger::new(&data_dom);
3028        let (tree, _root_id) = merger.merge(template).unwrap();
3029
3030        let orders_id = tree
3031            .nodes
3032            .iter()
3033            .enumerate()
3034            .find(|(_, n)| n.name == "Orders")
3035            .map(|(i, _)| FormNodeId(i))
3036            .unwrap();
3037        assert_eq!(tree.get(orders_id).children.len(), 3);
3038    }
3039
3040    #[test]
3041    fn repeating_subform_respects_occur_min() {
3042        let template = r#"<?xml version="1.0"?>
3043<template xmlns="http://www.xfa.org/schema/xfa-template/3.3/">
3044  <subform name="form1" layout="tb">
3045    <subform name="Orders" layout="tb">
3046      <subform name="Order" layout="position" w="200pt" h="20pt">
3047        <occur min="2" max="-1"/>
3048        <field name="Item" w="100pt" h="20pt"/>
3049      </subform>
3050    </subform>
3051  </subform>
3052</template>"#;
3053
3054        let data_xml = r#"<?xml version="1.0"?>
3055<xfa:datasets xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/">
3056  <xfa:data>
3057    <form1/>
3058  </xfa:data>
3059</xfa:datasets>"#;
3060
3061        let data_dom = DataDom::from_xml(data_xml).unwrap();
3062        let merger = FormMerger::new(&data_dom);
3063        let (tree, _root_id) = merger.merge(template).unwrap();
3064
3065        let orders_id = tree
3066            .nodes
3067            .iter()
3068            .enumerate()
3069            .find(|(_, n)| n.name == "Orders")
3070            .map(|(i, _)| FormNodeId(i))
3071            .unwrap();
3072        let orders = tree.get(orders_id);
3073        assert_eq!(orders.children.len(), 2);
3074        assert!(orders
3075            .children
3076            .iter()
3077            .all(|&id| tree.get(id).name == "Order"));
3078    }
3079
3080    /// Root subform binds to data root when names match (XFA §4.7.2).
3081    #[test]
3082    fn root_subform_binds_to_matching_data_root() {
3083        let template = r#"<?xml version="1.0"?>
3084<template xmlns="http://www.xfa.org/schema/xfa-template/3.3/">
3085  <subform name="form" layout="tb">
3086    <pageSet>
3087      <pageArea name="Page1">
3088        <contentArea w="595pt" h="842pt"/>
3089        <medium short="595pt" long="842pt"/>
3090      </pageArea>
3091    </pageSet>
3092    <field name="title" w="200pt" h="20pt" x="0pt" y="0pt"/>
3093    <field name="code" w="200pt" h="20pt" x="0pt" y="20pt"/>
3094  </subform>
3095</template>"#;
3096
3097        let data_xml = r#"<?xml version="1.0"?>
3098<xfa:datasets xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/">
3099  <xfa:data>
3100    <form>
3101      <title>Hello World</title>
3102      <code>42</code>
3103    </form>
3104  </xfa:data>
3105</xfa:datasets>"#;
3106
3107        let data_dom = DataDom::from_xml(data_xml).unwrap();
3108        let merger = FormMerger::new(&data_dom);
3109        let (tree, _root_id) = merger.merge(template).unwrap();
3110
3111        let title_node = tree
3112            .nodes
3113            .iter()
3114            .find(|n| n.name == "title")
3115            .expect("title field must exist");
3116        match &title_node.node_type {
3117            FormNodeType::Field { value } => {
3118                assert_eq!(value, "Hello World", "title should bind to data root");
3119            }
3120            _ => panic!("title should be a field"),
3121        }
3122    }
3123
3124    #[test]
3125    fn check_button_mark_parsed_into_style() {
3126        let template = r#"<?xml version="1.0"?>
3127<template xmlns="http://www.xfa.org/schema/xfa-template/3.3/">
3128  <subform name="form" layout="tb">
3129    <pageSet>
3130      <pageArea name="Page1">
3131        <contentArea w="595pt" h="842pt"/>
3132        <medium short="595pt" long="842pt"/>
3133      </pageArea>
3134    </pageSet>
3135    <field name="agree" w="20pt" h="20pt">
3136      <ui><checkButton mark="circle"/></ui>
3137      <value><text>1</text></value>
3138      <items><text>1</text><text>0</text><text>2</text></items>
3139    </field>
3140  </subform>
3141</template>"#;
3142
3143        let data_xml = r#"<?xml version="1.0"?>
3144<xfa:datasets xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/">
3145  <xfa:data>
3146    <form>
3147      <agree>1</agree>
3148    </form>
3149  </xfa:data>
3150</xfa:datasets>"#;
3151
3152        let data_dom = DataDom::from_xml(data_xml).unwrap();
3153        let merger = FormMerger::new(&data_dom);
3154        let (tree, _root_id) = merger.merge(template).unwrap();
3155
3156        let agree_id = tree
3157            .nodes
3158            .iter()
3159            .enumerate()
3160            .find(|(_, n)| n.name == "agree")
3161            .map(|(i, _)| FormNodeId(i))
3162            .expect("agree field must exist");
3163        let meta = tree.meta(agree_id);
3164        assert_eq!(meta.field_kind, FieldKind::Checkbox);
3165        assert_eq!(meta.style.check_button_mark.as_deref(), Some("circle"));
3166        assert_eq!(meta.style.check_button_on_value.as_deref(), Some("1"));
3167        assert_eq!(meta.style.check_button_off_value.as_deref(), Some("0"));
3168        assert_eq!(meta.style.check_button_neutral_value.as_deref(), Some("2"));
3169    }
3170
3171    #[test]
3172    fn nested_check_button_items_are_used_for_checkbox_values() {
3173        let template = r#"<?xml version="1.0"?>
3174<template xmlns="http://www.xfa.org/schema/xfa-template/3.3/">
3175  <subform name="form" layout="tb">
3176    <pageSet>
3177      <pageArea name="Page1">
3178        <contentArea w="595pt" h="842pt"/>
3179        <medium short="595pt" long="842pt"/>
3180      </pageArea>
3181    </pageSet>
3182    <field name="agree" w="20pt" h="20pt">
3183      <ui>
3184        <checkButton>
3185          <items><text>Yes</text><text>No</text></items>
3186        </checkButton>
3187      </ui>
3188      <value><text>Yes</text></value>
3189    </field>
3190  </subform>
3191</template>"#;
3192
3193        let data_xml = r#"<?xml version="1.0"?>
3194<xfa:datasets xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/">
3195  <xfa:data>
3196    <form>
3197      <agree>Yes</agree>
3198    </form>
3199  </xfa:data>
3200</xfa:datasets>"#;
3201
3202        let data_dom = DataDom::from_xml(data_xml).unwrap();
3203        let merger = FormMerger::new(&data_dom);
3204        let (tree, _root_id) = merger.merge(template).unwrap();
3205
3206        let agree_id = tree
3207            .nodes
3208            .iter()
3209            .enumerate()
3210            .find(|(_, n)| n.name == "agree")
3211            .map(|(i, _)| FormNodeId(i))
3212            .expect("agree field must exist");
3213        let meta = tree.meta(agree_id);
3214        assert_eq!(meta.field_kind, FieldKind::Checkbox);
3215        assert_eq!(meta.style.check_button_on_value.as_deref(), Some("Yes"));
3216        assert_eq!(meta.style.check_button_off_value.as_deref(), Some("No"));
3217        match &tree.get(agree_id).node_type {
3218            FormNodeType::Field { value } => assert_eq!(value, "Yes"),
3219            _ => panic!("agree should be a field"),
3220        }
3221    }
3222
3223    #[test]
3224    fn border_fill_solid_color_populates_background_color() {
3225        let template = r#"<?xml version="1.0"?>
3226<template xmlns="http://www.xfa.org/schema/xfa-template/3.3/">
3227  <subform name="form" layout="tb">
3228    <pageSet>
3229      <pageArea name="Page1">
3230        <contentArea w="595pt" h="842pt"/>
3231        <medium short="595pt" long="842pt"/>
3232      </pageArea>
3233    </pageSet>
3234    <field name="name" w="200pt" h="20pt">
3235      <ui><textEdit/></ui>
3236      <border>
3237        <fill>
3238          <solid>
3239            <color value="240,240,240"/>
3240          </solid>
3241        </fill>
3242      </border>
3243    </field>
3244  </subform>
3245</template>"#;
3246
3247        let data_xml = r#"<?xml version="1.0"?>
3248<xfa:datasets xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/">
3249  <xfa:data>
3250    <form>
3251      <name>Jane Doe</name>
3252    </form>
3253  </xfa:data>
3254</xfa:datasets>"#;
3255
3256        let data_dom = DataDom::from_xml(data_xml).unwrap();
3257        let merger = FormMerger::new(&data_dom);
3258        let (tree, _root_id) = merger.merge(template).unwrap();
3259
3260        let name_id = tree
3261            .nodes
3262            .iter()
3263            .enumerate()
3264            .find(|(_, n)| n.name == "name")
3265            .map(|(i, _)| FormNodeId(i))
3266            .expect("name field must exist");
3267        assert_eq!(tree.meta(name_id).style.bg_color, Some((240, 240, 240)));
3268    }
3269
3270    #[test]
3271    fn border_fill_direct_color_populates_button_bg_color() {
3272        // Mirrors 053ecab3's E-Mail submit button: <border><fill><color value=...>
3273        // without the <solid> wrapper. Parser must pick the color up so
3274        // render_button() draws the yellow fill instead of the gray fallback.
3275        let template = r#"<?xml version="1.0"?>
3276<template xmlns="http://www.xfa.org/schema/xfa-template/3.3/">
3277  <subform name="form" layout="tb">
3278    <pageSet>
3279      <pageArea name="Page1">
3280        <contentArea w="595pt" h="842pt"/>
3281        <medium short="595pt" long="842pt"/>
3282      </pageArea>
3283    </pageSet>
3284    <field name="Btn" w="25.4mm" h="9.525mm">
3285      <ui><button highlight="inverted"/></ui>
3286      <caption><value><text>E-Mail</text></value></caption>
3287      <border hand="right">
3288        <edge stroke="raised"/>
3289        <fill><color value="255,255,153"/></fill>
3290      </border>
3291    </field>
3292  </subform>
3293</template>"#;
3294        let data_xml = r#"<?xml version="1.0"?>
3295<xfa:datasets xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/">
3296  <xfa:data><form/></xfa:data>
3297</xfa:datasets>"#;
3298        let data_dom = DataDom::from_xml(data_xml).unwrap();
3299        let merger = FormMerger::new(&data_dom);
3300        let (tree, _root_id) = merger.merge(template).unwrap();
3301        let btn_id = tree
3302            .nodes
3303            .iter()
3304            .enumerate()
3305            .find(|(_, n)| n.name == "Btn")
3306            .map(|(i, _)| FormNodeId(i))
3307            .expect("button field must exist");
3308        assert_eq!(tree.meta(btn_id).style.bg_color, Some((255, 255, 153)));
3309    }
3310
3311    #[test]
3312    fn rich_text_exdata_captions_are_collapsed_to_plain_text() {
3313        let template = r#"<?xml version="1.0"?>
3314<template xmlns="http://www.xfa.org/schema/xfa-template/3.3/">
3315  <subform name="form" layout="tb">
3316    <pageSet>
3317      <pageArea name="Page1">
3318        <contentArea w="595pt" h="842pt"/>
3319        <medium short="595pt" long="842pt"/>
3320      </pageArea>
3321    </pageSet>
3322    <field name="patient_name" w="200pt" h="20pt">
3323      <ui><textEdit/></ui>
3324      <caption placement="top" reserve="0.230972in">
3325        <value>
3326          <exData contentType="text/html" maxLength="0">
3327            <body xmlns="http://www.w3.org/1999/xhtml"
3328                  xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/"
3329                  xfa:APIVersion="1.4.4136.0">
3330              <p>NAME<span style="xfa-spacerun:yes"> </span><span style="font-style:italic">(Last, First, Middle Initial)</span></p>
3331            </body>
3332          </exData>
3333        </value>
3334      </caption>
3335    </field>
3336    <field name="telephone" w="200pt" h="20pt">
3337      <ui><textEdit/></ui>
3338      <caption placement="top" reserve="0.230972in">
3339        <value>
3340          <exData contentType="text/html" maxLength="0">
3341            <body xmlns="http://www.w3.org/1999/xhtml"
3342                  xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/"
3343                  xfa:APIVersion="1.4.4136.0">
3344              <p>TELEPHONE<span style="font-style:italic"> (Include area code)</span></p>
3345            </body>
3346          </exData>
3347        </value>
3348      </caption>
3349    </field>
3350  </subform>
3351</template>"#;
3352
3353        let data_dom = DataDom::new();
3354        let merger = FormMerger::new(&data_dom);
3355        let (tree, _root_id) = merger.merge(template).unwrap();
3356
3357        let patient_name_id = tree
3358            .nodes
3359            .iter()
3360            .enumerate()
3361            .find(|(_, n)| n.name == "patient_name")
3362            .map(|(i, _)| FormNodeId(i))
3363            .expect("patient_name field must exist");
3364        let patient_name = tree.meta(patient_name_id);
3365        assert_eq!(
3366            patient_name.style.caption_text.as_deref(),
3367            Some("NAME (Last, First, Middle Initial)")
3368        );
3369        assert_eq!(patient_name.style.caption_placement.as_deref(), Some("top"));
3370        assert_eq!(
3371            tree.get(patient_name_id)
3372                .box_model
3373                .caption
3374                .as_ref()
3375                .map(|caption| caption.text.as_str()),
3376            Some("NAME (Last, First, Middle Initial)")
3377        );
3378
3379        let telephone_id = tree
3380            .nodes
3381            .iter()
3382            .enumerate()
3383            .find(|(_, n)| n.name == "telephone")
3384            .map(|(i, _)| FormNodeId(i))
3385            .expect("telephone field must exist");
3386        let telephone = tree.meta(telephone_id);
3387        assert_eq!(
3388            telephone.style.caption_text.as_deref(),
3389            Some("TELEPHONE (Include area code)")
3390        );
3391        assert_eq!(telephone.style.caption_placement.as_deref(), Some("top"));
3392        assert_eq!(
3393            tree.get(telephone_id)
3394                .box_model
3395                .caption
3396                .as_ref()
3397                .map(|caption| caption.text.as_str()),
3398            Some("TELEPHONE (Include area code)")
3399        );
3400    }
3401
3402    #[test]
3403    fn border_edge_order_keeps_bottom_only_underline() {
3404        let template = r#"<?xml version="1.0"?>
3405<template xmlns="http://www.xfa.org/schema/xfa-template/3.3/">
3406  <subform name="form" layout="tb">
3407    <pageSet>
3408      <pageArea name="Page1">
3409        <contentArea w="595pt" h="842pt"/>
3410        <medium short="595pt" long="842pt"/>
3411      </pageArea>
3412    </pageSet>
3413    <field name="underlined" w="200pt" h="20pt">
3414      <ui><textEdit/></ui>
3415      <border>
3416        <edge stroke="none" thickness="1pt"/>
3417        <edge stroke="none" thickness="2pt"/>
3418        <edge stroke="solid" thickness="3pt"/>
3419        <edge stroke="none" thickness="4pt"/>
3420      </border>
3421    </field>
3422  </subform>
3423</template>"#;
3424
3425        let data_dom = DataDom::new();
3426        let merger = FormMerger::new(&data_dom);
3427        let (tree, _root_id) = merger.merge(template).unwrap();
3428
3429        let underlined_id = tree
3430            .nodes
3431            .iter()
3432            .enumerate()
3433            .find(|(_, n)| n.name == "underlined")
3434            .map(|(i, _)| FormNodeId(i))
3435            .expect("underlined field must exist");
3436        let style = &tree.meta(underlined_id).style;
3437
3438        // Edges are interpreted in XFA order top/right/bottom/left.
3439        assert_eq!(style.border_edges, [false, false, true, false]);
3440        assert_eq!(style.border_widths, Some([1.0, 2.0, 3.0, 4.0]));
3441        assert_eq!(style.border_width_pt, Some(3.0));
3442    }
3443
3444    #[test]
3445    fn round_check_button_parsed_as_radio() {
3446        let template = r#"<?xml version="1.0"?>
3447<template xmlns="http://www.xfa.org/schema/xfa-template/3.3/">
3448  <subform name="form" layout="tb">
3449    <pageSet>
3450      <pageArea name="Page1">
3451        <contentArea w="595pt" h="842pt"/>
3452        <medium short="595pt" long="842pt"/>
3453      </pageArea>
3454    </pageSet>
3455    <field name="choice" w="20pt" h="20pt">
3456      <ui><checkButton shape="round"/></ui>
3457      <items><text>Y</text><text>N</text></items>
3458    </field>
3459  </subform>
3460</template>"#;
3461
3462        let data_dom = DataDom::new();
3463        let merger = FormMerger::new(&data_dom);
3464        let (tree, _root_id) = merger.merge(template).unwrap();
3465
3466        let choice_id = tree
3467            .nodes
3468            .iter()
3469            .enumerate()
3470            .find(|(_, n)| n.name == "choice")
3471            .map(|(i, _)| FormNodeId(i))
3472            .expect("choice field must exist");
3473        assert_eq!(tree.meta(choice_id).field_kind, FieldKind::Radio);
3474    }
3475
3476    #[test]
3477    fn excl_group_value_selects_matching_child() {
3478        let template = r#"<?xml version="1.0"?>
3479<template xmlns="http://www.xfa.org/schema/xfa-template/3.3/">
3480  <subform name="form" layout="tb">
3481    <pageSet>
3482      <pageArea name="Page1">
3483        <contentArea w="595pt" h="842pt"/>
3484        <medium short="595pt" long="842pt"/>
3485      </pageArea>
3486    </pageSet>
3487    <exclGroup name="choice">
3488      <field name="yes" w="20pt" h="20pt">
3489        <ui><checkButton shape="round"/></ui>
3490        <items><text>Y</text><text>N</text></items>
3491      </field>
3492      <field name="no" w="20pt" h="20pt">
3493        <ui><checkButton shape="round"/></ui>
3494        <items><text>N</text><text>Y</text></items>
3495      </field>
3496    </exclGroup>
3497  </subform>
3498</template>"#;
3499
3500        let data_xml = r#"<?xml version="1.0"?>
3501<xfa:datasets xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/">
3502  <xfa:data>
3503    <form>
3504      <choice>Y</choice>
3505    </form>
3506  </xfa:data>
3507</xfa:datasets>"#;
3508
3509        let data_dom = DataDom::from_xml(data_xml).unwrap();
3510        let merger = FormMerger::new(&data_dom);
3511        let (tree, _root_id) = merger.merge(template).unwrap();
3512
3513        let yes_node = tree
3514            .nodes
3515            .iter()
3516            .find(|n| n.name == "yes")
3517            .expect("yes field must exist");
3518        let no_node = tree
3519            .nodes
3520            .iter()
3521            .find(|n| n.name == "no")
3522            .expect("no field must exist");
3523
3524        match (&yes_node.node_type, &no_node.node_type) {
3525            (FormNodeType::Field { value: yes }, FormNodeType::Field { value: no }) => {
3526                assert_eq!(yes, "Y");
3527                assert!(
3528                    no.is_empty(),
3529                    "non-selected exclGroup child should be cleared"
3530                );
3531            }
3532            _ => panic!("exclGroup children must be fields"),
3533        }
3534    }
3535
3536    #[test]
3537    fn draw_with_image_creates_image_node() {
3538        // Base64-encoded 1x1 red PNG
3539        let image_b64 = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8DwHwAFBQIAX8jx0gAAAABJRU5ErkJggg==";
3540        let template = format!(
3541            r#"<?xml version="1.0"?>
3542<template xmlns="http://www.xfa.org/schema/xfa-template/3.3/">
3543  <subform name="form1" layout="tb">
3544    <pageSet>
3545      <pageArea name="Page1">
3546        <contentArea w="595pt" h="842pt"/>
3547        <medium short="595pt" long="842pt"/>
3548      </pageArea>
3549    </pageSet>
3550    <draw name="logo" x="0pt" y="0pt" w="100pt" h="100pt">
3551      <value>
3552        <image contentType="image/png">{}</image>
3553      </value>
3554    </draw>
3555  </subform>
3556</template>"#,
3557            image_b64
3558        );
3559
3560        let data_dom = DataDom::new();
3561        let merger = FormMerger::new(&data_dom);
3562        let (tree, _root_id) = merger.merge(&template).unwrap();
3563
3564        let logo_node = tree
3565            .nodes
3566            .iter()
3567            .find(|n| n.name == "logo")
3568            .expect("logo draw must exist");
3569
3570        match &logo_node.node_type {
3571            FormNodeType::Image { data, mime_type } => {
3572                assert_eq!(mime_type, "image/png");
3573                assert!(!data.is_empty(), "image data should not be empty");
3574                // Verify it's valid PNG by checking magic bytes
3575                assert!(
3576                    data.starts_with(&[0x89, 0x50, 0x4E, 0x47]),
3577                    "decoded data should be PNG"
3578                );
3579            }
3580            _ => panic!(
3581                "draw with image should create Image node, got {:?}",
3582                logo_node.node_type
3583            ),
3584        }
3585    }
3586
3587    #[test]
3588    fn draw_with_rectangle_creates_draw_node() {
3589        let template = r#"<?xml version="1.0"?>
3590<template xmlns="http://www.xfa.org/schema/xfa-template/3.3/">
3591  <subform name="form1" layout="tb">
3592    <pageSet>
3593      <pageArea name="Page1">
3594        <contentArea w="595pt" h="842pt"/>
3595        <medium short="595pt" long="842pt"/>
3596      </pageArea>
3597    </pageSet>
3598    <draw name="box" x="0pt" y="0pt" w="100pt" h="100pt">
3599      <value>
3600        <rectangle/>
3601      </value>
3602    </draw>
3603  </subform>
3604</template>"#;
3605
3606        let data_dom = DataDom::new();
3607        let merger = FormMerger::new(&data_dom);
3608        let (tree, _root_id) = merger.merge(template).unwrap();
3609
3610        let box_node = tree
3611            .nodes
3612            .iter()
3613            .find(|n| n.name == "box")
3614            .expect("box draw must exist");
3615
3616        match &box_node.node_type {
3617            FormNodeType::Draw(DrawContent::Rectangle { .. }) => {}
3618            other => panic!(
3619                "draw with rectangle should create Rectangle draw, got {:?}",
3620                other
3621            ),
3622        }
3623    }
3624
3625    #[test]
3626    fn field_anchor_type_is_preserved_in_merged_meta() {
3627        let template = r#"<?xml version="1.0"?>
3628<template xmlns="http://www.xfa.org/schema/xfa-template/3.3/">
3629  <subform name="form1" layout="tb">
3630    <pageSet>
3631      <pageArea name="Page1">
3632        <contentArea w="595pt" h="842pt"/>
3633        <medium short="595pt" long="842pt"/>
3634      </pageArea>
3635    </pageSet>
3636    <field name="centeredBox" layout="position" anchorType="middleCenter" w="20pt" h="10pt" x="100pt" y="50pt"/>
3637  </subform>
3638</template>"#;
3639
3640        let data_dom = DataDom::new();
3641        let merger = FormMerger::new(&data_dom);
3642        let (tree, _root_id) = merger.merge(template).unwrap();
3643
3644        let centered_id = tree
3645            .nodes
3646            .iter()
3647            .enumerate()
3648            .find(|(_, n)| n.name == "centeredBox")
3649            .map(|(i, _)| FormNodeId(i))
3650            .expect("centeredBox field must exist");
3651
3652        assert_eq!(tree.meta(centered_id).anchor_type, AnchorType::MiddleCenter);
3653    }
3654
3655    #[test]
3656    fn area_without_layout_uses_positioned_when_children_have_coordinates() {
3657        let template = r#"<?xml version="1.0"?>
3658<template xmlns="http://www.xfa.org/schema/xfa-template/3.3/">
3659  <subform name="form1" layout="tb">
3660    <pageSet>
3661      <pageArea name="Page1">
3662        <contentArea w="595pt" h="842pt"/>
3663        <medium short="595pt" long="842pt"/>
3664      </pageArea>
3665    </pageSet>
3666    <area name="Nagl">
3667      <draw name="headerLine" x="10pt" y="12pt" w="100pt" h="1pt">
3668        <value><rectangle/></value>
3669      </draw>
3670    </area>
3671  </subform>
3672</template>"#;
3673
3674        let data_dom = DataDom::new();
3675        let merger = FormMerger::new(&data_dom);
3676        let (tree, _root_id) = merger.merge(template).unwrap();
3677
3678        let nagl = tree
3679            .nodes
3680            .iter()
3681            .find(|n| n.name == "Nagl")
3682            .expect("Nagl area must exist");
3683
3684        assert_eq!(nagl.layout, LayoutStrategy::Positioned);
3685    }
3686
3687    // ─── #1092: matchTemplate merge mode ────────────────────────────────────
3688
3689    /// Default mode (no `matchTemplate` keyword in XML) must be ConsumeData.
3690    #[test]
3691    fn merge_mode_defaults_to_consume_data() {
3692        let template = r#"<?xml version="1.0"?>
3693<template xmlns="http://www.xfa.org/schema/xfa-template/3.3/">
3694  <subform name="form1" layout="tb"/>
3695</template>"#;
3696        assert_eq!(detect_merge_mode(template), MergeMode::ConsumeData);
3697    }
3698
3699    /// When the XML contains `matchTemplate`, the mode is detected.
3700    #[test]
3701    fn merge_mode_detects_match_template() {
3702        let template = r#"<?xml version="1.0"?>
3703<template xmlns="http://www.xfa.org/schema/xfa-template/3.3/">
3704  <!-- config: matchTemplate -->
3705  <subform name="form1" layout="tb"/>
3706</template>"#;
3707        assert_eq!(detect_merge_mode(template), MergeMode::MatchTemplate);
3708    }
3709
3710    /// In matchTemplate mode a flat data structure binds to named fields
3711    /// regardless of template hierarchy.
3712    #[test]
3713    fn match_template_binds_flat_data_to_named_fields() {
3714        // Template has a nested subform but data is flat
3715        let template = r#"<?xml version="1.0"?>
3716<!-- matchTemplate -->
3717<template xmlns="http://www.xfa.org/schema/xfa-template/3.3/">
3718  <subform name="form1" layout="tb">
3719    <pageSet>
3720      <pageArea name="Page1">
3721        <contentArea w="595pt" h="842pt"/>
3722        <medium short="595pt" long="842pt"/>
3723      </pageArea>
3724    </pageSet>
3725    <subform name="section" layout="tb">
3726      <field name="firstName" w="200pt" h="20pt" x="0pt" y="0pt"/>
3727      <field name="lastName" w="200pt" h="20pt" x="0pt" y="20pt"/>
3728    </subform>
3729  </subform>
3730</template>"#;
3731
3732        // Flat data — no "section" group, fields at root level
3733        let data_xml = r#"<?xml version="1.0"?>
3734<xfa:datasets xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/">
3735  <xfa:data>
3736    <form1>
3737      <firstName>John</firstName>
3738      <lastName>Doe</lastName>
3739    </form1>
3740  </xfa:data>
3741</xfa:datasets>"#;
3742
3743        let data_dom = DataDom::from_xml(data_xml).unwrap();
3744        let merger = FormMerger::new(&data_dom);
3745        let (tree, _root_id) = merger.merge(template).unwrap();
3746
3747        let first = tree
3748            .nodes
3749            .iter()
3750            .find(|n| n.name == "firstName")
3751            .expect("firstName must exist");
3752        let last = tree
3753            .nodes
3754            .iter()
3755            .find(|n| n.name == "lastName")
3756            .expect("lastName must exist");
3757
3758        match &first.node_type {
3759            FormNodeType::Field { value } => assert_eq!(value, "John"),
3760            _ => panic!("firstName must be a field"),
3761        }
3762        match &last.node_type {
3763            FormNodeType::Field { value } => assert_eq!(value, "Doe"),
3764            _ => panic!("lastName must be a field"),
3765        }
3766    }
3767
3768    // ─── #1093: scope matching ────────────────────────────────────────────────
3769
3770    /// Sibling resolution: template `a.b.city` finds data `a.city` when `b`
3771    /// has no `city` child.
3772    #[test]
3773    fn field_binds_via_sibling_scope() {
3774        let template = r#"<?xml version="1.0"?>
3775<template xmlns="http://www.xfa.org/schema/xfa-template/3.3/">
3776  <subform name="form1" layout="tb">
3777    <pageSet>
3778      <pageArea name="Page1">
3779        <contentArea w="595pt" h="842pt"/>
3780        <medium short="595pt" long="842pt"/>
3781      </pageArea>
3782    </pageSet>
3783    <subform name="address" layout="tb">
3784      <subform name="details" layout="tb">
3785        <field name="city" w="200pt" h="20pt" x="0pt" y="0pt"/>
3786      </subform>
3787    </subform>
3788  </subform>
3789</template>"#;
3790
3791        // city is a sibling of details (at the address level), not inside details
3792        let data_xml = r#"<?xml version="1.0"?>
3793<xfa:datasets xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/">
3794  <xfa:data>
3795    <form1>
3796      <address>
3797        <city>Rotterdam</city>
3798        <details/>
3799      </address>
3800    </form1>
3801  </xfa:data>
3802</xfa:datasets>"#;
3803
3804        let data_dom = DataDom::from_xml(data_xml).unwrap();
3805        let merger = FormMerger::new(&data_dom);
3806        let (tree, _root_id) = merger.merge(template).unwrap();
3807
3808        let city = tree
3809            .nodes
3810            .iter()
3811            .find(|n| n.name == "city")
3812            .expect("city field must exist");
3813        match &city.node_type {
3814            FormNodeType::Field { value } => assert_eq!(
3815                value, "Rotterdam",
3816                "city should be found via sibling scope resolution"
3817            ),
3818            _ => panic!("city should be a field"),
3819        }
3820    }
3821
3822    // ─── #1094: transparent subforms ─────────────────────────────────────────
3823
3824    /// A nameless subform is transparent: its children bind against the parent
3825    /// data context, not a child data group.
3826    #[test]
3827    fn transparent_subform_passes_data_context_through() {
3828        let template = r#"<?xml version="1.0"?>
3829<template xmlns="http://www.xfa.org/schema/xfa-template/3.3/">
3830  <subform name="form1" layout="tb">
3831    <pageSet>
3832      <pageArea name="Page1">
3833        <contentArea w="595pt" h="842pt"/>
3834        <medium short="595pt" long="842pt"/>
3835      </pageArea>
3836    </pageSet>
3837    <subform layout="tb">
3838      <field name="country" w="200pt" h="20pt" x="0pt" y="0pt"/>
3839    </subform>
3840  </subform>
3841</template>"#;
3842
3843        // Data has `country` at form1 level; there is no anonymous data group
3844        let data_xml = r#"<?xml version="1.0"?>
3845<xfa:datasets xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/">
3846  <xfa:data>
3847    <form1>
3848      <country>Netherlands</country>
3849    </form1>
3850  </xfa:data>
3851</xfa:datasets>"#;
3852
3853        let data_dom = DataDom::from_xml(data_xml).unwrap();
3854        let merger = FormMerger::new(&data_dom);
3855        let (tree, _root_id) = merger.merge(template).unwrap();
3856
3857        let country = tree
3858            .nodes
3859            .iter()
3860            .find(|n| n.name == "country")
3861            .expect("country field must exist");
3862        match &country.node_type {
3863            FormNodeType::Field { value } => assert_eq!(
3864                value, "Netherlands",
3865                "transparent subform should pass context through"
3866            ),
3867            _ => panic!("country should be a field"),
3868        }
3869    }
3870
3871    /// Nested transparent subforms still bind correctly.
3872    #[test]
3873    fn nested_transparent_subforms_bind_correctly() {
3874        let template = r#"<?xml version="1.0"?>
3875<template xmlns="http://www.xfa.org/schema/xfa-template/3.3/">
3876  <subform name="form1" layout="tb">
3877    <pageSet>
3878      <pageArea name="Page1">
3879        <contentArea w="595pt" h="842pt"/>
3880        <medium short="595pt" long="842pt"/>
3881      </pageArea>
3882    </pageSet>
3883    <subform layout="tb">
3884      <subform layout="tb">
3885        <field name="postalCode" w="100pt" h="20pt" x="0pt" y="0pt"/>
3886      </subform>
3887    </subform>
3888  </subform>
3889</template>"#;
3890
3891        let data_xml = r#"<?xml version="1.0"?>
3892<xfa:datasets xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/">
3893  <xfa:data>
3894    <form1>
3895      <postalCode>1234AB</postalCode>
3896    </form1>
3897  </xfa:data>
3898</xfa:datasets>"#;
3899
3900        let data_dom = DataDom::from_xml(data_xml).unwrap();
3901        let merger = FormMerger::new(&data_dom);
3902        let (tree, _root_id) = merger.merge(template).unwrap();
3903
3904        let postal = tree
3905            .nodes
3906            .iter()
3907            .find(|n| n.name == "postalCode")
3908            .expect("postalCode field must exist");
3909        match &postal.node_type {
3910            FormNodeType::Field { value } => assert_eq!(value, "1234AB"),
3911            _ => panic!("postalCode should be a field"),
3912        }
3913    }
3914
3915    // ─── #1095: occur expansion ───────────────────────────────────────────────
3916
3917    /// Repeating subform with 3 data items creates 3 form instances (already
3918    /// tested in repeating_subform_expands_from_data — this variant verifies
3919    /// the instances correctly contain their respective data).
3920    #[test]
3921    fn repeating_subform_three_items_creates_three_instances() {
3922        let template = r#"<?xml version="1.0"?>
3923<template xmlns="http://www.xfa.org/schema/xfa-template/3.3/">
3924  <subform name="form1" layout="tb">
3925    <pageSet>
3926      <pageArea name="Page1">
3927        <contentArea w="595pt" h="842pt"/>
3928        <medium short="595pt" long="842pt"/>
3929      </pageArea>
3930    </pageSet>
3931    <subform name="row" layout="position" w="500pt" h="30pt">
3932      <occur min="0" max="-1" initial="0"/>
3933      <field name="label" w="200pt" h="20pt" x="0pt" y="0pt"/>
3934    </subform>
3935  </subform>
3936</template>"#;
3937
3938        let data_xml = r#"<?xml version="1.0"?>
3939<xfa:datasets xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/">
3940  <xfa:data>
3941    <form1>
3942      <row><label>Alpha</label></row>
3943      <row><label>Beta</label></row>
3944      <row><label>Gamma</label></row>
3945    </form1>
3946  </xfa:data>
3947</xfa:datasets>"#;
3948
3949        let data_dom = DataDom::from_xml(data_xml).unwrap();
3950        let merger = FormMerger::new(&data_dom);
3951        let (tree, _root_id) = merger.merge(template).unwrap();
3952
3953        let rows: Vec<_> = tree.nodes.iter().filter(|n| n.name == "row").collect();
3954        assert_eq!(rows.len(), 3, "must have exactly 3 row instances");
3955    }
3956
3957    /// occur min=1 max=1 limits to one instance even when there are multiple
3958    /// matching data items.
3959    #[test]
3960    fn occur_min1_max1_limits_to_single_instance() {
3961        let template = r#"<?xml version="1.0"?>
3962<template xmlns="http://www.xfa.org/schema/xfa-template/3.3/">
3963  <subform name="form1" layout="tb">
3964    <pageSet>
3965      <pageArea name="Page1">
3966        <contentArea w="595pt" h="842pt"/>
3967        <medium short="595pt" long="842pt"/>
3968      </pageArea>
3969    </pageSet>
3970    <subform name="item" layout="position" w="500pt" h="30pt">
3971      <occur min="1" max="1"/>
3972      <field name="val" w="100pt" h="20pt" x="0pt" y="0pt"/>
3973    </subform>
3974  </subform>
3975</template>"#;
3976
3977        let data_xml = r#"<?xml version="1.0"?>
3978<xfa:datasets xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/">
3979  <xfa:data>
3980    <form1>
3981      <item><val>X</val></item>
3982      <item><val>Y</val></item>
3983      <item><val>Z</val></item>
3984    </form1>
3985  </xfa:data>
3986</xfa:datasets>"#;
3987
3988        let data_dom = DataDom::from_xml(data_xml).unwrap();
3989        let merger = FormMerger::new(&data_dom);
3990        let (tree, _root_id) = merger.merge(template).unwrap();
3991
3992        let items: Vec<_> = tree.nodes.iter().filter(|n| n.name == "item").collect();
3993        assert_eq!(items.len(), 1, "occur max=1 must limit to one instance");
3994    }
3995
3996    // ─── #1096: presence binding ──────────────────────────────────────────────
3997
3998    /// A field with `presence="hidden"` in the template is excluded from
3999    /// layout (presence remains Hidden after merge with no data override).
4000    #[test]
4001    fn field_with_presence_hidden_stays_hidden() {
4002        let template = r#"<?xml version="1.0"?>
4003<template xmlns="http://www.xfa.org/schema/xfa-template/3.3/">
4004  <subform name="form1" layout="tb">
4005    <pageSet>
4006      <pageArea name="Page1">
4007        <contentArea w="595pt" h="842pt"/>
4008        <medium short="595pt" long="842pt"/>
4009      </pageArea>
4010    </pageSet>
4011    <field name="secretField" presence="hidden" w="100pt" h="20pt" x="0pt" y="0pt"/>
4012  </subform>
4013</template>"#;
4014
4015        let data_dom = DataDom::new();
4016        let merger = FormMerger::new(&data_dom);
4017        let (tree, _root_id) = merger.merge(template).unwrap();
4018
4019        let secret_id = tree
4020            .nodes
4021            .iter()
4022            .enumerate()
4023            .find(|(_, n)| n.name == "secretField")
4024            .map(|(i, _)| FormNodeId(i))
4025            .expect("secretField must exist");
4026
4027        assert_eq!(
4028            tree.meta(secret_id).presence,
4029            Presence::Hidden,
4030            "presence=hidden in template must be preserved"
4031        );
4032    }
4033
4034    /// A field with no data value for presence defaults to Visible.
4035    #[test]
4036    fn field_without_presence_data_defaults_to_visible() {
4037        let template = r#"<?xml version="1.0"?>
4038<template xmlns="http://www.xfa.org/schema/xfa-template/3.3/">
4039  <subform name="form1" layout="tb">
4040    <pageSet>
4041      <pageArea name="Page1">
4042        <contentArea w="595pt" h="842pt"/>
4043        <medium short="595pt" long="842pt"/>
4044      </pageArea>
4045    </pageSet>
4046    <field name="normalField" w="100pt" h="20pt" x="0pt" y="0pt"/>
4047  </subform>
4048</template>"#;
4049
4050        let data_xml = r#"<?xml version="1.0"?>
4051<xfa:datasets xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/">
4052  <xfa:data>
4053    <form1>
4054      <normalField>hello</normalField>
4055    </form1>
4056  </xfa:data>
4057</xfa:datasets>"#;
4058
4059        let data_dom = DataDom::from_xml(data_xml).unwrap();
4060        let merger = FormMerger::new(&data_dom);
4061        let (tree, _root_id) = merger.merge(template).unwrap();
4062
4063        let field_id = tree
4064            .nodes
4065            .iter()
4066            .enumerate()
4067            .find(|(_, n)| n.name == "normalField")
4068            .map(|(i, _)| FormNodeId(i))
4069            .expect("normalField must exist");
4070
4071        assert_eq!(
4072            tree.meta(field_id).presence,
4073            Presence::Visible,
4074            "field with no presence data must default to Visible"
4075        );
4076    }
4077
4078    // ─── #1092 (existing test renamed for clarity) ────────────────────────────
4079
4080    /// Ancestor scope: a field nested inside a subform should find data
4081    /// at an ancestor level when not present at the direct context.
4082    #[test]
4083    fn field_binds_via_ancestor_scope_walk() {
4084        let template = r#"<?xml version="1.0"?>
4085<template xmlns="http://www.xfa.org/schema/xfa-template/3.3/">
4086  <subform name="form1" layout="tb">
4087    <pageSet>
4088      <pageArea name="Page1">
4089        <contentArea w="595pt" h="842pt"/>
4090        <medium short="595pt" long="842pt"/>
4091      </pageArea>
4092    </pageSet>
4093    <subform name="details" layout="tb">
4094      <field name="city" w="200pt" h="20pt" x="0pt" y="0pt"/>
4095    </subform>
4096  </subform>
4097</template>"#;
4098
4099        // "city" is at the form1 level, NOT inside "details".
4100        // The ancestor scope walk should find it.
4101        let data_xml = r#"<?xml version="1.0"?>
4102<xfa:datasets xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/">
4103  <xfa:data>
4104    <form1>
4105      <city>Amsterdam</city>
4106      <details/>
4107    </form1>
4108  </xfa:data>
4109</xfa:datasets>"#;
4110
4111        let data_dom = DataDom::from_xml(data_xml).unwrap();
4112        let merger = FormMerger::new(&data_dom);
4113        let (tree, _root_id) = merger.merge(template).unwrap();
4114
4115        let city = tree
4116            .nodes
4117            .iter()
4118            .find(|n| n.name == "city")
4119            .expect("city field must exist");
4120        match &city.node_type {
4121            FormNodeType::Field { value } => {
4122                assert_eq!(
4123                    value, "Amsterdam",
4124                    "city should be found via ancestor scope walk"
4125                );
4126            }
4127            _ => panic!("city should be a field"),
4128        }
4129    }
4130}