Skip to main content

pdf_xfa/
session.rs

1//! `XfaSession` — parse-once interactive fill session over an XFA PDF.
2//!
3//! Phase 1 SDK foundation: enumerate the currently visible/layouted fields,
4//! set values (with write-through to the bound data nodes), and save a PDF
5//! whose datasets packet carries the filled values so Adobe Acrobat/Reader
6//! reopens the form with them.
7//!
8//! Explicit non-goals at this phase (see the UEA gap analysis): no
9//! change/click event execution, no dynamic re-layout/reflow after a value
10//! write, no instanceManager add/remove, no form-packet *structural* writes.
11//!
12//! ```no_run
13//! use pdf_xfa::session::{XfaSession, XfaWriteValue};
14//!
15//! let bytes = std::fs::read("form.pdf").unwrap();
16//! let mut session = XfaSession::open(&bytes).unwrap();
17//! println!("pages: {}", session.page_count());
18//! for f in session.fields() {
19//!     println!("{} = {:?}", f.name, f.value);
20//! }
21//! session
22//!     .set_value("form1.applicant.name", XfaWriteValue::Text("Alice"))
23//!     .unwrap();
24//! let filled = session.save_to_bytes().unwrap();
25//! std::fs::write("filled.pdf", filled).unwrap();
26//! ```
27
28use std::collections::HashMap;
29
30use lopdf::Document;
31use xfa_dom_resolver::data_dom::{DataDom, DataNode, DataNodeId};
32use xfa_layout_engine::form::{
33    Access, FieldKind, FormNodeId, FormNodeType, FormTree, GroupKind, Presence,
34};
35use xfa_layout_engine::layout::{LayoutContent, LayoutDom, LayoutEngine, LayoutNode};
36
37use crate::datasets_writeback::{self, DatasetsEdit, FormPacketEdit};
38use crate::dynamic::apply_dynamic_scripts;
39use crate::error::{Result, XfaError};
40use crate::extract::{extract_xfa_from_bytes, XfaPackets};
41use crate::flatten::{
42    apply_form_dom_presence, extract_embedded_images, inject_resolved_metrics,
43    resolve_template_fonts, XfaRenderingPolicy,
44};
45use crate::merger::FormMerger;
46
47/// A value to write into an XFA field.
48#[derive(Debug, Clone, Copy)]
49pub enum XfaWriteValue<'a> {
50    /// Text-like fields: text, multiline text, numeric, date/time, password,
51    /// and choice lists (pass the save value or the display value).
52    Text(&'a str),
53    /// Checkbox: `true` selects the on-value, `false` the off-value.
54    Checkbox(bool),
55    /// Radio group (exclGroup): the on-value of the member to select.
56    Radio(&'a str),
57}
58
59/// Public field type, mirroring [`FieldKind`] without leaking the
60/// layout-engine type into the SDK surface.
61#[derive(Debug, Clone, Copy, PartialEq, Eq)]
62pub enum XfaFieldType {
63    /// Single-line or multiline text edit.
64    Text,
65    /// Standalone checkbox.
66    Checkbox,
67    /// Exclusive radio group (exclGroup with checkButton members).
68    RadioGroup,
69    /// Push button (not fillable).
70    Button,
71    /// Dropdown / choice list.
72    Dropdown,
73    /// Signature field (not fillable in Phase 1).
74    Signature,
75    /// Date/time picker (accepts text in Phase 1).
76    DateTime,
77    /// Numeric edit (accepts text in Phase 1).
78    Numeric,
79    /// Password edit.
80    Password,
81    /// Image edit (not fillable in Phase 1).
82    Image,
83    /// Barcode (not fillable).
84    Barcode,
85}
86
87impl XfaFieldType {
88    fn from_kind(kind: FieldKind) -> Self {
89        match kind {
90            FieldKind::Text => XfaFieldType::Text,
91            FieldKind::Checkbox => XfaFieldType::Checkbox,
92            FieldKind::Radio => XfaFieldType::RadioGroup,
93            FieldKind::Button => XfaFieldType::Button,
94            FieldKind::Dropdown => XfaFieldType::Dropdown,
95            FieldKind::Signature => XfaFieldType::Signature,
96            FieldKind::DateTimePicker => XfaFieldType::DateTime,
97            FieldKind::NumericEdit => XfaFieldType::Numeric,
98            FieldKind::PasswordEdit => XfaFieldType::Password,
99            FieldKind::ImageEdit => XfaFieldType::Image,
100            FieldKind::Barcode => XfaFieldType::Barcode,
101        }
102    }
103
104    /// Whether [`XfaSession::set_value`] accepts writes for this type.
105    pub fn is_fillable(self) -> bool {
106        !matches!(
107            self,
108            XfaFieldType::Button
109                | XfaFieldType::Signature
110                | XfaFieldType::Image
111                | XfaFieldType::Barcode
112        )
113    }
114}
115
116/// Axis-aligned rectangle in page space, points, top-left origin (XFA
117/// coordinate convention: y grows downward from the top of the page).
118#[derive(Debug, Clone, Copy, PartialEq)]
119pub struct XfaRect {
120    /// Left edge.
121    pub x: f64,
122    /// Top edge.
123    pub y: f64,
124    /// Width.
125    pub width: f64,
126    /// Height.
127    pub height: f64,
128}
129
130/// One layouted widget occurrence of a field.
131#[derive(Debug, Clone)]
132pub struct XfaWidget {
133    /// 0-based page index in the layout result.
134    pub page: usize,
135    /// Widget rectangle (page space, top-left origin, points).
136    pub rect: XfaRect,
137    /// For radio groups: the on-value asserted by this member widget.
138    pub on_value: Option<String>,
139}
140
141/// A selectable option of a dropdown / list field.
142#[derive(Debug, Clone)]
143pub struct XfaFieldOption {
144    /// User-visible text.
145    pub display: String,
146    /// Persisted save value (equals `display` when the template defines no
147    /// separate save items).
148    pub save: String,
149}
150
151/// SDK-level model of one fillable XFA form field, derived from the merged
152/// form tree and the current layout.
153#[derive(Debug, Clone)]
154pub struct XfaFieldModel {
155    /// Fully-qualified dotted name with `[n]` indices omitted when 0
156    /// (Adobe display-SOM style), e.g. `form1.applicant.name` or
157    /// `form1.rows.row[2].amount`.
158    pub name: String,
159    /// Fully-qualified SOM path with explicit `[n]` on every segment,
160    /// e.g. `form1[0].rows[0].row[2].amount[0]`.
161    pub som_path: String,
162    /// Field type.
163    pub field_type: XfaFieldType,
164    /// Current value (the form-tree raw value; radio groups report the
165    /// selected member's on-value, empty when none selected).
166    pub value: String,
167    /// Effective read-only state: the field or an ancestor container sets
168    /// `access="readOnly" | "protected" | "nonInteractive"`.
169    pub read_only: bool,
170    /// Mandatory field (`<validate nullTest="error">`).
171    pub required: bool,
172    /// Multiline text edit (`<ui><textEdit multiLine="1">`).
173    pub multiline: bool,
174    /// Hidden/invisible/inactive via XFA presence (template, saved form
175    /// state, or script-derived).
176    pub hidden: bool,
177    /// Dropdown/list options. For radio groups: one option per member
178    /// (display = member on-value).
179    pub options: Vec<XfaFieldOption>,
180    /// Checkbox on-value (`<items>` / checkButton binding), when known.
181    pub on_value: Option<String>,
182    /// Checkbox off-value, when known.
183    pub off_value: Option<String>,
184    /// First page this field appears on (0-based), `None` when the field is
185    /// not part of the current layout (e.g. hidden).
186    pub page: Option<usize>,
187    /// Rectangle of the first widget occurrence.
188    pub rect: Option<XfaRect>,
189    /// All layouted widget occurrences (radio groups: one per member).
190    pub widgets: Vec<XfaWidget>,
191    /// Whether the field has a bound data node in the datasets DOM. Values
192    /// written to unbound fields are persisted by creating the data node on
193    /// demand (default binding), except for `bind match="none"` fields.
194    pub bound_to_data: bool,
195    /// `<bind match="none">` — the field deliberately does not participate
196    /// in data binding; Adobe persists such values only in the form packet.
197    pub bind_none: bool,
198}
199
200/// Outcome of a successful [`XfaSession::set_value`] call.
201#[derive(Debug, Clone)]
202pub struct XfaSetOutcome {
203    /// The normalized value written to the form tree (e.g. the resolved
204    /// save value of a dropdown, or a checkbox on/off value).
205    pub raw_value: String,
206    /// The value was written through to a datasets node (existing or
207    /// created on demand) and will be persisted by [`XfaSession::save_to_bytes`].
208    pub persisted_to_datasets: bool,
209}
210
211/// Report from a save/writeback.
212#[derive(Debug, Clone, Default)]
213pub struct XfaWritebackReport {
214    /// Datasets packet rewritten by surgical splice (original XML preserved
215    /// except for the changed values). `false` = lossless splice failed and
216    /// the data section was regenerated from the data DOM instead.
217    pub datasets_spliced: bool,
218    /// Number of `<value>` updates applied to the saved form packet (kept in
219    /// sync so Adobe's `restoreState` does not resurrect stale values).
220    pub form_packet_values_updated: usize,
221}
222
223struct FieldEntry {
224    /// The field node (for radio groups: the exclGroup node).
225    node: FormNodeId,
226    /// Member fields of a radio group, with their on-values.
227    members: Vec<(FormNodeId, String)>,
228    /// Effective access at enumeration time.
229    read_only: bool,
230    /// Path of named segments from the root, with same-name sibling indices.
231    segments: Vec<(String, usize)>,
232}
233
234/// Parse-once session over a single XFA PDF.
235///
236/// Holds the extracted packets, the parsed datasets DOM, the merged form
237/// tree, and the layout result. Field enumeration and value writes operate
238/// on the cached state; no re-parse or re-layout happens per call (Phase 1:
239/// value writes do not reflow the layout).
240pub struct XfaSession {
241    pdf_bytes: Vec<u8>,
242    packets: XfaPackets,
243    data_dom: DataDom,
244    tree: FormTree,
245    layout: LayoutDom,
246    fields: Vec<FieldEntry>,
247    models: Vec<XfaFieldModel>,
248    by_name: HashMap<String, usize>,
249    /// Data nodes whose values changed (existing nodes).
250    dirty_data: Vec<DataNodeId>,
251    /// Data nodes created on demand: (id, parent path at creation time).
252    created_data: Vec<DataNodeId>,
253    /// Form-packet value syncs to apply at save.
254    form_packet_sync: Vec<FormPacketEdit>,
255}
256
257impl XfaSession {
258    /// Open a session over the given PDF bytes.
259    ///
260    /// Mirrors the flatten pipeline's Extract → Bind → (static scripts) →
261    /// saved-form-state → Layout stages, then caches everything.
262    ///
263    /// Returns [`XfaError::PacketNotFound`] when the PDF has no XFA template
264    /// packet (i.e. it is not an XFA form).
265    pub fn open(pdf_bytes: &[u8]) -> Result<XfaSession> {
266        let packets = extract_xfa_from_bytes(pdf_bytes.to_vec())?;
267        let template_xml = packets
268            .template()
269            .ok_or_else(|| XfaError::PacketNotFound("template".to_string()))?
270            .to_string();
271
272        // A malformed or empty datasets packet must not block the session —
273        // the form is still fillable; the writeback regenerates a canonical
274        // packet from scratch (Adobe tolerates both).
275        let data_dom = match packets.datasets() {
276            Some(ds) => DataDom::from_xml(ds).unwrap_or_else(|e| {
277                log::warn!("XfaSession: datasets packet unparseable ({e}); starting empty");
278                DataDom::new()
279            }),
280            None => DataDom::new(),
281        };
282
283        let image_files = match Document::load_mem(pdf_bytes) {
284            Ok(doc) => extract_embedded_images(&doc),
285            Err(_) => HashMap::new(),
286        };
287
288        let merger = FormMerger::new(&data_dom).with_image_files(image_files);
289        let (mut tree, root_id) = merger
290            .merge(&template_xml)
291            .map_err(|e| XfaError::ParseFailed(format!("template merge: {e}")))?;
292
293        // Static-analysis script pass only (BestEffortStatic): presence
294        // toggles and FormCalc the engine can resolve without a JS runtime.
295        // Phase 1 deliberately runs no change/click events.
296        let _ = apply_dynamic_scripts(&mut tree, root_id)?;
297
298        // Saved form state (Adobe-Reader-saved PDFs): presence overrides and
299        // saved field values, exactly as the flatten pipeline applies them.
300        if let Some(form_xml) = packets.get_packet("form") {
301            let _ = apply_form_dom_presence(
302                &mut tree,
303                root_id,
304                form_xml,
305                XfaRenderingPolicy::SavedStateFaithful,
306                true,
307            );
308            apply_form_dom_access(&mut tree, root_id, form_xml);
309        }
310
311        // Resolved font metrics make the layout geometry match the flatten
312        // output (widths drive wrapping and therefore field positions).
313        let resolved = resolve_template_fonts(&template_xml, pdf_bytes);
314        inject_resolved_metrics(&mut tree, &resolved);
315
316        let layout = LayoutEngine::new(&tree)
317            .layout(root_id)
318            .map_err(|e| XfaError::LayoutFailed(e.to_string()))?;
319
320        let mut session = XfaSession {
321            pdf_bytes: pdf_bytes.to_vec(),
322            packets,
323            data_dom,
324            tree,
325            layout,
326            fields: Vec::new(),
327            models: Vec::new(),
328            by_name: HashMap::new(),
329            dirty_data: Vec::new(),
330            created_data: Vec::new(),
331            form_packet_sync: Vec::new(),
332        };
333        session.enumerate_fields(root_id);
334        Ok(session)
335    }
336
337    /// Number of pages in the cached layout result.
338    pub fn page_count(&self) -> usize {
339        self.layout.pages.len()
340    }
341
342    /// Page dimensions `(width, height)` in points for a 0-based page index.
343    pub fn page_size(&self, page: usize) -> Option<(f64, f64)> {
344        self.layout.pages.get(page).map(|p| (p.width, p.height))
345    }
346
347    /// The enumerated field models, in document order.
348    pub fn fields(&self) -> &[XfaFieldModel] {
349        &self.models
350    }
351
352    /// Look up one field model by fully-qualified name. Accepts both the
353    /// display name (`a.b[2].c`) and the explicit SOM path (`a[0].b[2].c[0]`).
354    pub fn field(&self, name: &str) -> Option<&XfaFieldModel> {
355        self.by_name.get(name).map(|&i| &self.models[i])
356    }
357
358    /// Set a field's value.
359    ///
360    /// Updates the form-tree raw value, writes through to the bound
361    /// datasets node (creating it on demand for default-bound fields), and
362    /// records the change for [`save_to_bytes`](Self::save_to_bytes).
363    ///
364    /// Rejects writes to read-only fields ([`XfaError::FieldReadOnly`]) and
365    /// to non-fillable kinds (buttons, signatures, …). No change/click
366    /// scripts run and the layout is not recomputed (Phase 1).
367    pub fn set_value(&mut self, name: &str, value: XfaWriteValue<'_>) -> Result<XfaSetOutcome> {
368        let idx = *self
369            .by_name
370            .get(name)
371            .ok_or_else(|| XfaError::FieldNotFound(name.to_string()))?;
372
373        if self.fields[idx].read_only {
374            return Err(XfaError::FieldReadOnly(name.to_string()));
375        }
376        let field_type = self.models[idx].field_type;
377        if !field_type.is_fillable() {
378            return Err(XfaError::InvalidFieldValue {
379                name: name.to_string(),
380                reason: format!("{field_type:?} fields are not fillable"),
381            });
382        }
383
384        let raw = self.normalize_value(idx, value, name)?;
385
386        // 1. Form-tree raw value.
387        self.write_tree_value(idx, &raw);
388
389        // 2. Datasets write-through.
390        let persisted = self.write_data_value(idx, &raw)?;
391
392        // 3. Saved-form-packet value sync (recorded; spliced at save time).
393        if self.packets.get_packet("form").is_some() {
394            let segments = self.fields[idx].segments.clone();
395            // Drop earlier edits for this field (group prefix covers the
396            // radio-member edits too).
397            self.form_packet_sync
398                .retain(|e| !e.segments.starts_with(&segments));
399            if self.fields[idx].members.is_empty() {
400                self.form_packet_sync.push(FormPacketEdit {
401                    segments,
402                    value: raw.clone(),
403                    insert_if_missing: true,
404                });
405            } else {
406                // Radio group: the saved form DOM stores the selection on
407                // the member fields — the selected member carries the
408                // on-value, deselected members are cleared. Also refresh a
409                // group-level <value> when the producer wrote one.
410                for (member, on) in self.fields[idx].members.clone() {
411                    let mut member_segments = segments.clone();
412                    member_segments.push((self.tree.get(member).name.clone(), 0));
413                    let selected = on == raw;
414                    self.form_packet_sync.push(FormPacketEdit {
415                        segments: member_segments,
416                        value: if selected { raw.clone() } else { String::new() },
417                        insert_if_missing: selected,
418                    });
419                }
420                self.form_packet_sync.push(FormPacketEdit {
421                    segments,
422                    value: raw.clone(),
423                    insert_if_missing: false,
424                });
425            }
426        }
427
428        // 4. Refresh the public model.
429        self.models[idx].value = raw.clone();
430
431        Ok(XfaSetOutcome {
432            raw_value: raw,
433            persisted_to_datasets: persisted,
434        })
435    }
436
437    /// Serialize the updated datasets (and form-packet value syncs) into a
438    /// copy of the original PDF and return the new bytes.
439    pub fn save_to_bytes(&self) -> Result<Vec<u8>> {
440        let mut doc = Document::load_mem(&self.pdf_bytes)
441            .map_err(|e| XfaError::LoadFailed(format!("reload for writeback: {e}")))?;
442        self.write_into_document(&mut doc)?;
443        let mut out = Vec::new();
444        doc.save_to(&mut out)
445            .map_err(|e| XfaError::WritebackFailed(format!("PDF save: {e}")))?;
446        Ok(out)
447    }
448
449    /// Apply the datasets/form-packet writeback to an already-loaded
450    /// `lopdf::Document` (the SDK facade path: the caller owns the document
451    /// and serializes it itself).
452    pub fn write_into_document(&self, doc: &mut Document) -> Result<XfaWritebackReport> {
453        let mut report = XfaWritebackReport::default();
454
455        // Build the new datasets packet: surgical splice of the original
456        // packet text, falling back to data-section regeneration.
457        let edits = self.collect_datasets_edits();
458        let original_datasets = self.packets.datasets();
459        let new_datasets = match original_datasets {
460            Some(orig) => match datasets_writeback::splice_datasets(orig, &edits) {
461                Some(spliced) => {
462                    report.datasets_spliced = true;
463                    spliced
464                }
465                None => datasets_writeback::regenerate_datasets(orig, &self.data_dom),
466            },
467            None => datasets_writeback::regenerate_datasets("", &self.data_dom),
468        };
469
470        // Saved form packet: keep recorded field values in sync.
471        let new_form = self.packets.get_packet("form").and_then(|orig| {
472            if self.form_packet_sync.is_empty() {
473                return None;
474            }
475            let (xml, updated) =
476                datasets_writeback::splice_form_packet(orig, &self.form_packet_sync);
477            report.form_packet_values_updated = updated;
478            if updated > 0 {
479                Some(xml)
480            } else {
481                None
482            }
483        });
484
485        datasets_writeback::write_packets_into_pdf(doc, &new_datasets, new_form.as_deref())?;
486        Ok(report)
487    }
488
489    /// True when any value was changed since the session opened.
490    pub fn is_dirty(&self) -> bool {
491        !self.dirty_data.is_empty()
492            || !self.created_data.is_empty()
493            || !self.form_packet_sync.is_empty()
494    }
495
496    // ── internals ──────────────────────────────────────────────────────
497
498    fn normalize_value(&self, idx: usize, value: XfaWriteValue<'_>, name: &str) -> Result<String> {
499        let model = &self.models[idx];
500        match (model.field_type, value) {
501            (XfaFieldType::Checkbox, XfaWriteValue::Checkbox(on)) => Ok(if on {
502                model.on_value.clone().unwrap_or_else(|| "1".to_string())
503            } else {
504                model.off_value.clone().unwrap_or_else(|| "0".to_string())
505            }),
506            (XfaFieldType::Checkbox, XfaWriteValue::Text(s)) => {
507                let on_v = model.on_value.clone().unwrap_or_else(|| "1".to_string());
508                let off_v = model.off_value.clone().unwrap_or_else(|| "0".to_string());
509                if s == on_v || s.eq_ignore_ascii_case("true") || s == "1" {
510                    Ok(on_v)
511                } else if s == off_v || s.eq_ignore_ascii_case("false") || s == "0" || s.is_empty()
512                {
513                    Ok(off_v)
514                } else {
515                    Err(XfaError::InvalidFieldValue {
516                        name: name.to_string(),
517                        reason: format!("checkbox accepts {on_v:?} or {off_v:?}, got {s:?}"),
518                    })
519                }
520            }
521            (XfaFieldType::RadioGroup, XfaWriteValue::Radio(s))
522            | (XfaFieldType::RadioGroup, XfaWriteValue::Text(s)) => {
523                let known = self.fields[idx].members.iter().any(|(_, on)| on == s);
524                if known {
525                    Ok(s.to_string())
526                } else {
527                    let opts: Vec<&str> = self.fields[idx]
528                        .members
529                        .iter()
530                        .map(|(_, on)| on.as_str())
531                        .collect();
532                    Err(XfaError::InvalidFieldValue {
533                        name: name.to_string(),
534                        reason: format!("radio accepts one of {opts:?}, got {s:?}"),
535                    })
536                }
537            }
538            (XfaFieldType::Dropdown, XfaWriteValue::Text(s)) => {
539                if model.options.is_empty() {
540                    return Ok(s.to_string());
541                }
542                if let Some(opt) = model.options.iter().find(|o| o.save == s) {
543                    return Ok(opt.save.clone());
544                }
545                if let Some(opt) = model.options.iter().find(|o| o.display == s) {
546                    return Ok(opt.save.clone());
547                }
548                // XFA choice lists may allow free entry; the template parser
549                // does not currently surface `open="..."`, so accept unknown
550                // values rather than over-rejecting.
551                Ok(s.to_string())
552            }
553            (_, XfaWriteValue::Text(s)) => Ok(s.to_string()),
554            (ft, v) => Err(XfaError::InvalidFieldValue {
555                name: name.to_string(),
556                reason: format!("{v:?} is not assignable to a {ft:?} field"),
557            }),
558        }
559    }
560
561    fn write_tree_value(&mut self, idx: usize, raw: &str) {
562        let entry = &self.fields[idx];
563        if entry.members.is_empty() {
564            let id = entry.node;
565            if let FormNodeType::Field { .. } = self.tree.get(id).node_type {
566                self.tree.get_mut(id).node_type = FormNodeType::Field {
567                    value: raw.to_string(),
568                };
569            }
570        } else {
571            // exclGroup: the member whose on-value matches gets the value,
572            // every other member is cleared (mirrors the merger's
573            // apply_exclusive_choice_value semantics).
574            let members = entry.members.clone();
575            for (member, on_value) in members {
576                let new_value = if on_value == raw {
577                    raw.to_string()
578                } else {
579                    String::new()
580                };
581                if let FormNodeType::Field { .. } = self.tree.get(member).node_type {
582                    self.tree.get_mut(member).node_type = FormNodeType::Field { value: new_value };
583                }
584            }
585        }
586    }
587
588    /// Write through to the datasets DOM. Returns `true` when the value is
589    /// now held by a data node (existing or created).
590    fn write_data_value(&mut self, idx: usize, raw: &str) -> Result<bool> {
591        let entry_node = self.fields[idx].node;
592        if self.tree.meta(entry_node).data_bind_none {
593            // `bind match="none"` — by design not persisted in datasets.
594            return Ok(false);
595        }
596
597        if let Some(raw_id) = self.tree.meta(entry_node).bound_data_node {
598            let id = DataNodeId::from_raw(raw_id);
599            match self.data_dom.get(id) {
600                Some(DataNode::DataValue { .. }) => {
601                    self.data_dom
602                        .set_value(id, raw.to_string())
603                        .map_err(|e| XfaError::WritebackFailed(e.to_string()))?;
604                    if !self.dirty_data.contains(&id) {
605                        self.dirty_data.push(id);
606                    }
607                    return Ok(true);
608                }
609                Some(DataNode::DataGroup { .. }) => {
610                    // Container-bound (e.g. an exclGroup bound to a group):
611                    // fall through to create/find a value child below.
612                }
613                None => return Ok(false),
614            }
615        }
616
617        // No directly bound value node: create one on demand under the
618        // nearest bound ancestor (XFA default binding by name).
619        let created = self.create_data_node_for(idx, raw)?;
620        Ok(created)
621    }
622
623    fn create_data_node_for(&mut self, idx: usize, raw: &str) -> Result<bool> {
624        let field_node = self.fields[idx].node;
625        let data_name = self.tree.get(field_node).name.clone();
626        if data_name.is_empty() {
627            return Ok(false);
628        }
629
630        // Find the nearest ancestor (or the node itself when group-bound)
631        // with a bound data *group*.
632        let mut parent_group: Option<DataNodeId> = None;
633        // The node itself may be bound to a group (exclGroup case).
634        if let Some(raw_id) = self.tree.meta(field_node).bound_data_node {
635            let id = DataNodeId::from_raw(raw_id);
636            if matches!(self.data_dom.get(id), Some(DataNode::DataGroup { .. })) {
637                // The group IS the field's data home: a value child named
638                // like the field is the XFA §4.4.5 group-value convention —
639                // but Designer datasets normally store the exclGroup value
640                // directly on the group-named *value* node. Prefer an
641                // existing value child named like the field, else write a
642                // value child with the field's name.
643                parent_group = Some(id);
644            }
645        }
646        if parent_group.is_none() {
647            let mut cursor = self.parent_of(field_node);
648            while let Some(p) = cursor {
649                if let Some(raw_id) = self.tree.meta(p).bound_data_node {
650                    let id = DataNodeId::from_raw(raw_id);
651                    if matches!(self.data_dom.get(id), Some(DataNode::DataGroup { .. })) {
652                        parent_group = Some(id);
653                        break;
654                    }
655                }
656                cursor = self.parent_of(p);
657            }
658        }
659        if parent_group.is_none() {
660            // No bound ancestor at all (empty or absent datasets): synthesize
661            // the group chain from the data root along the field's named
662            // path — the same tree Adobe Reader creates on first save.
663            // Explicit `<bind ref>` fields are skipped: their data home is
664            // not the name chain.
665            if self.tree.meta(field_node).data_bind_ref.is_none() {
666                parent_group = self.synthesize_group_chain(idx)?;
667            }
668        }
669        let Some(group) = parent_group else {
670            return Ok(false);
671        };
672
673        // Reuse an existing same-name child value node when present.
674        let existing = self
675            .data_dom
676            .children_by_name(group, &data_name)
677            .into_iter()
678            .find(|&c| matches!(self.data_dom.get(c), Some(DataNode::DataValue { .. })));
679        let id = match existing {
680            Some(c) => {
681                self.data_dom
682                    .set_value(c, raw.to_string())
683                    .map_err(|e| XfaError::WritebackFailed(e.to_string()))?;
684                if !self.dirty_data.contains(&c) {
685                    self.dirty_data.push(c);
686                }
687                self.tree.meta_mut(self.fields[idx].node).bound_data_node = Some(c.as_raw());
688                return Ok(true);
689            }
690            None => self
691                .data_dom
692                .create_value(group, &data_name, raw)
693                .map_err(|e| XfaError::WritebackFailed(e.to_string()))?,
694        };
695        self.created_data.push(id);
696        self.tree.meta_mut(self.fields[idx].node).bound_data_node = Some(id.as_raw());
697        Ok(true)
698    }
699
700    /// Create (or find) the data-group chain `root → seg[0] → … → seg[n-2]`
701    /// for the field at `idx`, creating a `<data>` root when the DataDom is
702    /// empty. Returns the deepest group, or `None` when an indexed segment
703    /// (`name[i>0]`) is missing — repeated-instance synthesis is out of
704    /// Phase-1 scope.
705    fn synthesize_group_chain(&mut self, idx: usize) -> Result<Option<DataNodeId>> {
706        let segments = self.fields[idx].segments.clone();
707        if segments.len() < 2 {
708            // A root-level field still needs a root group to live under.
709            if segments.is_empty() {
710                return Ok(None);
711            }
712        }
713
714        let root = match self.data_dom.root() {
715            Some(r)
716                if self
717                    .data_dom
718                    .get(r)
719                    .map(DataNode::is_group)
720                    .unwrap_or(false) =>
721            {
722                r
723            }
724            _ => {
725                let r = self.data_dom.alloc(DataNode::DataGroup {
726                    name: "data".to_string(),
727                    namespace: None,
728                    children: Vec::new(),
729                    is_record: false,
730                    parent: None,
731                });
732                self.data_dom.set_root(r);
733                r
734            }
735        };
736
737        let mut cursor = root;
738        for (name, index) in &segments[..segments.len() - 1] {
739            let groups: Vec<DataNodeId> = self
740                .data_dom
741                .children_by_name(cursor, name)
742                .into_iter()
743                .filter(|&c| {
744                    self.data_dom
745                        .get(c)
746                        .map(DataNode::is_group)
747                        .unwrap_or(false)
748                })
749                .collect();
750            cursor = match groups.get(*index) {
751                Some(&g) => g,
752                None if *index == 0 => self
753                    .data_dom
754                    .create_group(cursor, name)
755                    .map_err(|e| XfaError::WritebackFailed(e.to_string()))?,
756                None => return Ok(None),
757            };
758        }
759        Ok(Some(cursor))
760    }
761
762    fn parent_of(&self, node: FormNodeId) -> Option<FormNodeId> {
763        // FormTree stores no parent links; derive lazily from the arena.
764        for (i, n) in self.tree.nodes.iter().enumerate() {
765            if n.children.contains(&node) {
766                return Some(FormNodeId(i));
767            }
768        }
769        None
770    }
771
772    fn collect_datasets_edits(&self) -> Vec<DatasetsEdit> {
773        let mut edits = Vec::new();
774        for &id in &self.dirty_data {
775            if let Some(path) = self.data_path_of(id) {
776                if let Ok(value) = self.data_dom.value(id) {
777                    edits.push(DatasetsEdit::SetValue {
778                        path,
779                        value: value.to_string(),
780                    });
781                }
782            }
783        }
784        for &id in &self.created_data {
785            let Some(node) = self.data_dom.get(id) else {
786                continue;
787            };
788            let name = node.name().to_string();
789            let Some(parent) = node.parent() else {
790                continue;
791            };
792            let Some(parent_path) = self.data_path_of(parent) else {
793                continue;
794            };
795            if let Ok(value) = self.data_dom.value(id) {
796                edits.push(DatasetsEdit::CreateValue {
797                    parent_path,
798                    name,
799                    value: value.to_string(),
800                });
801            }
802        }
803        edits
804    }
805
806    /// Path of `(name, index-among-same-name-element-siblings)` segments
807    /// from the data root down to `id`. The root itself is segment 0; the
808    /// upward walk stops there (the parse may keep wrapper parents like
809    /// `<xfa:datasets>` linked above the effective root).
810    fn data_path_of(&self, id: DataNodeId) -> Option<Vec<(String, usize)>> {
811        let root = self.data_dom.root()?;
812        let mut rev = Vec::new();
813        let mut cursor = Some(id);
814        while let Some(c) = cursor {
815            let node = self.data_dom.get(c)?;
816            let name = node.name().to_string();
817            let at_root = c == root;
818            let parent = if at_root { None } else { node.parent() };
819            let index = match parent {
820                Some(p) => {
821                    // Index among same-name siblings, skipping nodes the
822                    // splicer cannot see as XML elements (attribute-derived
823                    // metadata values).
824                    let mut k = 0usize;
825                    let mut found = None;
826                    for &sib in self.data_dom.children(p) {
827                        let Some(sn) = self.data_dom.get(sib) else {
828                            continue;
829                        };
830                        if sn.name() != name {
831                            continue;
832                        }
833                        if is_metadata_value(sn) {
834                            continue;
835                        }
836                        if sib == c {
837                            found = Some(k);
838                            break;
839                        }
840                        k += 1;
841                    }
842                    found?
843                }
844                None => 0,
845            };
846            rev.push((name, index));
847            if at_root {
848                cursor = None;
849            } else {
850                // A node that never reaches the effective root (e.g. it
851                // lives under a wrapper sibling) cannot be spliced by a
852                // root-relative path.
853                cursor = Some(node.parent()?);
854            }
855        }
856        rev.reverse();
857        Some(rev)
858    }
859
860    // ── enumeration ─────────────────────────────────────────────────────
861
862    fn enumerate_fields(&mut self, root: FormNodeId) {
863        // Geometry first: form node → widget occurrences.
864        let mut geometry: HashMap<usize, Vec<(usize, XfaRect)>> = HashMap::new();
865        for (page_idx, page) in self.layout.pages.iter().enumerate() {
866            for node in &page.nodes {
867                collect_geometry(node, 0.0, 0.0, page_idx, &mut geometry);
868            }
869        }
870
871        let mut entries = Vec::new();
872        let mut segments: Vec<(String, usize)> = Vec::new();
873        let mut sibling_counters: Vec<HashMap<String, usize>> = vec![HashMap::new()];
874        // The merged tree's root is a synthetic container (named "root");
875        // Adobe SOM names start at the template's root subform, so walk the
876        // root's children instead of the root itself.
877        let root_access = self.tree.meta(root).access.unwrap_or(Access::Open);
878        for &child in &self.tree.get(root).children.clone() {
879            self.walk_enumerate(
880                child,
881                root_access,
882                &mut segments,
883                &mut sibling_counters,
884                &mut entries,
885            );
886        }
887
888        let mut models = Vec::new();
889        let mut by_name: HashMap<String, usize> = HashMap::new();
890        for entry in entries {
891            let model = self.build_model(&entry, &geometry);
892            let idx = models.len();
893            // First registration wins; later duplicates stay addressable via
894            // their explicit SOM path.
895            by_name.entry(model.name.clone()).or_insert(idx);
896            by_name.entry(model.som_path.clone()).or_insert(idx);
897            models.push(model);
898            self.fields.push(entry);
899        }
900        self.models = models;
901        self.by_name = by_name;
902    }
903
904    #[allow(clippy::too_many_arguments)]
905    fn walk_enumerate(
906        &self,
907        node_id: FormNodeId,
908        inherited_access: Access,
909        segments: &mut Vec<(String, usize)>,
910        sibling_counters: &mut Vec<HashMap<String, usize>>,
911        out: &mut Vec<FieldEntry>,
912    ) {
913        let node = self.tree.get(node_id);
914        let meta = self.tree.meta(node_id);
915        let access = meta.access.unwrap_or(inherited_access);
916
917        let named = !node.name.is_empty();
918        if named {
919            let counter = sibling_counters
920                .last_mut()
921                .expect("sibling counter scope present");
922            let idx = *counter
923                .entry(node.name.clone())
924                .and_modify(|c| *c += 1)
925                .or_insert(0);
926            segments.push((node.name.clone(), idx));
927        }
928
929        let is_radio_group = matches!(node.node_type, FormNodeType::ExclGroup)
930            || meta.group_kind == GroupKind::ExclusiveChoice;
931        let is_field = matches!(node.node_type, FormNodeType::Field { .. });
932
933        if is_radio_group {
934            let mut members = Vec::new();
935            for &child in &node.children {
936                if let FormNodeType::Field { .. } = self.tree.get(child).node_type {
937                    let cmeta = self.tree.meta(child);
938                    let on = cmeta
939                        .item_value
940                        .clone()
941                        .or_else(|| cmeta.style.check_button_on_value.clone())
942                        .unwrap_or_else(|| self.tree.get(child).name.clone());
943                    members.push((child, on));
944                }
945            }
946            if !members.is_empty() {
947                out.push(FieldEntry {
948                    node: node_id,
949                    members,
950                    read_only: access.denies_writes(),
951                    segments: segments.clone(),
952                });
953            }
954        } else if is_field {
955            out.push(FieldEntry {
956                node: node_id,
957                members: Vec::new(),
958                read_only: access.denies_writes(),
959                segments: segments.clone(),
960            });
961        }
962
963        // Recurse into containers (not into exclGroup members or field
964        // internals — members are folded into the group entry above).
965        if !is_field && !is_radio_group {
966            sibling_counters.push(HashMap::new());
967            for &child in &node.children {
968                self.walk_enumerate(child, access, segments, sibling_counters, out);
969            }
970            sibling_counters.pop();
971        }
972
973        if named {
974            segments.pop();
975        }
976    }
977
978    fn build_model(
979        &self,
980        entry: &FieldEntry,
981        geometry: &HashMap<usize, Vec<(usize, XfaRect)>>,
982    ) -> XfaFieldModel {
983        let node = self.tree.get(entry.node);
984        let meta = self.tree.meta(entry.node);
985
986        let name = segments_to_display_name(&entry.segments);
987        let som_path = segments_to_som_path(&entry.segments);
988
989        let field_type = if entry.members.is_empty() {
990            XfaFieldType::from_kind(meta.field_kind)
991        } else {
992            XfaFieldType::RadioGroup
993        };
994
995        // Value: field raw value, or selected member's on-value for groups.
996        let value = if entry.members.is_empty() {
997            match &node.node_type {
998                FormNodeType::Field { value } => value.clone(),
999                _ => String::new(),
1000            }
1001        } else {
1002            entry
1003                .members
1004                .iter()
1005                .find_map(|(m, on)| match &self.tree.get(*m).node_type {
1006                    FormNodeType::Field { value } if !value.is_empty() && value == on => {
1007                        Some(on.clone())
1008                    }
1009                    _ => None,
1010                })
1011                .unwrap_or_default()
1012        };
1013
1014        // Options.
1015        let options = if !entry.members.is_empty() {
1016            entry
1017                .members
1018                .iter()
1019                .map(|(_, on)| XfaFieldOption {
1020                    display: on.clone(),
1021                    save: on.clone(),
1022                })
1023                .collect()
1024        } else {
1025            let displays = &meta.display_items;
1026            let saves = &meta.save_items;
1027            displays
1028                .iter()
1029                .enumerate()
1030                .map(|(i, d)| XfaFieldOption {
1031                    display: d.clone(),
1032                    save: saves.get(i).cloned().unwrap_or_else(|| d.clone()),
1033                })
1034                .collect()
1035        };
1036
1037        let on_value = meta
1038            .style
1039            .check_button_on_value
1040            .clone()
1041            .or_else(|| meta.item_value.clone());
1042        let off_value = meta.style.check_button_off_value.clone();
1043
1044        // Geometry: the group node's own rect plus member rects.
1045        let mut widgets: Vec<XfaWidget> = Vec::new();
1046        if let Some(occ) = geometry.get(&entry.node.0) {
1047            for (page, rect) in occ {
1048                widgets.push(XfaWidget {
1049                    page: *page,
1050                    rect: *rect,
1051                    on_value: None,
1052                });
1053            }
1054        }
1055        for (member, on) in &entry.members {
1056            if let Some(occ) = geometry.get(&member.0) {
1057                for (page, rect) in occ {
1058                    widgets.push(XfaWidget {
1059                        page: *page,
1060                        rect: *rect,
1061                        on_value: Some(on.clone()),
1062                    });
1063                }
1064            }
1065        }
1066        widgets.sort_by(|a, b| {
1067            a.page.cmp(&b.page).then(
1068                a.rect
1069                    .y
1070                    .partial_cmp(&b.rect.y)
1071                    .unwrap_or(std::cmp::Ordering::Equal),
1072            )
1073        });
1074        let page = widgets.first().map(|w| w.page);
1075        let rect = widgets.first().map(|w| w.rect);
1076
1077        let bound = meta
1078            .bound_data_node
1079            .map(|raw| self.data_dom.get(DataNodeId::from_raw(raw)).is_some())
1080            .unwrap_or(false);
1081
1082        XfaFieldModel {
1083            name,
1084            som_path,
1085            field_type,
1086            value,
1087            read_only: entry.read_only,
1088            required: meta.required,
1089            multiline: meta.multiline,
1090            hidden: meta.presence != Presence::Visible,
1091            options,
1092            on_value,
1093            off_value,
1094            page,
1095            rect,
1096            widgets,
1097            bound_to_data: bound,
1098            bind_none: meta.data_bind_none,
1099        }
1100    }
1101}
1102
1103fn is_metadata_value(node: &DataNode) -> bool {
1104    match node {
1105        DataNode::DataValue { contains, .. } => {
1106            *contains == xfa_dom_resolver::data_dom::DataContains::MetaData
1107        }
1108        DataNode::DataGroup { .. } => false,
1109    }
1110}
1111
1112fn segments_to_display_name(segments: &[(String, usize)]) -> String {
1113    segments
1114        .iter()
1115        .map(|(n, i)| {
1116            if *i == 0 {
1117                n.clone()
1118            } else {
1119                format!("{n}[{i}]")
1120            }
1121        })
1122        .collect::<Vec<_>>()
1123        .join(".")
1124}
1125
1126fn segments_to_som_path(segments: &[(String, usize)]) -> String {
1127    segments
1128        .iter()
1129        .map(|(n, i)| format!("{n}[{i}]"))
1130        .collect::<Vec<_>>()
1131        .join(".")
1132}
1133
1134/// Accumulate absolute widget rectangles per form node.
1135///
1136/// Layout coordinates are parent-relative (the render bridge accumulates
1137/// them the same way); rectangles are reported in XFA page space (top-left
1138/// origin, points).
1139fn collect_geometry(
1140    node: &LayoutNode,
1141    parent_x: f64,
1142    parent_y: f64,
1143    page_idx: usize,
1144    out: &mut HashMap<usize, Vec<(usize, XfaRect)>>,
1145) {
1146    let abs_x = node.rect.x + parent_x;
1147    let abs_y = node.rect.y + parent_y;
1148
1149    let record = matches!(node.content, LayoutContent::Field { .. })
1150        || matches!(
1151            node.content,
1152            LayoutContent::WrappedText {
1153                from_field: true,
1154                ..
1155            }
1156        );
1157    if record {
1158        out.entry(node.form_node.0).or_default().push((
1159            page_idx,
1160            XfaRect {
1161                x: abs_x,
1162                y: abs_y,
1163                width: node.rect.width,
1164                height: node.rect.height,
1165            },
1166        ));
1167    }
1168
1169    for child in &node.children {
1170        collect_geometry(child, abs_x, abs_y, page_idx, out);
1171    }
1172}
1173
1174/// Apply `access="…"` attributes from the saved form packet onto matching
1175/// form-tree nodes (session-only pass; the flatten pipeline ignores access).
1176///
1177/// Adobe Reader records interactive locks (e.g. fields disabled after
1178/// submission) as `access="readOnly"` in the form packet. Without this pass
1179/// the session would let callers write fields Acrobat refuses to edit.
1180fn apply_form_dom_access(tree: &mut FormTree, root_id: FormNodeId, form_xml: &str) {
1181    let Ok(doc) = roxmltree::Document::parse(form_xml) else {
1182        return;
1183    };
1184
1185    fn matches_node(tree: &FormTree, fid: FormNodeId, tag: &str, name: &str) -> bool {
1186        let node = tree.get(fid);
1187        match (tag, &node.node_type) {
1188            ("subform", FormNodeType::Subform | FormNodeType::Area) => node.name == name,
1189            ("exclGroup", FormNodeType::ExclGroup) => node.name == name,
1190            // Some producers serialize exclGroups as <field> in the form DOM.
1191            ("field", FormNodeType::Field { .. } | FormNodeType::ExclGroup) => node.name == name,
1192            _ => false,
1193        }
1194    }
1195
1196    fn walk(tree: &mut FormTree, fid: FormNodeId, xml: roxmltree::Node<'_, '_>) {
1197        if let Some(access) = xml.attribute("access") {
1198            tree.meta_mut(fid).access = Some(Access::parse(access));
1199        }
1200
1201        let xml_children: Vec<roxmltree::Node<'_, '_>> = xml
1202            .children()
1203            .filter(|c| {
1204                c.is_element() && matches!(c.tag_name().name(), "subform" | "field" | "exclGroup")
1205            })
1206            .collect();
1207        if xml_children.is_empty() {
1208            return;
1209        }
1210
1211        let tree_children = tree.get(fid).children.clone();
1212        let mut used = vec![false; tree_children.len()];
1213        for xc in xml_children {
1214            let tag = xc.tag_name().name();
1215            let name = xc.attribute("name").unwrap_or("");
1216            if name.is_empty() {
1217                continue;
1218            }
1219            for (i, &cid) in tree_children.iter().enumerate() {
1220                if used[i] || !matches_node(tree, cid, tag, name) {
1221                    continue;
1222                }
1223                used[i] = true;
1224                walk(tree, cid, xc);
1225                break;
1226            }
1227        }
1228    }
1229
1230    // The form packet root is <form> (sometimes namespaced); its element
1231    // children mirror the template root subform's children. The template
1232    // FormTree root is the <template> wrapper whose first child is the root
1233    // subform — align by descending one level when the names don't match.
1234    let Some(form_root) = doc.root().children().find(|c| c.is_element()) else {
1235        return;
1236    };
1237    // Try matching the root subform: the form packet's first subform child
1238    // corresponds to the tree root's subform child of the same name.
1239    let root_children = tree.get(root_id).children.clone();
1240    let packet_subforms: Vec<roxmltree::Node<'_, '_>> = form_root
1241        .children()
1242        .filter(|c| c.is_element() && c.tag_name().name() == "subform")
1243        .collect();
1244    for xc in packet_subforms {
1245        let name = xc.attribute("name").unwrap_or("");
1246        for &cid in &root_children {
1247            if matches_node(tree, cid, "subform", name) {
1248                walk(tree, cid, xc);
1249                break;
1250            }
1251        }
1252    }
1253}