pdf-xfa 1.0.0-beta.7

XFA engine — extraction, layout rendering, font resolution. Experimental and under active development.
Documentation
//! Tier A wave-3 regression: each clamped instance of a repeating
//! subform must bind to its own data record, not collapse to the
//! first record.
//!
//! Before the fix, `FormMerger::merge` produced two `Order`
//! instances (correct per the M5.3 occur-clamp rule) but both bound
//! their `<Item>` field to data record 0 — rendered output read
//! `"A A"` instead of `"A B"`.
//!
//! Three tests:
//!
//! 1. Direct merger-level: each clamped instance's field value
//!    matches its own data record.
//! 2. End-to-end flatten + extracted-text: rendered page 1 contains
//!    each clamped instance's value.
//! 3. Anti-regression: a non-repeating sibling subform with no
//!    `<occur max=…>` is unaffected (parent context still drives
//!    its child lookup).

use std::sync::{Arc, Mutex, OnceLock};

use lopdf::{dictionary, Document, Object, Stream};
use pdf_xfa::dom_resolver::data_dom::DataDom;
use pdf_xfa::flatten_xfa_to_pdf;
use pdf_xfa::layout::form::FormNodeType;
use pdf_xfa::layout::trace::{with_global_sink, RecordingSink};
use pdf_xfa::merger::FormMerger;

fn global_sink_serializer() -> &'static Mutex<()> {
    static LOCK: OnceLock<Mutex<()>> = OnceLock::new();
    LOCK.get_or_init(|| Mutex::new(()))
}

const TEMPLATE_OCCUR_MAX_2: &str = r#"<template xmlns="http://www.xfa.org/schema/xfa-template/3.3/">
    <subform name="form1" layout="tb" w="8.5in" h="11in">
      <pageSet><pageArea name="Page1"><contentArea x="36pt" y="36pt" w="540pt" h="720pt"/><medium stock="default" short="612pt" long="792pt"/></pageArea></pageSet>
      <subform name="Orders" layout="tb" w="540pt">
        <subform name="Order" layout="tb" w="540pt">
          <occur min="0" max="2"/>
          <field name="Item" w="200pt" h="18pt"><ui><textEdit/></ui></field>
        </subform>
      </subform>
    </subform>
  </template>"#;

const DATASETS_FIVE_ORDERS: &str = r#"<xfa:datasets xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/">
    <xfa:data><form1>
      <Order><Item>A</Item></Order><Order><Item>B</Item></Order>
      <Order><Item>C</Item></Order><Order><Item>D</Item></Order>
      <Order><Item>E</Item></Order>
    </form1></xfa:data>
  </xfa:datasets>"#;

/// Direct merger-level assertion: with occur max=2 and 5 data
/// records, the two clamped `Order` instances must bind to
/// `A` and `B` respectively. Before the fix both bound to `A`.
#[test]
fn occur_max_two_each_instance_binds_its_own_record() {
    let data_dom = DataDom::from_xml(DATASETS_FIVE_ORDERS).expect("ds parse");
    let merger = FormMerger::new(&data_dom);
    let (tree, _root_id) = merger.merge(TEMPLATE_OCCUR_MAX_2).expect("merge");

    let item_values: Vec<&str> = tree
        .nodes
        .iter()
        .filter_map(|n| match (n.name.as_str(), &n.node_type) {
            ("Item", FormNodeType::Field { value }) => Some(value.as_str()),
            _ => None,
        })
        .collect();
    assert_eq!(
        item_values.len(),
        2,
        "occur max=2 must produce 2 Item fields"
    );
    assert_eq!(
        item_values,
        vec!["A", "B"],
        "each clamped Order instance must bind to its own data record; \
         pre-wave-3 the second instance collapsed to record 0"
    );
}

// -----------------------------------------------------------------
// End-to-end flatten test
// -----------------------------------------------------------------

const XDP_OCCUR_CLAMP_MAX: &str = r#"<?xml version="1.0"?>
<xdp:xdp xmlns:xdp="http://ns.adobe.com/xdp/" xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/">
  <template xmlns="http://www.xfa.org/schema/xfa-template/3.3/">
    <subform name="form1" layout="tb" w="8.5in" h="11in">
      <pageSet><pageArea name="Page1"><contentArea x="36pt" y="36pt" w="540pt" h="720pt"/><medium stock="default" short="612pt" long="792pt"/></pageArea></pageSet>
      <subform name="Orders" layout="tb" w="540pt">
        <subform name="Order" layout="tb" w="540pt">
          <occur min="0" max="2"/>
          <field name="Item" w="200pt" h="18pt"><ui><textEdit/></ui></field>
        </subform>
      </subform>
    </subform>
  </template>
  <xfa:datasets xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/">
    <xfa:data><form1>
      <Order><Item>A</Item></Order><Order><Item>B</Item></Order>
      <Order><Item>C</Item></Order><Order><Item>D</Item></Order>
      <Order><Item>E</Item></Order>
    </form1></xfa:data>
  </xfa:datasets>
</xdp:xdp>"#;

fn wrap_in_host_pdf(xdp: &str) -> Vec<u8> {
    let mut doc = Document::with_version("1.7");
    let xfa_id = doc.add_object(Object::Stream(Stream::new(
        dictionary! {},
        xdp.as_bytes().to_vec(),
    )));
    let pages_id = doc.new_object_id();
    let content_id = doc.add_object(Object::Stream(Stream::new(
        dictionary! { "Length" => Object::Integer(0_i64) },
        vec![],
    )));
    let page_id = doc.add_object(Object::Dictionary(dictionary! {
        "Type" => Object::Name(b"Page".to_vec()),
        "Parent" => Object::Reference(pages_id),
        "MediaBox" => Object::Array(vec![
            Object::Integer(0), Object::Integer(0),
            Object::Integer(612), Object::Integer(792),
        ]),
        "Contents" => Object::Reference(content_id),
    }));
    doc.objects.insert(
        pages_id,
        Object::Dictionary(dictionary! {
            "Type" => Object::Name(b"Pages".to_vec()),
            "Kids" => Object::Array(vec![Object::Reference(page_id)]),
            "Count" => Object::Integer(1),
        }),
    );
    let acroform_id = doc.add_object(Object::Dictionary(dictionary! {
        "XFA" => Object::Reference(xfa_id),
    }));
    let catalog_id = doc.add_object(Object::Dictionary(dictionary! {
        "Type" => Object::Name(b"Catalog".to_vec()),
        "Pages" => Object::Reference(pages_id),
        "AcroForm" => Object::Reference(acroform_id),
    }));
    doc.trailer.set("Root", Object::Reference(catalog_id));
    let mut out = Vec::new();
    doc.save_to(&mut out).expect("save");
    out
}

#[test]
fn flattened_output_renders_both_clamped_instance_values() {
    let _guard = global_sink_serializer()
        .lock()
        .unwrap_or_else(|e| e.into_inner());
    let pdf = wrap_in_host_pdf(XDP_OCCUR_CLAMP_MAX);
    let sink: Arc<RecordingSink> = Arc::new(RecordingSink::new());
    let out = with_global_sink(sink.clone() as _, || flatten_xfa_to_pdf(&pdf)).expect("flatten ok");
    let tags: Vec<String> = sink
        .events()
        .iter()
        .map(|e| format!("{}/{}", e.phase.tag(), e.reason.tag()))
        .collect();
    assert!(
        !tags.iter().any(|t| t == "fallback/static_fallback_taken"),
        "Tier A static-template XDP must not take the static fallback; got {:?}",
        tags
    );
    assert!(
        tags.iter()
            .any(|t| t == "occur/data_count_clamped_by_occur_max"),
        "M5.3 occur clamp rule must still fire after the fix; got {:?}",
        tags
    );

    let doc = Document::load_mem(&out).expect("load out");
    assert_eq!(doc.get_pages().len(), 1, "expected 1 rendered page");
    let text = doc.extract_text(&[1]).unwrap_or_default();
    assert!(
        text.contains('A') && text.contains('B'),
        "page 1 must render both clamped Order values; got text={:?}",
        text
    );
    // Pre-fix, the rendered string was "A A" — assert the second
    // instance is no longer a duplicate of the first.
    let collapsed: String = text.split_whitespace().collect::<Vec<_>>().join(" ");
    assert_ne!(
        collapsed.trim(),
        "A A",
        "second clamped instance must not collapse to the first record"
    );
}

// -----------------------------------------------------------------
// Anti-regression: non-repeating sibling with parent context
// -----------------------------------------------------------------

/// Verify the fix does not break the *normal-parse* binding path:
/// a non-repeating subform whose parent context is the natural
/// parent (form1_data) and whose name does not collide with a
/// pre-bound instance still resolves its bound child correctly.
#[test]
fn non_repeating_sibling_still_binds_via_parent_context() {
    const TEMPLATE: &str = r#"<template xmlns="http://www.xfa.org/schema/xfa-template/3.3/">
        <subform name="form1" layout="tb" w="8.5in" h="11in">
          <pageSet><pageArea name="Page1"><contentArea x="36pt" y="36pt" w="540pt" h="720pt"/><medium stock="default" short="612pt" long="792pt"/></pageArea></pageSet>
          <subform name="Header" layout="tb" w="540pt">
            <field name="Title" w="300pt" h="18pt"><ui><textEdit/></ui></field>
          </subform>
        </subform>
      </template>"#;
    const DATASETS: &str = r#"<xfa:datasets xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/">
        <xfa:data><form1>
          <Header><Title>Receipt</Title></Header>
        </form1></xfa:data>
      </xfa:datasets>"#;

    let data_dom = DataDom::from_xml(DATASETS).expect("ds parse");
    let merger = FormMerger::new(&data_dom);
    let (tree, _root_id) = merger.merge(TEMPLATE).expect("merge");

    let title_values: Vec<&str> = tree
        .nodes
        .iter()
        .filter_map(|n| match (n.name.as_str(), &n.node_type) {
            ("Title", FormNodeType::Field { value }) => Some(value.as_str()),
            _ => None,
        })
        .collect();
    assert_eq!(
        title_values,
        vec!["Receipt"],
        "non-repeating subform's child must still bind via parent context"
    );
}