pdf-xfa 1.0.0-beta.7

XFA engine — extraction, layout rendering, font resolution. Experimental and under active development.
Documentation
//! Tier A Wave 1 fix: when `pdf-xfa::flatten` punts to the static
//! fallback, it must emit `(Phase::Fallback, Reason::StaticFallbackTaken)`
//! so wave/gate harnesses can distinguish a real XFA flatten from a
//! passthrough.
//!
//! Two scenarios are covered (must-fire + must-not-fire):
//!
//! - corrupt/minimal XFA template (tiny PDF with stub `<template>`)
//!   → fallback tag MUST appear,
//! - healthy XFA 3.3 template with one field
//!   → fallback tag MUST NOT appear.
//!
//! The "AcroForm-only / no XFA packet" pre-XFA branch in
//! `flatten.rs:470` is deliberately silent — it is legitimate static
//! handling, not a failed-XFA fallback — and is therefore not
//! exercised here.

use std::sync::{Arc, Mutex, OnceLock};

use lopdf::{dictionary, Document, Object, Stream};
use pdf_xfa::flatten_xfa_to_pdf;
use pdf_xfa::layout::trace::{with_global_sink, RecordingSink};

/// `with_global_sink` installs into a process-wide slot. Cargo runs
/// `#[test]` functions in parallel, so concurrent tests cross-
/// contaminate each other's recordings. Serialize this file's tests
/// with a private mutex so the assertions are deterministic.
fn global_sink_serializer() -> &'static Mutex<()> {
    static LOCK: OnceLock<Mutex<()>> = OnceLock::new();
    LOCK.get_or_init(|| Mutex::new(()))
}

fn wrap_in_host_pdf(xfa_bytes: &[u8]) -> Vec<u8> {
    let mut doc = Document::with_version("1.7");
    let xfa_id = doc.add_object(Object::Stream(Stream::new(
        dictionary! {},
        xfa_bytes.to_vec(),
    )));
    let pages_id = doc.new_object_id();
    let content_id = doc.add_object(Object::Stream(Stream::new(
        dictionary! { "Length" => Object::Integer(0_i64) },
        vec![],
    )));
    let page_id = doc.add_object(Object::Dictionary(dictionary! {
        "Type" => Object::Name(b"Page".to_vec()),
        "Parent" => Object::Reference(pages_id),
        "MediaBox" => Object::Array(vec![
            Object::Integer(0), Object::Integer(0),
            Object::Integer(612), Object::Integer(792),
        ]),
        "Contents" => Object::Reference(content_id),
    }));
    doc.objects.insert(
        pages_id,
        Object::Dictionary(dictionary! {
            "Type" => Object::Name(b"Pages".to_vec()),
            "Kids" => Object::Array(vec![Object::Reference(page_id)]),
            "Count" => Object::Integer(1),
        }),
    );
    let acroform_id = doc.add_object(Object::Dictionary(dictionary! {
        "XFA" => Object::Reference(xfa_id),
    }));
    let catalog_id = doc.add_object(Object::Dictionary(dictionary! {
        "Type" => Object::Name(b"Catalog".to_vec()),
        "Pages" => Object::Reference(pages_id),
        "AcroForm" => Object::Reference(acroform_id),
    }));
    doc.trailer.set("Root", Object::Reference(catalog_id));
    let mut out = Vec::new();
    doc.save_to(&mut out).expect("save lopdf");
    out
}

fn captured_tag_sequence(pdf: &[u8]) -> Vec<String> {
    let _guard = global_sink_serializer()
        .lock()
        .unwrap_or_else(|e| e.into_inner());
    let sink: Arc<RecordingSink> = Arc::new(RecordingSink::new());
    let _ = with_global_sink(sink.clone() as _, || flatten_xfa_to_pdf(pdf));
    sink.events()
        .iter()
        .map(|e| format!("{}/{}", e.phase.tag(), e.reason.tag()))
        .collect()
}

const CORRUPT_TINY_XDP: &str = r#"<?xml version="1.0"?>
<xdp:xdp xmlns:xdp="http://ns.adobe.com/xdp/">
<template xmlns="http://www.xfa.org/schema/xfa-template/3.3/"/>
</xdp:xdp>"#;

/// Tiny PDF whose template has no `<subform>` or `<pageSet>` content —
/// caught by `is_corrupt_xfa_template` and routed to the fallback.
#[test]
fn corrupt_minimal_template_emits_static_fallback_tag() {
    let pdf = wrap_in_host_pdf(CORRUPT_TINY_XDP.as_bytes());
    let tags = captured_tag_sequence(&pdf);
    assert!(
        tags.iter().any(|t| t == "fallback/static_fallback_taken"),
        "expected fallback/static_fallback_taken in tag sequence; got {:?}",
        tags
    );
}

/// Synthetic XFA-3.3 template with declared pages and well-formed
/// content should *not* fall back.
#[test]
fn healthy_xfa_does_not_emit_fallback_tag() {
    let xdp = br#"<?xml version="1.0"?>
<xdp:xdp xmlns:xdp="http://ns.adobe.com/xdp/" xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/">
  <template xmlns="http://www.xfa.org/schema/xfa-template/3.3/">
    <subform name="form1" layout="paginate" w="8.5in" h="11in">
      <pageSet><pageArea name="P1"><contentArea x="36pt" y="36pt" w="540pt" h="720pt"/><medium stock="default" short="612pt" long="792pt"/></pageArea></pageSet>
      <subform name="s1" layout="tb" w="540pt">
        <field name="Name" w="300pt" h="18pt"><ui><textEdit/></ui></field>
      </subform>
    </subform>
  </template>
  <xfa:datasets xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/">
    <xfa:data><form1><Name>Alice</Name></form1></xfa:data>
  </xfa:datasets>
</xdp:xdp>"#;
    let pdf = wrap_in_host_pdf(xdp);
    let tags = captured_tag_sequence(&pdf);
    assert!(
        !tags.iter().any(|t| t == "fallback/static_fallback_taken"),
        "healthy XFA should not emit fallback; got tags {:?}",
        tags
    );
}