Skip to main content

pdf_xfa/
classify.rs

1//! XFA form type classification — static vs dynamic vs none.
2//!
3//! XFA Spec 3.3 §9.1 distinguishes two XFA form profiles:
4//!
5//! - **Static (XFAF)**: `baseProfile="interactiveForms"` on `<template>`.
6//!   Layout is fixed; form fields are baked in as AcroForm widgets.
7//! - **Dynamic**: full XFA grammar; layout is re-computed from data at render
8//!   time.  No `baseProfile` restriction.
9//!
10//! Use [`detect_xfa_type_from_packets`] when you already have [`XfaPackets`],
11//! or [`detect_xfa_type`] to parse raw PDF bytes end-to-end.
12//!
13//! # Example
14//! ```rust,ignore
15//! use pdf_xfa::classify::{detect_xfa_type, XfaType};
16//! match detect_xfa_type(&pdf_bytes) {
17//!     XfaType::Static  => println!("XFAF / static form"),
18//!     XfaType::Dynamic => println!("full dynamic XFA"),
19//!     XfaType::None    => println!("not an XFA PDF"),
20//! }
21//! ```
22
23use crate::extract::{extract_xfa_from_bytes, XfaPackets};
24
25/// Classification of an XFA form.
26#[derive(Debug, Clone, Copy, PartialEq, Eq)]
27pub enum XfaType {
28    /// Not an XFA PDF (no XFA content found).
29    None,
30    /// Static XFA form (`baseProfile="interactiveForms"`).
31    /// Layout is fixed; content is pre-rendered in the PDF page streams.
32    Static,
33    /// Dynamic XFA form — full XFA grammar, layout computed at runtime.
34    Dynamic,
35}
36
37impl std::fmt::Display for XfaType {
38    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
39        match self {
40            XfaType::None => write!(f, "None"),
41            XfaType::Static => write!(f, "Static"),
42            XfaType::Dynamic => write!(f, "Dynamic"),
43        }
44    }
45}
46
47/// Detect XFA type from already-extracted [`XfaPackets`].
48///
49/// Returns [`XfaType::None`] when there are no packets and no full XML.
50pub fn detect_xfa_type_from_packets(packets: &XfaPackets) -> XfaType {
51    // No packets and no monolithic XML → not an XFA document.
52    if packets.packets.is_empty() && packets.full_xml.is_none() {
53        return XfaType::None;
54    }
55
56    // Look for baseProfile="interactiveForms" in the template packet first,
57    // then fall back to searching the full XDP XML blob.
58    let template_xml: Option<&str> = packets.template();
59    let full_xml: Option<&str> = packets.full_xml.as_deref();
60
61    let search_text: &str = template_xml.or(full_xml).unwrap_or("");
62
63    if search_text.contains(r#"baseProfile="interactiveForms""#) {
64        XfaType::Static
65    } else if !search_text.is_empty() || packets.packets.iter().any(|(name, _)| name == "template")
66    {
67        XfaType::Dynamic
68    } else {
69        // We have something (datasets-only, config-only, etc.) but no template.
70        // Treat as Dynamic — it is at minimum an XFA artefact.
71        XfaType::Dynamic
72    }
73}
74
75/// Detect XFA type from raw PDF bytes.
76///
77/// Parses and extracts XFA packets, then classifies the form.
78/// Returns [`XfaType::None`] when the PDF has no XFA content.
79pub fn detect_xfa_type(pdf_bytes: &[u8]) -> XfaType {
80    // extract_xfa_from_bytes requires Into<PdfData>; Vec<u8> satisfies that.
81    match extract_xfa_from_bytes(pdf_bytes.to_vec()) {
82        Ok(packets) => detect_xfa_type_from_packets(&packets),
83        Err(_) => XfaType::None,
84    }
85}
86
87// ─── unit tests ─────────────────────────────────────────────────────────────
88
89#[cfg(test)]
90mod tests {
91    use super::*;
92    use crate::extract::XfaPackets;
93
94    fn packets_from_xml(xml: &str) -> XfaPackets {
95        // Use the internal parse helper via extract module's public surface.
96        // We build XfaPackets by hand to keep tests self-contained.
97        let mut p = XfaPackets::default();
98        p.full_xml = Some(xml.to_string());
99        // Manually push the template packet so get_packet("template") works.
100        if xml.contains("<template") {
101            let start = xml.find("<template").unwrap();
102            // Find the closing tag — handle both qualified and plain versions.
103            let end = xml
104                .find("</template>")
105                .map(|i| i + "</template>".len())
106                .or_else(|| {
107                    // self-closing or other variant — take to end of xml as fallback
108                    Some(xml.len())
109                })
110                .unwrap();
111            p.packets
112                .push(("template".to_string(), xml[start..end].to_string()));
113        }
114        p
115    }
116
117    #[test]
118    fn empty_packets_returns_none() {
119        let p = XfaPackets::default();
120        assert_eq!(detect_xfa_type_from_packets(&p), XfaType::None);
121    }
122
123    #[test]
124    fn static_form_detected_via_base_profile() {
125        let xml = r#"<xdp:xdp xmlns:xdp="http://ns.adobe.com/xdp/"><template xmlns="http://www.xfa.org/schema/xfa-template/3.3/" baseProfile="interactiveForms"><subform name="root"/></template></xdp:xdp>"#;
126        let p = packets_from_xml(xml);
127        assert_eq!(detect_xfa_type_from_packets(&p), XfaType::Static);
128    }
129
130    #[test]
131    fn dynamic_form_detected_when_no_base_profile() {
132        let xml = r#"<xdp:xdp xmlns:xdp="http://ns.adobe.com/xdp/"><template xmlns="http://www.xfa.org/schema/xfa-template/3.3/"><subform name="root"><occur min="0" max="-1"/></subform></template></xdp:xdp>"#;
133        let p = packets_from_xml(xml);
134        assert_eq!(detect_xfa_type_from_packets(&p), XfaType::Dynamic);
135    }
136
137    #[test]
138    fn xfa_type_none_on_empty_pdf_bytes() {
139        // Empty slice cannot be a valid PDF → XfaType::None.
140        assert_eq!(detect_xfa_type(&[]), XfaType::None);
141    }
142
143    #[test]
144    fn xfa_type_display() {
145        assert_eq!(XfaType::None.to_string(), "None");
146        assert_eq!(XfaType::Static.to_string(), "Static");
147        assert_eq!(XfaType::Dynamic.to_string(), "Dynamic");
148    }
149
150    #[test]
151    fn packets_with_only_full_xml_static() {
152        // Simulate a monolithic XDP stream (full_xml set, packets empty).
153        let xml = r#"<?xml version="1.0"?><xdp:xdp xmlns:xdp="http://ns.adobe.com/xdp/"><template baseProfile="interactiveForms"><subform/></template></xdp:xdp>"#;
154        let mut p = XfaPackets::default();
155        p.full_xml = Some(xml.to_string());
156        // No individual packets — detect from full_xml fallback.
157        assert_eq!(detect_xfa_type_from_packets(&p), XfaType::Static);
158    }
159
160    #[test]
161    fn datasets_only_packet_treated_as_dynamic() {
162        let mut p = XfaPackets::default();
163        p.packets
164            .push(("datasets".to_string(), "<xfa:datasets/>".to_string()));
165        // datasets-only is still an XFA artefact — Dynamic.
166        assert_eq!(detect_xfa_type_from_packets(&p), XfaType::Dynamic);
167    }
168}