use crate::error::{Result, XfaError};
use pdf_syntax::object::dict::keys::{ACRO_FORM, XFA};
use pdf_syntax::object::{Array, Dict, Object, Stream};
use pdf_syntax::Pdf;
#[derive(Debug, Clone, Default)]
pub struct XfaPackets {
pub full_xml: Option<String>,
pub packets: Vec<(String, String)>,
}
impl XfaPackets {
pub fn get_packet(&self, name: &str) -> Option<&str> {
self.packets
.iter()
.find(|(n, _)| n == name)
.map(|(_, v)| v.as_str())
}
pub fn template(&self) -> Option<&str> {
self.get_packet("template")
}
pub fn datasets(&self) -> Option<&str> {
self.packets
.iter()
.filter(|(n, _)| n == "datasets")
.max_by_key(|(_, v)| v.len())
.map(|(_, v)| v.as_str())
}
pub fn config(&self) -> Option<&str> {
self.get_packet("config")
}
pub fn locale_set(&self) -> Option<&str> {
self.get_packet("localeSet")
}
}
pub fn extract_xfa(pdf: &Pdf) -> Result<XfaPackets> {
if let Some(mut p) = extract_xfa_from_acroform(pdf) {
if !p.packets.is_empty() || p.full_xml.is_some() {
let current_ds_len = p.datasets().map(|s| s.len()).unwrap_or(0);
if current_ds_len < 200 {
if let Some(better_ds) = scan_for_datasets(pdf, current_ds_len) {
p.packets.push(("datasets".to_string(), better_ds));
}
}
return Ok(p);
}
}
scan_for_xfa(pdf)
}
fn scan_for_datasets(pdf: &Pdf, min_len: usize) -> Option<String> {
let mut best: Option<String> = None;
for obj in pdf.objects() {
if let Object::Stream(s) = obj {
if let Some(d) = decode_stream(&s) {
if d.len() > min_len
&& looks_like_datasets_packet(&d)
&& best.as_ref().is_none_or(|b| d.len() > b.len())
{
best = Some(d);
}
}
}
}
best
}
fn extract_xmlns_decls(opener: &str) -> Vec<String> {
let mut out = Vec::new();
let bytes = opener.as_bytes();
let mut i = 0;
while i < bytes.len() {
let next = match opener[i..].find("xmlns") {
Some(off) => i + off,
None => break,
};
let is_token_start = next == 0
|| matches!(
bytes.get(next - 1),
Some(b' ' | b'\t' | b'\r' | b'\n' | b'/')
);
if !is_token_start {
i = next + 1;
continue;
}
let after = next + "xmlns".len();
let mut j = after;
if j < bytes.len() && bytes[j] == b':' {
j += 1;
while j < bytes.len() && !matches!(bytes[j], b'=' | b' ' | b'\t' | b'\r' | b'\n') {
j += 1;
}
}
while j < bytes.len() && matches!(bytes[j], b' ' | b'\t' | b'\r' | b'\n') {
j += 1;
}
if j >= bytes.len() || bytes[j] != b'=' {
i = next + 1;
continue;
}
j += 1;
while j < bytes.len() && matches!(bytes[j], b' ' | b'\t' | b'\r' | b'\n') {
j += 1;
}
let quote = match bytes.get(j) {
Some(&q @ (b'"' | b'\'')) => q,
_ => {
i = next + 1;
continue;
}
};
j += 1;
while j < bytes.len() && bytes[j] != quote {
j += 1;
}
if j >= bytes.len() {
break;
}
let value_end = j; j += 1; out.push(opener[next..value_end + 1].to_string());
i = j;
}
out
}
fn inject_missing_xmlns(raw: &str, wrapper_xmlns: &[String]) -> String {
if wrapper_xmlns.is_empty() {
return raw.to_string();
}
let open_end = match raw.find('>') {
Some(p) => p,
None => return raw.to_string(),
};
let opener = &raw[..open_end];
let mut additions = String::new();
for decl in wrapper_xmlns {
let prefix_key = decl.split('=').next().unwrap_or("").trim_end().to_string();
if prefix_key.is_empty() {
continue;
}
let already = opener.split_whitespace().any(|tok| {
tok.trim_end_matches('/')
.starts_with(&format!("{}=", prefix_key))
});
if !already {
additions.push(' ');
additions.push_str(decl);
}
}
if additions.is_empty() {
return raw.to_string();
}
let insert_at = if opener.trim_end().ends_with('/') {
opener.trim_end().len() - 1
} else {
open_end
};
let mut out = String::with_capacity(raw.len() + additions.len());
out.push_str(&raw[..insert_at]);
out.push_str(&additions);
out.push_str(&raw[insert_at..]);
out
}
fn looks_like_datasets_packet(s: &str) -> bool {
let mut tail = s.trim_start_matches('\u{FEFF}').trim_start();
loop {
if let Some(rest) = tail.strip_prefix("<?") {
match rest.find("?>") {
Some(end) => tail = rest[end + 2..].trim_start(),
None => return false,
}
} else if let Some(rest) = tail.strip_prefix("<!--") {
match rest.find("-->") {
Some(end) => tail = rest[end + 3..].trim_start(),
None => return false,
}
} else {
break;
}
}
let rest = match tail.strip_prefix("<xfa:datasets") {
Some(r) => r,
None => return false,
};
matches!(
rest.as_bytes().first(),
Some(b' ' | b'\t' | b'\r' | b'\n' | b'/' | b'>')
)
}
pub fn extract_xfa_from_bytes(data: impl Into<pdf_syntax::PdfData>) -> Result<XfaPackets> {
let pdf = Pdf::new(data).map_err(|e| XfaError::LoadFailed(format!("{e:?}")))?;
extract_xfa(&pdf)
}
pub fn extract_xfa_from_acroform(pdf: &Pdf) -> Option<XfaPackets> {
let xref = pdf.xref();
let catalog: Dict<'_> = xref.get(xref.root_id())?;
let acroform: Dict<'_> = catalog.get(ACRO_FORM)?;
if let Some(stream) = acroform.get::<Stream<'_>>(XFA) {
return Some(parse_xfa_xml(&decode_stream(&stream)?));
}
if let Some(array) = acroform.get::<Array<'_>>(XFA) {
return Some(extract_from_array(&array));
}
None
}
fn extract_from_array(array: &Array<'_>) -> XfaPackets {
let mut packets = XfaPackets::default();
let items: Vec<Object<'_>> = array.iter::<Object<'_>>().collect();
let mut i = 0;
while i + 1 < items.len() {
let name = match &items[i] {
Object::String(s) => std::string::String::from_utf8_lossy(s.as_bytes()).to_string(),
Object::Name(n) => std::string::String::from_utf8_lossy(n.as_ref()).to_string(),
_ => {
i += 1;
continue;
}
};
if let Some(c) = match &items[i + 1] {
Object::Stream(s) => decode_stream(s),
Object::String(s) => {
Some(std::string::String::from_utf8_lossy(s.as_bytes()).to_string())
}
_ => None,
} {
packets.packets.push((name, c));
}
i += 2;
}
packets
}
fn scan_for_xfa(pdf: &Pdf) -> Result<XfaPackets> {
let mut streams_checked = 0u32;
for obj in pdf.objects() {
if let Object::Stream(s) = obj {
streams_checked += 1;
if streams_checked > 2000 {
break;
}
if let Some(d) = decode_stream(&s) {
if d.contains("<xdp:xdp") {
return Ok(parse_xfa_xml(&d));
}
}
}
}
Err(XfaError::PacketNotFound("no XFA content found".to_string()))
}
fn decode_stream(stream: &Stream<'_>) -> Option<String> {
std::string::String::from_utf8(stream.decoded().ok()?).ok()
}
fn parse_xfa_xml(xml: &str) -> XfaPackets {
let mut packets = XfaPackets {
full_xml: Some(xml.to_string()),
packets: Vec::new(),
};
let t = xml.trim();
let c = t.find("?>").map(|p| &t[p + 2..]).unwrap_or(t).trim();
let (inner, wrapper_xmlns) = match c.find('>') {
Some(s) => {
let opener = &c[..s]; let wrapper_xmlns = extract_xmlns_decls(opener);
let rest = &c[s + 1..];
let inner = rest
.rfind("</xdp:xdp>")
.map(|e| &rest[..e])
.or_else(|| rest.rfind("</xdp>").map(|e| &rest[..e]))
.unwrap_or(rest);
(inner, wrapper_xmlns)
}
None => return packets,
};
let mut pos = 0;
let bytes = inner.as_bytes();
while pos < bytes.len() {
while pos < bytes.len() && bytes[pos].is_ascii_whitespace() {
pos += 1;
}
if pos >= bytes.len() {
break;
}
if bytes[pos] != b'<' {
pos += 1;
continue;
}
if inner[pos..].starts_with("<!--") {
if let Some(e) = inner[pos..].find("-->") {
pos += e + 3;
continue;
}
}
if inner[pos..].starts_with("<?") {
if let Some(e) = inner[pos..].find("?>") {
pos += e + 2;
continue;
}
}
let ts = pos;
pos += 1;
let ns = pos;
while pos < bytes.len() && bytes[pos] != b'>' && bytes[pos] != b' ' && bytes[pos] != b'/' {
pos += 1;
}
let ft = &inner[ns..pos];
let pn = ft.split(':').next_back().unwrap_or(ft);
let ct = format!("</{ft}>");
let at = format!("</xfa:{pn}>");
if let Some(cp) = inner[ts..].find(ct.as_str()) {
let ee = ts + cp + ct.len();
let raw = &inner[ts..ee];
packets
.packets
.push((pn.to_string(), inject_missing_xmlns(raw, &wrapper_xmlns)));
pos = ee;
} else if let Some(cp) = inner[ts..].find(at.as_str()) {
let ee = ts + cp + at.len();
let raw = &inner[ts..ee];
packets
.packets
.push((pn.to_string(), inject_missing_xmlns(raw, &wrapper_xmlns)));
pos = ee;
} else {
while pos < bytes.len() && bytes[pos] != b'>' {
pos += 1;
}
pos += 1;
}
}
packets
}
#[derive(Debug, Clone, Default)]
pub struct PacketValidation {
pub has_template: bool,
pub has_datasets: bool,
pub has_config: bool,
pub template_bytes: usize,
pub datasets_bytes: usize,
pub packet_names: Vec<String>,
pub warnings: Vec<String>,
}
pub fn validate_xfa_packets(packets: &XfaPackets) -> PacketValidation {
let has_template = packets.template().is_some();
let has_datasets = packets.datasets().is_some();
let has_config = packets.config().is_some();
let template_bytes = packets.template().map(|s| s.len()).unwrap_or(0);
let datasets_bytes = packets.datasets().map(|s| s.len()).unwrap_or(0);
let packet_names = packets.packets.iter().map(|(n, _)| n.clone()).collect();
let mut warnings = Vec::new();
if !has_template {
warnings.push("No template packet found".to_string());
} else if template_bytes < 100 {
warnings.push(format!(
"Template packet is empty (< 100 bytes) — only {template_bytes} bytes"
));
}
if !has_datasets {
warnings.push("No datasets packet".to_string());
} else if datasets_bytes < 50 {
warnings.push(format!(
"Datasets packet is suspiciously small (< 50 bytes) — only {datasets_bytes} bytes"
));
}
PacketValidation {
has_template,
has_datasets,
has_config,
template_bytes,
datasets_bytes,
packet_names,
warnings,
}
}
pub fn extract_embedded_fonts(pdf: &Pdf) -> Vec<(String, Vec<u8>)> {
use pdf_syntax::object::dict::keys::{FONT_FILE, FONT_FILE2, FONT_FILE3, FONT_NAME, TYPE};
use pdf_syntax::object::Name;
let mut fonts = Vec::new();
for obj in pdf.objects() {
let dict = match &obj {
Object::Dict(d) => d.clone(),
Object::Stream(s) => s.dict().clone(),
_ => continue,
};
if dict
.get::<Name>(TYPE)
.is_none_or(|n| n.as_ref() != b"FontDescriptor")
{
continue;
}
let name = dict
.get::<Name>(FONT_NAME)
.map(|n| std::string::String::from_utf8_lossy(n.as_ref()).to_string())
.unwrap_or_default();
for key in [FONT_FILE2, FONT_FILE, FONT_FILE3] {
if let Some(s) = dict.get::<Stream<'_>>(key) {
if let Ok(d) = s.decoded() {
if !d.is_empty() {
fonts.push((name.clone(), d));
break;
}
}
}
}
}
fonts
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_xfa_packets() {
let xml = r#"<?xml version="1.0"?><xdp:xdp xmlns:xdp="http://ns.adobe.com/xdp/"><template xmlns="http://www.xfa.org/schema/xfa-template/3.3/"><subform name="f1"><field name="T1"/></subform></template><xfa:datasets xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/"><xfa:data><f1><T1>Hi</T1></f1></xfa:data></xfa:datasets></xdp:xdp>"#;
let p = parse_xfa_xml(xml);
assert_eq!(p.packets.len(), 2);
assert!(p.template().is_some());
assert!(p.datasets().is_some());
}
#[test]
fn empty_xfa() {
let p = parse_xfa_xml(r#"<xdp:xdp xmlns:xdp="http://ns.adobe.com/xdp/"></xdp:xdp>"#);
assert_eq!(p.packets.len(), 0);
}
#[test]
fn get_packet_missing_returns_none() {
let p = parse_xfa_xml(r#"<xdp:xdp xmlns:xdp="http://ns.adobe.com/xdp/"></xdp:xdp>"#);
assert!(p.get_packet("template").is_none());
assert!(p.get_packet("nonexistent").is_none());
assert!(p.config().is_none());
assert!(p.locale_set().is_none());
}
#[test]
fn full_xml_preserved() {
let xml =
r#"<?xml version="1.0"?><xdp:xdp xmlns:xdp="http://ns.adobe.com/xdp/"></xdp:xdp>"#;
let p = parse_xfa_xml(xml);
let stored = p.full_xml.as_deref().unwrap_or("");
assert!(stored.contains("xdp:xdp"));
}
#[test]
fn config_packet_parsed() {
let xml = r#"<xdp:xdp xmlns:xdp="http://ns.adobe.com/xdp/"><config xmlns="http://www.xfa.org/schema/xci/3.1/"><present><xdp><packets>*</packets></xdp></present></config></xdp:xdp>"#;
let p = parse_xfa_xml(xml);
assert_eq!(p.packets.len(), 1);
assert!(p.config().is_some());
assert!(p.template().is_none());
}
#[test]
fn multiple_packets_order_preserved() {
let xml = r#"<?xml version="1.0"?><xdp:xdp xmlns:xdp="http://ns.adobe.com/xdp/"><template xmlns="http://www.xfa.org/schema/xfa-template/3.3/"><subform name="root"/></template><xfa:datasets xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/"><xfa:data/></xfa:datasets></xdp:xdp>"#;
let p = parse_xfa_xml(xml);
assert_eq!(p.packets.len(), 2);
assert_eq!(p.packets[0].0, "template");
assert_eq!(p.packets[1].0, "datasets");
assert!(p.template().is_some());
assert!(p.datasets().is_some());
}
#[test]
fn validate_complete_packets_no_warnings() {
let xml = r#"<?xml version="1.0"?><xdp:xdp xmlns:xdp="http://ns.adobe.com/xdp/"><template xmlns="http://www.xfa.org/schema/xfa-template/3.3/"><subform name="root"><field name="firstName" xmlns:ui="http://www.xfa.org/schema/xfa-template/3.3/"><ui><textEdit/></ui></field></subform></template><xfa:datasets xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/"><xfa:data><root><firstName>Alice</firstName></root></xfa:data></xfa:datasets></xdp:xdp>"#;
let p = parse_xfa_xml(xml);
let v = validate_xfa_packets(&p);
assert!(v.has_template);
assert!(v.has_datasets);
assert!(v.template_bytes > 0);
assert!(v.datasets_bytes > 0);
assert!(
v.warnings.is_empty(),
"expected no warnings, got: {:?}",
v.warnings
);
}
#[test]
fn validate_missing_template_produces_warning() {
let xml = r#"<xdp:xdp xmlns:xdp="http://ns.adobe.com/xdp/"><xfa:datasets xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/"><xfa:data/></xfa:datasets></xdp:xdp>"#;
let p = parse_xfa_xml(xml);
let v = validate_xfa_packets(&p);
assert!(!v.has_template);
assert!(v
.warnings
.iter()
.any(|w| w.contains("No template packet found")));
}
#[test]
fn validate_missing_datasets_produces_warning() {
let xml = r#"<xdp:xdp xmlns:xdp="http://ns.adobe.com/xdp/"><template xmlns="http://www.xfa.org/schema/xfa-template/3.3/"><subform name="root"><field name="x"/><field name="y"/><field name="z"/><field name="w"/></subform></template></xdp:xdp>"#;
let p = parse_xfa_xml(xml);
let v = validate_xfa_packets(&p);
assert!(!v.has_datasets);
assert!(v.warnings.iter().any(|w| w.contains("No datasets packet")));
}
#[test]
fn validate_tiny_template_produces_warning() {
let mut p = XfaPackets::default();
p.packets.push(("template".to_string(), "<t/>".to_string()));
p.packets.push((
"datasets".to_string(),
"<xfa:datasets xmlns:xfa=\"http://www.xfa.org/schema/xfa-data/1.0/\"><xfa:data/></xfa:datasets>".to_string(),
));
let v = validate_xfa_packets(&p);
assert!(v.warnings.iter().any(|w| w.contains("< 100 bytes")));
}
#[test]
fn validate_tiny_datasets_produces_warning() {
let mut p = XfaPackets::default();
p.packets.push((
"template".to_string(),
"<template xmlns=\"http://www.xfa.org/schema/xfa-template/3.3/\"><subform name=\"root\"><field name=\"a\"/><field name=\"b\"/><field name=\"c\"/></subform></template>".to_string(),
));
p.packets
.push(("datasets".to_string(), "<ds/>".to_string()));
let v = validate_xfa_packets(&p);
assert!(v.warnings.iter().any(|w| w.contains("< 50 bytes")));
}
#[test]
fn validate_packet_names_list() {
let xml = r#"<xdp:xdp xmlns:xdp="http://ns.adobe.com/xdp/"><config xmlns="http://www.xfa.org/schema/xci/3.1/"><present/></config><template xmlns="http://www.xfa.org/schema/xfa-template/3.3/"><subform name="root"><field name="f1"/><field name="f2"/><field name="f3"/></subform></template></xdp:xdp>"#;
let p = parse_xfa_xml(xml);
let v = validate_xfa_packets(&p);
assert!(v.packet_names.contains(&"config".to_string()));
assert!(v.packet_names.contains(&"template".to_string()));
assert!(v.has_config);
}
#[test]
fn corpus_01_static_xfa_form_detection() {
use crate::classify::{detect_xfa_type_from_packets, XfaType};
let xml = r#"<xdp:xdp xmlns:xdp="http://ns.adobe.com/xdp/"><template xmlns="http://www.xfa.org/schema/xfa-template/3.3/" baseProfile="interactiveForms"><subform name="Page1"><field name="LastName"/><field name="FirstName"/></subform></template></xdp:xdp>"#;
let p = parse_xfa_xml(xml);
assert_eq!(detect_xfa_type_from_packets(&p), XfaType::Static);
}
#[test]
fn corpus_02_dynamic_xfa_form_detection() {
use crate::classify::{detect_xfa_type_from_packets, XfaType};
let xml = r#"<xdp:xdp xmlns:xdp="http://ns.adobe.com/xdp/"><template xmlns="http://www.xfa.org/schema/xfa-template/3.3/"><subform name="root"><occur min="0" max="-1"/><field name="item"/></subform></template></xdp:xdp>"#;
let p = parse_xfa_xml(xml);
assert_eq!(detect_xfa_type_from_packets(&p), XfaType::Dynamic);
}
#[test]
fn corpus_03_multiple_packets() {
let xml = r#"<xdp:xdp xmlns:xdp="http://ns.adobe.com/xdp/"><config xmlns="http://www.xfa.org/schema/xci/3.1/"><present><xdp><packets>*</packets></xdp></present></config><template xmlns="http://www.xfa.org/schema/xfa-template/3.3/"><subform name="root"/></template><xfa:datasets xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/"><xfa:data/></xfa:datasets></xdp:xdp>"#;
let p = parse_xfa_xml(xml);
assert_eq!(p.packets.len(), 3, "should have config, template, datasets");
assert!(p.config().is_some());
assert!(p.template().is_some());
assert!(p.datasets().is_some());
}
#[test]
fn corpus_04_template_only_no_datasets() {
let xml = r#"<xdp:xdp xmlns:xdp="http://ns.adobe.com/xdp/"><template xmlns="http://www.xfa.org/schema/xfa-template/3.3/"><subform name="root"><field name="LastName"/><field name="FirstName"/><field name="DOB"/></subform></template></xdp:xdp>"#;
let p = parse_xfa_xml(xml);
assert!(p.template().is_some());
assert!(p.datasets().is_none());
let v = validate_xfa_packets(&p);
assert!(!v.has_datasets);
assert!(v.warnings.iter().any(|w| w.contains("No datasets")));
}
#[test]
fn corpus_05_xfa_with_image_data_in_datasets() {
let b64_image = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==";
let xml = format!(
r#"<xdp:xdp xmlns:xdp="http://ns.adobe.com/xdp/"><template xmlns="http://www.xfa.org/schema/xfa-template/3.3/"><subform name="root"><field name="photo"><ui><imageEdit/></ui></field></subform></template><xfa:datasets xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/"><xfa:data><root><photo contentType="image/png" href="">{b64_image}</photo></root></xfa:data></xfa:datasets></xdp:xdp>"#
);
let p = parse_xfa_xml(&xml);
assert!(p.template().is_some());
assert!(p.datasets().is_some());
let ds = p.datasets().unwrap();
assert!(ds.contains(b64_image), "datasets should contain image data");
}
#[test]
fn corpus_06_non_xfa_pdf_returns_none() {
use crate::classify::{detect_xfa_type, XfaType};
let not_xfa: &[u8] = b"%PDF-1.4\n%%EOF";
assert_eq!(detect_xfa_type(not_xfa), XfaType::None);
}
#[test]
fn corpus_07_xfa_with_config_packet() {
let xml = r#"<xdp:xdp xmlns:xdp="http://ns.adobe.com/xdp/"><config xmlns="http://www.xfa.org/schema/xci/3.1/"><present><xdp><packets>*</packets></xdp></present><pdf><version>1.6</version></pdf></config><template xmlns="http://www.xfa.org/schema/xfa-template/3.3/"><subform name="root"/></template></xdp:xdp>"#;
let p = parse_xfa_xml(xml);
assert!(p.config().is_some());
let cfg = p.config().unwrap();
assert!(cfg.contains("packets"));
let v = validate_xfa_packets(&p);
assert!(v.has_config);
}
#[test]
fn corpus_08_empty_datasets_incremental_save_pattern() {
let mut p = XfaPackets::default();
p.packets.push((
"template".to_string(),
"<template xmlns=\"http://www.xfa.org/schema/xfa-template/3.3/\"><subform name=\"root\"><field name=\"qty\"/><field name=\"price\"/><field name=\"total\"/></subform></template>".to_string(),
));
p.packets.push((
"datasets".to_string(),
"<xfa:datasets xmlns:xfa=\"http://www.xfa.org/schema/xfa-data/1.0/\"/>".to_string(),
));
p.packets.push(("datasets".to_string(), "<xfa:datasets xmlns:xfa=\"http://www.xfa.org/schema/xfa-data/1.0/\"><xfa:data><root><qty>3</qty><price>9.99</price><total>29.97</total></root></xfa:data></xfa:datasets>".to_string()));
let ds = p.datasets().expect("datasets should exist");
assert!(
ds.contains("29.97"),
"should return the larger/filled datasets"
);
}
#[test]
fn corpus_09_large_template_many_fields() {
let fields: String = (1..=20)
.map(|i| format!("<field name=\"field{i}\"><ui><textEdit/></ui></field>"))
.collect();
let xml = format!(
r#"<xdp:xdp xmlns:xdp="http://ns.adobe.com/xdp/"><template xmlns="http://www.xfa.org/schema/xfa-template/3.3/"><subform name="root">{fields}</subform></template><xfa:datasets xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/"><xfa:data><root>{}</root></xfa:data></xfa:datasets></xdp:xdp>"#,
(1..=20)
.map(|i| format!("<field{i}>val{i}</field{i}>"))
.collect::<String>()
);
let p = parse_xfa_xml(&xml);
let v = validate_xfa_packets(&p);
assert!(v.has_template);
assert!(v.has_datasets);
assert!(
v.template_bytes >= 100,
"large template should exceed 100 bytes"
);
assert!(
v.warnings.is_empty(),
"no warnings expected: {:?}",
v.warnings
);
}
#[test]
fn corpus_10_xfa_with_locale_set_packet() {
let xml = r#"<xdp:xdp xmlns:xdp="http://ns.adobe.com/xdp/"><localeSet xmlns="http://www.xfa.org/schema/xfa-locale-set/2.7/"><locale name="en_US" desc="English (United States)"><calendarSymbols name="gregorian"/></locale></localeSet><template xmlns="http://www.xfa.org/schema/xfa-template/3.3/"><subform name="root"><field name="date"/></subform></template></xdp:xdp>"#;
let p = parse_xfa_xml(xml);
assert!(
p.locale_set().is_some(),
"localeSet packet should be accessible"
);
assert!(p.template().is_some());
let ls = p.locale_set().unwrap();
assert!(ls.contains("en_US"));
}
}