1use crate::error::{Result, XfaError};
3use pdf_syntax::object::dict::keys::{ACRO_FORM, XFA};
4use pdf_syntax::object::{Array, Dict, Object, Stream};
5use pdf_syntax::Pdf;
6#[derive(Debug, Clone, Default)]
9pub struct XfaPackets {
10 pub full_xml: Option<String>,
12 pub packets: Vec<(String, String)>,
14}
15
16impl XfaPackets {
17 pub fn get_packet(&self, name: &str) -> Option<&str> {
19 self.packets
20 .iter()
21 .find(|(n, _)| n == name)
22 .map(|(_, v)| v.as_str())
23 }
24 pub fn template(&self) -> Option<&str> {
26 self.get_packet("template")
27 }
28 pub fn datasets(&self) -> Option<&str> {
30 self.packets
34 .iter()
35 .filter(|(n, _)| n == "datasets")
36 .max_by_key(|(_, v)| v.len())
37 .map(|(_, v)| v.as_str())
38 }
39 pub fn config(&self) -> Option<&str> {
41 self.get_packet("config")
42 }
43 pub fn locale_set(&self) -> Option<&str> {
45 self.get_packet("localeSet")
46 }
47}
48pub fn extract_xfa(pdf: &Pdf) -> Result<XfaPackets> {
50 if let Some(mut p) = extract_xfa_from_acroform(pdf) {
51 if !p.packets.is_empty() || p.full_xml.is_some() {
52 let current_ds_len = p.datasets().map(|s| s.len()).unwrap_or(0);
56 if current_ds_len < 200 {
57 if let Some(better_ds) = scan_for_datasets(pdf, current_ds_len) {
58 p.packets.push(("datasets".to_string(), better_ds));
59 }
60 }
61 return Ok(p);
62 }
63 }
64 scan_for_xfa(pdf)
65}
66
67fn scan_for_datasets(pdf: &Pdf, min_len: usize) -> Option<String> {
86 let mut best: Option<String> = None;
87 for obj in pdf.objects() {
88 if let Object::Stream(s) = obj {
89 if let Some(d) = decode_stream(&s) {
90 if d.len() > min_len
91 && looks_like_datasets_packet(&d)
92 && best.as_ref().is_none_or(|b| d.len() > b.len())
93 {
94 best = Some(d);
95 }
96 }
97 }
98 }
99 best
100}
101
102fn extract_xmlns_decls(opener: &str) -> Vec<String> {
109 let mut out = Vec::new();
110 let bytes = opener.as_bytes();
111 let mut i = 0;
112 while i < bytes.len() {
113 let next = match opener[i..].find("xmlns") {
115 Some(off) => i + off,
116 None => break,
117 };
118 let is_token_start = next == 0
119 || matches!(
120 bytes.get(next - 1),
121 Some(b' ' | b'\t' | b'\r' | b'\n' | b'/')
122 );
123 if !is_token_start {
124 i = next + 1;
125 continue;
126 }
127 let after = next + "xmlns".len();
129 let mut j = after;
130 if j < bytes.len() && bytes[j] == b':' {
131 j += 1;
132 while j < bytes.len() && !matches!(bytes[j], b'=' | b' ' | b'\t' | b'\r' | b'\n') {
133 j += 1;
134 }
135 }
136 while j < bytes.len() && matches!(bytes[j], b' ' | b'\t' | b'\r' | b'\n') {
138 j += 1;
139 }
140 if j >= bytes.len() || bytes[j] != b'=' {
141 i = next + 1;
142 continue;
143 }
144 j += 1;
145 while j < bytes.len() && matches!(bytes[j], b' ' | b'\t' | b'\r' | b'\n') {
146 j += 1;
147 }
148 let quote = match bytes.get(j) {
149 Some(&q @ (b'"' | b'\'')) => q,
150 _ => {
151 i = next + 1;
152 continue;
153 }
154 };
155 j += 1;
156 while j < bytes.len() && bytes[j] != quote {
157 j += 1;
158 }
159 if j >= bytes.len() {
160 break;
161 }
162 let value_end = j; j += 1; out.push(opener[next..value_end + 1].to_string());
165 i = j;
166 }
167 out
168}
169
170fn inject_missing_xmlns(raw: &str, wrapper_xmlns: &[String]) -> String {
179 if wrapper_xmlns.is_empty() {
180 return raw.to_string();
181 }
182 let open_end = match raw.find('>') {
183 Some(p) => p,
184 None => return raw.to_string(),
185 };
186 let opener = &raw[..open_end];
187 let mut additions = String::new();
188 for decl in wrapper_xmlns {
189 let prefix_key = decl.split('=').next().unwrap_or("").trim_end().to_string();
190 if prefix_key.is_empty() {
191 continue;
192 }
193 let already = opener.split_whitespace().any(|tok| {
195 tok.trim_end_matches('/')
196 .starts_with(&format!("{}=", prefix_key))
197 });
198 if !already {
199 additions.push(' ');
200 additions.push_str(decl);
201 }
202 }
203 if additions.is_empty() {
204 return raw.to_string();
205 }
206 let insert_at = if opener.trim_end().ends_with('/') {
209 opener.trim_end().len() - 1
210 } else {
211 open_end
212 };
213 let mut out = String::with_capacity(raw.len() + additions.len());
214 out.push_str(&raw[..insert_at]);
215 out.push_str(&additions);
216 out.push_str(&raw[insert_at..]);
217 out
218}
219
220fn looks_like_datasets_packet(s: &str) -> bool {
227 let mut tail = s.trim_start_matches('\u{FEFF}').trim_start();
228 loop {
229 if let Some(rest) = tail.strip_prefix("<?") {
230 match rest.find("?>") {
231 Some(end) => tail = rest[end + 2..].trim_start(),
232 None => return false,
233 }
234 } else if let Some(rest) = tail.strip_prefix("<!--") {
235 match rest.find("-->") {
236 Some(end) => tail = rest[end + 3..].trim_start(),
237 None => return false,
238 }
239 } else {
240 break;
241 }
242 }
243 let rest = match tail.strip_prefix("<xfa:datasets") {
244 Some(r) => r,
245 None => return false,
246 };
247 matches!(
248 rest.as_bytes().first(),
249 Some(b' ' | b'\t' | b'\r' | b'\n' | b'/' | b'>')
250 )
251}
252pub fn extract_xfa_from_bytes(data: impl Into<pdf_syntax::PdfData>) -> Result<XfaPackets> {
254 let pdf = Pdf::new(data).map_err(|e| XfaError::LoadFailed(format!("{e:?}")))?;
255 extract_xfa(&pdf)
256}
257pub fn extract_xfa_from_acroform(pdf: &Pdf) -> Option<XfaPackets> {
259 let xref = pdf.xref();
260 let catalog: Dict<'_> = xref.get(xref.root_id())?;
261 let acroform: Dict<'_> = catalog.get(ACRO_FORM)?;
262 if let Some(stream) = acroform.get::<Stream<'_>>(XFA) {
263 return Some(parse_xfa_xml(&decode_stream(&stream)?));
264 }
265 if let Some(array) = acroform.get::<Array<'_>>(XFA) {
266 return Some(extract_from_array(&array));
267 }
268 None
269}
270
271fn extract_from_array(array: &Array<'_>) -> XfaPackets {
272 let mut packets = XfaPackets::default();
273 let items: Vec<Object<'_>> = array.iter::<Object<'_>>().collect();
274 let mut i = 0;
275 while i + 1 < items.len() {
276 let name = match &items[i] {
277 Object::String(s) => std::string::String::from_utf8_lossy(s.as_bytes()).to_string(),
278 Object::Name(n) => std::string::String::from_utf8_lossy(n.as_ref()).to_string(),
279 _ => {
280 i += 1;
281 continue;
282 }
283 };
284 if let Some(c) = match &items[i + 1] {
285 Object::Stream(s) => decode_stream(s),
286 Object::String(s) => {
287 Some(std::string::String::from_utf8_lossy(s.as_bytes()).to_string())
288 }
289 _ => None,
290 } {
291 packets.packets.push((name, c));
292 }
293 i += 2;
294 }
295 packets
296}
297
298fn scan_for_xfa(pdf: &Pdf) -> Result<XfaPackets> {
299 let mut streams_checked = 0u32;
303 for obj in pdf.objects() {
304 if let Object::Stream(s) = obj {
305 streams_checked += 1;
306 if streams_checked > 2000 {
307 break;
308 }
309 if let Some(d) = decode_stream(&s) {
310 if d.contains("<xdp:xdp") {
311 return Ok(parse_xfa_xml(&d));
312 }
313 }
314 }
315 }
316 Err(XfaError::PacketNotFound("no XFA content found".to_string()))
317}
318
319fn decode_stream(stream: &Stream<'_>) -> Option<String> {
320 std::string::String::from_utf8(stream.decoded().ok()?).ok()
321}
322
323fn parse_xfa_xml(xml: &str) -> XfaPackets {
324 let mut packets = XfaPackets {
325 full_xml: Some(xml.to_string()),
326 packets: Vec::new(),
327 };
328 let t = xml.trim();
329 let c = t.find("?>").map(|p| &t[p + 2..]).unwrap_or(t).trim();
330 let (inner, wrapper_xmlns) = match c.find('>') {
331 Some(s) => {
332 let opener = &c[..s]; let wrapper_xmlns = extract_xmlns_decls(opener);
334 let rest = &c[s + 1..];
335 let inner = rest
336 .rfind("</xdp:xdp>")
337 .map(|e| &rest[..e])
338 .or_else(|| rest.rfind("</xdp>").map(|e| &rest[..e]))
339 .unwrap_or(rest);
340 (inner, wrapper_xmlns)
341 }
342 None => return packets,
343 };
344 let mut pos = 0;
345 let bytes = inner.as_bytes();
346 while pos < bytes.len() {
347 while pos < bytes.len() && bytes[pos].is_ascii_whitespace() {
348 pos += 1;
349 }
350 if pos >= bytes.len() {
351 break;
352 }
353 if bytes[pos] != b'<' {
354 pos += 1;
355 continue;
356 }
357 if inner[pos..].starts_with("<!--") {
358 if let Some(e) = inner[pos..].find("-->") {
359 pos += e + 3;
360 continue;
361 }
362 }
363 if inner[pos..].starts_with("<?") {
364 if let Some(e) = inner[pos..].find("?>") {
365 pos += e + 2;
366 continue;
367 }
368 }
369 let ts = pos;
370 pos += 1;
371 let ns = pos;
372 while pos < bytes.len() && bytes[pos] != b'>' && bytes[pos] != b' ' && bytes[pos] != b'/' {
373 pos += 1;
374 }
375 let ft = &inner[ns..pos];
376 let pn = ft.split(':').next_back().unwrap_or(ft);
377 let ct = format!("</{ft}>");
378 let at = format!("</xfa:{pn}>");
379 if let Some(cp) = inner[ts..].find(ct.as_str()) {
380 let ee = ts + cp + ct.len();
381 let raw = &inner[ts..ee];
382 packets
383 .packets
384 .push((pn.to_string(), inject_missing_xmlns(raw, &wrapper_xmlns)));
385 pos = ee;
386 } else if let Some(cp) = inner[ts..].find(at.as_str()) {
387 let ee = ts + cp + at.len();
388 let raw = &inner[ts..ee];
389 packets
390 .packets
391 .push((pn.to_string(), inject_missing_xmlns(raw, &wrapper_xmlns)));
392 pos = ee;
393 } else {
394 while pos < bytes.len() && bytes[pos] != b'>' {
395 pos += 1;
396 }
397 pos += 1;
398 }
399 }
400 packets
401}
402
403#[derive(Debug, Clone, Default)]
410pub struct PacketValidation {
411 pub has_template: bool,
413 pub has_datasets: bool,
415 pub has_config: bool,
417 pub template_bytes: usize,
419 pub datasets_bytes: usize,
421 pub packet_names: Vec<String>,
423 pub warnings: Vec<String>,
425}
426
427pub fn validate_xfa_packets(packets: &XfaPackets) -> PacketValidation {
432 let has_template = packets.template().is_some();
433 let has_datasets = packets.datasets().is_some();
434 let has_config = packets.config().is_some();
435
436 let template_bytes = packets.template().map(|s| s.len()).unwrap_or(0);
437 let datasets_bytes = packets.datasets().map(|s| s.len()).unwrap_or(0);
438 let packet_names = packets.packets.iter().map(|(n, _)| n.clone()).collect();
439
440 let mut warnings = Vec::new();
441
442 if !has_template {
443 warnings.push("No template packet found".to_string());
444 } else if template_bytes < 100 {
445 warnings.push(format!(
446 "Template packet is empty (< 100 bytes) — only {template_bytes} bytes"
447 ));
448 }
449
450 if !has_datasets {
451 warnings.push("No datasets packet".to_string());
452 } else if datasets_bytes < 50 {
453 warnings.push(format!(
454 "Datasets packet is suspiciously small (< 50 bytes) — only {datasets_bytes} bytes"
455 ));
456 }
457
458 PacketValidation {
459 has_template,
460 has_datasets,
461 has_config,
462 template_bytes,
463 datasets_bytes,
464 packet_names,
465 warnings,
466 }
467}
468pub fn extract_embedded_fonts(pdf: &Pdf) -> Vec<(String, Vec<u8>)> {
471 use pdf_syntax::object::dict::keys::{FONT_FILE, FONT_FILE2, FONT_FILE3, FONT_NAME, TYPE};
472 use pdf_syntax::object::Name;
473 let mut fonts = Vec::new();
474 for obj in pdf.objects() {
475 let dict = match &obj {
476 Object::Dict(d) => d.clone(),
477 Object::Stream(s) => s.dict().clone(),
478 _ => continue,
479 };
480 if dict
481 .get::<Name>(TYPE)
482 .is_none_or(|n| n.as_ref() != b"FontDescriptor")
483 {
484 continue;
485 }
486 let name = dict
487 .get::<Name>(FONT_NAME)
488 .map(|n| std::string::String::from_utf8_lossy(n.as_ref()).to_string())
489 .unwrap_or_default();
490 for key in [FONT_FILE2, FONT_FILE, FONT_FILE3] {
491 if let Some(s) = dict.get::<Stream<'_>>(key) {
492 if let Ok(d) = s.decoded() {
493 if !d.is_empty() {
494 fonts.push((name.clone(), d));
495 break;
496 }
497 }
498 }
499 }
500 }
501 fonts
502}
503
504#[cfg(test)]
505mod tests {
506 use super::*;
507 #[test]
508 fn parse_xfa_packets() {
509 let xml = r#"<?xml version="1.0"?><xdp:xdp xmlns:xdp="http://ns.adobe.com/xdp/"><template xmlns="http://www.xfa.org/schema/xfa-template/3.3/"><subform name="f1"><field name="T1"/></subform></template><xfa:datasets xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/"><xfa:data><f1><T1>Hi</T1></f1></xfa:data></xfa:datasets></xdp:xdp>"#;
510 let p = parse_xfa_xml(xml);
511 assert_eq!(p.packets.len(), 2);
512 assert!(p.template().is_some());
513 assert!(p.datasets().is_some());
514 }
515 #[test]
516 fn empty_xfa() {
517 let p = parse_xfa_xml(r#"<xdp:xdp xmlns:xdp="http://ns.adobe.com/xdp/"></xdp:xdp>"#);
518 assert_eq!(p.packets.len(), 0);
519 }
520
521 #[test]
522 fn get_packet_missing_returns_none() {
523 let p = parse_xfa_xml(r#"<xdp:xdp xmlns:xdp="http://ns.adobe.com/xdp/"></xdp:xdp>"#);
524 assert!(p.get_packet("template").is_none());
525 assert!(p.get_packet("nonexistent").is_none());
526 assert!(p.config().is_none());
527 assert!(p.locale_set().is_none());
528 }
529
530 #[test]
531 fn full_xml_preserved() {
532 let xml =
534 r#"<?xml version="1.0"?><xdp:xdp xmlns:xdp="http://ns.adobe.com/xdp/"></xdp:xdp>"#;
535 let p = parse_xfa_xml(xml);
536 let stored = p.full_xml.as_deref().unwrap_or("");
537 assert!(stored.contains("xdp:xdp"));
538 }
539
540 #[test]
541 fn config_packet_parsed() {
542 let xml = r#"<xdp:xdp xmlns:xdp="http://ns.adobe.com/xdp/"><config xmlns="http://www.xfa.org/schema/xci/3.1/"><present><xdp><packets>*</packets></xdp></present></config></xdp:xdp>"#;
543 let p = parse_xfa_xml(xml);
544 assert_eq!(p.packets.len(), 1);
545 assert!(p.config().is_some());
546 assert!(p.template().is_none());
547 }
548
549 #[test]
550 fn multiple_packets_order_preserved() {
551 let xml = r#"<?xml version="1.0"?><xdp:xdp xmlns:xdp="http://ns.adobe.com/xdp/"><template xmlns="http://www.xfa.org/schema/xfa-template/3.3/"><subform name="root"/></template><xfa:datasets xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/"><xfa:data/></xfa:datasets></xdp:xdp>"#;
553 let p = parse_xfa_xml(xml);
554 assert_eq!(p.packets.len(), 2);
555 assert_eq!(p.packets[0].0, "template");
556 assert_eq!(p.packets[1].0, "datasets");
557 assert!(p.template().is_some());
558 assert!(p.datasets().is_some());
559 }
560
561 #[test]
564 fn validate_complete_packets_no_warnings() {
565 let xml = r#"<?xml version="1.0"?><xdp:xdp xmlns:xdp="http://ns.adobe.com/xdp/"><template xmlns="http://www.xfa.org/schema/xfa-template/3.3/"><subform name="root"><field name="firstName" xmlns:ui="http://www.xfa.org/schema/xfa-template/3.3/"><ui><textEdit/></ui></field></subform></template><xfa:datasets xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/"><xfa:data><root><firstName>Alice</firstName></root></xfa:data></xfa:datasets></xdp:xdp>"#;
566 let p = parse_xfa_xml(xml);
567 let v = validate_xfa_packets(&p);
568 assert!(v.has_template);
569 assert!(v.has_datasets);
570 assert!(v.template_bytes > 0);
571 assert!(v.datasets_bytes > 0);
572 assert!(
573 v.warnings.is_empty(),
574 "expected no warnings, got: {:?}",
575 v.warnings
576 );
577 }
578
579 #[test]
580 fn validate_missing_template_produces_warning() {
581 let xml = r#"<xdp:xdp xmlns:xdp="http://ns.adobe.com/xdp/"><xfa:datasets xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/"><xfa:data/></xfa:datasets></xdp:xdp>"#;
582 let p = parse_xfa_xml(xml);
583 let v = validate_xfa_packets(&p);
584 assert!(!v.has_template);
585 assert!(v
586 .warnings
587 .iter()
588 .any(|w| w.contains("No template packet found")));
589 }
590
591 #[test]
592 fn validate_missing_datasets_produces_warning() {
593 let xml = r#"<xdp:xdp xmlns:xdp="http://ns.adobe.com/xdp/"><template xmlns="http://www.xfa.org/schema/xfa-template/3.3/"><subform name="root"><field name="x"/><field name="y"/><field name="z"/><field name="w"/></subform></template></xdp:xdp>"#;
594 let p = parse_xfa_xml(xml);
595 let v = validate_xfa_packets(&p);
596 assert!(!v.has_datasets);
597 assert!(v.warnings.iter().any(|w| w.contains("No datasets packet")));
598 }
599
600 #[test]
601 fn validate_tiny_template_produces_warning() {
602 let mut p = XfaPackets::default();
603 p.packets.push(("template".to_string(), "<t/>".to_string()));
604 p.packets.push((
605 "datasets".to_string(),
606 "<xfa:datasets xmlns:xfa=\"http://www.xfa.org/schema/xfa-data/1.0/\"><xfa:data/></xfa:datasets>".to_string(),
607 ));
608 let v = validate_xfa_packets(&p);
609 assert!(v.warnings.iter().any(|w| w.contains("< 100 bytes")));
610 }
611
612 #[test]
613 fn validate_tiny_datasets_produces_warning() {
614 let mut p = XfaPackets::default();
615 p.packets.push((
617 "template".to_string(),
618 "<template xmlns=\"http://www.xfa.org/schema/xfa-template/3.3/\"><subform name=\"root\"><field name=\"a\"/><field name=\"b\"/><field name=\"c\"/></subform></template>".to_string(),
619 ));
620 p.packets
621 .push(("datasets".to_string(), "<ds/>".to_string()));
622 let v = validate_xfa_packets(&p);
623 assert!(v.warnings.iter().any(|w| w.contains("< 50 bytes")));
624 }
625
626 #[test]
627 fn validate_packet_names_list() {
628 let xml = r#"<xdp:xdp xmlns:xdp="http://ns.adobe.com/xdp/"><config xmlns="http://www.xfa.org/schema/xci/3.1/"><present/></config><template xmlns="http://www.xfa.org/schema/xfa-template/3.3/"><subform name="root"><field name="f1"/><field name="f2"/><field name="f3"/></subform></template></xdp:xdp>"#;
629 let p = parse_xfa_xml(xml);
630 let v = validate_xfa_packets(&p);
631 assert!(v.packet_names.contains(&"config".to_string()));
632 assert!(v.packet_names.contains(&"template".to_string()));
633 assert!(v.has_config);
634 }
635
636 #[test]
642 fn corpus_01_static_xfa_form_detection() {
643 use crate::classify::{detect_xfa_type_from_packets, XfaType};
644 let xml = r#"<xdp:xdp xmlns:xdp="http://ns.adobe.com/xdp/"><template xmlns="http://www.xfa.org/schema/xfa-template/3.3/" baseProfile="interactiveForms"><subform name="Page1"><field name="LastName"/><field name="FirstName"/></subform></template></xdp:xdp>"#;
645 let p = parse_xfa_xml(xml);
646 assert_eq!(detect_xfa_type_from_packets(&p), XfaType::Static);
647 }
648
649 #[test]
651 fn corpus_02_dynamic_xfa_form_detection() {
652 use crate::classify::{detect_xfa_type_from_packets, XfaType};
653 let xml = r#"<xdp:xdp xmlns:xdp="http://ns.adobe.com/xdp/"><template xmlns="http://www.xfa.org/schema/xfa-template/3.3/"><subform name="root"><occur min="0" max="-1"/><field name="item"/></subform></template></xdp:xdp>"#;
654 let p = parse_xfa_xml(xml);
655 assert_eq!(detect_xfa_type_from_packets(&p), XfaType::Dynamic);
656 }
657
658 #[test]
660 fn corpus_03_multiple_packets() {
661 let xml = r#"<xdp:xdp xmlns:xdp="http://ns.adobe.com/xdp/"><config xmlns="http://www.xfa.org/schema/xci/3.1/"><present><xdp><packets>*</packets></xdp></present></config><template xmlns="http://www.xfa.org/schema/xfa-template/3.3/"><subform name="root"/></template><xfa:datasets xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/"><xfa:data/></xfa:datasets></xdp:xdp>"#;
662 let p = parse_xfa_xml(xml);
663 assert_eq!(p.packets.len(), 3, "should have config, template, datasets");
664 assert!(p.config().is_some());
665 assert!(p.template().is_some());
666 assert!(p.datasets().is_some());
667 }
668
669 #[test]
671 fn corpus_04_template_only_no_datasets() {
672 let xml = r#"<xdp:xdp xmlns:xdp="http://ns.adobe.com/xdp/"><template xmlns="http://www.xfa.org/schema/xfa-template/3.3/"><subform name="root"><field name="LastName"/><field name="FirstName"/><field name="DOB"/></subform></template></xdp:xdp>"#;
673 let p = parse_xfa_xml(xml);
674 assert!(p.template().is_some());
675 assert!(p.datasets().is_none());
676 let v = validate_xfa_packets(&p);
677 assert!(!v.has_datasets);
678 assert!(v.warnings.iter().any(|w| w.contains("No datasets")));
679 }
680
681 #[test]
683 fn corpus_05_xfa_with_image_data_in_datasets() {
684 let b64_image = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==";
686 let xml = format!(
687 r#"<xdp:xdp xmlns:xdp="http://ns.adobe.com/xdp/"><template xmlns="http://www.xfa.org/schema/xfa-template/3.3/"><subform name="root"><field name="photo"><ui><imageEdit/></ui></field></subform></template><xfa:datasets xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/"><xfa:data><root><photo contentType="image/png" href="">{b64_image}</photo></root></xfa:data></xfa:datasets></xdp:xdp>"#
688 );
689 let p = parse_xfa_xml(&xml);
690 assert!(p.template().is_some());
691 assert!(p.datasets().is_some());
692 let ds = p.datasets().unwrap();
693 assert!(ds.contains(b64_image), "datasets should contain image data");
694 }
695
696 #[test]
698 fn corpus_06_non_xfa_pdf_returns_none() {
699 use crate::classify::{detect_xfa_type, XfaType};
700 let not_xfa: &[u8] = b"%PDF-1.4\n%%EOF";
702 assert_eq!(detect_xfa_type(not_xfa), XfaType::None);
703 }
704
705 #[test]
707 fn corpus_07_xfa_with_config_packet() {
708 let xml = r#"<xdp:xdp xmlns:xdp="http://ns.adobe.com/xdp/"><config xmlns="http://www.xfa.org/schema/xci/3.1/"><present><xdp><packets>*</packets></xdp></present><pdf><version>1.6</version></pdf></config><template xmlns="http://www.xfa.org/schema/xfa-template/3.3/"><subform name="root"/></template></xdp:xdp>"#;
709 let p = parse_xfa_xml(xml);
710 assert!(p.config().is_some());
711 let cfg = p.config().unwrap();
712 assert!(cfg.contains("packets"));
713 let v = validate_xfa_packets(&p);
714 assert!(v.has_config);
715 }
716
717 #[test]
719 fn corpus_08_empty_datasets_incremental_save_pattern() {
720 let mut p = XfaPackets::default();
722 p.packets.push((
723 "template".to_string(),
724 "<template xmlns=\"http://www.xfa.org/schema/xfa-template/3.3/\"><subform name=\"root\"><field name=\"qty\"/><field name=\"price\"/><field name=\"total\"/></subform></template>".to_string(),
725 ));
726 p.packets.push((
728 "datasets".to_string(),
729 "<xfa:datasets xmlns:xfa=\"http://www.xfa.org/schema/xfa-data/1.0/\"/>".to_string(),
730 ));
731 p.packets.push(("datasets".to_string(), "<xfa:datasets xmlns:xfa=\"http://www.xfa.org/schema/xfa-data/1.0/\"><xfa:data><root><qty>3</qty><price>9.99</price><total>29.97</total></root></xfa:data></xfa:datasets>".to_string()));
733 let ds = p.datasets().expect("datasets should exist");
735 assert!(
736 ds.contains("29.97"),
737 "should return the larger/filled datasets"
738 );
739 }
740
741 #[test]
743 fn corpus_09_large_template_many_fields() {
744 let fields: String = (1..=20)
746 .map(|i| format!("<field name=\"field{i}\"><ui><textEdit/></ui></field>"))
747 .collect();
748 let xml = format!(
749 r#"<xdp:xdp xmlns:xdp="http://ns.adobe.com/xdp/"><template xmlns="http://www.xfa.org/schema/xfa-template/3.3/"><subform name="root">{fields}</subform></template><xfa:datasets xmlns:xfa="http://www.xfa.org/schema/xfa-data/1.0/"><xfa:data><root>{}</root></xfa:data></xfa:datasets></xdp:xdp>"#,
750 (1..=20)
751 .map(|i| format!("<field{i}>val{i}</field{i}>"))
752 .collect::<String>()
753 );
754 let p = parse_xfa_xml(&xml);
755 let v = validate_xfa_packets(&p);
756 assert!(v.has_template);
757 assert!(v.has_datasets);
758 assert!(
759 v.template_bytes >= 100,
760 "large template should exceed 100 bytes"
761 );
762 assert!(
763 v.warnings.is_empty(),
764 "no warnings expected: {:?}",
765 v.warnings
766 );
767 }
768
769 #[test]
771 fn corpus_10_xfa_with_locale_set_packet() {
772 let xml = r#"<xdp:xdp xmlns:xdp="http://ns.adobe.com/xdp/"><localeSet xmlns="http://www.xfa.org/schema/xfa-locale-set/2.7/"><locale name="en_US" desc="English (United States)"><calendarSymbols name="gregorian"/></locale></localeSet><template xmlns="http://www.xfa.org/schema/xfa-template/3.3/"><subform name="root"><field name="date"/></subform></template></xdp:xdp>"#;
773 let p = parse_xfa_xml(xml);
774 assert!(
775 p.locale_set().is_some(),
776 "localeSet packet should be accessible"
777 );
778 assert!(p.template().is_some());
779 let ls = p.locale_set().unwrap();
780 assert!(ls.contains("en_US"));
781 }
782}