1use std::{collections::BTreeMap, num::NonZeroU32, sync::Arc};
4
5use quick_xml::{Reader, events::Event};
6use serde::{Deserialize, Serialize};
7
8use crate::{
9 BoundedText, CosObject, Identifier, ObjectKey, ParseError, ParseFact, PdfvError,
10 ProfileRepository, ResourceLimits, Result, ValidationFlavour, ValidationProfile,
11 ValidationWarning, XmpFact, display_flavour,
12};
13
14const PDF_A_ID_NS: &str = "http://www.aiim.org/pdfa/ns/id/";
15const PDF_UA_ID_NS: &str = "http://www.aiim.org/pdfua/ns/id/";
16const PDF_D_NS: &str = "http://pdfa.org/declarations/";
17const WTPDF_ACCESSIBILITY_DECLARATION: &str = "http://pdfa.org/declarations/wtpdf#accessibility1.0";
18const WTPDF_REUSE_DECLARATION: &str = "http://pdfa.org/declarations/wtpdf#reuse1.0";
19
20#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
22#[non_exhaustive]
23#[serde(rename_all = "camelCase", deny_unknown_fields)]
24pub struct NamespaceBinding {
25 pub prefix: Identifier,
27 pub uri: BoundedText,
29}
30
31#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)]
33#[non_exhaustive]
34#[serde(rename_all = "camelCase")]
35pub enum XmpIdentificationKind {
36 PdfA,
38 PdfUa,
40 Wtpdf,
42}
43
44#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
46#[non_exhaustive]
47#[serde(rename_all = "camelCase", deny_unknown_fields)]
48pub struct FlavourClaim {
49 pub kind: XmpIdentificationKind,
51 pub flavour: ValidationFlavour,
53 pub display_flavour: BoundedText,
55 pub namespace_uri: BoundedText,
57 pub property: Identifier,
59}
60
61#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
63#[non_exhaustive]
64#[serde(rename_all = "camelCase", deny_unknown_fields)]
65pub struct XmpPacket {
66 pub source_object: ObjectKey,
68 pub bytes: u64,
70 pub namespaces: Vec<NamespaceBinding>,
72 pub identification: Vec<FlavourClaim>,
74 pub facts: Vec<XmpFact>,
76}
77
78#[derive(Clone, Debug, Default)]
80pub struct XmpParser;
81
82#[derive(Clone, Debug)]
84#[non_exhaustive]
85pub struct DetectedFlavours {
86 pub packet: Option<XmpPacket>,
88 pub profiles: Vec<ValidationProfile>,
90 pub parse_facts: Vec<ParseFact>,
92 pub warnings: Vec<ValidationWarning>,
94}
95
96#[derive(Clone, Debug)]
98#[non_exhaustive]
99pub(crate) struct XmpParseResult {
100 pub packet: Option<XmpPacket>,
102 pub parse_facts: Vec<ParseFact>,
104 pub warnings: Vec<ValidationWarning>,
106}
107
108#[derive(Clone)]
110pub struct FlavourDetector {
111 profiles: Arc<dyn ProfileRepository + Send + Sync>,
112}
113
114impl std::fmt::Debug for FlavourDetector {
115 fn fmt(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
116 formatter.write_str("FlavourDetector")
117 }
118}
119
120impl FlavourDetector {
121 #[must_use]
123 pub fn new(profiles: Arc<dyn ProfileRepository + Send + Sync>) -> Self {
124 Self { profiles }
125 }
126
127 pub fn detect(
133 &self,
134 document: &crate::ParsedDocument,
135 default: Option<&ValidationFlavour>,
136 limits: &ResourceLimits,
137 ) -> Result<DetectedFlavours> {
138 let parsed_xmp = parse_document_xmp(document, limits, true)?;
139 let Some(packet) = parsed_xmp.packet else {
140 let mut fallback = self.fallback(default, "catalog metadata stream is missing")?;
141 fallback.parse_facts = parsed_xmp.parse_facts;
142 fallback.warnings = parsed_xmp.warnings;
143 return Ok(fallback);
144 };
145
146 let mut parse_facts = parsed_xmp.parse_facts;
147 let mut warnings = parsed_xmp.warnings;
148 let mut profiles = Vec::new();
149 for claim in &packet.identification {
150 match self
151 .profiles
152 .profiles_for(&crate::FlavourSelection::Explicit {
153 flavour: claim.flavour.clone(),
154 }) {
155 Ok(mut selected) => profiles.append(&mut selected),
156 Err(error) => warnings.push(ValidationWarning::IncompatibleProfile {
157 profile_id: Identifier::new(claim.display_flavour.as_str())?,
158 reason: BoundedText::new(error.to_string(), 512)?,
159 }),
160 }
161 }
162 if profiles.is_empty() {
163 let mut fallback =
164 self.fallback(default, "XMP metadata contains no supported claims")?;
165 fallback.parse_facts.append(&mut parse_facts);
166 fallback.warnings.extend(warnings);
167 fallback.packet = Some(packet);
168 return Ok(fallback);
169 }
170 let compatible_profiles = select_compatible_profiles(profiles, &mut warnings)?;
171 Ok(DetectedFlavours {
172 packet: Some(packet),
173 profiles: compatible_profiles,
174 parse_facts,
175 warnings,
176 })
177 }
178
179 fn fallback(
180 &self,
181 default: Option<&ValidationFlavour>,
182 reason: &'static str,
183 ) -> Result<DetectedFlavours> {
184 let warning = ValidationWarning::AutoDetection {
185 message: BoundedText::unchecked(reason),
186 };
187 self.fallback_with_warning(default, warning)
188 }
189
190 fn fallback_with_warning(
191 &self,
192 default: Option<&ValidationFlavour>,
193 warning: ValidationWarning,
194 ) -> Result<DetectedFlavours> {
195 let warnings = vec![warning];
196 let profiles = if let Some(flavour) = default {
197 self.profiles.profiles_for(&crate::FlavourSelection::Auto {
198 default: Some(flavour.clone()),
199 })?
200 } else {
201 self.profiles
202 .profiles_for(&crate::FlavourSelection::Auto { default: None })?
203 };
204 Ok(DetectedFlavours {
205 packet: None,
206 profiles,
207 parse_facts: Vec::new(),
208 warnings,
209 })
210 }
211}
212
213impl XmpParser {
214 pub fn parse_packet(
220 &self,
221 source_object: ObjectKey,
222 bytes: &[u8],
223 limits: &ResourceLimits,
224 ) -> Result<XmpPacket> {
225 enforce_xmp_len(bytes.len(), limits.max_xmp_bytes)?;
226 let text = std::str::from_utf8(bytes).map_err(|error| crate::ProfileError::InvalidXml {
227 reason: BoundedText::new(error.to_string(), 512)
228 .unwrap_or_else(|_| BoundedText::unchecked("XMP is not UTF-8")),
229 })?;
230 let parser = PacketBuilder::new(source_object, bytes.len(), limits);
231 parser.parse(text)
232 }
233}
234
235#[derive(Debug)]
236struct PacketBuilder<'a> {
237 source_object: ObjectKey,
238 byte_len: usize,
239 limits: &'a ResourceLimits,
240 depth: u32,
241 elements: u64,
242 namespaces: BTreeMap<String, BoundedText>,
243 current_namespaces: BTreeMap<String, BoundedText>,
244 properties: BTreeMap<(String, String), XmpProperty>,
245 stack: Vec<ElementFrame>,
246 facts: Vec<XmpFact>,
247 saw_packet_wrapper: bool,
248}
249
250impl<'a> PacketBuilder<'a> {
251 fn new(source_object: ObjectKey, byte_len: usize, limits: &'a ResourceLimits) -> Self {
252 Self {
253 source_object,
254 byte_len,
255 limits,
256 depth: 0,
257 elements: 0,
258 namespaces: BTreeMap::new(),
259 current_namespaces: BTreeMap::new(),
260 properties: BTreeMap::new(),
261 stack: Vec::with_capacity(usize::try_from(limits.max_xmp_depth).unwrap_or(0)),
262 facts: Vec::new(),
263 saw_packet_wrapper: false,
264 }
265 }
266
267 fn parse(mut self, text: &str) -> Result<XmpPacket> {
268 let mut reader = Reader::from_str(text);
269 reader.config_mut().trim_text(true);
270 loop {
271 match reader.read_event().map_err(|error| xmp_xml_error(&error))? {
272 Event::Start(element) => self.start(&element)?,
273 Event::Empty(element) => {
274 self.start(&element)?;
275 self.end()?;
276 }
277 Event::Text(text) => {
278 let decoded =
279 text.decode()
280 .map_err(|error| crate::ProfileError::InvalidXml {
281 reason: bounded_reason(error.to_string()),
282 })?;
283 self.text(decoded.as_ref())?;
284 }
285 Event::End(_) => self.end()?,
286 Event::Decl(_) | Event::PI(_) | Event::Comment(_) | Event::CData(_) => {}
287 Event::DocType(_) | Event::GeneralRef(_) => {
288 return Err(crate::ProfileError::InvalidXml {
289 reason: BoundedText::unchecked(
290 "XMP DTD and entity processing are forbidden",
291 ),
292 }
293 .into());
294 }
295 Event::Eof => break,
296 }
297 }
298 self.finish()
299 }
300
301 fn start(&mut self, element: &quick_xml::events::BytesStart<'_>) -> Result<()> {
302 self.depth = self.depth.checked_add(1).ok_or(ParseError::LimitExceeded {
303 limit: "max_xmp_depth",
304 })?;
305 if self.depth > self.limits.max_xmp_depth {
306 return Err(ParseError::LimitExceeded {
307 limit: "max_xmp_depth",
308 }
309 .into());
310 }
311 self.elements = self
312 .elements
313 .checked_add(1)
314 .ok_or(ParseError::LimitExceeded {
315 limit: "max_xmp_elements",
316 })?;
317 if self.elements > self.limits.max_xmp_elements {
318 return Err(ParseError::LimitExceeded {
319 limit: "max_xmp_elements",
320 }
321 .into());
322 }
323 let (prefix, local) = split_xml_name(element.name().as_ref())?;
324 if local.as_str() == "xmpmeta" || local.as_str() == "RDF" {
325 self.saw_packet_wrapper = true;
326 }
327 let previous_namespaces = self.current_namespaces.clone();
328 self.read_namespaces(element)?;
329 let namespace = self.resolve_prefix(&prefix)?;
330 let frame = ElementFrame {
331 namespace,
332 local,
333 text: String::new(),
334 previous_namespaces,
335 };
336 self.capture_attr_properties(element)?;
337 self.stack.push(frame);
338 Ok(())
339 }
340
341 fn end(&mut self) -> Result<()> {
342 let Some(frame) = self.stack.pop() else {
343 return Err(crate::ProfileError::InvalidXml {
344 reason: BoundedText::unchecked("XMP element depth underflow"),
345 }
346 .into());
347 };
348 let value = frame.text.trim().to_owned();
349 if !value.is_empty() && is_identification_property(&frame.namespace, &frame.local) {
350 self.insert_property(&frame, &value)?;
351 }
352 self.current_namespaces = frame.previous_namespaces;
353 self.depth = self.depth.checked_sub(1).ok_or(ParseError::LimitExceeded {
354 limit: "max_xmp_depth",
355 })?;
356 Ok(())
357 }
358
359 fn text(&mut self, value: &str) -> Result<()> {
360 let Some(frame) = self.stack.last_mut() else {
361 return Ok(());
362 };
363 let next_len =
364 frame
365 .text
366 .len()
367 .checked_add(value.len())
368 .ok_or(ParseError::LimitExceeded {
369 limit: "max_xmp_text_bytes",
370 })?;
371 if next_len > self.limits.max_xmp_text_bytes {
372 return Err(ParseError::LimitExceeded {
373 limit: "max_xmp_text_bytes",
374 }
375 .into());
376 }
377 frame.text.push_str(value);
378 Ok(())
379 }
380
381 fn finish(mut self) -> Result<XmpPacket> {
382 if !self.saw_packet_wrapper {
383 self.facts.push(XmpFact::MissingPacketWrapper);
384 }
385 let identification = self.claims()?;
386 self.facts.push(XmpFact::PacketParsed {
387 bytes: checked_u64_len(self.byte_len, "XMP packet length")?,
388 namespaces: checked_u64_len(self.namespaces.len(), "XMP namespace count")?,
389 claims: checked_u64_len(identification.len(), "XMP claim count")?,
390 });
391 for claim in &identification {
392 self.facts.push(XmpFact::FlavourClaim {
393 family: claim.flavour.family.clone(),
394 display_flavour: claim.display_flavour.clone(),
395 namespace_uri: claim.namespace_uri.clone(),
396 });
397 }
398 let namespaces = self
399 .namespaces
400 .into_iter()
401 .map(|(prefix, uri)| {
402 Ok(NamespaceBinding {
403 prefix: identifier_allow_empty(prefix)?,
404 uri,
405 })
406 })
407 .collect::<Result<Vec<_>>>()?;
408 Ok(XmpPacket {
409 source_object: self.source_object,
410 bytes: checked_u64_len(self.byte_len, "XMP packet length")?,
411 namespaces,
412 identification,
413 facts: self.facts,
414 })
415 }
416
417 fn read_namespaces(&mut self, element: &quick_xml::events::BytesStart<'_>) -> Result<()> {
418 let mut attributes = 0_usize;
419 for attr in element.attributes().with_checks(true) {
420 let attr = attr.map_err(|error| crate::ProfileError::InvalidXml {
421 reason: bounded_reason(error.to_string()),
422 })?;
423 attributes = attributes.checked_add(1).ok_or(ParseError::LimitExceeded {
424 limit: "max_xmp_attributes",
425 })?;
426 if attributes > self.limits.max_xmp_attributes {
427 return Err(ParseError::LimitExceeded {
428 limit: "max_xmp_attributes",
429 }
430 .into());
431 }
432 let key = attr.key.as_ref();
433 let prefix = namespace_decl_prefix(key);
434 if let Some(prefix) = prefix {
435 if self.namespaces.len() >= self.limits.max_xmp_namespaces {
436 return Err(ParseError::LimitExceeded {
437 limit: "max_xmp_namespaces",
438 }
439 .into());
440 }
441 let value = String::from_utf8_lossy(attr.value.as_ref()).into_owned();
442 let value = BoundedText::new(value, 512)?;
443 self.current_namespaces
444 .insert(prefix.clone(), value.clone());
445 self.namespaces.insert(prefix, value);
446 }
447 }
448 Ok(())
449 }
450
451 fn capture_attr_properties(
452 &mut self,
453 element: &quick_xml::events::BytesStart<'_>,
454 ) -> Result<()> {
455 for attr in element.attributes().with_checks(true) {
456 let attr = attr.map_err(|error| crate::ProfileError::InvalidXml {
457 reason: bounded_reason(error.to_string()),
458 })?;
459 let key = attr.key.as_ref();
460 if namespace_decl_prefix(key).is_some() {
461 continue;
462 }
463 let (prefix, local) = split_xml_name(key)?;
464 let namespace = self.resolve_prefix(&prefix)?;
465 if is_identification_property(&namespace, &local) {
466 let value = String::from_utf8_lossy(attr.value.as_ref()).into_owned();
467 let frame = ElementFrame {
468 namespace,
469 local,
470 text: String::new(),
471 previous_namespaces: BTreeMap::new(),
472 };
473 self.insert_property(&frame, value.trim())?;
474 }
475 }
476 Ok(())
477 }
478
479 fn insert_property(&mut self, frame: &ElementFrame, value: &str) -> Result<()> {
480 let text = BoundedText::new(value.to_owned(), self.limits.max_xmp_text_bytes)?;
481 self.properties.insert(
482 (
483 frame.namespace.as_str().to_owned(),
484 frame.local.as_str().to_owned(),
485 ),
486 XmpProperty {
487 namespace: frame.namespace.clone(),
488 local: frame.local.clone(),
489 value: text,
490 },
491 );
492 Ok(())
493 }
494
495 fn resolve_prefix(&self, prefix: &Identifier) -> Result<BoundedText> {
496 if prefix.as_str().is_empty() {
497 return Ok(self
498 .current_namespaces
499 .get("")
500 .cloned()
501 .unwrap_or_else(|| BoundedText::unchecked("")));
502 }
503 self.current_namespaces
504 .get(prefix.as_str())
505 .cloned()
506 .ok_or_else(|| {
507 crate::ProfileError::InvalidXml {
508 reason: BoundedText::new(
509 format!("unknown XMP namespace prefix {}", prefix.as_str()),
510 512,
511 )
512 .unwrap_or_else(|_| BoundedText::unchecked("unknown XMP namespace prefix")),
513 }
514 .into()
515 })
516 }
517
518 fn property(&self, namespace: &str, local: &str) -> Option<&XmpProperty> {
519 self.properties
520 .get(&(namespace.to_owned(), local.to_owned()))
521 }
522
523 fn claims(&self) -> Result<Vec<FlavourClaim>> {
524 let mut claims = Vec::new();
525 if let Some(claim) = self.pdfa_claim()? {
526 claims.push(claim);
527 }
528 if let Some(claim) = self.pdfua_claim()? {
529 claims.push(claim);
530 }
531 claims.extend(self.wtpdf_claims()?);
532 Ok(claims)
533 }
534
535 fn pdfa_claim(&self) -> Result<Option<FlavourClaim>> {
536 let Some(part) = self.property(PDF_A_ID_NS, "part") else {
537 return Ok(None);
538 };
539 let part_number = parse_nonzero_part(part.value.as_str(), "PDF/A part")?;
540 let conformance = self
541 .property(PDF_A_ID_NS, "conformance")
542 .map_or("none", |property| property.value.as_str())
543 .to_ascii_lowercase();
544 let flavour = ValidationFlavour::new("pdfa", part_number, conformance)?;
545 Ok(Some(claim_from_property(
546 XmpIdentificationKind::PdfA,
547 &flavour,
548 part,
549 )?))
550 }
551
552 fn pdfua_claim(&self) -> Result<Option<FlavourClaim>> {
553 let Some(part) = self.property(PDF_UA_ID_NS, "part") else {
554 return Ok(None);
555 };
556 let part_number = parse_nonzero_part(part.value.as_str(), "PDF/UA part")?;
557 let conformance = if part_number.get() == 2 {
558 "iso32005"
559 } else {
560 "none"
561 };
562 let flavour = ValidationFlavour::new("pdfua", part_number, conformance)?;
563 Ok(Some(claim_from_property(
564 XmpIdentificationKind::PdfUa,
565 &flavour,
566 part,
567 )?))
568 }
569
570 fn wtpdf_claims(&self) -> Result<Vec<FlavourClaim>> {
571 let mut claims = Vec::new();
572 for property in self
573 .properties
574 .values()
575 .filter(|property| property.namespace.as_str() == PDF_D_NS)
576 {
577 let conformance = match property.value.as_str() {
578 WTPDF_ACCESSIBILITY_DECLARATION => Some("accessibility"),
579 WTPDF_REUSE_DECLARATION => Some("reuse"),
580 _ => None,
581 };
582 if let Some(conformance) = conformance {
583 let flavour = ValidationFlavour::new("wtpdf", NonZeroU32::MIN, conformance)?;
584 claims.push(claim_from_property(
585 XmpIdentificationKind::Wtpdf,
586 &flavour,
587 property,
588 )?);
589 }
590 }
591 Ok(claims)
592 }
593}
594
595#[derive(Clone, Debug)]
596struct ElementFrame {
597 namespace: BoundedText,
598 local: Identifier,
599 text: String,
600 previous_namespaces: BTreeMap<String, BoundedText>,
601}
602
603#[derive(Clone, Debug)]
604struct XmpProperty {
605 namespace: BoundedText,
606 local: Identifier,
607 value: BoundedText,
608}
609
610fn catalog_xmp_bytes(
611 document: &crate::ParsedDocument,
612 limits: &ResourceLimits,
613 warnings: &mut Vec<ValidationWarning>,
614) -> Result<Option<(ObjectKey, Vec<u8>)>> {
615 let Some(catalog_key) = document.catalog else {
616 return Ok(None);
617 };
618 let Some(catalog) = document.objects.get(&catalog_key) else {
619 return Ok(None);
620 };
621 let Some(dictionary) = catalog.object.as_dictionary() else {
622 return Ok(None);
623 };
624 let Some(CosObject::Reference(metadata_key)) = dictionary.get("Metadata") else {
625 return Ok(None);
626 };
627 let Some(metadata) = document.objects.get(metadata_key) else {
628 return Ok(None);
629 };
630 let CosObject::Stream(stream) = &metadata.object else {
631 warnings.push(ValidationWarning::AutoDetection {
632 message: BoundedText::unchecked("catalog metadata is not a stream"),
633 });
634 return Ok(None);
635 };
636 let mut xmp_limits = limits.clone();
637 xmp_limits.max_stream_decode_bytes = limits.max_stream_decode_bytes.min(limits.max_xmp_bytes);
638 let bytes = stream.decoded_bytes(&xmp_limits)?;
639 enforce_xmp_len(bytes.len(), limits.max_xmp_bytes)?;
640 Ok(Some((*metadata_key, bytes)))
641}
642
643pub(crate) fn parse_document_xmp(
649 document: &crate::ParsedDocument,
650 limits: &ResourceLimits,
651 report_absent_metadata: bool,
652) -> Result<XmpParseResult> {
653 let mut warnings = Vec::new();
654 let Some((object, bytes)) = catalog_xmp_bytes(document, limits, &mut warnings)? else {
655 if report_absent_metadata {
656 warnings.push(ValidationWarning::AutoDetection {
657 message: BoundedText::unchecked("catalog metadata stream is missing"),
658 });
659 }
660 return Ok(XmpParseResult {
661 packet: None,
662 parse_facts: Vec::new(),
663 warnings,
664 });
665 };
666 if document.is_encrypted() && !looks_like_xml(&bytes) {
667 return Ok(XmpParseResult {
668 packet: None,
669 parse_facts: Vec::new(),
670 warnings,
671 });
672 }
673 let parser = XmpParser;
674 match parser.parse_packet(object, &bytes, limits) {
675 Ok(packet) => {
676 warnings.extend(packet_warnings(&packet)?);
677 let parse_facts = xmp_parse_facts(&packet)?;
678 Ok(XmpParseResult {
679 packet: Some(packet),
680 parse_facts,
681 warnings,
682 })
683 }
684 Err(PdfvError::Parse(ParseError::LimitExceeded { limit })) => {
685 Err(ParseError::LimitExceeded { limit }.into())
686 }
687 Err(error) => {
688 let reason = BoundedText::new(error.to_string(), 512)
689 .unwrap_or_else(|_| BoundedText::unchecked("XMP parse failed"));
690 let fact = malformed_fact(reason.clone());
691 let parse_facts = vec![ParseFact::Xmp { object, fact }];
692 warnings.push(ValidationWarning::AutoDetection { message: reason });
693 Ok(XmpParseResult {
694 packet: None,
695 parse_facts,
696 warnings,
697 })
698 }
699 }
700}
701
702fn malformed_fact(reason: BoundedText) -> XmpFact {
703 if reason.as_str().contains("DTD")
704 || reason.as_str().contains("entity")
705 || reason.as_str().contains("forbidden")
706 {
707 XmpFact::HostileXmlRejected { reason }
708 } else {
709 XmpFact::Malformed { reason }
710 }
711}
712
713fn looks_like_xml(bytes: &[u8]) -> bool {
714 bytes
715 .iter()
716 .copied()
717 .find(|byte| !byte.is_ascii_whitespace())
718 .is_some_and(|byte| byte == b'<')
719}
720
721fn select_compatible_profiles(
722 profiles: Vec<ValidationProfile>,
723 warnings: &mut Vec<ValidationWarning>,
724) -> Result<Vec<ValidationProfile>> {
725 let Some(first_group) = profiles
726 .first()
727 .map(|profile| compatibility_group(&profile.flavour))
728 else {
729 return Ok(Vec::new());
730 };
731 let mut selected = Vec::new();
732 for profile in profiles {
733 if compatibility_group(&profile.flavour) == first_group {
734 selected.push(profile);
735 } else {
736 warnings.push(ValidationWarning::IncompatibleProfile {
737 profile_id: profile.identity.id,
738 reason: BoundedText::new(
739 "detected XMP claim is incompatible with the first selected PDF specification \
740 generation",
741 256,
742 )?,
743 });
744 }
745 }
746 Ok(selected)
747}
748
749fn compatibility_group(flavour: &ValidationFlavour) -> &'static str {
750 match (flavour.family.as_str(), flavour.part.get()) {
751 ("pdfa", 1..=3) | ("pdfua", 1) => "pdf-1",
752 _ => "pdf-2",
753 }
754}
755
756fn xmp_parse_facts(packet: &XmpPacket) -> Result<Vec<ParseFact>> {
757 packet
758 .facts
759 .iter()
760 .cloned()
761 .map(|fact| {
762 Ok(ParseFact::Xmp {
763 object: packet.source_object,
764 fact,
765 })
766 })
767 .collect()
768}
769
770fn packet_warnings(packet: &XmpPacket) -> Result<Vec<ValidationWarning>> {
771 packet
772 .facts
773 .iter()
774 .filter_map(|fact| match fact {
775 XmpFact::MissingPacketWrapper => Some("XMP packet wrapper is missing"),
776 XmpFact::Malformed { .. } | XmpFact::HostileXmlRejected { .. } => {
777 Some("XMP metadata has parser warnings")
778 }
779 XmpFact::PacketParsed { .. } | XmpFact::FlavourClaim { .. } => None,
780 })
781 .map(|message| {
782 Ok(ValidationWarning::AutoDetection {
783 message: BoundedText::new(message, 256)?,
784 })
785 })
786 .collect()
787}
788
789fn claim_from_property(
790 kind: XmpIdentificationKind,
791 flavour: &ValidationFlavour,
792 property: &XmpProperty,
793) -> Result<FlavourClaim> {
794 Ok(FlavourClaim {
795 kind,
796 flavour: flavour.clone(),
797 display_flavour: display_flavour(flavour)?,
798 namespace_uri: property.namespace.clone(),
799 property: property.local.clone(),
800 })
801}
802
803fn namespace_decl_prefix(key: &[u8]) -> Option<String> {
804 if key == b"xmlns" {
805 return Some(String::new());
806 }
807 key.strip_prefix(b"xmlns:")
808 .map(|prefix| String::from_utf8_lossy(prefix).into_owned())
809}
810
811fn split_xml_name(name: &[u8]) -> Result<(Identifier, Identifier)> {
812 let split = name.iter().position(|byte| *byte == b':');
813 let (prefix, local) = match split {
814 Some(index) => {
815 let prefix = name.get(..index).unwrap_or_default();
816 let local = name.get(index.saturating_add(1)..).unwrap_or_default();
817 (prefix, local)
818 }
819 None => (&[][..], name),
820 };
821 Ok((
822 identifier_allow_empty(String::from_utf8_lossy(prefix).into_owned())?,
823 Identifier::new(String::from_utf8_lossy(local).into_owned())?,
824 ))
825}
826
827fn identifier_allow_empty(value: String) -> Result<Identifier> {
828 if value.is_empty() {
829 Ok(Identifier::unchecked(""))
830 } else {
831 Identifier::new(value).map_err(Into::into)
832 }
833}
834
835fn is_identification_property(namespace: &BoundedText, local: &Identifier) -> bool {
836 matches!(
837 (namespace.as_str(), local.as_str()),
838 (PDF_A_ID_NS, "part" | "conformance" | "rev")
839 | (PDF_UA_ID_NS, "part" | "rev" | "amd" | "corr")
840 | (PDF_D_NS, "conformsTo" | "declarations" | "value" | "li")
841 )
842}
843
844fn parse_nonzero_part(value: &str, field: &'static str) -> Result<NonZeroU32> {
845 let number = value
846 .parse::<u32>()
847 .map_err(|_| crate::ProfileError::InvalidField {
848 field,
849 reason: BoundedText::unchecked("XMP identification part is not numeric"),
850 })?;
851 NonZeroU32::new(number)
852 .ok_or(crate::ProfileError::InvalidField {
853 field,
854 reason: BoundedText::unchecked("XMP identification part is zero"),
855 })
856 .map_err(Into::into)
857}
858
859fn enforce_xmp_len(len: usize, max: u64) -> Result<()> {
860 if checked_u64_len(len, "XMP byte length")? > max {
861 return Err(ParseError::LimitExceeded {
862 limit: "max_xmp_bytes",
863 }
864 .into());
865 }
866 Ok(())
867}
868
869fn checked_u64_len(len: usize, context: &'static str) -> Result<u64> {
870 u64::try_from(len)
871 .map_err(|_| ParseError::ArithmeticOverflow { context })
872 .map_err(Into::into)
873}
874
875fn xmp_xml_error(error: &quick_xml::Error) -> PdfvError {
876 crate::ProfileError::InvalidXml {
877 reason: bounded_reason(error.to_string()),
878 }
879 .into()
880}
881
882fn bounded_reason(value: String) -> BoundedText {
883 BoundedText::new(value, 512).unwrap_or_else(|_| BoundedText::unchecked("XMP XML error"))
884}
885
886#[cfg(test)]
887mod tests {
888 use std::num::NonZeroU32;
889
890 use super::XmpParser;
891 use crate::{ObjectKey, ResourceLimits, XmpFact};
892
893 fn key() -> ObjectKey {
894 ObjectKey::new(NonZeroU32::MIN, 0)
895 }
896
897 #[test]
898 fn test_should_parse_pdfa_claim_with_namespace_alias() -> crate::Result<()> {
899 let xml = br#"<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?>
900<x:xmpmeta xmlns:x="adobe:ns:meta/">
901 <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
902 <rdf:Description xmlns:aid="http://www.aiim.org/pdfa/ns/id/" aid:part="2" aid:conformance="B"/>
903 </rdf:RDF>
904</x:xmpmeta>"#;
905
906 let packet = XmpParser.parse_packet(key(), xml, &ResourceLimits::default())?;
907
908 assert_eq!(packet.identification.len(), 1);
909 assert_eq!(
910 packet
911 .identification
912 .first()
913 .map(|claim| claim.display_flavour.as_str()),
914 Some("pdfa-2b")
915 );
916 assert!(packet.facts.iter().any(|fact| matches!(
917 fact,
918 XmpFact::FlavourClaim {
919 family,
920 display_flavour,
921 ..
922 } if family.as_str() == "pdfa" && display_flavour.as_str() == "pdfa-2b"
923 )));
924 Ok(())
925 }
926
927 #[test]
928 fn test_should_parse_pdfua_and_wtpdf_claims() -> crate::Result<()> {
929 let xml = br#"<x:xmpmeta xmlns:x="adobe:ns:meta/">
930 <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
931 <rdf:Description xmlns:pdfuaid="http://www.aiim.org/pdfua/ns/id/"
932 xmlns:pdfd="http://pdfa.org/declarations/"
933 pdfuaid:part="2">
934 <pdfd:conformsTo>http://pdfa.org/declarations/wtpdf#reuse1.0</pdfd:conformsTo>
935 </rdf:Description>
936 </rdf:RDF>
937</x:xmpmeta>"#;
938
939 let packet = XmpParser.parse_packet(key(), xml, &ResourceLimits::default())?;
940 let flavours = packet
941 .identification
942 .iter()
943 .map(|claim| claim.display_flavour.as_str())
944 .collect::<Vec<_>>();
945
946 assert!(flavours.contains(&"pdfua-2-iso32005"));
947 assert!(flavours.contains(&"wtpdf-1-0-reuse"));
948 Ok(())
949 }
950
951 #[test]
952 fn test_should_restore_scoped_namespace_bindings() -> crate::Result<()> {
953 let xml = br#"<x:xmpmeta xmlns:x="adobe:ns:meta/">
954 <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
955 <rdf:Description xmlns:id="http://www.aiim.org/pdfa/ns/id/">
956 <wrapper xmlns:id="http://www.aiim.org/pdfua/ns/id/">
957 <id:part>2</id:part>
958 </wrapper>
959 <id:part>3</id:part>
960 <id:conformance>U</id:conformance>
961 </rdf:Description>
962 </rdf:RDF>
963</x:xmpmeta>"#;
964
965 let packet = XmpParser.parse_packet(key(), xml, &ResourceLimits::default())?;
966
967 assert!(packet.identification.iter().any(|claim| {
968 claim.display_flavour.as_str() == "pdfa-3u"
969 && claim.kind == super::XmpIdentificationKind::PdfA
970 }));
971 Ok(())
972 }
973
974 #[test]
975 fn test_should_reject_xmp_doctype_and_entities() {
976 let xml = br#"<!DOCTYPE x [ <!ENTITY ext SYSTEM "file:///etc/passwd"> ]>
977<x:xmpmeta xmlns:x="adobe:ns:meta/">&ext;</x:xmpmeta>"#;
978
979 let result = XmpParser.parse_packet(key(), xml, &ResourceLimits::default());
980
981 assert!(result.is_err());
982 }
983
984 #[test]
985 fn test_should_enforce_xmp_byte_cap() {
986 let limits = ResourceLimits {
987 max_xmp_bytes: 8,
988 ..ResourceLimits::default()
989 };
990
991 let result = XmpParser.parse_packet(key(), b"<x:xmpmeta/>", &limits);
992
993 assert!(matches!(
994 result,
995 Err(crate::PdfvError::Parse(crate::ParseError::LimitExceeded {
996 limit: "max_xmp_bytes"
997 }))
998 ));
999 }
1000}