1#![forbid(unsafe_code)]
2#![warn(rust_2024_compatibility, missing_docs, missing_debug_implementations)]
3mod generated_profiles;
18mod parser;
19mod profile;
20mod validation;
21mod xmp;
22
23use std::{
24 collections::BTreeMap,
25 fmt,
26 io::{self, Write},
27 num::{NonZeroU32, NonZeroU64},
28 path::{Path, PathBuf},
29 time::Duration,
30};
31
32pub use parser::{
33 CosObject, DecodeParams, DecoderRegistry, Dictionary, IndirectObject, ObjectStore,
34 ParseOptions, ParsedDocument, Parser, PdfName, PdfSource, PdfString, SourceStorage,
35 StreamDecoder, StreamObject, Trailer,
36};
37#[cfg(feature = "custom-profiles")]
38pub use profile::CustomProfileRepository;
39pub use profile::{
40 BinaryOp, BuiltinFunction, BuiltinProfileRepository, ErrorTemplate, ModelValue, ObjectTypeName,
41 ProfileCatalogEntry, ProfileImportSummary, ProfileRepository, PropertyName, PropertyPath, Rule,
42 RuleEvaluator, RuleExpr, RuleOutcome, UnaryOp, ValidationProfile, display_flavour,
43 import_verapdf_profile_xml,
44};
45use secrecy::{ExposeSecret, SecretString};
46use serde::{Deserialize, Serialize};
47use thiserror::Error;
48use typed_builder::TypedBuilder;
49pub use validation::{
50 AnnotationModel, CatalogModel, ContentStreamModel, FeatureSelection, FontModel, InputName,
51 LinkName, MetadataModel, ModelGraph, ModelObject, ModelObjectRef, ObjectIdentity,
52 OutputIntentModel, PageModel, Validator,
53};
54pub use xmp::{
55 DetectedFlavours, FlavourClaim, FlavourDetector, NamespaceBinding, XmpIdentificationKind,
56 XmpPacket, XmpParser,
57};
58
59pub const ENGINE_VERSION: &str = env!("CARGO_PKG_VERSION");
61
62const MAX_IDENTIFIER_BYTES: usize = 128;
63const MAX_TEXT_BYTES: usize = 4096;
64const DEFAULT_MAX_PASSWORD_BYTES: usize = 1024;
65const HARD_MAX_PASSWORD_BYTES: usize = 4096;
66const DEFAULT_MAX_STRING_BYTES: usize = 1_048_576;
67const DEFAULT_MAX_STREAM_DECODE_BYTES: u64 = 256 * 1024 * 1024;
68const DEFAULT_MAX_ENCRYPTION_DICT_ENTRIES: u64 = 64;
69const DEFAULT_MEMORY_SOURCE_THRESHOLD_BYTES: u64 = 16 * 1024 * 1024;
70const DEFAULT_MAX_XMP_BYTES: u64 = 4 * 1024 * 1024;
71const DEFAULT_MAX_XMP_ELEMENTS: u64 = 50_000;
72const DEFAULT_MAX_XMP_DEPTH: u32 = 32;
73const DEFAULT_MAX_XMP_ATTRIBUTES: usize = 64;
74const DEFAULT_MAX_XMP_NAMESPACES: usize = 256;
75const DEFAULT_MAX_XMP_TEXT_BYTES: usize = 4096;
76
77pub type Result<T> = std::result::Result<T, PdfvError>;
79
80#[derive(Debug, Error)]
82#[non_exhaustive]
83pub enum PdfvError {
84 #[error("I/O error{path}: {source}", path = format_optional_path(.path.as_ref()))]
86 Io {
87 path: Option<PathBuf>,
89 #[source]
91 source: std::io::Error,
92 },
93 #[error("parse error: {0}")]
95 Parse(#[from] ParseError),
96 #[error("profile error: {0}")]
98 Profile(#[from] ProfileError),
99 #[error("validation error: {0}")]
101 Validation(#[from] ValidationError),
102 #[error("policy error: {0}")]
104 Policy(#[from] PolicyError),
105 #[error("repair error: {0}")]
107 Repair(#[from] RepairError),
108 #[error("report error: {0}")]
110 Report(#[from] ReportError),
111 #[error("configuration error: {0}")]
113 Configuration(#[from] ConfigError),
114}
115
116#[derive(Debug, Error, Clone, Eq, PartialEq)]
118#[non_exhaustive]
119pub enum ParseError {
120 #[error("resource limit exceeded: {limit}")]
122 LimitExceeded {
123 limit: &'static str,
125 },
126 #[error("arithmetic overflow while parsing {context}")]
128 ArithmeticOverflow {
129 context: &'static str,
131 },
132 #[error("malformed PDF syntax: {message}")]
134 Malformed {
135 message: BoundedText,
137 },
138 #[error("missing PDF object: {message}")]
140 MissingObject {
141 message: BoundedText,
143 },
144 #[error("unsupported stream filter: {filter}")]
146 UnsupportedFilter {
147 filter: BoundedText,
149 },
150 #[error("stream decode failed: {message}")]
152 StreamDecode {
153 message: BoundedText,
155 },
156}
157
158#[derive(Debug, Error, Clone, Eq, PartialEq)]
160#[non_exhaustive]
161pub enum ProfileError {
162 #[error("unsupported profile selection")]
164 UnsupportedSelection,
165 #[error("invalid profile field {field}: {reason}")]
167 InvalidField {
168 field: &'static str,
170 reason: BoundedText,
172 },
173 #[error("rule evaluation budget exceeded: {budget}")]
175 BudgetExceeded {
176 budget: &'static str,
178 },
179 #[error("unknown model property {property}")]
181 UnknownProperty {
182 property: BoundedText,
184 },
185 #[error("rule expression type mismatch: {message}")]
187 TypeMismatch {
188 message: BoundedText,
190 },
191 #[error("unsupported rule expression: {reason}")]
193 UnsupportedRule {
194 reason: BoundedText,
196 },
197 #[error("invalid profile XML: {reason}")]
199 InvalidXml {
200 reason: BoundedText,
202 },
203}
204
205#[derive(Debug, Error, Clone, Eq, PartialEq)]
207#[non_exhaustive]
208pub enum ValidationError {
209 #[error("validation subsystem is unavailable: {subsystem}")]
211 SubsystemUnavailable {
212 subsystem: &'static str,
214 },
215 #[error("validation traversal limit exceeded: {limit}")]
217 LimitExceeded {
218 limit: &'static str,
220 },
221}
222
223#[derive(Debug, Error, Clone, Eq, PartialEq)]
225#[non_exhaustive]
226pub enum PolicyError {
227 #[error("invalid policy field {field}: {reason}")]
229 InvalidField {
230 field: &'static str,
232 reason: BoundedText,
234 },
235 #[error("policy rule could not be evaluated: {reason}")]
237 Evaluation {
238 reason: BoundedText,
240 },
241}
242
243#[derive(Debug, Error, Clone, Eq, PartialEq)]
245#[non_exhaustive]
246pub enum RepairError {
247 #[error("invalid repair field {field}: {reason}")]
249 InvalidField {
250 field: &'static str,
252 reason: BoundedText,
254 },
255 #[error("metadata repair failed: {reason}")]
257 Failed {
258 reason: BoundedText,
260 },
261}
262
263#[derive(Debug, Error)]
265#[non_exhaustive]
266pub enum ReportError {
267 #[error("JSON serialization failed")]
269 Json {
270 #[from]
272 source: serde_json::Error,
273 },
274 #[error("XML serialization failed: {message}")]
276 Xml {
277 message: BoundedText,
279 },
280 #[error("report output write failed")]
282 Write {
283 #[source]
285 source: std::io::Error,
286 },
287}
288
289#[derive(Debug, Error, Clone, Eq, PartialEq)]
291#[non_exhaustive]
292pub enum ConfigError {
293 #[error("invalid configuration value {field}: {reason}")]
295 InvalidValue {
296 field: &'static str,
298 reason: BoundedText,
300 },
301}
302
303#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd, Deserialize, Serialize)]
305#[serde(try_from = "String", into = "String")]
306pub struct BoundedText(String);
307
308impl BoundedText {
309 pub fn new(
315 value: impl Into<String>,
316 max_bytes: usize,
317 ) -> std::result::Result<Self, ConfigError> {
318 let value = value.into();
319 if value.len() > max_bytes {
320 return Err(ConfigError::InvalidValue {
321 field: "text",
322 reason: Self::unchecked("value exceeds byte limit"),
323 });
324 }
325 Ok(Self(value))
326 }
327
328 #[must_use]
330 pub fn as_str(&self) -> &str {
331 &self.0
332 }
333
334 pub(crate) fn unchecked(value: impl Into<String>) -> Self {
335 Self(value.into())
336 }
337}
338
339impl fmt::Display for BoundedText {
340 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
341 formatter.write_str(&self.0)
342 }
343}
344
345impl TryFrom<String> for BoundedText {
346 type Error = ConfigError;
347
348 fn try_from(value: String) -> std::result::Result<Self, Self::Error> {
349 Self::new(value, MAX_TEXT_BYTES)
350 }
351}
352
353impl From<BoundedText> for String {
354 fn from(value: BoundedText) -> Self {
355 value.0
356 }
357}
358
359#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd, Deserialize, Serialize)]
361#[serde(try_from = "String", into = "String")]
362pub struct Identifier(String);
363
364impl Identifier {
365 pub fn new(value: impl Into<String>) -> std::result::Result<Self, ConfigError> {
372 let value = value.into();
373 let valid_charset = value
374 .bytes()
375 .all(|byte| byte.is_ascii_alphanumeric() || matches!(byte, b'-' | b'_' | b'.' | b':'));
376 if value.is_empty() || value.len() > MAX_IDENTIFIER_BYTES || !valid_charset {
377 return Err(ConfigError::InvalidValue {
378 field: "identifier",
379 reason: BoundedText::unchecked("identifier violates byte or charset policy"),
380 });
381 }
382 Ok(Self(value))
383 }
384
385 #[must_use]
387 pub fn as_str(&self) -> &str {
388 &self.0
389 }
390
391 pub(crate) fn unchecked(value: impl Into<String>) -> Self {
392 Self(value.into())
393 }
394}
395
396impl TryFrom<String> for Identifier {
397 type Error = ConfigError;
398
399 fn try_from(value: String) -> std::result::Result<Self, Self::Error> {
400 Self::new(value)
401 }
402}
403
404impl From<Identifier> for String {
405 fn from(value: Identifier) -> Self {
406 value.0
407 }
408}
409
410#[derive(Clone)]
412pub struct PasswordSecret(SecretString);
413
414impl PasswordSecret {
415 pub fn new(value: impl Into<String>) -> std::result::Result<Self, ConfigError> {
421 Self::new_with_limit(value, DEFAULT_MAX_PASSWORD_BYTES)
422 }
423
424 pub fn new_with_limit(
431 value: impl Into<String>,
432 max_bytes: usize,
433 ) -> std::result::Result<Self, ConfigError> {
434 if max_bytes > HARD_MAX_PASSWORD_BYTES {
435 return Err(ConfigError::InvalidValue {
436 field: "maxPasswordBytes",
437 reason: BoundedText::unchecked("value exceeds hard cap"),
438 });
439 }
440 let value = value.into();
441 if value.len() > max_bytes {
442 return Err(ConfigError::InvalidValue {
443 field: "password",
444 reason: BoundedText::unchecked("password exceeds byte limit"),
445 });
446 }
447 Ok(Self(SecretString::from(value)))
448 }
449
450 pub(crate) fn expose_secret_bytes(&self) -> &[u8] {
451 self.0.expose_secret().as_bytes()
452 }
453}
454
455impl fmt::Debug for PasswordSecret {
456 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
457 formatter.write_str("PasswordSecret([REDACTED])")
458 }
459}
460
461#[derive(Clone, Debug, Deserialize, Serialize, TypedBuilder)]
463#[non_exhaustive]
464#[serde(rename_all = "camelCase", deny_unknown_fields)]
465pub struct ValidationOptions {
466 #[builder(default)]
468 pub flavour: FlavourSelection,
469 #[builder(default)]
471 pub resource_limits: ResourceLimits,
472 #[builder(default)]
474 #[serde(skip, default)]
475 pub password: Option<PasswordSecret>,
476 #[builder(default)]
478 pub max_failed_assertions_per_rule: MaxDisplayedFailures,
479 #[builder(default)]
481 pub record_passed_assertions: bool,
482 #[builder(default = true)]
484 pub report_parse_warnings: bool,
485 #[builder(default)]
487 pub feature_selection: FeatureSelection,
488 #[builder(default)]
490 #[serde(skip_serializing_if = "Option::is_none")]
491 pub policy: Option<PolicySet>,
492}
493
494impl Default for ValidationOptions {
495 fn default() -> Self {
496 Self::builder().build()
497 }
498}
499
500#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
502#[non_exhaustive]
503#[serde(rename_all = "camelCase", deny_unknown_fields)]
504pub enum FlavourSelection {
505 Auto {
507 default: Option<ValidationFlavour>,
509 },
510 Explicit {
512 flavour: ValidationFlavour,
514 },
515 CustomProfile {
517 profile_path: PathBuf,
519 },
520}
521
522impl Default for FlavourSelection {
523 fn default() -> Self {
524 Self::Auto {
525 default: Some(ValidationFlavour {
526 family: Identifier::unchecked("pdfa"),
527 part: NonZeroU32::MIN,
528 conformance: Identifier::unchecked("b"),
529 }),
530 }
531 }
532}
533
534#[derive(Clone, Debug, Deserialize, Eq, Hash, PartialEq, Serialize)]
536#[non_exhaustive]
537#[serde(rename_all = "camelCase", deny_unknown_fields)]
538pub struct ValidationFlavour {
539 pub family: Identifier,
541 pub part: NonZeroU32,
543 pub conformance: Identifier,
545}
546
547impl ValidationFlavour {
548 pub fn new(
554 family: impl Into<String>,
555 part: NonZeroU32,
556 conformance: impl Into<String>,
557 ) -> std::result::Result<Self, ConfigError> {
558 Ok(Self {
559 family: Identifier::new(family)?,
560 part,
561 conformance: Identifier::new(conformance)?,
562 })
563 }
564}
565
566#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize, TypedBuilder)]
568#[non_exhaustive]
569#[serde(rename_all = "camelCase", deny_unknown_fields)]
570pub struct ResourceLimits {
571 pub max_file_bytes: u64,
573 pub max_objects: u64,
575 pub max_object_depth: u32,
577 pub max_array_len: u64,
579 pub max_dict_entries: u64,
581 pub max_name_bytes: usize,
583 pub max_string_bytes: usize,
585 #[builder(default = DEFAULT_MAX_PASSWORD_BYTES)]
587 #[serde(default = "default_max_password_bytes")]
588 pub max_password_bytes: usize,
589 #[builder(default = DEFAULT_MAX_STRING_BYTES)]
591 #[serde(default = "default_max_decrypted_string_bytes")]
592 pub max_decrypted_string_bytes: usize,
593 pub max_stream_declared_bytes: u64,
595 pub max_stream_decode_bytes: u64,
597 #[builder(default = DEFAULT_MAX_STREAM_DECODE_BYTES)]
599 #[serde(default = "default_max_decrypted_stream_bytes")]
600 pub max_decrypted_stream_bytes: u64,
601 #[builder(default = DEFAULT_MAX_ENCRYPTION_DICT_ENTRIES)]
603 #[serde(default = "default_max_encryption_dict_entries")]
604 pub max_encryption_dict_entries: u64,
605 #[builder(default = DEFAULT_MEMORY_SOURCE_THRESHOLD_BYTES)]
607 #[serde(default = "default_memory_source_threshold_bytes")]
608 pub memory_source_threshold_bytes: u64,
609 pub max_parse_facts: usize,
611 #[builder(default = DEFAULT_MAX_XMP_BYTES)]
613 #[serde(default = "default_max_xmp_bytes")]
614 pub max_xmp_bytes: u64,
615 #[builder(default = DEFAULT_MAX_XMP_ELEMENTS)]
617 #[serde(default = "default_max_xmp_elements")]
618 pub max_xmp_elements: u64,
619 #[builder(default = DEFAULT_MAX_XMP_DEPTH)]
621 #[serde(default = "default_max_xmp_depth")]
622 pub max_xmp_depth: u32,
623 #[builder(default = DEFAULT_MAX_XMP_ATTRIBUTES)]
625 #[serde(default = "default_max_xmp_attributes")]
626 pub max_xmp_attributes: usize,
627 #[builder(default = DEFAULT_MAX_XMP_NAMESPACES)]
629 #[serde(default = "default_max_xmp_namespaces")]
630 pub max_xmp_namespaces: usize,
631 #[builder(default = DEFAULT_MAX_XMP_TEXT_BYTES)]
633 #[serde(default = "default_max_xmp_text_bytes")]
634 pub max_xmp_text_bytes: usize,
635}
636
637impl Default for ResourceLimits {
638 fn default() -> Self {
639 Self {
640 max_file_bytes: 256 * 1024 * 1024,
641 max_objects: 1_000_000,
642 max_object_depth: 128,
643 max_array_len: 65_536,
644 max_dict_entries: 16_384,
645 max_name_bytes: 127,
646 max_string_bytes: DEFAULT_MAX_STRING_BYTES,
647 max_password_bytes: DEFAULT_MAX_PASSWORD_BYTES,
648 max_decrypted_string_bytes: DEFAULT_MAX_STRING_BYTES,
649 max_stream_declared_bytes: 128 * 1024 * 1024,
650 max_stream_decode_bytes: DEFAULT_MAX_STREAM_DECODE_BYTES,
651 max_decrypted_stream_bytes: DEFAULT_MAX_STREAM_DECODE_BYTES,
652 max_encryption_dict_entries: DEFAULT_MAX_ENCRYPTION_DICT_ENTRIES,
653 memory_source_threshold_bytes: DEFAULT_MEMORY_SOURCE_THRESHOLD_BYTES,
654 max_parse_facts: 100_000,
655 max_xmp_bytes: DEFAULT_MAX_XMP_BYTES,
656 max_xmp_elements: DEFAULT_MAX_XMP_ELEMENTS,
657 max_xmp_depth: DEFAULT_MAX_XMP_DEPTH,
658 max_xmp_attributes: DEFAULT_MAX_XMP_ATTRIBUTES,
659 max_xmp_namespaces: DEFAULT_MAX_XMP_NAMESPACES,
660 max_xmp_text_bytes: DEFAULT_MAX_XMP_TEXT_BYTES,
661 }
662 }
663}
664
665fn default_max_password_bytes() -> usize {
666 DEFAULT_MAX_PASSWORD_BYTES
667}
668
669fn default_max_decrypted_string_bytes() -> usize {
670 DEFAULT_MAX_STRING_BYTES
671}
672
673fn default_max_decrypted_stream_bytes() -> u64 {
674 DEFAULT_MAX_STREAM_DECODE_BYTES
675}
676
677fn default_max_encryption_dict_entries() -> u64 {
678 DEFAULT_MAX_ENCRYPTION_DICT_ENTRIES
679}
680
681fn default_memory_source_threshold_bytes() -> u64 {
682 DEFAULT_MEMORY_SOURCE_THRESHOLD_BYTES
683}
684
685fn default_max_xmp_bytes() -> u64 {
686 DEFAULT_MAX_XMP_BYTES
687}
688
689fn default_max_xmp_elements() -> u64 {
690 DEFAULT_MAX_XMP_ELEMENTS
691}
692
693fn default_max_xmp_depth() -> u32 {
694 DEFAULT_MAX_XMP_DEPTH
695}
696
697fn default_max_xmp_attributes() -> usize {
698 DEFAULT_MAX_XMP_ATTRIBUTES
699}
700
701fn default_max_xmp_namespaces() -> usize {
702 DEFAULT_MAX_XMP_NAMESPACES
703}
704
705fn default_max_xmp_text_bytes() -> usize {
706 DEFAULT_MAX_XMP_TEXT_BYTES
707}
708
709#[derive(Clone, Copy, Debug, Deserialize, Eq, Ord, PartialEq, PartialOrd, Serialize)]
711#[serde(try_from = "u32", into = "u32")]
712pub struct MaxDisplayedFailures(NonZeroU32);
713
714impl MaxDisplayedFailures {
715 #[must_use]
717 pub fn new(value: NonZeroU32) -> Self {
718 Self(value)
719 }
720
721 #[must_use]
723 pub fn get(self) -> u32 {
724 self.0.get()
725 }
726}
727
728impl Default for MaxDisplayedFailures {
729 fn default() -> Self {
730 Self(NonZeroU32::MIN)
731 }
732}
733
734impl TryFrom<u32> for MaxDisplayedFailures {
735 type Error = ConfigError;
736
737 fn try_from(value: u32) -> std::result::Result<Self, Self::Error> {
738 let Some(value) = NonZeroU32::new(value) else {
739 return Err(ConfigError::InvalidValue {
740 field: "maxFailedAssertionsPerRule",
741 reason: BoundedText::unchecked("value must be greater than zero"),
742 });
743 };
744 Ok(Self(value))
745 }
746}
747
748impl From<MaxDisplayedFailures> for u32 {
749 fn from(value: MaxDisplayedFailures) -> Self {
750 value.get()
751 }
752}
753
754#[derive(Clone, Debug, Deserialize, Serialize, TypedBuilder)]
756#[non_exhaustive]
757#[serde(rename_all = "camelCase", deny_unknown_fields)]
758pub struct ValidationReport {
759 pub engine_version: String,
761 pub source: InputSummary,
763 pub status: ValidationStatus,
765 pub flavours: Vec<ValidationFlavour>,
767 pub profile_reports: Vec<ProfileReport>,
769 pub parse_facts: Vec<ParseFact>,
771 pub warnings: Vec<ValidationWarning>,
773 #[builder(default)]
775 #[serde(skip_serializing_if = "Option::is_none")]
776 pub feature_report: Option<FeatureReport>,
777 #[builder(default)]
779 #[serde(skip_serializing_if = "Option::is_none")]
780 pub policy_report: Option<PolicyReport>,
781 pub task_durations: Vec<TaskDuration>,
783}
784
785#[derive(Clone, Debug, Deserialize, Serialize, TypedBuilder)]
787#[non_exhaustive]
788#[serde(rename_all = "camelCase", deny_unknown_fields)]
789pub struct FeatureReport {
790 pub objects: Vec<FeatureObject>,
792 pub visited_objects: u64,
794 pub selected_families: Vec<ObjectTypeName>,
796 pub truncated: bool,
798}
799
800#[derive(Clone, Debug, Deserialize, Serialize, TypedBuilder)]
802#[non_exhaustive]
803#[serde(rename_all = "camelCase", deny_unknown_fields)]
804pub struct FeatureObject {
805 pub family: ObjectTypeName,
807 pub location: ObjectLocation,
809 pub context: BoundedText,
811 pub properties: BTreeMap<PropertyName, FeatureValue>,
813}
814
815#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)]
817#[non_exhaustive]
818#[serde(rename_all = "camelCase", tag = "type", content = "value")]
819pub enum FeatureValue {
820 Null,
822 Bool(bool),
824 Number(f64),
826 String(BoundedText),
828 RedactedString {
830 bytes: u64,
832 },
833 ObjectKey(ObjectKey),
835 List(Vec<FeatureValue>),
837}
838
839#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)]
841#[non_exhaustive]
842#[serde(rename_all = "camelCase", deny_unknown_fields)]
843pub struct PolicySet {
844 #[serde(default, skip_serializing_if = "Option::is_none")]
846 pub name: Option<BoundedText>,
847 pub rules: Vec<PolicyRule>,
849}
850
851impl PolicySet {
852 pub fn validate(&self) -> std::result::Result<(), PolicyError> {
858 const MAX_POLICY_RULES: usize = 1024;
859 if self.rules.is_empty() {
860 return Err(PolicyError::InvalidField {
861 field: "rules",
862 reason: BoundedText::unchecked("policy must contain at least one rule"),
863 });
864 }
865 if self.rules.len() > MAX_POLICY_RULES {
866 return Err(PolicyError::InvalidField {
867 field: "rules",
868 reason: BoundedText::unchecked("policy rule count exceeds limit"),
869 });
870 }
871 Ok(())
872 }
873}
874
875#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
877#[non_exhaustive]
878#[serde(rename_all = "camelCase", deny_unknown_fields)]
879pub struct PolicyRule {
880 pub id: Identifier,
882 pub description: BoundedText,
884 pub family: ObjectTypeName,
886 pub field: PropertyName,
888 pub operator: PolicyOperator,
890 #[serde(default, skip_serializing_if = "Option::is_none")]
892 pub value: Option<PolicyValue>,
893}
894
895#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)]
897#[non_exhaustive]
898#[serde(rename_all = "camelCase")]
899pub enum PolicyOperator {
900 Exists,
902 Absent,
904 Equals,
906 NotEquals,
908 Min,
910 Max,
912}
913
914#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
916#[non_exhaustive]
917#[serde(rename_all = "camelCase", tag = "type", content = "value")]
918pub enum PolicyValue {
919 Bool(bool),
921 Number(i32),
923 String(BoundedText),
925}
926
927#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize, TypedBuilder)]
929#[non_exhaustive]
930#[serde(rename_all = "camelCase", deny_unknown_fields)]
931pub struct PolicyReport {
932 #[builder(default)]
934 #[serde(skip_serializing_if = "Option::is_none")]
935 pub name: Option<BoundedText>,
936 pub is_compliant: bool,
938 pub results: Vec<PolicyRuleResult>,
940}
941
942#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize, TypedBuilder)]
944#[non_exhaustive]
945#[serde(rename_all = "camelCase", deny_unknown_fields)]
946pub struct PolicyRuleResult {
947 pub id: Identifier,
949 pub description: BoundedText,
951 pub passed: bool,
953 pub matches: u64,
955 pub message: BoundedText,
957}
958
959#[derive(Clone, Debug, Deserialize, Serialize, TypedBuilder)]
961#[non_exhaustive]
962#[serde(rename_all = "camelCase", deny_unknown_fields)]
963pub struct RepairReport {
964 pub engine_version: String,
966 pub source: InputSummary,
968 #[serde(skip_serializing_if = "Option::is_none")]
970 pub output_path: Option<PathBuf>,
971 pub status: RepairStatus,
973 pub actions: Vec<RepairAction>,
975 #[serde(skip_serializing_if = "Option::is_none")]
977 pub refusal: Option<RepairRefusal>,
978 pub warnings: Vec<ValidationWarning>,
980 pub task_durations: Vec<TaskDuration>,
982}
983
984impl RepairReport {
985 #[must_use]
987 pub fn wrote_output(&self) -> bool {
988 matches!(
989 self.status,
990 RepairStatus::Succeeded | RepairStatus::NoAction
991 ) && self.output_path.is_some()
992 }
993}
994
995#[derive(Clone, Debug)]
997pub struct MetadataRepairOptions {
998 pub validation_options: ValidationOptions,
1000 pub output_dir: PathBuf,
1002 pub prefix: String,
1004}
1005
1006impl MetadataRepairOptions {
1007 pub fn new(
1014 validation_options: ValidationOptions,
1015 output_dir: impl AsRef<Path>,
1016 prefix: impl Into<String>,
1017 ) -> Result<Self> {
1018 Ok(Self {
1019 validation_options,
1020 output_dir: validate_repair_output_dir(output_dir.as_ref())?,
1021 prefix: validate_repair_prefix(&prefix.into())?,
1022 })
1023 }
1024}
1025
1026#[derive(Debug)]
1028pub struct MetadataRepairer {
1029 validator: Validator,
1030 output_dir: PathBuf,
1031 prefix: String,
1032}
1033
1034impl MetadataRepairer {
1035 pub fn new(options: MetadataRepairOptions) -> Result<Self> {
1041 Ok(Self {
1042 validator: Validator::new(options.validation_options)?,
1043 output_dir: options.output_dir,
1044 prefix: options.prefix,
1045 })
1046 }
1047
1048 pub fn repair_path(&self, path: impl AsRef<Path>) -> Result<RepairReport> {
1054 repair_metadata_path(
1055 &self.validator,
1056 path.as_ref(),
1057 &self.output_dir,
1058 &self.prefix,
1059 )
1060 }
1061}
1062
1063#[derive(Clone, Debug, Deserialize, Serialize)]
1065#[non_exhaustive]
1066#[serde(rename_all = "camelCase", deny_unknown_fields)]
1067pub struct RepairBatchReport {
1068 pub items: Vec<RepairReport>,
1070 pub summary: RepairBatchSummary,
1072 pub warnings: Vec<ValidationWarning>,
1074}
1075
1076impl RepairBatchReport {
1077 #[must_use]
1079 pub fn from_items(
1080 items: Vec<RepairReport>,
1081 warnings: Vec<ValidationWarning>,
1082 elapsed: Duration,
1083 ) -> Self {
1084 let summary = RepairBatchSummary::from_items(&items, elapsed);
1085 Self {
1086 items,
1087 summary,
1088 warnings,
1089 }
1090 }
1091}
1092
1093#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize, TypedBuilder)]
1095#[non_exhaustive]
1096#[serde(rename_all = "camelCase", deny_unknown_fields)]
1097pub struct RepairBatchSummary {
1098 pub total_files: u64,
1100 pub succeeded: u64,
1102 pub no_action: u64,
1104 pub refused: u64,
1106 pub failed: u64,
1108 pub elapsed_millis: u64,
1110 pub worst_exit_category: ExitCategory,
1112}
1113
1114impl RepairBatchSummary {
1115 #[must_use]
1117 pub fn from_items(items: &[RepairReport], elapsed: Duration) -> Self {
1118 let mut summary = Self {
1119 total_files: u64::try_from(items.len()).unwrap_or(u64::MAX),
1120 elapsed_millis: duration_millis(elapsed),
1121 ..Self::default()
1122 };
1123 for item in items {
1124 match item.status {
1125 RepairStatus::Succeeded => summary.succeeded = summary.succeeded.saturating_add(1),
1126 RepairStatus::NoAction => summary.no_action = summary.no_action.saturating_add(1),
1127 RepairStatus::Refused => summary.refused = summary.refused.saturating_add(1),
1128 RepairStatus::Failed => summary.failed = summary.failed.saturating_add(1),
1129 }
1130 }
1131 summary.worst_exit_category = if summary.failed > 0 {
1132 ExitCategory::InternalError
1133 } else if summary.refused > 0 {
1134 ExitCategory::ProcessingFailed
1135 } else {
1136 ExitCategory::Success
1137 };
1138 summary
1139 }
1140}
1141
1142#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)]
1144#[non_exhaustive]
1145#[serde(rename_all = "camelCase")]
1146pub enum RepairStatus {
1147 Succeeded,
1149 NoAction,
1151 Refused,
1153 Failed,
1155}
1156
1157#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
1159#[non_exhaustive]
1160#[serde(rename_all = "camelCase", tag = "kind")]
1161pub enum RepairAction {
1162 CopiedUnchanged,
1164 MetadataRewritten {
1166 description: BoundedText,
1168 },
1169}
1170
1171#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
1173#[non_exhaustive]
1174#[serde(rename_all = "camelCase", tag = "kind")]
1175pub enum RepairRefusal {
1176 ParseFailed {
1178 reason: BoundedText,
1180 },
1181 Encrypted,
1183 AmbiguousFlavour {
1185 selected: u64,
1187 },
1188 UnsupportedValidationStatus {
1190 status: ValidationStatus,
1192 },
1193 OutputWouldModifyInput,
1195 InvalidOutputPath {
1197 reason: BoundedText,
1199 },
1200}
1201
1202#[allow(
1203 clippy::disallowed_methods,
1204 reason = "metadata repair is an explicit synchronous file rewrite API, not an async service \
1205 path"
1206)]
1207fn repair_metadata_path(
1208 validator: &Validator,
1209 path: &Path,
1210 output_dir: &Path,
1211 prefix: &str,
1212) -> Result<RepairReport> {
1213 let source = input_summary_for_path(path)?;
1214 let output_path = repair_output_path(path, output_dir, prefix)?;
1215 let input_canonical = std::fs::canonicalize(path).map_err(|source| PdfvError::Io {
1216 path: Some(path.to_path_buf()),
1217 source,
1218 })?;
1219 if input_canonical == output_path {
1220 return Ok(refused_repair_report(
1221 source,
1222 RepairRefusal::OutputWouldModifyInput,
1223 ));
1224 }
1225 if output_path.exists() {
1226 return Ok(refused_repair_report(
1227 source,
1228 RepairRefusal::InvalidOutputPath {
1229 reason: BoundedText::unchecked("output path already exists"),
1230 },
1231 ));
1232 }
1233
1234 let started = std::time::Instant::now();
1235 let validation = validator.validate_path(path)?;
1236 if matches!(validation.status, ValidationStatus::ParseFailed) {
1237 return Ok(refused_repair_report(
1238 source,
1239 RepairRefusal::ParseFailed {
1240 reason: validation
1241 .warnings
1242 .first()
1243 .map_or_else(default_parse_failed_text, ValidationWarning::message_text),
1244 },
1245 ));
1246 }
1247 if matches!(validation.status, ValidationStatus::Encrypted) {
1248 return Ok(refused_repair_report(source, RepairRefusal::Encrypted));
1249 }
1250 let selected_profiles = if validation.flavours.is_empty() {
1251 validation.profile_reports.len()
1252 } else {
1253 validation.flavours.len()
1254 };
1255 if selected_profiles != 1 {
1256 return Ok(refused_repair_report(
1257 source,
1258 RepairRefusal::AmbiguousFlavour {
1259 selected: u64::try_from(selected_profiles).unwrap_or(u64::MAX),
1260 },
1261 ));
1262 }
1263 if !matches!(validation.status, ValidationStatus::Valid) {
1264 return Ok(refused_repair_report(
1265 source,
1266 RepairRefusal::UnsupportedValidationStatus {
1267 status: validation.status,
1268 },
1269 ));
1270 }
1271
1272 match atomic_copy(path, &output_path) {
1273 Ok(()) => Ok(RepairReport::builder()
1274 .engine_version(ENGINE_VERSION.to_owned())
1275 .source(source)
1276 .output_path(Some(output_path))
1277 .status(RepairStatus::NoAction)
1278 .actions(vec![RepairAction::CopiedUnchanged])
1279 .refusal(None)
1280 .warnings(Vec::new())
1281 .task_durations(vec![TaskDuration::from_duration(
1282 Identifier::new("repairMetadata")?,
1283 started.elapsed(),
1284 )])
1285 .build()),
1286 Err(error) => {
1287 remove_failed_output(&output_path)?;
1288 Ok(failed_repair_report(
1289 source,
1290 Some(output_path),
1291 &error.to_string(),
1292 ))
1293 }
1294 }
1295}
1296
1297#[allow(
1298 clippy::disallowed_methods,
1299 reason = "metadata repair reports filesystem input size synchronously"
1300)]
1301fn input_summary_for_path(path: &Path) -> Result<InputSummary> {
1302 let metadata = std::fs::metadata(path).map_err(|source| PdfvError::Io {
1303 path: Some(path.to_path_buf()),
1304 source,
1305 })?;
1306 Ok(InputSummary::new(
1307 InputKind::File,
1308 Some(path.to_path_buf()),
1309 Some(metadata.len()),
1310 ))
1311}
1312
1313#[allow(
1314 clippy::disallowed_methods,
1315 reason = "metadata repair validates a caller-selected filesystem output directory"
1316)]
1317fn validate_repair_output_dir(path: &Path) -> Result<PathBuf> {
1318 let metadata = std::fs::metadata(path).map_err(|source| PdfvError::Io {
1319 path: Some(path.to_path_buf()),
1320 source,
1321 })?;
1322 if !metadata.is_dir() {
1323 return Err(RepairError::InvalidField {
1324 field: "outputDir",
1325 reason: BoundedText::unchecked("output directory is not a directory"),
1326 }
1327 .into());
1328 }
1329 std::fs::canonicalize(path).map_err(|source| PdfvError::Io {
1330 path: Some(path.to_path_buf()),
1331 source,
1332 })
1333}
1334
1335fn validate_repair_prefix(prefix: &str) -> Result<String> {
1336 const MAX_REPAIR_PREFIX_BYTES: usize = 64;
1337 let valid = prefix.len() <= MAX_REPAIR_PREFIX_BYTES
1338 && prefix
1339 .bytes()
1340 .all(|byte| byte.is_ascii_alphanumeric() || matches!(byte, b'-' | b'_' | b'.'));
1341 if valid {
1342 Ok(prefix.to_owned())
1343 } else {
1344 Err(RepairError::InvalidField {
1345 field: "prefix",
1346 reason: BoundedText::unchecked(
1347 "prefix must be ASCII letters, digits, dot, dash, or underscore and at most 64 \
1348 bytes",
1349 ),
1350 }
1351 .into())
1352 }
1353}
1354
1355fn repair_output_path(path: &Path, output_dir: &Path, prefix: &str) -> Result<PathBuf> {
1356 let file_name = path
1357 .file_name()
1358 .and_then(|name| name.to_str())
1359 .ok_or_else(|| RepairError::InvalidField {
1360 field: "paths",
1361 reason: BoundedText::unchecked("input path must have a UTF-8 file name"),
1362 })?;
1363 validate_output_filename(file_name)?;
1364 let output_name = format!("{prefix}{file_name}");
1365 validate_output_filename(&output_name)?;
1366 Ok(output_dir.join(output_name))
1367}
1368
1369fn validate_output_filename(name: &str) -> Result<()> {
1370 const MAX_OUTPUT_FILENAME_BYTES: usize = 255;
1371 let valid = !name.is_empty()
1372 && name.len() <= MAX_OUTPUT_FILENAME_BYTES
1373 && !name.contains("..")
1374 && name
1375 .bytes()
1376 .all(|byte| byte != b'\0' && byte != b'/' && byte != b'\\');
1377 if valid {
1378 Ok(())
1379 } else {
1380 Err(RepairError::InvalidField {
1381 field: "output",
1382 reason: BoundedText::unchecked("output filename is invalid"),
1383 }
1384 .into())
1385 }
1386}
1387
1388#[allow(
1389 clippy::disallowed_methods,
1390 clippy::disallowed_types,
1391 reason = "metadata repair performs synchronous atomic file output by design"
1392)]
1393fn atomic_copy(input: &Path, output_path: &Path) -> Result<()> {
1394 let Some(parent) = output_path.parent() else {
1395 return Err(RepairError::InvalidField {
1396 field: "outputDir",
1397 reason: BoundedText::unchecked("output path has no parent"),
1398 }
1399 .into());
1400 };
1401 let mut source = std::fs::File::open(input).map_err(|source| PdfvError::Io {
1402 path: Some(input.to_path_buf()),
1403 source,
1404 })?;
1405 let mut temp = tempfile::NamedTempFile::new_in(parent).map_err(|source| PdfvError::Io {
1406 path: Some(parent.to_path_buf()),
1407 source,
1408 })?;
1409 io::copy(&mut source, &mut temp).map_err(|source| PdfvError::Io {
1410 path: Some(input.to_path_buf()),
1411 source,
1412 })?;
1413 temp.flush().map_err(|source| PdfvError::Io {
1414 path: Some(output_path.to_path_buf()),
1415 source,
1416 })?;
1417 temp.persist(output_path).map_err(|error| PdfvError::Io {
1418 path: Some(output_path.to_path_buf()),
1419 source: error.error,
1420 })?;
1421 Ok(())
1422}
1423
1424#[allow(
1425 clippy::disallowed_methods,
1426 reason = "metadata repair removes failed synchronous output artifacts"
1427)]
1428fn remove_failed_output(output_path: &Path) -> Result<()> {
1429 match std::fs::remove_file(output_path) {
1430 Ok(()) => Ok(()),
1431 Err(error) if error.kind() == std::io::ErrorKind::NotFound => Ok(()),
1432 Err(source) => Err(PdfvError::Io {
1433 path: Some(output_path.to_path_buf()),
1434 source,
1435 }),
1436 }
1437}
1438
1439fn refused_repair_report(source: InputSummary, refusal: RepairRefusal) -> RepairReport {
1440 RepairReport::builder()
1441 .engine_version(ENGINE_VERSION.to_owned())
1442 .source(source)
1443 .output_path(None)
1444 .status(RepairStatus::Refused)
1445 .actions(Vec::new())
1446 .refusal(Some(refusal))
1447 .warnings(Vec::new())
1448 .task_durations(Vec::new())
1449 .build()
1450}
1451
1452fn failed_repair_report(
1453 source: InputSummary,
1454 output_path: Option<PathBuf>,
1455 reason: &str,
1456) -> RepairReport {
1457 RepairReport::builder()
1458 .engine_version(ENGINE_VERSION.to_owned())
1459 .source(source)
1460 .output_path(output_path)
1461 .status(RepairStatus::Failed)
1462 .actions(Vec::new())
1463 .refusal(None)
1464 .warnings(vec![ValidationWarning::General {
1465 message: BoundedText::new(reason, 512)
1466 .unwrap_or_else(|_| BoundedText::unchecked("metadata repair failed")),
1467 }])
1468 .task_durations(Vec::new())
1469 .build()
1470}
1471
1472fn default_parse_failed_text() -> BoundedText {
1473 BoundedText::unchecked("parse failed")
1474}
1475
1476#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
1478#[non_exhaustive]
1479#[serde(rename_all = "camelCase", deny_unknown_fields)]
1480pub struct InputSummary {
1481 pub kind: InputKind,
1483 #[serde(skip_serializing_if = "Option::is_none")]
1485 pub path: Option<PathBuf>,
1486 #[serde(skip_serializing_if = "Option::is_none")]
1488 pub bytes: Option<u64>,
1489}
1490
1491impl InputSummary {
1492 #[must_use]
1494 pub fn new(kind: InputKind, path: Option<PathBuf>, bytes: Option<u64>) -> Self {
1495 Self { kind, path, bytes }
1496 }
1497}
1498
1499#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)]
1501#[non_exhaustive]
1502#[serde(rename_all = "camelCase")]
1503pub enum InputKind {
1504 File,
1506 Memory,
1508}
1509
1510#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)]
1512#[non_exhaustive]
1513#[serde(rename_all = "camelCase")]
1514pub enum ValidationStatus {
1515 Valid,
1517 Invalid,
1519 Encrypted,
1521 Incomplete,
1523 ParseFailed,
1525}
1526
1527#[derive(Clone, Debug, Deserialize, Serialize, TypedBuilder)]
1529#[non_exhaustive]
1530#[serde(rename_all = "camelCase", deny_unknown_fields)]
1531pub struct ProfileReport {
1532 pub profile: ProfileIdentity,
1534 pub is_compliant: bool,
1536 pub checks_executed: u64,
1538 pub rules_executed: u64,
1540 pub failed_rules: u64,
1542 pub failed_assertions: Vec<Assertion>,
1544 pub passed_assertions: Vec<Assertion>,
1546 pub unsupported_rules: Vec<UnsupportedRule>,
1548}
1549
1550#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
1552#[non_exhaustive]
1553#[serde(rename_all = "camelCase", deny_unknown_fields)]
1554pub struct ProfileIdentity {
1555 pub id: Identifier,
1557 pub name: BoundedText,
1559 #[serde(skip_serializing_if = "Option::is_none")]
1561 pub version: Option<Identifier>,
1562}
1563
1564#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
1566#[non_exhaustive]
1567#[serde(rename_all = "camelCase", deny_unknown_fields)]
1568pub struct Assertion {
1569 pub ordinal: NonZeroU64,
1571 pub rule_id: RuleId,
1573 pub status: AssertionStatus,
1575 pub description: BoundedText,
1577 pub location: ObjectLocation,
1579 #[serde(skip_serializing_if = "Option::is_none")]
1581 pub object_context: Option<BoundedText>,
1582 #[serde(skip_serializing_if = "Option::is_none")]
1584 pub message: Option<BoundedText>,
1585 pub error_arguments: Vec<ErrorArgument>,
1587}
1588
1589#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)]
1591#[non_exhaustive]
1592#[serde(rename_all = "camelCase")]
1593pub enum AssertionStatus {
1594 Passed,
1596 Failed,
1598}
1599
1600#[derive(Clone, Debug, Deserialize, Eq, Hash, PartialEq, Serialize)]
1602#[serde(transparent)]
1603pub struct RuleId(pub Identifier);
1604
1605#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
1607#[non_exhaustive]
1608#[serde(rename_all = "camelCase", deny_unknown_fields)]
1609pub struct ObjectLocation {
1610 #[serde(skip_serializing_if = "Option::is_none")]
1612 pub object: Option<ObjectKey>,
1613 #[serde(skip_serializing_if = "Option::is_none")]
1615 pub offset: Option<u64>,
1616 #[serde(skip_serializing_if = "Option::is_none")]
1618 pub path: Option<BoundedText>,
1619}
1620
1621#[derive(Clone, Copy, Debug, Deserialize, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize)]
1623#[non_exhaustive]
1624#[serde(rename_all = "camelCase", deny_unknown_fields)]
1625pub struct ObjectKey {
1626 pub number: NonZeroU32,
1628 pub generation: u16,
1630}
1631
1632impl ObjectKey {
1633 #[must_use]
1635 pub fn new(number: NonZeroU32, generation: u16) -> Self {
1636 Self { number, generation }
1637 }
1638}
1639
1640#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
1642#[non_exhaustive]
1643#[serde(rename_all = "camelCase", deny_unknown_fields)]
1644pub struct ErrorArgument {
1645 pub name: Identifier,
1647 pub value: BoundedText,
1649}
1650
1651#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
1653#[non_exhaustive]
1654#[serde(rename_all = "camelCase", deny_unknown_fields)]
1655pub struct UnsupportedRule {
1656 pub profile_id: Identifier,
1658 pub rule_id: RuleId,
1660 #[serde(skip_serializing_if = "Option::is_none")]
1662 pub expression_fragment: Option<BoundedText>,
1663 pub reason: BoundedText,
1665 pub references: Vec<SpecReference>,
1667}
1668
1669#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
1671#[non_exhaustive]
1672#[serde(rename_all = "camelCase", deny_unknown_fields)]
1673pub struct SpecReference {
1674 pub specification: BoundedText,
1676 pub clause: BoundedText,
1678}
1679
1680#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
1682#[non_exhaustive]
1683#[serde(rename_all = "camelCase", tag = "kind")]
1684pub enum ParseFact {
1685 Header {
1687 offset: u64,
1689 version: PdfVersion,
1691 #[serde(rename = "hadLeadingBytes")]
1693 had_leading_bytes: bool,
1694 },
1695 PostEofData {
1697 bytes: u64,
1699 },
1700 Xref {
1702 section: ObjectLocation,
1704 fact: XrefFact,
1706 },
1707 Stream {
1709 object: ObjectKey,
1711 fact: StreamFact,
1713 },
1714 Encryption {
1716 encrypted: bool,
1718 #[serde(skip_serializing_if = "Option::is_none")]
1720 handler: Option<Identifier>,
1721 #[serde(skip_serializing_if = "Option::is_none")]
1723 version: Option<u8>,
1724 #[serde(skip_serializing_if = "Option::is_none")]
1726 revision: Option<u8>,
1727 #[serde(skip_serializing_if = "Option::is_none")]
1729 algorithm: Option<Identifier>,
1730 decrypted: bool,
1732 },
1733 Xmp {
1735 object: ObjectKey,
1737 fact: XmpFact,
1739 },
1740}
1741
1742#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)]
1744#[non_exhaustive]
1745#[serde(rename_all = "camelCase", deny_unknown_fields)]
1746pub struct PdfVersion {
1747 pub major: u8,
1749 pub minor: u8,
1751}
1752
1753#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)]
1755#[non_exhaustive]
1756#[serde(rename_all = "camelCase")]
1757pub enum XrefFact {
1758 EolMarkersComply,
1760 MalformedClassic,
1762 XrefStreamUnsupported,
1764 XrefStreamParsed {
1766 entries: u64,
1768 compressed_entries: u64,
1770 },
1771 PrevChain {
1773 offset: u64,
1775 },
1776 HybridReference {
1778 offset: u64,
1780 },
1781 ObjectStreamParsed,
1783}
1784
1785#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
1787#[non_exhaustive]
1788#[serde(rename_all = "camelCase", deny_unknown_fields, tag = "fact")]
1789pub enum StreamFact {
1790 Length {
1792 declared: u64,
1794 discovered: u64,
1796 },
1797 KeywordSpacing {
1799 #[serde(rename = "streamKeywordCRLFCompliant")]
1801 stream_keyword_crlf_compliant: bool,
1802 #[serde(rename = "endstreamKeywordEolCompliant")]
1804 endstream_keyword_eol_compliant: bool,
1805 },
1806 Decoded {
1808 bytes: u64,
1810 },
1811 FilterDecoded {
1813 filter: Identifier,
1815 input_bytes: u64,
1817 output_bytes: u64,
1819 },
1820 FilterMetadataMode {
1822 filter: Identifier,
1824 bytes: u64,
1826 },
1827}
1828
1829#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
1831#[non_exhaustive]
1832#[serde(rename_all = "camelCase", deny_unknown_fields, tag = "fact")]
1833pub enum XmpFact {
1834 PacketParsed {
1836 bytes: u64,
1838 namespaces: u64,
1840 claims: u64,
1842 },
1843 MissingPacketWrapper,
1845 FlavourClaim {
1847 family: Identifier,
1849 display_flavour: BoundedText,
1851 namespace_uri: BoundedText,
1853 },
1854 Malformed {
1856 reason: BoundedText,
1858 },
1859 HostileXmlRejected {
1861 reason: BoundedText,
1863 },
1864}
1865
1866#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
1868#[non_exhaustive]
1869#[serde(rename_all = "camelCase", tag = "kind")]
1870pub enum ValidationWarning {
1871 ParseFactCapReached {
1873 cap: usize,
1875 },
1876 IncompatibleProfile {
1878 profile_id: Identifier,
1880 reason: BoundedText,
1882 },
1883 AutoDetection {
1885 message: BoundedText,
1887 },
1888 General {
1890 message: BoundedText,
1892 },
1893}
1894
1895impl ValidationWarning {
1896 #[must_use]
1898 pub fn message_text(&self) -> BoundedText {
1899 match self {
1900 Self::ParseFactCapReached { cap } => {
1901 BoundedText::unchecked(format!("parse fact cap reached: {cap}"))
1902 }
1903 Self::IncompatibleProfile { profile_id, reason } => BoundedText::unchecked(format!(
1904 "incompatible profile {}: {}",
1905 profile_id.as_str(),
1906 reason.as_str()
1907 )),
1908 Self::AutoDetection { message } => {
1909 BoundedText::unchecked(format!("auto detection: {}", message.as_str()))
1910 }
1911 Self::General { message } => message.clone(),
1912 }
1913 }
1914}
1915
1916#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
1918#[non_exhaustive]
1919#[serde(rename_all = "camelCase", deny_unknown_fields)]
1920pub struct TaskDuration {
1921 pub task: Identifier,
1923 pub millis: u64,
1925}
1926
1927impl TaskDuration {
1928 #[must_use]
1932 pub fn from_duration(task: Identifier, duration: Duration) -> Self {
1933 let millis = u64::try_from(duration.as_millis()).unwrap_or(u64::MAX);
1934 Self { task, millis }
1935 }
1936}
1937
1938#[derive(Clone, Debug, Deserialize, Serialize)]
1940#[non_exhaustive]
1941#[serde(rename_all = "camelCase", deny_unknown_fields)]
1942pub struct BatchReport {
1943 pub items: Vec<ValidationReport>,
1945 pub summary: BatchSummary,
1947 pub warnings: Vec<ValidationWarning>,
1949}
1950
1951impl BatchReport {
1952 #[must_use]
1954 pub fn from_items(
1955 items: Vec<ValidationReport>,
1956 warnings: Vec<ValidationWarning>,
1957 elapsed: Duration,
1958 ) -> Self {
1959 let summary = BatchSummary::from_items(&items, elapsed);
1960 Self {
1961 items,
1962 summary,
1963 warnings,
1964 }
1965 }
1966
1967 #[must_use]
1969 pub fn from_items_with_internal_errors(
1970 items: Vec<ValidationReport>,
1971 warnings: Vec<ValidationWarning>,
1972 elapsed: Duration,
1973 internal_errors: u64,
1974 ) -> Self {
1975 let summary =
1976 BatchSummary::from_items_with_internal_errors(&items, elapsed, internal_errors);
1977 Self {
1978 items,
1979 summary,
1980 warnings,
1981 }
1982 }
1983}
1984
1985#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize, TypedBuilder)]
1987#[non_exhaustive]
1988#[serde(rename_all = "camelCase", deny_unknown_fields)]
1989pub struct BatchSummary {
1990 pub total_files: u64,
1992 pub valid: u64,
1994 pub invalid: u64,
1996 pub parse_failures: u64,
1998 pub encrypted: u64,
2000 pub incomplete: u64,
2002 pub internal_errors: u64,
2004 pub elapsed_millis: u64,
2006 pub worst_exit_category: ExitCategory,
2008}
2009
2010impl BatchSummary {
2011 #[must_use]
2013 pub fn from_items(items: &[ValidationReport], elapsed: Duration) -> Self {
2014 let mut summary = Self {
2015 total_files: u64::try_from(items.len()).unwrap_or(u64::MAX),
2016 elapsed_millis: duration_millis(elapsed),
2017 ..Self::default()
2018 };
2019 summary.apply_items(items);
2020 summary.finish()
2021 }
2022
2023 #[must_use]
2025 pub fn from_items_with_internal_errors(
2026 items: &[ValidationReport],
2027 elapsed: Duration,
2028 internal_errors: u64,
2029 ) -> Self {
2030 let mut summary = Self {
2031 total_files: u64::try_from(items.len())
2032 .unwrap_or(u64::MAX)
2033 .saturating_add(internal_errors),
2034 elapsed_millis: duration_millis(elapsed),
2035 internal_errors,
2036 ..Self::default()
2037 };
2038 summary.apply_items(items);
2039 summary.finish()
2040 }
2041
2042 fn apply_items(&mut self, items: &[ValidationReport]) {
2043 for report in items {
2044 match report.status {
2045 ValidationStatus::Valid => self.valid = self.valid.saturating_add(1),
2046 ValidationStatus::Invalid => self.invalid = self.invalid.saturating_add(1),
2047 ValidationStatus::ParseFailed => {
2048 self.parse_failures = self.parse_failures.saturating_add(1);
2049 }
2050 ValidationStatus::Encrypted => {
2051 self.encrypted = self.encrypted.saturating_add(1);
2052 }
2053 ValidationStatus::Incomplete => {
2054 self.incomplete = self.incomplete.saturating_add(1);
2055 }
2056 }
2057 }
2058 }
2059
2060 fn finish(mut self) -> Self {
2061 self.worst_exit_category = if self.parse_failures > 0
2062 || self.encrypted > 0
2063 || self.incomplete > 0
2064 || self.internal_errors > 0
2065 {
2066 ExitCategory::ProcessingFailed
2067 } else if self.invalid > 0 {
2068 ExitCategory::ValidationFailed
2069 } else {
2070 ExitCategory::Success
2071 };
2072 self
2073 }
2074}
2075
2076#[derive(Clone, Copy, Debug, Default, Deserialize, Eq, PartialEq, Serialize)]
2078#[non_exhaustive]
2079#[serde(rename_all = "camelCase")]
2080pub enum ExitCategory {
2081 #[default]
2083 Success,
2084 ValidationFailed,
2086 ProcessingFailed,
2088 InternalError,
2090}
2091
2092#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)]
2094#[non_exhaustive]
2095#[serde(rename_all = "camelCase")]
2096pub enum ReportFormat {
2097 Json,
2099 JsonPretty,
2101 Text,
2103 Xml,
2105 RawXml,
2107 Html,
2109}
2110
2111impl ReportFormat {
2112 pub fn write_report<W: Write>(&self, report: &ValidationReport, out: W) -> Result<()> {
2118 match self {
2119 Self::Json => JsonReportWriter::compact().write_report(report, out),
2120 Self::JsonPretty => JsonReportWriter::pretty().write_report(report, out),
2121 Self::Text => TextReportWriter.write_report(report, out),
2122 Self::Xml => XmlReportWriter.write_report(report, out),
2123 Self::RawXml => RawXmlReportWriter.write_report(report, out),
2124 Self::Html => HtmlReportWriter.write_report(report, out),
2125 }
2126 }
2127
2128 pub fn write_batch<W: Write>(&self, report: &BatchReport, out: W) -> Result<()> {
2134 match self {
2135 Self::Json => JsonReportWriter::compact().write_batch(report, out),
2136 Self::JsonPretty => JsonReportWriter::pretty().write_batch(report, out),
2137 Self::Text => TextReportWriter.write_batch(report, out),
2138 Self::Xml => XmlReportWriter.write_batch(report, out),
2139 Self::RawXml => RawXmlReportWriter.write_batch(report, out),
2140 Self::Html => HtmlReportWriter.write_batch(report, out),
2141 }
2142 }
2143
2144 pub fn write_repair_report<W: Write>(&self, report: &RepairReport, out: W) -> Result<()> {
2150 match self {
2151 Self::Json => JsonReportWriter::compact().write_repair_report(report, out),
2152 Self::JsonPretty => JsonReportWriter::pretty().write_repair_report(report, out),
2153 Self::Text => TextReportWriter.write_repair_report(report, out),
2154 Self::Xml => XmlReportWriter.write_repair_report(report, out),
2155 Self::RawXml => RawXmlReportWriter.write_repair_report(report, out),
2156 Self::Html => HtmlReportWriter.write_repair_report(report, out),
2157 }
2158 }
2159
2160 pub fn write_repair_batch<W: Write>(&self, report: &RepairBatchReport, out: W) -> Result<()> {
2166 match self {
2167 Self::Json => JsonReportWriter::compact().write_repair_batch(report, out),
2168 Self::JsonPretty => JsonReportWriter::pretty().write_repair_batch(report, out),
2169 Self::Text => TextReportWriter.write_repair_batch(report, out),
2170 Self::Xml => XmlReportWriter.write_repair_batch(report, out),
2171 Self::RawXml => RawXmlReportWriter.write_repair_batch(report, out),
2172 Self::Html => HtmlReportWriter.write_repair_batch(report, out),
2173 }
2174 }
2175}
2176
2177pub trait ReportWriter {
2179 fn write_report<W: Write>(&self, report: &ValidationReport, out: W) -> Result<()>;
2185
2186 fn write_batch<W: Write>(&self, report: &BatchReport, out: W) -> Result<()>;
2192
2193 fn write_repair_report<W: Write>(&self, report: &RepairReport, out: W) -> Result<()>;
2199
2200 fn write_repair_batch<W: Write>(&self, report: &RepairBatchReport, out: W) -> Result<()>;
2206}
2207
2208#[derive(Clone, Copy, Debug, Eq, PartialEq)]
2210pub struct JsonReportWriter {
2211 pretty: bool,
2212}
2213
2214impl JsonReportWriter {
2215 #[must_use]
2217 pub fn compact() -> Self {
2218 Self { pretty: false }
2219 }
2220
2221 #[must_use]
2223 pub fn pretty() -> Self {
2224 Self { pretty: true }
2225 }
2226}
2227
2228impl ReportWriter for JsonReportWriter {
2229 fn write_report<W: Write>(&self, report: &ValidationReport, out: W) -> Result<()> {
2230 write_json(out, report, self.pretty)
2231 }
2232
2233 fn write_batch<W: Write>(&self, report: &BatchReport, out: W) -> Result<()> {
2234 write_json(out, report, self.pretty)
2235 }
2236
2237 fn write_repair_report<W: Write>(&self, report: &RepairReport, out: W) -> Result<()> {
2238 write_json(out, report, self.pretty)
2239 }
2240
2241 fn write_repair_batch<W: Write>(&self, report: &RepairBatchReport, out: W) -> Result<()> {
2242 write_json(out, report, self.pretty)
2243 }
2244}
2245
2246#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
2248pub struct TextReportWriter;
2249
2250impl ReportWriter for TextReportWriter {
2251 fn write_report<W: Write>(&self, report: &ValidationReport, mut out: W) -> Result<()> {
2252 write_text_report(report, &mut out)
2253 }
2254
2255 fn write_batch<W: Write>(&self, report: &BatchReport, mut out: W) -> Result<()> {
2256 writeln!(
2257 out,
2258 "batch: {}",
2259 exit_category_text(report.summary.worst_exit_category)
2260 )
2261 .map_err(write_error)?;
2262 writeln!(out, "files: {}", report.summary.total_files).map_err(write_error)?;
2263 writeln!(
2264 out,
2265 "summary: {} valid, {} invalid, {} parse failed, {} encrypted, {} incomplete, {} \
2266 internal errors",
2267 report.summary.valid,
2268 report.summary.invalid,
2269 report.summary.parse_failures,
2270 report.summary.encrypted,
2271 report.summary.incomplete,
2272 report.summary.internal_errors,
2273 )
2274 .map_err(write_error)?;
2275 if !report.warnings.is_empty() {
2276 writeln!(out, "warnings: {}", report.warnings.len()).map_err(write_error)?;
2277 }
2278 writeln!(out, "items:").map_err(write_error)?;
2279 for item in &report.items {
2280 writeln!(
2281 out,
2282 " {}: {}",
2283 source_name(&item.source),
2284 status_text(item.status)
2285 )
2286 .map_err(write_error)?;
2287 }
2288 Ok(())
2289 }
2290
2291 fn write_repair_report<W: Write>(&self, report: &RepairReport, mut out: W) -> Result<()> {
2292 write_text_repair_report(report, &mut out)
2293 }
2294
2295 fn write_repair_batch<W: Write>(&self, report: &RepairBatchReport, mut out: W) -> Result<()> {
2296 writeln!(
2297 out,
2298 "repair batch: {}",
2299 exit_category_text(report.summary.worst_exit_category)
2300 )
2301 .map_err(write_error)?;
2302 writeln!(out, "files: {}", report.summary.total_files).map_err(write_error)?;
2303 writeln!(
2304 out,
2305 "summary: {} repaired, {} unchanged, {} refused, {} failed",
2306 report.summary.succeeded,
2307 report.summary.no_action,
2308 report.summary.refused,
2309 report.summary.failed,
2310 )
2311 .map_err(write_error)?;
2312 writeln!(out, "items:").map_err(write_error)?;
2313 for item in &report.items {
2314 writeln!(
2315 out,
2316 " {}: {}",
2317 source_name(&item.source),
2318 repair_status_text(item.status),
2319 )
2320 .map_err(write_error)?;
2321 }
2322 Ok(())
2323 }
2324}
2325
2326#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
2328pub struct XmlReportWriter;
2329
2330impl ReportWriter for XmlReportWriter {
2331 fn write_report<W: Write>(&self, report: &ValidationReport, mut out: W) -> Result<()> {
2332 let batch = BatchReport::from_items(vec![report.clone()], Vec::new(), Duration::ZERO);
2333 write_xml_batch(&batch, &mut out)
2334 }
2335
2336 fn write_batch<W: Write>(&self, report: &BatchReport, mut out: W) -> Result<()> {
2337 write_xml_batch(report, &mut out)
2338 }
2339
2340 fn write_repair_report<W: Write>(&self, report: &RepairReport, mut out: W) -> Result<()> {
2341 let batch = RepairBatchReport::from_items(vec![report.clone()], Vec::new(), Duration::ZERO);
2342 write_xml_repair_batch(&batch, &mut out, "repairReport")
2343 }
2344
2345 fn write_repair_batch<W: Write>(&self, report: &RepairBatchReport, mut out: W) -> Result<()> {
2346 write_xml_repair_batch(report, &mut out, "repairReport")
2347 }
2348}
2349
2350#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
2352pub struct RawXmlReportWriter;
2353
2354impl ReportWriter for RawXmlReportWriter {
2355 fn write_report<W: Write>(&self, report: &ValidationReport, mut out: W) -> Result<()> {
2356 let batch = BatchReport::from_items(vec![report.clone()], Vec::new(), Duration::ZERO);
2357 write_raw_xml_batch(&batch, &mut out)
2358 }
2359
2360 fn write_batch<W: Write>(&self, report: &BatchReport, mut out: W) -> Result<()> {
2361 write_raw_xml_batch(report, &mut out)
2362 }
2363
2364 fn write_repair_report<W: Write>(&self, report: &RepairReport, mut out: W) -> Result<()> {
2365 let batch = RepairBatchReport::from_items(vec![report.clone()], Vec::new(), Duration::ZERO);
2366 write_xml_repair_batch(&batch, &mut out, "rawRepairReport")
2367 }
2368
2369 fn write_repair_batch<W: Write>(&self, report: &RepairBatchReport, mut out: W) -> Result<()> {
2370 write_xml_repair_batch(report, &mut out, "rawRepairReport")
2371 }
2372}
2373
2374#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
2376pub struct HtmlReportWriter;
2377
2378impl ReportWriter for HtmlReportWriter {
2379 fn write_report<W: Write>(&self, report: &ValidationReport, mut out: W) -> Result<()> {
2380 let batch = BatchReport::from_items(vec![report.clone()], Vec::new(), Duration::ZERO);
2381 write_html_batch(&batch, &mut out)
2382 }
2383
2384 fn write_batch<W: Write>(&self, report: &BatchReport, mut out: W) -> Result<()> {
2385 write_html_batch(report, &mut out)
2386 }
2387
2388 fn write_repair_report<W: Write>(&self, report: &RepairReport, mut out: W) -> Result<()> {
2389 let batch = RepairBatchReport::from_items(vec![report.clone()], Vec::new(), Duration::ZERO);
2390 write_html_repair_batch(&batch, &mut out)
2391 }
2392
2393 fn write_repair_batch<W: Write>(&self, report: &RepairBatchReport, mut out: W) -> Result<()> {
2394 write_html_repair_batch(report, &mut out)
2395 }
2396}
2397
2398fn write_json<W, T>(out: W, value: &T, pretty: bool) -> Result<()>
2399where
2400 W: Write,
2401 T: Serialize,
2402{
2403 if pretty {
2404 serde_json::to_writer_pretty(out, value).map_err(ReportError::from)?;
2405 } else {
2406 serde_json::to_writer(out, value).map_err(ReportError::from)?;
2407 }
2408 Ok(())
2409}
2410
2411fn write_text_report<W: Write>(report: &ValidationReport, out: &mut W) -> Result<()> {
2412 writeln!(
2413 out,
2414 "{}: {}",
2415 source_name(&report.source),
2416 status_text(report.status),
2417 )
2418 .map_err(write_error)?;
2419 writeln!(out, "profiles: {}", profile_list(report)).map_err(write_error)?;
2420 let checks = check_counts(report);
2421 writeln!(
2422 out,
2423 "checks: {} passed, {} failed, {} unsupported",
2424 checks.passed, checks.failed, checks.unsupported,
2425 )
2426 .map_err(write_error)?;
2427 let failures = report
2428 .profile_reports
2429 .iter()
2430 .flat_map(|profile| profile.failed_assertions.iter())
2431 .take(5)
2432 .collect::<Vec<_>>();
2433 if !failures.is_empty() {
2434 writeln!(out, "first failures:").map_err(write_error)?;
2435 for assertion in failures {
2436 writeln!(
2437 out,
2438 " {} at {}: {}",
2439 assertion.rule_id.0.as_str(),
2440 location_text(&assertion.location),
2441 assertion_message(assertion),
2442 )
2443 .map_err(write_error)?;
2444 }
2445 }
2446 let unsupported = report
2447 .profile_reports
2448 .iter()
2449 .flat_map(|profile| profile.unsupported_rules.iter())
2450 .take(5)
2451 .collect::<Vec<_>>();
2452 if !unsupported.is_empty() {
2453 writeln!(out, "unsupported rules:").map_err(write_error)?;
2454 for rule in unsupported {
2455 writeln!(
2456 out,
2457 " {}: {}{}",
2458 rule.rule_id.0.as_str(),
2459 rule.reason.as_str(),
2460 reference_suffix(&rule.references),
2461 )
2462 .map_err(write_error)?;
2463 }
2464 }
2465 if !report.warnings.is_empty() {
2466 writeln!(out, "warnings: {}", report.warnings.len()).map_err(write_error)?;
2467 }
2468 if let Some(features) = &report.feature_report {
2469 writeln!(out, "features: {} objects", features.objects.len()).map_err(write_error)?;
2470 }
2471 if let Some(policy) = &report.policy_report {
2472 writeln!(
2473 out,
2474 "policy: {}",
2475 if policy.is_compliant {
2476 "compliant"
2477 } else {
2478 "non-compliant"
2479 }
2480 )
2481 .map_err(write_error)?;
2482 }
2483 Ok(())
2484}
2485
2486fn write_text_repair_report<W: Write>(report: &RepairReport, out: &mut W) -> Result<()> {
2487 writeln!(
2488 out,
2489 "{}: {}",
2490 source_name(&report.source),
2491 repair_status_text(report.status),
2492 )
2493 .map_err(write_error)?;
2494 if let Some(output_path) = &report.output_path {
2495 writeln!(out, "output: {}", output_path.display()).map_err(write_error)?;
2496 }
2497 if !report.actions.is_empty() {
2498 writeln!(out, "actions: {}", report.actions.len()).map_err(write_error)?;
2499 for action in &report.actions {
2500 writeln!(out, " {}", repair_action_text(action)).map_err(write_error)?;
2501 }
2502 }
2503 if let Some(refusal) = &report.refusal {
2504 writeln!(out, "refusal: {}", repair_refusal_text(refusal)).map_err(write_error)?;
2505 }
2506 if !report.warnings.is_empty() {
2507 writeln!(out, "warnings: {}", report.warnings.len()).map_err(write_error)?;
2508 }
2509 Ok(())
2510}
2511
2512fn write_xml_batch<W: Write>(report: &BatchReport, out: &mut W) -> Result<()> {
2513 writeln!(out, r#"<?xml version="1.0" encoding="utf-8"?>"#).map_err(write_error)?;
2514 writeln!(out, "<report>").map_err(write_error)?;
2515 writeln!(out, " <buildInformation>").map_err(write_error)?;
2516 writeln!(
2517 out,
2518 r#" <releaseDetails id="pdfv-core" version="{}"></releaseDetails>"#,
2519 XmlEscapedAttr::new(ENGINE_VERSION)?,
2520 )
2521 .map_err(write_error)?;
2522 writeln!(out, " </buildInformation>").map_err(write_error)?;
2523 writeln!(out, " <jobs>").map_err(write_error)?;
2524 for item in &report.items {
2525 write_xml_job(item, out)?;
2526 }
2527 writeln!(out, " </jobs>").map_err(write_error)?;
2528 write_xml_batch_summary(&report.summary, out)?;
2529 write_xml_warnings(&report.warnings, out, 2)?;
2530 writeln!(out, "</report>").map_err(write_error)?;
2531 Ok(())
2532}
2533
2534fn write_raw_xml_batch<W: Write>(report: &BatchReport, out: &mut W) -> Result<()> {
2535 writeln!(out, r#"<?xml version="1.0" encoding="utf-8"?>"#).map_err(write_error)?;
2536 writeln!(
2537 out,
2538 r#"<rawReport engine="pdfv-core" version="{}">"#,
2539 XmlEscapedAttr::new(ENGINE_VERSION)?,
2540 )
2541 .map_err(write_error)?;
2542 writeln!(
2543 out,
2544 r#" <processorConfig tasks="{}"></processorConfig>"#,
2545 XmlEscapedAttr::new(&raw_validation_tasks(report))?,
2546 )
2547 .map_err(write_error)?;
2548 writeln!(out, " <processorResults>").map_err(write_error)?;
2549 for item in &report.items {
2550 writeln!(
2551 out,
2552 r#" <processorResult status="{}">"#,
2553 status_text(item.status),
2554 )
2555 .map_err(write_error)?;
2556 write_xml_item(&item.source, out)?;
2557 for profile in &item.profile_reports {
2558 write_xml_validation_report(item.status, profile, out)?;
2559 }
2560 if let Some(feature_report) = &item.feature_report {
2561 write_xml_feature_report(feature_report, out)?;
2562 }
2563 if let Some(policy_report) = &item.policy_report {
2564 write_xml_policy_report(policy_report, out)?;
2565 }
2566 write_xml_parse_facts(&item.parse_facts, out)?;
2567 write_xml_warnings(&item.warnings, out, 6)?;
2568 writeln!(out, " </processorResult>").map_err(write_error)?;
2569 }
2570 writeln!(out, " </processorResults>").map_err(write_error)?;
2571 write_xml_batch_summary(&report.summary, out)?;
2572 writeln!(out, "</rawReport>").map_err(write_error)?;
2573 Ok(())
2574}
2575
2576fn write_xml_repair_batch<W: Write>(
2577 report: &RepairBatchReport,
2578 out: &mut W,
2579 root: &str,
2580) -> Result<()> {
2581 writeln!(out, r#"<?xml version="1.0" encoding="utf-8"?>"#).map_err(write_error)?;
2582 writeln!(
2583 out,
2584 r#"<{root} engine="pdfv-core" version="{}">"#,
2585 XmlEscapedAttr::new(ENGINE_VERSION)?,
2586 )
2587 .map_err(write_error)?;
2588 if root == "rawRepairReport" {
2589 writeln!(
2590 out,
2591 r#" <processorConfig tasks="metadata"></processorConfig>"#,
2592 )
2593 .map_err(write_error)?;
2594 }
2595 writeln!(out, " <items>").map_err(write_error)?;
2596 for item in &report.items {
2597 write_xml_repair_item(item, out)?;
2598 }
2599 writeln!(out, " </items>").map_err(write_error)?;
2600 write_xml_repair_summary(&report.summary, out)?;
2601 write_xml_warnings(&report.warnings, out, 2)?;
2602 writeln!(out, "</{root}>").map_err(write_error)?;
2603 Ok(())
2604}
2605
2606fn write_xml_repair_item<W: Write>(report: &RepairReport, out: &mut W) -> Result<()> {
2607 writeln!(
2608 out,
2609 r#" <repairItem status="{}">"#,
2610 repair_status_text(report.status),
2611 )
2612 .map_err(write_error)?;
2613 write_xml_item(&report.source, out)?;
2614 if let Some(output_path) = &report.output_path {
2615 writeln!(
2616 out,
2617 " <output>{}</output>",
2618 XmlEscapedText::new(&output_path.display().to_string())?,
2619 )
2620 .map_err(write_error)?;
2621 }
2622 if !report.actions.is_empty() {
2623 writeln!(out, " <actions>").map_err(write_error)?;
2624 for action in &report.actions {
2625 writeln!(
2626 out,
2627 r#" <action kind="{}">{}</action>"#,
2628 repair_action_kind(action),
2629 XmlEscapedText::new(&repair_action_text(action))?,
2630 )
2631 .map_err(write_error)?;
2632 }
2633 writeln!(out, " </actions>").map_err(write_error)?;
2634 }
2635 if let Some(refusal) = &report.refusal {
2636 writeln!(
2637 out,
2638 r#" <refusal kind="{}">{}</refusal>"#,
2639 repair_refusal_kind(refusal),
2640 XmlEscapedText::new(&repair_refusal_text(refusal))?,
2641 )
2642 .map_err(write_error)?;
2643 }
2644 write_xml_warnings(&report.warnings, out, 6)?;
2645 writeln!(out, " </repairItem>").map_err(write_error)?;
2646 Ok(())
2647}
2648
2649fn write_xml_repair_summary<W: Write>(summary: &RepairBatchSummary, out: &mut W) -> Result<()> {
2650 writeln!(
2651 out,
2652 r#" <repairSummary totalJobs="{}" succeeded="{}" noAction="{}" refused="{}" failed="{}" elapsedMillis="{}"></repairSummary>"#,
2653 summary.total_files,
2654 summary.succeeded,
2655 summary.no_action,
2656 summary.refused,
2657 summary.failed,
2658 summary.elapsed_millis,
2659 )
2660 .map_err(write_error)?;
2661 Ok(())
2662}
2663
2664fn raw_validation_tasks(report: &BatchReport) -> String {
2665 let has_features = report
2666 .items
2667 .iter()
2668 .any(|item| item.feature_report.is_some());
2669 let has_policy = report.items.iter().any(|item| item.policy_report.is_some());
2670 let mut tasks = vec!["validation"];
2671 if has_features {
2672 tasks.push("features");
2673 }
2674 if has_policy {
2675 tasks.push("policy");
2676 }
2677 tasks.join(",")
2678}
2679
2680fn write_html_batch<W: Write>(report: &BatchReport, out: &mut W) -> Result<()> {
2681 write_html_start(out, "pdfv validation report")?;
2682 writeln!(out, "<h1>Validation Report</h1>").map_err(write_error)?;
2683 writeln!(
2684 out,
2685 "<p>{} valid, {} invalid, {} parse failed, {} encrypted, {} incomplete.</p>",
2686 report.summary.valid,
2687 report.summary.invalid,
2688 report.summary.parse_failures,
2689 report.summary.encrypted,
2690 report.summary.incomplete,
2691 )
2692 .map_err(write_error)?;
2693 writeln!(
2694 out,
2695 "<table><thead><tr><th>Input</th><th>Status</th><th>Profiles</th><th>Features</\
2696 th><th>Policy</th></tr></thead><tbody>"
2697 )
2698 .map_err(write_error)?;
2699 for item in &report.items {
2700 let features = item
2701 .feature_report
2702 .as_ref()
2703 .map_or(String::from("-"), |features| {
2704 features.objects.len().to_string()
2705 });
2706 let policy = item.policy_report.as_ref().map_or("-", |policy| {
2707 if policy.is_compliant {
2708 "compliant"
2709 } else {
2710 "non-compliant"
2711 }
2712 });
2713 writeln!(
2714 out,
2715 "<tr><td>{}</td><td>{}</td><td>{}</td><td>{}</td><td>{}</td></tr>",
2716 HtmlEscapedText::new(&source_name(&item.source))?,
2717 status_text(item.status),
2718 HtmlEscapedText::new(&profile_list(item))?,
2719 features,
2720 policy,
2721 )
2722 .map_err(write_error)?;
2723 }
2724 writeln!(out, "</tbody></table>").map_err(write_error)?;
2725 write_html_end(out)
2726}
2727
2728fn write_html_repair_batch<W: Write>(report: &RepairBatchReport, out: &mut W) -> Result<()> {
2729 write_html_start(out, "pdfv metadata repair report")?;
2730 writeln!(out, "<h1>Metadata Repair Report</h1>").map_err(write_error)?;
2731 writeln!(
2732 out,
2733 "<p>{} repaired, {} unchanged, {} refused, {} failed.</p>",
2734 report.summary.succeeded,
2735 report.summary.no_action,
2736 report.summary.refused,
2737 report.summary.failed,
2738 )
2739 .map_err(write_error)?;
2740 writeln!(
2741 out,
2742 "<table><thead><tr><th>Input</th><th>Status</th><th>Output</th><th>Reason</th></tr></\
2743 thead><tbody>"
2744 )
2745 .map_err(write_error)?;
2746 for item in &report.items {
2747 let output = item
2748 .output_path
2749 .as_ref()
2750 .map_or_else(String::new, |path| path.display().to_string());
2751 let reason = item
2752 .refusal
2753 .as_ref()
2754 .map_or_else(String::new, repair_refusal_text);
2755 writeln!(
2756 out,
2757 "<tr><td>{}</td><td>{}</td><td>{}</td><td>{}</td></tr>",
2758 HtmlEscapedText::new(&source_name(&item.source))?,
2759 repair_status_text(item.status),
2760 HtmlEscapedText::new(&output)?,
2761 HtmlEscapedText::new(&reason)?,
2762 )
2763 .map_err(write_error)?;
2764 }
2765 writeln!(out, "</tbody></table>").map_err(write_error)?;
2766 write_html_end(out)
2767}
2768
2769fn write_html_start<W: Write>(out: &mut W, title: &str) -> Result<()> {
2770 writeln!(out, "<!doctype html>").map_err(write_error)?;
2771 writeln!(
2772 out,
2773 r#"<html lang="en"><head><meta charset="utf-8"><title>{}</title><style>body{{font-family:system-ui,sans-serif;margin:2rem;color:#1f2937}}table{{border-collapse:collapse;width:100%}}th,td{{border:1px solid #d1d5db;padding:.4rem;text-align:left}}th{{background:#f3f4f6}}</style></head><body>"#,
2774 HtmlEscapedText::new(title)?,
2775 )
2776 .map_err(write_error)?;
2777 Ok(())
2778}
2779
2780fn write_html_end<W: Write>(out: &mut W) -> Result<()> {
2781 writeln!(out, "</body></html>").map_err(write_error)?;
2782 Ok(())
2783}
2784
2785fn write_xml_job<W: Write>(report: &ValidationReport, out: &mut W) -> Result<()> {
2786 writeln!(out, " <job>").map_err(write_error)?;
2787 write_xml_item(&report.source, out)?;
2788 for profile in &report.profile_reports {
2789 write_xml_validation_report(report.status, profile, out)?;
2790 }
2791 if report.profile_reports.is_empty() {
2792 writeln!(
2793 out,
2794 r#" <validationReport profileName="" statement="{}" isCompliant="false">"#,
2795 XmlEscapedAttr::new(status_statement(report.status))?,
2796 )
2797 .map_err(write_error)?;
2798 writeln!(
2799 out,
2800 r#" <details passedRules="0" failedRules="0" passedChecks="0" failedChecks="0" unsupportedRules="0"></details>"#,
2801 )
2802 .map_err(write_error)?;
2803 writeln!(out, " </validationReport>").map_err(write_error)?;
2804 }
2805 write_xml_parse_facts(&report.parse_facts, out)?;
2806 if let Some(feature_report) = &report.feature_report {
2807 write_xml_feature_report(feature_report, out)?;
2808 }
2809 if let Some(policy_report) = &report.policy_report {
2810 write_xml_policy_report(policy_report, out)?;
2811 }
2812 write_xml_warnings(&report.warnings, out, 6)?;
2813 writeln!(out, " </job>").map_err(write_error)?;
2814 Ok(())
2815}
2816
2817fn write_xml_item<W: Write>(source: &InputSummary, out: &mut W) -> Result<()> {
2818 let size = source
2819 .bytes
2820 .map_or_else(String::new, |bytes| format!(r#" size="{bytes}""#));
2821 writeln!(out, " <item{size}>").map_err(write_error)?;
2822 let name = source_name(source);
2823 writeln!(out, " <name>{}</name>", XmlEscapedText::new(&name)?).map_err(write_error)?;
2824 writeln!(out, " </item>").map_err(write_error)?;
2825 Ok(())
2826}
2827
2828fn write_xml_validation_report<W: Write>(
2829 status: ValidationStatus,
2830 profile: &ProfileReport,
2831 out: &mut W,
2832) -> Result<()> {
2833 writeln!(
2834 out,
2835 r#" <validationReport profileName="{}" statement="{}" isCompliant="{}">"#,
2836 XmlEscapedAttr::new(profile.profile.name.as_str())?,
2837 XmlEscapedAttr::new(status_statement(status))?,
2838 profile.is_compliant,
2839 )
2840 .map_err(write_error)?;
2841 let failed_checks = u64::try_from(profile.failed_assertions.len()).unwrap_or(u64::MAX);
2842 let unsupported_rules = u64::try_from(profile.unsupported_rules.len()).unwrap_or(u64::MAX);
2843 let passed_checks = profile.checks_executed.saturating_sub(failed_checks);
2844 let passed_rules = profile.rules_executed.saturating_sub(profile.failed_rules);
2845 writeln!(
2846 out,
2847 r#" <details passedRules="{passed_rules}" failedRules="{}" passedChecks="{passed_checks}" failedChecks="{failed_checks}" unsupportedRules="{unsupported_rules}"></details>"#,
2848 profile.failed_rules,
2849 )
2850 .map_err(write_error)?;
2851 write_xml_assertions("failedChecks", &profile.failed_assertions, out)?;
2852 write_xml_assertions("passedChecks", &profile.passed_assertions, out)?;
2853 write_xml_unsupported_rules(&profile.unsupported_rules, out)?;
2854 writeln!(out, " </validationReport>").map_err(write_error)?;
2855 Ok(())
2856}
2857
2858fn write_xml_assertions<W: Write>(
2859 element: &str,
2860 assertions: &[Assertion],
2861 out: &mut W,
2862) -> Result<()> {
2863 if assertions.is_empty() {
2864 return Ok(());
2865 }
2866 writeln!(out, " <{element}>").map_err(write_error)?;
2867 for assertion in assertions {
2868 writeln!(
2869 out,
2870 r#" <check ruleId="{}" status="{}" location="{}">"#,
2871 XmlEscapedAttr::new(assertion.rule_id.0.as_str())?,
2872 assertion_status_text(assertion.status),
2873 XmlEscapedAttr::new(&location_text(&assertion.location))?,
2874 )
2875 .map_err(write_error)?;
2876 writeln!(
2877 out,
2878 " <description>{}</description>",
2879 XmlEscapedText::new(assertion.description.as_str())?,
2880 )
2881 .map_err(write_error)?;
2882 if let Some(message) = &assertion.message {
2883 writeln!(
2884 out,
2885 " <message>{}</message>",
2886 XmlEscapedText::new(message.as_str())?,
2887 )
2888 .map_err(write_error)?;
2889 }
2890 if !assertion.error_arguments.is_empty() {
2891 writeln!(out, " <errorArguments>").map_err(write_error)?;
2892 for argument in &assertion.error_arguments {
2893 writeln!(
2894 out,
2895 r#" <argument name="{}">{}</argument>"#,
2896 XmlEscapedAttr::new(argument.name.as_str())?,
2897 XmlEscapedText::new(argument.value.as_str())?,
2898 )
2899 .map_err(write_error)?;
2900 }
2901 writeln!(out, " </errorArguments>").map_err(write_error)?;
2902 }
2903 writeln!(out, " </check>").map_err(write_error)?;
2904 }
2905 writeln!(out, " </{element}>").map_err(write_error)?;
2906 Ok(())
2907}
2908
2909fn write_xml_unsupported_rules<W: Write>(rules: &[UnsupportedRule], out: &mut W) -> Result<()> {
2910 if rules.is_empty() {
2911 return Ok(());
2912 }
2913 writeln!(out, " <unsupportedRules>").map_err(write_error)?;
2914 for rule in rules {
2915 writeln!(
2916 out,
2917 r#" <rule profileId="{}" ruleId="{}">"#,
2918 XmlEscapedAttr::new(rule.profile_id.as_str())?,
2919 XmlEscapedAttr::new(rule.rule_id.0.as_str())?,
2920 )
2921 .map_err(write_error)?;
2922 if let Some(fragment) = &rule.expression_fragment {
2923 writeln!(
2924 out,
2925 " <expression>{}</expression>",
2926 XmlEscapedText::new(fragment.as_str())?,
2927 )
2928 .map_err(write_error)?;
2929 }
2930 writeln!(
2931 out,
2932 " <reason>{}</reason>",
2933 XmlEscapedText::new(rule.reason.as_str())?,
2934 )
2935 .map_err(write_error)?;
2936 if !rule.references.is_empty() {
2937 writeln!(out, " <references>").map_err(write_error)?;
2938 for reference in &rule.references {
2939 writeln!(
2940 out,
2941 r#" <reference specification="{}" clause="{}"></reference>"#,
2942 XmlEscapedAttr::new(reference.specification.as_str())?,
2943 XmlEscapedAttr::new(reference.clause.as_str())?,
2944 )
2945 .map_err(write_error)?;
2946 }
2947 writeln!(out, " </references>").map_err(write_error)?;
2948 }
2949 writeln!(out, " </rule>").map_err(write_error)?;
2950 }
2951 writeln!(out, " </unsupportedRules>").map_err(write_error)?;
2952 Ok(())
2953}
2954
2955fn write_xml_feature_report<W: Write>(report: &FeatureReport, out: &mut W) -> Result<()> {
2956 writeln!(
2957 out,
2958 r#" <featureReport visitedObjects="{}" extractedObjects="{}" truncated="{}">"#,
2959 report.visited_objects,
2960 report.objects.len(),
2961 report.truncated,
2962 )
2963 .map_err(write_error)?;
2964 for object in &report.objects {
2965 writeln!(
2966 out,
2967 r#" <featureObject family="{}" location="{}">"#,
2968 XmlEscapedAttr::new(object.family.as_str())?,
2969 XmlEscapedAttr::new(&location_text(&object.location))?,
2970 )
2971 .map_err(write_error)?;
2972 for (name, value) in &object.properties {
2973 writeln!(
2974 out,
2975 r#" <property name="{}">"#,
2976 XmlEscapedAttr::new(name.as_str())?,
2977 )
2978 .map_err(write_error)?;
2979 write_xml_feature_value(value, out, 12)?;
2980 writeln!(out, " </property>").map_err(write_error)?;
2981 }
2982 writeln!(out, " </featureObject>").map_err(write_error)?;
2983 }
2984 writeln!(out, " </featureReport>").map_err(write_error)?;
2985 Ok(())
2986}
2987
2988fn write_xml_policy_report<W: Write>(report: &PolicyReport, out: &mut W) -> Result<()> {
2989 writeln!(
2990 out,
2991 r#" <policyReport name="{}" isCompliant="{}">"#,
2992 XmlEscapedAttr::new(report.name.as_ref().map_or("", BoundedText::as_str))?,
2993 report.is_compliant,
2994 )
2995 .map_err(write_error)?;
2996 for result in &report.results {
2997 writeln!(
2998 out,
2999 r#" <rule id="{}" passed="{}" matches="{}">"#,
3000 XmlEscapedAttr::new(result.id.as_str())?,
3001 result.passed,
3002 result.matches,
3003 )
3004 .map_err(write_error)?;
3005 writeln!(
3006 out,
3007 " <description>{}</description>",
3008 XmlEscapedText::new(result.description.as_str())?,
3009 )
3010 .map_err(write_error)?;
3011 writeln!(
3012 out,
3013 " <message>{}</message>",
3014 XmlEscapedText::new(result.message.as_str())?,
3015 )
3016 .map_err(write_error)?;
3017 writeln!(out, " </rule>").map_err(write_error)?;
3018 }
3019 writeln!(out, " </policyReport>").map_err(write_error)?;
3020 Ok(())
3021}
3022
3023fn reference_suffix(references: &[SpecReference]) -> String {
3024 let Some(reference) = references.first() else {
3025 return String::new();
3026 };
3027 format!(
3028 " [{} {}]",
3029 reference.specification.as_str(),
3030 reference.clause.as_str()
3031 )
3032}
3033
3034fn write_xml_feature_value<W: Write>(
3035 value: &FeatureValue,
3036 out: &mut W,
3037 indent: usize,
3038) -> Result<()> {
3039 let spaces = " ".repeat(indent);
3040 match value {
3041 FeatureValue::Null => writeln!(out, r#"{spaces}<value type="null"></value>"#),
3042 FeatureValue::Bool(value) => {
3043 writeln!(out, r#"{spaces}<value type="bool">{value}</value>"#)
3044 }
3045 FeatureValue::Number(value) => {
3046 writeln!(out, r#"{spaces}<value type="number">{value}</value>"#)
3047 }
3048 FeatureValue::String(value) => writeln!(
3049 out,
3050 r#"{spaces}<value type="string">{}</value>"#,
3051 XmlEscapedText::new(value.as_str())?,
3052 ),
3053 FeatureValue::RedactedString { bytes } => writeln!(
3054 out,
3055 r#"{spaces}<value type="redactedString" bytes="{bytes}"></value>"#
3056 ),
3057 FeatureValue::ObjectKey(value) => writeln!(
3058 out,
3059 r#"{spaces}<value type="objectKey" number="{}" generation="{}"></value>"#,
3060 value.number, value.generation,
3061 ),
3062 FeatureValue::List(values) => {
3063 writeln!(out, r#"{spaces}<value type="list">"#).map_err(write_error)?;
3064 for item in values {
3065 write_xml_feature_value(item, out, indent.saturating_add(2))?;
3066 }
3067 writeln!(out, "{spaces}</value>")
3068 }
3069 }
3070 .map_err(write_error)?;
3071 Ok(())
3072}
3073
3074fn write_xml_parse_facts<W: Write>(facts: &[ParseFact], out: &mut W) -> Result<()> {
3075 if facts.is_empty() {
3076 return Ok(());
3077 }
3078 writeln!(out, " <parseFacts>").map_err(write_error)?;
3079 for fact in facts {
3080 match fact {
3081 ParseFact::Header {
3082 offset,
3083 version,
3084 had_leading_bytes,
3085 } => writeln!(
3086 out,
3087 r#" <header offset="{offset}" version="{}.{}" hadLeadingBytes="{had_leading_bytes}"></header>"#,
3088 version.major,
3089 version.minor,
3090 )
3091 .map_err(write_error)?,
3092 ParseFact::PostEofData { bytes } => {
3093 writeln!(out, r#" <postEofData bytes="{bytes}"></postEofData>"#)
3094 .map_err(write_error)?;
3095 }
3096 ParseFact::Xref { section, fact } => writeln!(
3097 out,
3098 r#" <xref location="{}" fact="{}"></xref>"#,
3099 XmlEscapedAttr::new(&location_text(section))?,
3100 XmlEscapedAttr::new(&xref_fact_text(fact))?,
3101 )
3102 .map_err(write_error)?,
3103 ParseFact::Stream { object, fact } => writeln!(
3104 out,
3105 r#" <stream object="{} {}" fact="{}"></stream>"#,
3106 object.number,
3107 object.generation,
3108 XmlEscapedAttr::new(&stream_fact_text(fact))?,
3109 )
3110 .map_err(write_error)?,
3111 ParseFact::Encryption {
3112 encrypted,
3113 handler,
3114 version,
3115 revision,
3116 algorithm,
3117 decrypted,
3118 } => writeln!(
3119 out,
3120 r#" <encryption encrypted="{encrypted}" handler="{}" version="{}" revision="{}" algorithm="{}" decrypted="{decrypted}"></encryption>"#,
3121 XmlEscapedAttr::new(handler.as_ref().map_or("", Identifier::as_str))?,
3122 version.map_or_else(String::new, |value| value.to_string()),
3123 revision.map_or_else(String::new, |value| value.to_string()),
3124 XmlEscapedAttr::new(algorithm.as_ref().map_or("", Identifier::as_str))?,
3125 )
3126 .map_err(write_error)?,
3127 ParseFact::Xmp { object, fact } => writeln!(
3128 out,
3129 r#" <xmp object="{} {}" fact="{}"></xmp>"#,
3130 object.number,
3131 object.generation,
3132 XmlEscapedAttr::new(&xmp_fact_text(fact))?,
3133 )
3134 .map_err(write_error)?,
3135 }
3136 }
3137 writeln!(out, " </parseFacts>").map_err(write_error)?;
3138 Ok(())
3139}
3140
3141fn write_xml_warnings<W: Write>(
3142 warnings: &[ValidationWarning],
3143 out: &mut W,
3144 indent: usize,
3145) -> Result<()> {
3146 if warnings.is_empty() {
3147 return Ok(());
3148 }
3149 let spaces = " ".repeat(indent);
3150 writeln!(out, "{spaces}<warnings>").map_err(write_error)?;
3151 for warning in warnings {
3152 writeln!(
3153 out,
3154 "{spaces} <warning>{}</warning>",
3155 XmlEscapedText::new(&warning_text(warning))?,
3156 )
3157 .map_err(write_error)?;
3158 }
3159 writeln!(out, "{spaces}</warnings>").map_err(write_error)?;
3160 Ok(())
3161}
3162
3163fn write_xml_batch_summary<W: Write>(summary: &BatchSummary, out: &mut W) -> Result<()> {
3164 writeln!(
3165 out,
3166 r#" <batchSummary totalJobs="{}" failedToParse="{}" encrypted="{}" incomplete="{}" internalErrors="{}">"#,
3167 summary.total_files,
3168 summary.parse_failures,
3169 summary.encrypted,
3170 summary.incomplete,
3171 summary.internal_errors,
3172 )
3173 .map_err(write_error)?;
3174 writeln!(
3175 out,
3176 r#" <validationReports compliant="{}" nonCompliant="{}" failedJobs="{}">{}</validationReports>"#,
3177 summary.valid,
3178 summary.invalid,
3179 summary
3180 .parse_failures
3181 .saturating_add(summary.encrypted)
3182 .saturating_add(summary.incomplete)
3183 .saturating_add(summary.internal_errors),
3184 summary.valid.saturating_add(summary.invalid),
3185 )
3186 .map_err(write_error)?;
3187 writeln!(
3188 out,
3189 r#" <duration elapsedMillis="{}"></duration>"#,
3190 summary.elapsed_millis,
3191 )
3192 .map_err(write_error)?;
3193 writeln!(out, " </batchSummary>").map_err(write_error)?;
3194 Ok(())
3195}
3196
3197#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
3198struct CheckCounts {
3199 passed: u64,
3200 failed: u64,
3201 unsupported: u64,
3202}
3203
3204fn check_counts(report: &ValidationReport) -> CheckCounts {
3205 report
3206 .profile_reports
3207 .iter()
3208 .fold(CheckCounts::default(), |mut counts, profile| {
3209 let failed = profile.failed_rules;
3210 let unsupported = u64::try_from(profile.unsupported_rules.len()).unwrap_or(u64::MAX);
3211 counts.failed = counts.failed.saturating_add(failed);
3212 counts.unsupported = counts.unsupported.saturating_add(unsupported);
3213 counts.passed = counts
3214 .passed
3215 .saturating_add(profile.checks_executed.saturating_sub(failed));
3216 counts
3217 })
3218}
3219
3220fn profile_list(report: &ValidationReport) -> String {
3221 let profiles = report
3222 .profile_reports
3223 .iter()
3224 .map(|profile| profile.profile.id.as_str())
3225 .collect::<Vec<_>>();
3226 if profiles.is_empty() {
3227 String::from("-")
3228 } else {
3229 profiles.join(", ")
3230 }
3231}
3232
3233fn source_name(source: &InputSummary) -> String {
3234 source.path.as_ref().map_or_else(
3235 || String::from("<memory>"),
3236 |path| path.display().to_string(),
3237 )
3238}
3239
3240fn status_text(status: ValidationStatus) -> &'static str {
3241 match status {
3242 ValidationStatus::Valid => "valid",
3243 ValidationStatus::Invalid => "invalid",
3244 ValidationStatus::Encrypted => "encrypted",
3245 ValidationStatus::Incomplete => "incomplete",
3246 ValidationStatus::ParseFailed => "parse failed",
3247 }
3248}
3249
3250fn repair_status_text(status: RepairStatus) -> &'static str {
3251 match status {
3252 RepairStatus::Succeeded => "succeeded",
3253 RepairStatus::NoAction => "no action",
3254 RepairStatus::Refused => "refused",
3255 RepairStatus::Failed => "failed",
3256 }
3257}
3258
3259fn repair_action_kind(action: &RepairAction) -> &'static str {
3260 match action {
3261 RepairAction::CopiedUnchanged => "copiedUnchanged",
3262 RepairAction::MetadataRewritten { .. } => "metadataRewritten",
3263 }
3264}
3265
3266fn repair_action_text(action: &RepairAction) -> String {
3267 match action {
3268 RepairAction::CopiedUnchanged => String::from("copied unchanged"),
3269 RepairAction::MetadataRewritten { description } => description.as_str().to_owned(),
3270 }
3271}
3272
3273fn repair_refusal_kind(refusal: &RepairRefusal) -> &'static str {
3274 match refusal {
3275 RepairRefusal::ParseFailed { .. } => "parseFailed",
3276 RepairRefusal::Encrypted => "encrypted",
3277 RepairRefusal::AmbiguousFlavour { .. } => "ambiguousFlavour",
3278 RepairRefusal::UnsupportedValidationStatus { .. } => "unsupportedValidationStatus",
3279 RepairRefusal::OutputWouldModifyInput => "outputWouldModifyInput",
3280 RepairRefusal::InvalidOutputPath { .. } => "invalidOutputPath",
3281 }
3282}
3283
3284fn repair_refusal_text(refusal: &RepairRefusal) -> String {
3285 match refusal {
3286 RepairRefusal::ParseFailed { reason } => {
3287 format!("input could not be parsed: {}", reason.as_str())
3288 }
3289 RepairRefusal::Encrypted => String::from("encrypted inputs are not repaired"),
3290 RepairRefusal::AmbiguousFlavour { selected } => {
3291 format!("repair requires exactly one selected flavour, got {selected}")
3292 }
3293 RepairRefusal::UnsupportedValidationStatus { status } => {
3294 format!(
3295 "metadata repair is unsupported for {} inputs",
3296 status_text(*status)
3297 )
3298 }
3299 RepairRefusal::OutputWouldModifyInput => {
3300 String::from("output path would modify input in place")
3301 }
3302 RepairRefusal::InvalidOutputPath { reason } => reason.as_str().to_owned(),
3303 }
3304}
3305
3306fn exit_category_text(category: ExitCategory) -> &'static str {
3307 match category {
3308 ExitCategory::Success => "success",
3309 ExitCategory::ValidationFailed => "validation failed",
3310 ExitCategory::ProcessingFailed => "processing failed",
3311 ExitCategory::InternalError => "internal error",
3312 }
3313}
3314
3315fn location_text(location: &ObjectLocation) -> String {
3316 if let Some(path) = &location.path {
3317 return path.to_string();
3318 }
3319 if let Some(object) = location.object {
3320 return format!("object {} {}", object.number, object.generation);
3321 }
3322 if let Some(offset) = location.offset {
3323 return format!("offset {offset}");
3324 }
3325 String::from("unknown")
3326}
3327
3328fn assertion_message(assertion: &Assertion) -> &str {
3329 assertion
3330 .message
3331 .as_ref()
3332 .unwrap_or(&assertion.description)
3333 .as_str()
3334}
3335
3336fn assertion_status_text(status: AssertionStatus) -> &'static str {
3337 match status {
3338 AssertionStatus::Passed => "passed",
3339 AssertionStatus::Failed => "failed",
3340 }
3341}
3342
3343fn status_statement(status: ValidationStatus) -> &'static str {
3344 match status {
3345 ValidationStatus::Valid => "PDF file is compliant with Validation Profile requirements.",
3346 ValidationStatus::Invalid => {
3347 "PDF file is not compliant with Validation Profile requirements."
3348 }
3349 ValidationStatus::Encrypted => "PDF file is encrypted and could not be validated.",
3350 ValidationStatus::Incomplete => "Validation did not complete for all required rules.",
3351 ValidationStatus::ParseFailed => "PDF file could not be parsed.",
3352 }
3353}
3354
3355fn xref_fact_text(fact: &XrefFact) -> String {
3356 match fact {
3357 XrefFact::EolMarkersComply => String::from("eolMarkersComply"),
3358 XrefFact::MalformedClassic => String::from("malformedClassic"),
3359 XrefFact::XrefStreamUnsupported => String::from("xrefStreamUnsupported"),
3360 XrefFact::XrefStreamParsed {
3361 entries,
3362 compressed_entries,
3363 } => format!("xrefStreamParsed entries={entries} compressedEntries={compressed_entries}"),
3364 XrefFact::PrevChain { offset } => format!("prevChain offset={offset}"),
3365 XrefFact::HybridReference { offset } => format!("hybridReference offset={offset}"),
3366 XrefFact::ObjectStreamParsed => String::from("objectStreamParsed"),
3367 }
3368}
3369
3370fn stream_fact_text(fact: &StreamFact) -> String {
3371 match fact {
3372 StreamFact::Length {
3373 declared,
3374 discovered,
3375 } => format!("length declared={declared} discovered={discovered}"),
3376 StreamFact::KeywordSpacing {
3377 stream_keyword_crlf_compliant,
3378 endstream_keyword_eol_compliant,
3379 } => format!(
3380 "keywordSpacing streamKeywordCRLFCompliant={stream_keyword_crlf_compliant} \
3381 endstreamKeywordEolCompliant={endstream_keyword_eol_compliant}"
3382 ),
3383 StreamFact::Decoded { bytes } => format!("decoded bytes={bytes}"),
3384 StreamFact::FilterDecoded {
3385 filter,
3386 input_bytes,
3387 output_bytes,
3388 } => format!(
3389 "filterDecoded filter={} inputBytes={input_bytes} outputBytes={output_bytes}",
3390 filter.as_str()
3391 ),
3392 StreamFact::FilterMetadataMode { filter, bytes } => {
3393 format!(
3394 "filterMetadataMode filter={} bytes={bytes}",
3395 filter.as_str()
3396 )
3397 }
3398 }
3399}
3400
3401fn xmp_fact_text(fact: &XmpFact) -> String {
3402 match fact {
3403 XmpFact::PacketParsed {
3404 bytes,
3405 namespaces,
3406 claims,
3407 } => format!("packetParsed bytes={bytes} namespaces={namespaces} claims={claims}"),
3408 XmpFact::MissingPacketWrapper => String::from("missingPacketWrapper"),
3409 XmpFact::FlavourClaim {
3410 family,
3411 display_flavour,
3412 namespace_uri,
3413 } => format!(
3414 "flavourClaim family={} displayFlavour={} namespaceUri={}",
3415 family.as_str(),
3416 display_flavour.as_str(),
3417 namespace_uri.as_str()
3418 ),
3419 XmpFact::Malformed { reason } => format!("malformed reason={}", reason.as_str()),
3420 XmpFact::HostileXmlRejected { reason } => {
3421 format!("hostileXmlRejected reason={}", reason.as_str())
3422 }
3423 }
3424}
3425
3426fn warning_text(warning: &ValidationWarning) -> String {
3427 warning.message_text().to_string()
3428}
3429
3430fn duration_millis(duration: Duration) -> u64 {
3431 u64::try_from(duration.as_millis()).unwrap_or(u64::MAX)
3432}
3433
3434fn write_error(source: std::io::Error) -> PdfvError {
3435 ReportError::Write { source }.into()
3436}
3437
3438fn format_optional_path(path: Option<&PathBuf>) -> String {
3439 path.map(|path| format!(" at {}", path.display()))
3440 .unwrap_or_default()
3441}
3442
3443#[derive(Clone, Copy, Debug)]
3444struct XmlEscapedText<'a>(&'a str);
3445
3446impl<'a> XmlEscapedText<'a> {
3447 fn new(value: &'a str) -> Result<Self> {
3448 ensure_xml_text(value)?;
3449 Ok(Self(value))
3450 }
3451}
3452
3453impl fmt::Display for XmlEscapedText<'_> {
3454 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
3455 for character in self.0.chars() {
3456 match character {
3457 '&' => formatter.write_str("&")?,
3458 '<' => formatter.write_str("<")?,
3459 '>' => formatter.write_str(">")?,
3460 '"' => formatter.write_str(""")?,
3461 '\'' => formatter.write_str("'")?,
3462 _ => formatter.write_str(character.encode_utf8(&mut [0; 4]))?,
3463 }
3464 }
3465 Ok(())
3466 }
3467}
3468
3469#[derive(Clone, Copy, Debug)]
3470struct XmlEscapedAttr<'a>(&'a str);
3471
3472impl<'a> XmlEscapedAttr<'a> {
3473 fn new(value: &'a str) -> Result<Self> {
3474 ensure_xml_text(value)?;
3475 Ok(Self(value))
3476 }
3477}
3478
3479impl fmt::Display for XmlEscapedAttr<'_> {
3480 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
3481 XmlEscapedText(self.0).fmt(formatter)
3482 }
3483}
3484
3485#[derive(Clone, Copy, Debug)]
3486struct HtmlEscapedText<'a>(&'a str);
3487
3488impl<'a> HtmlEscapedText<'a> {
3489 fn new(value: &'a str) -> Result<Self> {
3490 ensure_xml_text(value)?;
3491 Ok(Self(value))
3492 }
3493}
3494
3495impl fmt::Display for HtmlEscapedText<'_> {
3496 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
3497 XmlEscapedText(self.0).fmt(formatter)
3498 }
3499}
3500
3501fn ensure_xml_text(value: &str) -> Result<()> {
3502 if value.chars().all(is_xml_char) {
3503 return Ok(());
3504 }
3505 Err(ReportError::Xml {
3506 message: BoundedText::unchecked("text contains characters forbidden by XML 1.0"),
3507 }
3508 .into())
3509}
3510
3511fn is_xml_char(character: char) -> bool {
3512 matches!(character, '\u{09}' | '\u{0A}' | '\u{0D}')
3513 || ('\u{20}'..='\u{D7FF}').contains(&character)
3514 || ('\u{E000}'..='\u{FFFD}').contains(&character)
3515 || ('\u{10000}'..='\u{10FFFF}').contains(&character)
3516}
3517
3518#[cfg(test)]
3519mod tests {
3520 use std::{
3521 collections::BTreeMap,
3522 error::Error as StdError,
3523 num::{NonZeroU32, NonZeroU64},
3524 path::PathBuf,
3525 time::Duration,
3526 };
3527
3528 use super::{
3529 Assertion, AssertionStatus, BatchReport, BoundedText, ErrorArgument, ExitCategory,
3530 FeatureObject, FeatureReport, FeatureValue, HtmlReportWriter, Identifier, InputKind,
3531 InputSummary, JsonReportWriter, MaxDisplayedFailures, MetadataRepairOptions,
3532 MetadataRepairer, ObjectLocation, ObjectTypeName, PdfVersion, PolicyReport,
3533 PolicyRuleResult, ProfileIdentity, ProfileReport, PropertyName, RawXmlReportWriter,
3534 RepairAction, RepairBatchReport, RepairRefusal, RepairReport, RepairStatus, ReportFormat,
3535 ReportWriter, RuleId, TextReportWriter, ValidationOptions, ValidationReport,
3536 ValidationStatus, XmlReportWriter,
3537 };
3538
3539 fn sample_report() -> std::result::Result<ValidationReport, Box<dyn StdError>> {
3540 let profile_id = Identifier::new("pdfa-1b")?;
3541 let rule_id = RuleId(Identifier::new("6.1.2-1")?);
3542 Ok(ValidationReport::builder()
3543 .engine_version("0.1.0".to_owned())
3544 .source(InputSummary::new(InputKind::Memory, None, Some(42)))
3545 .status(ValidationStatus::Invalid)
3546 .flavours(vec![super::ValidationFlavour::new(
3547 "pdfa",
3548 NonZeroU32::MIN,
3549 "b",
3550 )?])
3551 .profile_reports(vec![
3552 ProfileReport::builder()
3553 .profile(ProfileIdentity {
3554 id: profile_id.clone(),
3555 name: BoundedText::new("PDF/A-1B", 64)?,
3556 version: None,
3557 })
3558 .is_compliant(false)
3559 .checks_executed(1)
3560 .rules_executed(1)
3561 .failed_rules(1)
3562 .failed_assertions(vec![Assertion {
3563 ordinal: NonZeroU64::MIN,
3564 rule_id,
3565 status: AssertionStatus::Failed,
3566 description: BoundedText::new("Header must start at byte zero", 128)?,
3567 location: ObjectLocation {
3568 object: None,
3569 offset: Some(0),
3570 path: None,
3571 },
3572 object_context: None,
3573 message: Some(BoundedText::new("Header offset is non-zero", 128)?),
3574 error_arguments: vec![ErrorArgument {
3575 name: Identifier::new("offset")?,
3576 value: BoundedText::new("12", 16)?,
3577 }],
3578 }])
3579 .passed_assertions(Vec::new())
3580 .unsupported_rules(Vec::new())
3581 .build(),
3582 ])
3583 .parse_facts(vec![super::ParseFact::Header {
3584 offset: 12,
3585 version: PdfVersion { major: 1, minor: 7 },
3586 had_leading_bytes: true,
3587 }])
3588 .warnings(Vec::new())
3589 .task_durations(Vec::new())
3590 .build())
3591 }
3592
3593 fn sample_feature_policy_report() -> std::result::Result<ValidationReport, Box<dyn StdError>> {
3594 let mut report = sample_report()?;
3595 let mut properties = BTreeMap::new();
3596 properties.insert(PropertyName::new("hasMetadata")?, FeatureValue::Bool(false));
3597 report.feature_report = Some(
3598 FeatureReport::builder()
3599 .objects(vec![
3600 FeatureObject::builder()
3601 .family(ObjectTypeName::new("catalog".to_owned())?)
3602 .location(ObjectLocation {
3603 object: None,
3604 offset: None,
3605 path: Some(BoundedText::new("root/catalog[0]", 64)?),
3606 })
3607 .context(BoundedText::new("root/catalog[0]", 64)?)
3608 .properties(properties)
3609 .build(),
3610 ])
3611 .visited_objects(1)
3612 .selected_families(vec![ObjectTypeName::new("catalog".to_owned())?])
3613 .truncated(false)
3614 .build(),
3615 );
3616 report.policy_report = Some(
3617 PolicyReport::builder()
3618 .name(Some(BoundedText::new("catalog-policy", 64)?))
3619 .is_compliant(true)
3620 .results(vec![
3621 PolicyRuleResult::builder()
3622 .id(Identifier::new("catalog-has-no-metadata")?)
3623 .description(BoundedText::new("Catalog metadata is absent", 128)?)
3624 .passed(true)
3625 .matches(1)
3626 .message(BoundedText::new(
3627 "policy rule catalog-has-no-metadata passed with 1 matching feature \
3628 objects",
3629 128,
3630 )?)
3631 .build(),
3632 ])
3633 .build(),
3634 );
3635 Ok(report)
3636 }
3637
3638 fn sample_repair_report() -> RepairReport {
3639 RepairReport::builder()
3640 .engine_version("0.1.0".to_owned())
3641 .source(InputSummary::new(
3642 InputKind::File,
3643 Some(PathBuf::from("input.pdf")),
3644 Some(42),
3645 ))
3646 .output_path(Some(PathBuf::from("out/repaired-input.pdf")))
3647 .status(RepairStatus::NoAction)
3648 .actions(vec![RepairAction::CopiedUnchanged])
3649 .refusal(None)
3650 .warnings(Vec::new())
3651 .task_durations(Vec::new())
3652 .build()
3653 }
3654
3655 #[test]
3656 fn test_should_apply_validation_options_defaults() {
3657 let options = ValidationOptions::default();
3658
3659 assert!(options.report_parse_warnings);
3660 assert!(!options.record_passed_assertions);
3661 assert_eq!(options.max_failed_assertions_per_rule.get(), 1);
3662 }
3663
3664 #[test]
3665 fn test_should_reject_zero_max_displayed_failures() {
3666 let result = MaxDisplayedFailures::try_from(0);
3667
3668 assert!(result.is_err());
3669 }
3670
3671 #[test]
3672 fn test_should_reject_invalid_identifier() {
3673 let result = Identifier::new("bad identifier");
3674
3675 assert!(result.is_err());
3676 }
3677
3678 #[test]
3679 fn test_should_serialize_validation_report_as_camel_case_json()
3680 -> std::result::Result<(), Box<dyn StdError>> {
3681 let report = sample_report()?;
3682 let json = serde_json::to_string_pretty(&report)?;
3683 let expected = r#"{
3684 "engineVersion": "0.1.0",
3685 "source": {
3686 "kind": "memory",
3687 "bytes": 42
3688 },
3689 "status": "invalid",
3690 "flavours": [
3691 {
3692 "family": "pdfa",
3693 "part": 1,
3694 "conformance": "b"
3695 }
3696 ],
3697 "profileReports": [
3698 {
3699 "profile": {
3700 "id": "pdfa-1b",
3701 "name": "PDF/A-1B"
3702 },
3703 "isCompliant": false,
3704 "checksExecuted": 1,
3705 "rulesExecuted": 1,
3706 "failedRules": 1,
3707 "failedAssertions": [
3708 {
3709 "ordinal": 1,
3710 "ruleId": "6.1.2-1",
3711 "status": "failed",
3712 "description": "Header must start at byte zero",
3713 "location": {
3714 "offset": 0
3715 },
3716 "message": "Header offset is non-zero",
3717 "errorArguments": [
3718 {
3719 "name": "offset",
3720 "value": "12"
3721 }
3722 ]
3723 }
3724 ],
3725 "passedAssertions": [],
3726 "unsupportedRules": []
3727 }
3728 ],
3729 "parseFacts": [
3730 {
3731 "kind": "header",
3732 "offset": 12,
3733 "version": {
3734 "major": 1,
3735 "minor": 7
3736 },
3737 "hadLeadingBytes": true
3738 }
3739 ],
3740 "warnings": [],
3741 "taskDurations": []
3742}"#;
3743
3744 assert_eq!(json, expected);
3745 Ok(())
3746 }
3747
3748 #[test]
3749 fn test_should_write_compact_json_report() -> std::result::Result<(), Box<dyn StdError>> {
3750 let report = sample_report()?;
3751 let mut output = Vec::new();
3752
3753 JsonReportWriter::compact()
3754 .write_report(&report, &mut output)
3755 .map_err(Box::<dyn StdError>::from)?;
3756
3757 let json = String::from_utf8(output)?;
3758 assert!(json.contains("\"engineVersion\":\"0.1.0\""));
3759 Ok(())
3760 }
3761
3762 #[test]
3763 fn test_should_write_text_report() -> std::result::Result<(), Box<dyn StdError>> {
3764 let report = sample_report()?;
3765 let mut output = Vec::new();
3766
3767 TextReportWriter
3768 .write_report(&report, &mut output)
3769 .map_err(Box::<dyn StdError>::from)?;
3770
3771 let text = String::from_utf8(output)?;
3772 let expected = "\
3773<memory>: invalid
3774profiles: pdfa-1b
3775checks: 0 passed, 1 failed, 0 unsupported
3776first failures:
3777 6.1.2-1 at offset 0: Header offset is non-zero
3778";
3779 assert_eq!(text, expected);
3780 Ok(())
3781 }
3782
3783 #[test]
3784 fn test_should_write_xml_report() -> std::result::Result<(), Box<dyn StdError>> {
3785 let report = sample_report()?;
3786 let mut output = Vec::new();
3787
3788 XmlReportWriter
3789 .write_report(&report, &mut output)
3790 .map_err(Box::<dyn StdError>::from)?;
3791
3792 let xml = String::from_utf8(output)?;
3793 assert!(xml.contains(r#"<?xml version="1.0" encoding="utf-8"?>"#));
3794 assert!(xml.contains("<report>"));
3795 assert!(xml.contains(r#"<validationReport profileName="PDF/A-1B""#));
3796 assert!(xml.contains(r#"<details passedRules="0" failedRules="1""#));
3797 assert!(xml.contains(r#"<check ruleId="6.1.2-1" status="failed" location="offset 0">"#));
3798 assert!(xml.contains(r#"<batchSummary totalJobs="1""#));
3799 Ok(())
3800 }
3801
3802 #[test]
3803 fn test_should_write_raw_xml_report_with_feature_and_policy_sections()
3804 -> std::result::Result<(), Box<dyn StdError>> {
3805 let report = sample_feature_policy_report()?;
3806 let mut output = Vec::new();
3807
3808 RawXmlReportWriter
3809 .write_report(&report, &mut output)
3810 .map_err(Box::<dyn StdError>::from)?;
3811
3812 let xml = String::from_utf8(output)?;
3813 let expected = format!(
3814 r#"<?xml version="1.0" encoding="utf-8"?>
3815<rawReport engine="pdfv-core" version="{version}">
3816 <processorConfig tasks="validation,features,policy"></processorConfig>
3817 <processorResults>
3818 <processorResult status="invalid">
3819 <item size="42">
3820 <name><memory></name>
3821 </item>
3822 <validationReport profileName="PDF/A-1B" statement="PDF file is not compliant with Validation Profile requirements." isCompliant="false">
3823 <details passedRules="0" failedRules="1" passedChecks="0" failedChecks="1" unsupportedRules="0"></details>
3824 <failedChecks>
3825 <check ruleId="6.1.2-1" status="failed" location="offset 0">
3826 <description>Header must start at byte zero</description>
3827 <message>Header offset is non-zero</message>
3828 <errorArguments>
3829 <argument name="offset">12</argument>
3830 </errorArguments>
3831 </check>
3832 </failedChecks>
3833 </validationReport>
3834 <featureReport visitedObjects="1" extractedObjects="1" truncated="false">
3835 <featureObject family="catalog" location="root/catalog[0]">
3836 <property name="hasMetadata">
3837 <value type="bool">false</value>
3838 </property>
3839 </featureObject>
3840 </featureReport>
3841 <policyReport name="catalog-policy" isCompliant="true">
3842 <rule id="catalog-has-no-metadata" passed="true" matches="1">
3843 <description>Catalog metadata is absent</description>
3844 <message>policy rule catalog-has-no-metadata passed with 1 matching feature objects</message>
3845 </rule>
3846 </policyReport>
3847 <parseFacts>
3848 <header offset="12" version="1.7" hadLeadingBytes="true"></header>
3849 </parseFacts>
3850 </processorResult>
3851 </processorResults>
3852 <batchSummary totalJobs="1" failedToParse="0" encrypted="0" incomplete="0" internalErrors="0">
3853 <validationReports compliant="0" nonCompliant="1" failedJobs="0">1</validationReports>
3854 <duration elapsedMillis="0"></duration>
3855 </batchSummary>
3856</rawReport>
3857"#,
3858 version = super::ENGINE_VERSION,
3859 );
3860 assert_eq!(xml, expected);
3861 Ok(())
3862 }
3863
3864 #[test]
3865 fn test_should_write_static_html_report() -> std::result::Result<(), Box<dyn StdError>> {
3866 let report = sample_report()?;
3867 let mut output = Vec::new();
3868
3869 HtmlReportWriter
3870 .write_report(&report, &mut output)
3871 .map_err(Box::<dyn StdError>::from)?;
3872
3873 let html = String::from_utf8(output)?;
3874 let expected = "\
3875<!doctype html>
3876<html lang=\"en\"><head><meta charset=\"utf-8\"><title>pdfv validation \
3877 report</title><style>body{font-family:system-ui,sans-serif;margin:2rem;\
3878 color:#1f2937}table{border-collapse:collapse;width:100%}th,td{border:1px \
3879 solid #d1d5db;padding:.4rem;text-align:left}th{background:#f3f4f6}</\
3880 style></head><body>
3881<h1>Validation Report</h1>
3882<p>0 valid, 1 invalid, 0 parse failed, 0 encrypted, 0 incomplete.</p>
3883<table><thead><tr><th>Input</th><th>Status</th><th>Profiles</th><th>Features</th><th>Policy</th></\
3884 tr></thead><tbody>
3885<tr><td><memory></td><td>invalid</td><td>pdfa-1b</td><td>-</td><td>-</td></tr>
3886</tbody></table>
3887</body></html>
3888";
3889 assert_eq!(html, expected);
3890 Ok(())
3891 }
3892
3893 #[test]
3894 fn test_should_serialize_repair_report_and_summary()
3895 -> std::result::Result<(), Box<dyn StdError>> {
3896 let report = sample_repair_report();
3897 let json = serde_json::to_string_pretty(&report)?;
3898 assert!(json.contains(r#""status": "noAction""#));
3899 assert!(json.contains(r#""kind": "copiedUnchanged""#));
3900
3901 let refused = RepairReport::builder()
3902 .engine_version("0.1.0".to_owned())
3903 .source(InputSummary::new(
3904 InputKind::File,
3905 Some(PathBuf::from("bad.pdf")),
3906 None,
3907 ))
3908 .output_path(None)
3909 .status(RepairStatus::Refused)
3910 .actions(Vec::new())
3911 .refusal(Some(RepairRefusal::Encrypted))
3912 .warnings(Vec::new())
3913 .task_durations(Vec::new())
3914 .build();
3915 let batch =
3916 RepairBatchReport::from_items(vec![report, refused], Vec::new(), Duration::ZERO);
3917
3918 assert_eq!(batch.summary.no_action, 1);
3919 assert_eq!(batch.summary.refused, 1);
3920 assert_eq!(
3921 batch.summary.worst_exit_category,
3922 ExitCategory::ProcessingFailed
3923 );
3924 Ok(())
3925 }
3926
3927 #[test]
3928 fn test_should_write_repair_raw_xml_and_html() -> std::result::Result<(), Box<dyn StdError>> {
3929 let report = sample_repair_report();
3930 let mut raw = Vec::new();
3931 let mut html = Vec::new();
3932
3933 ReportFormat::RawXml
3934 .write_repair_report(&report, &mut raw)
3935 .map_err(Box::<dyn StdError>::from)?;
3936 ReportFormat::Html
3937 .write_repair_report(&report, &mut html)
3938 .map_err(Box::<dyn StdError>::from)?;
3939
3940 let raw = String::from_utf8(raw)?;
3941 let html = String::from_utf8(html)?;
3942 let expected_raw = format!(
3943 r#"<?xml version="1.0" encoding="utf-8"?>
3944<rawRepairReport engine="pdfv-core" version="{version}">
3945 <processorConfig tasks="metadata"></processorConfig>
3946 <items>
3947 <repairItem status="no action">
3948 <item size="42">
3949 <name>input.pdf</name>
3950 </item>
3951 <output>out/repaired-input.pdf</output>
3952 <actions>
3953 <action kind="copiedUnchanged">copied unchanged</action>
3954 </actions>
3955 </repairItem>
3956 </items>
3957 <repairSummary totalJobs="1" succeeded="0" noAction="1" refused="0" failed="0" elapsedMillis="0"></repairSummary>
3958</rawRepairReport>
3959"#,
3960 version = super::ENGINE_VERSION,
3961 );
3962 let expected_html =
3963 "\
3964<!doctype html>
3965<html lang=\"en\"><head><meta charset=\"utf-8\"><title>pdfv metadata repair \
3966 report</title><style>body{font-family:system-ui,sans-serif;margin:2rem;color:#\
3967 1f2937}table{border-collapse:collapse;width:100%}th,td{border:1px solid \
3968 #d1d5db;padding:.4rem;text-align:left}th{background:#f3f4f6}</style></head><body>
3969<h1>Metadata Repair Report</h1>
3970<p>0 repaired, 1 unchanged, 0 refused, 0 failed.</p>
3971<table><thead><tr><th>Input</th><th>Status</th><th>Output</th><th>Reason</th></tr></thead><tbody>
3972<tr><td>input.pdf</td><td>no action</td><td>out/repaired-input.pdf</td><td></td></tr>
3973</tbody></table>
3974</body></html>
3975";
3976 assert_eq!(raw, expected_raw);
3977 assert_eq!(html, expected_html);
3978 Ok(())
3979 }
3980
3981 #[test]
3982 #[allow(
3983 clippy::disallowed_methods,
3984 reason = "unit test creates local repair files synchronously"
3985 )]
3986 fn test_should_refuse_repair_when_output_already_exists_without_removing_it()
3987 -> std::result::Result<(), Box<dyn StdError>> {
3988 let temp = tempfile::tempdir()?;
3989 let input = temp.path().join("input.pdf");
3990 let output_dir = temp.path().join("out");
3991 let output = output_dir.join("input.pdf");
3992 std::fs::create_dir(&output_dir)?;
3993 std::fs::write(&input, b"not a valid pdf")?;
3994 std::fs::write(&output, b"existing output")?;
3995 let repairer = MetadataRepairer::new(MetadataRepairOptions::new(
3996 ValidationOptions::default(),
3997 &output_dir,
3998 "",
3999 )?)?;
4000
4001 let report = repairer.repair_path(&input)?;
4002
4003 assert_eq!(report.status, RepairStatus::Refused);
4004 assert!(matches!(
4005 report.refusal,
4006 Some(RepairRefusal::InvalidOutputPath { .. })
4007 ));
4008 assert_eq!(std::fs::read(&output)?, b"existing output");
4009 Ok(())
4010 }
4011
4012 #[test]
4013 fn test_should_reject_xml_forbidden_text() -> std::result::Result<(), Box<dyn StdError>> {
4014 let mut report = sample_report()?;
4015 let Some(profile) = report.profile_reports.first_mut() else {
4016 return Err("sample report must contain profile".into());
4017 };
4018 profile.profile.name = BoundedText::unchecked("bad\u{1}profile");
4019 let mut output = Vec::new();
4020
4021 let result = XmlReportWriter.write_report(&report, &mut output);
4022
4023 assert!(matches!(
4024 result,
4025 Err(super::PdfvError::Report(super::ReportError::Xml { .. }))
4026 ));
4027 Ok(())
4028 }
4029
4030 #[test]
4031 fn test_should_dispatch_pretty_json_report_format() -> std::result::Result<(), Box<dyn StdError>>
4032 {
4033 let report = sample_report()?;
4034 let mut output = Vec::new();
4035
4036 ReportFormat::JsonPretty
4037 .write_report(&report, &mut output)
4038 .map_err(Box::<dyn StdError>::from)?;
4039
4040 let json = String::from_utf8(output)?;
4041 assert!(json.contains("\n \"engineVersion\": \"0.1.0\""));
4042 Ok(())
4043 }
4044
4045 #[test]
4046 fn test_should_dispatch_xml_report_format() -> std::result::Result<(), Box<dyn StdError>> {
4047 let report = sample_report()?;
4048 let mut output = Vec::new();
4049
4050 ReportFormat::Xml
4051 .write_report(&report, &mut output)
4052 .map_err(Box::<dyn StdError>::from)?;
4053
4054 let xml = String::from_utf8(output)?;
4055 assert!(xml.contains("<validationReport"));
4056 Ok(())
4057 }
4058
4059 #[test]
4060 fn test_should_compute_batch_summary() -> std::result::Result<(), Box<dyn StdError>> {
4061 let valid = ValidationReport::builder()
4062 .engine_version("0.1.0".to_owned())
4063 .source(InputSummary::new(InputKind::Memory, None, Some(42)))
4064 .status(ValidationStatus::Valid)
4065 .flavours(Vec::new())
4066 .profile_reports(Vec::new())
4067 .parse_facts(Vec::new())
4068 .warnings(Vec::new())
4069 .task_durations(Vec::new())
4070 .build();
4071 let invalid = sample_report()?;
4072
4073 let batch = BatchReport::from_items(vec![valid, invalid], Vec::new(), Duration::ZERO);
4074
4075 assert_eq!(batch.summary.total_files, 2);
4076 assert_eq!(batch.summary.valid, 1);
4077 assert_eq!(batch.summary.invalid, 1);
4078 assert_eq!(
4079 batch.summary.worst_exit_category,
4080 ExitCategory::ValidationFailed
4081 );
4082 Ok(())
4083 }
4084}